diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp index 57ed17a00d77ea..fe0ce00bcf6321 100644 --- a/be/src/agent/agent_server.cpp +++ b/be/src/agent/agent_server.cpp @@ -17,15 +17,14 @@ #include "agent/agent_server.h" -#include - #include +#include #include "agent/task_worker_pool.h" #include "agent/topic_subscriber.h" #include "agent/user_resource_listener.h" -#include "common/status.h" #include "common/logging.h" +#include "common/status.h" #include "gutil/strings/substitute.h" #include "olap/snapshot_manager.h" #include "runtime/etl_job_mgr.h" @@ -35,10 +34,8 @@ using std::vector; namespace doris { -AgentServer::AgentServer(ExecEnv* exec_env, const TMasterInfo& master_info) : - _exec_env(exec_env), - _master_info(master_info), - _topic_subscriber(new TopicSubscriber()) { +AgentServer::AgentServer(ExecEnv* exec_env, const TMasterInfo& master_info) + : _exec_env(exec_env), _master_info(master_info), _topic_subscriber(new TopicSubscriber()) { for (auto& path : exec_env->store_paths()) { try { string dpp_download_path_str = path.path + DPP_PREFIX; @@ -55,11 +52,9 @@ AgentServer::AgentServer(ExecEnv* exec_env, const TMasterInfo& master_info) : // to make code to be more readable. #ifndef BE_TEST -#define CREATE_AND_START_POOL(type, pool_name) \ - pool_name.reset(new TaskWorkerPool( \ - TaskWorkerPool::TaskWorkerType::type, \ - _exec_env, \ - master_info)); \ +#define CREATE_AND_START_POOL(type, pool_name) \ + pool_name.reset( \ + new TaskWorkerPool(TaskWorkerPool::TaskWorkerType::type, _exec_env, master_info)); \ pool_name->start(); #else #define CREATE_AND_START_POOL(type, pool_name) @@ -95,11 +90,12 @@ AgentServer::AgentServer(ExecEnv* exec_env, const TMasterInfo& master_info) : #endif } -AgentServer::~AgentServer() { } +AgentServer::~AgentServer() {} // TODO(lingbin): each task in the batch may have it own status or FE must check and // resend request when something is wrong(BE may need some logic to guarantee idempotence. -void AgentServer::submit_tasks(TAgentResult& agent_result, const std::vector& tasks) { +void AgentServer::submit_tasks(TAgentResult& agent_result, + const std::vector& tasks) { Status ret_st; // TODO check master_info here if it is the same with that of heartbeat rpc @@ -126,27 +122,24 @@ void AgentServer::submit_tasks(TAgentResult& agent_result, const std::vectorsubmit_task(task); } else if (task.push_req.push_type == TPushType::DELETE) { _delete_workers->submit_task(task); } else { ret_st = Status::InvalidArgument(strings::Substitute( - "task(signature=$0, type=$1, push_type=$2) has wrong push_type", - signature, task_type, task.push_req.push_type)); + "task(signature=$0, type=$1, push_type=$2) has wrong push_type", signature, + task_type, task.push_req.push_type)); } break; case TTaskType::ALTER: @@ -184,7 +177,7 @@ void AgentServer::submit_tasks(TAgentResult& agent_result, const std::vectoretl_job_mgr()->erase_job(request); if (!status.ok()) { LOG(WARNING) << "fail to delete etl files. because " << status.get_error_msg() - << " with request " << request; + << " with request " << request; } VLOG_RPC << "success to delete etl files. request=" << request; status.to_thrift(&t_agent_result.status); } -} // namespace doris +} // namespace doris diff --git a/be/src/agent/agent_server.h b/be/src/agent/agent_server.h index 3ed126de4587af..b4c0f155ba8e86 100644 --- a/be/src/agent/agent_server.h +++ b/be/src/agent/agent_server.h @@ -89,7 +89,6 @@ class AgentServer { std::unique_ptr _topic_subscriber; }; -} // end namespace doris - -#endif // DORIS_BE_SRC_AGENT_AGENT_SERVER_H +} // end namespace doris +#endif // DORIS_BE_SRC_AGENT_AGENT_SERVER_H diff --git a/be/src/agent/cgroups_mgr.cpp b/be/src/agent/cgroups_mgr.cpp index 128be59397de64..53308fb44f23cd 100644 --- a/be/src/agent/cgroups_mgr.cpp +++ b/be/src/agent/cgroups_mgr.cpp @@ -16,16 +16,19 @@ // under the License. #include "agent/cgroups_mgr.h" -#include -#include -#include -#include -#include + #include -#include +#include #include #include #include +#include + +#include +#include +#include +#include + #include "boost/filesystem.hpp" #include "common/logging.h" #include "olap/data_dir.h" @@ -42,42 +45,39 @@ using apache::thrift::transport::TTransportException; namespace doris { -static CgroupsMgr *s_global_cg_mgr; +static CgroupsMgr* s_global_cg_mgr; const std::string CgroupsMgr::_s_system_user = "system"; const std::string CgroupsMgr::_s_system_group = "normal"; -std::map CgroupsMgr::_s_resource_cgroups = - {{TResourceType::type::TRESOURCE_CPU_SHARE, "cpu.shares"}, - {TResourceType::type::TRESOURCE_IO_SHARE, "blkio.weight"}}; +std::map CgroupsMgr::_s_resource_cgroups = { + {TResourceType::type::TRESOURCE_CPU_SHARE, "cpu.shares"}, + {TResourceType::type::TRESOURCE_IO_SHARE, "blkio.weight"}}; CgroupsMgr::CgroupsMgr(ExecEnv* exec_env, const string& root_cgroups_path) - : _exec_env(exec_env), - _root_cgroups_path(root_cgroups_path), - _is_cgroups_init_success(false), - _cur_version(-1) { + : _exec_env(exec_env), + _root_cgroups_path(root_cgroups_path), + _is_cgroups_init_success(false), + _cur_version(-1) { if (s_global_cg_mgr == nullptr) { s_global_cg_mgr = this; } } -CgroupsMgr::~CgroupsMgr() { -} - -AgentStatus CgroupsMgr::update_local_cgroups(const TFetchResourceResult& new_fetched_resource) { +CgroupsMgr::~CgroupsMgr() {} +AgentStatus CgroupsMgr::update_local_cgroups(const TFetchResourceResult& new_fetched_resource) { std::lock_guard lck(_update_cgroups_mtx); if (!_is_cgroups_init_success) { return AgentStatus::DORIS_ERROR; } - if (_cur_version >= new_fetched_resource.resourceVersion) { return AgentStatus::DORIS_SUCCESS; } - const std::map& new_user_resource - = new_fetched_resource.resourceByUser; + const std::map& new_user_resource = + new_fetched_resource.resourceByUser; if (!_local_users.empty()) { std::set::const_iterator old_it = _local_users.begin(); @@ -97,12 +97,11 @@ AgentStatus CgroupsMgr::update_local_cgroups(const TFetchResourceResult& new_fe const std::map& level_share = new_it->second.shareByGroup; std::map user_share; const std::map& resource_share = - new_it->second.resource.resourceByType; + new_it->second.resource.resourceByType; std::map::const_iterator resource_it = resource_share.begin(); for (; resource_it != resource_share.end(); ++resource_it) { if (_s_resource_cgroups.count(resource_it->first) > 0) { - user_share[_s_resource_cgroups[resource_it->first]] = - resource_it->second; + user_share[_s_resource_cgroups[resource_it->first]] = resource_it->second; } } @@ -118,45 +117,42 @@ AgentStatus CgroupsMgr::update_local_cgroups(const TFetchResourceResult& new_fe return AgentStatus::DORIS_SUCCESS; } -void CgroupsMgr::_config_user_disk_throttle(std::string user_name, - const std::map& resource_share) { - int64_t hdd_read_iops = _get_resource_value(TResourceType::type::TRESOURCE_HDD_READ_IOPS, - resource_share); - int64_t hdd_write_iops = _get_resource_value(TResourceType::type::TRESOURCE_HDD_WRITE_IOPS, - resource_share); - int64_t hdd_read_mbps = _get_resource_value(TResourceType::type::TRESOURCE_HDD_READ_MBPS, - resource_share); - int64_t hdd_write_mbps = _get_resource_value(TResourceType::type::TRESOURCE_HDD_WRITE_MBPS, - resource_share); - int64_t ssd_read_iops = _get_resource_value(TResourceType::type::TRESOURCE_SSD_READ_IOPS, - resource_share); - int64_t ssd_write_iops = _get_resource_value(TResourceType::type::TRESOURCE_SSD_WRITE_IOPS, - resource_share); - int64_t ssd_read_mbps = _get_resource_value(TResourceType::type::TRESOURCE_SSD_READ_MBPS, - resource_share); - int64_t ssd_write_mbps = _get_resource_value(TResourceType::type::TRESOURCE_SSD_WRITE_MBPS, - resource_share); - - _config_disk_throttle(user_name, "", hdd_read_iops, hdd_write_iops, - hdd_read_mbps, hdd_write_mbps, - ssd_read_iops, ssd_write_iops, - ssd_read_mbps, ssd_write_mbps); - _config_disk_throttle(user_name, "low", hdd_read_iops, hdd_write_iops, - hdd_read_mbps, hdd_write_mbps, - ssd_read_iops, ssd_write_iops, - ssd_read_mbps, ssd_write_mbps); - _config_disk_throttle(user_name, "normal", hdd_read_iops, hdd_write_iops, - hdd_read_mbps, hdd_write_mbps, - ssd_read_iops, ssd_write_iops, - ssd_read_mbps, ssd_write_mbps); - _config_disk_throttle(user_name, "high", hdd_read_iops, hdd_write_iops, - hdd_read_mbps, hdd_write_mbps, - ssd_read_iops, ssd_write_iops, - ssd_read_mbps, ssd_write_mbps); +void CgroupsMgr::_config_user_disk_throttle( + std::string user_name, const std::map& resource_share) { + int64_t hdd_read_iops = + _get_resource_value(TResourceType::type::TRESOURCE_HDD_READ_IOPS, resource_share); + int64_t hdd_write_iops = + _get_resource_value(TResourceType::type::TRESOURCE_HDD_WRITE_IOPS, resource_share); + int64_t hdd_read_mbps = + _get_resource_value(TResourceType::type::TRESOURCE_HDD_READ_MBPS, resource_share); + int64_t hdd_write_mbps = + _get_resource_value(TResourceType::type::TRESOURCE_HDD_WRITE_MBPS, resource_share); + int64_t ssd_read_iops = + _get_resource_value(TResourceType::type::TRESOURCE_SSD_READ_IOPS, resource_share); + int64_t ssd_write_iops = + _get_resource_value(TResourceType::type::TRESOURCE_SSD_WRITE_IOPS, resource_share); + int64_t ssd_read_mbps = + _get_resource_value(TResourceType::type::TRESOURCE_SSD_READ_MBPS, resource_share); + int64_t ssd_write_mbps = + _get_resource_value(TResourceType::type::TRESOURCE_SSD_WRITE_MBPS, resource_share); + + _config_disk_throttle(user_name, "", hdd_read_iops, hdd_write_iops, hdd_read_mbps, + hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, + ssd_write_mbps); + _config_disk_throttle(user_name, "low", hdd_read_iops, hdd_write_iops, hdd_read_mbps, + hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, + ssd_write_mbps); + _config_disk_throttle(user_name, "normal", hdd_read_iops, hdd_write_iops, hdd_read_mbps, + hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, + ssd_write_mbps); + _config_disk_throttle(user_name, "high", hdd_read_iops, hdd_write_iops, hdd_read_mbps, + hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, + ssd_write_mbps); } -int64_t CgroupsMgr::_get_resource_value(const TResourceType::type resource_type, - const std::map& resource_share) { +int64_t CgroupsMgr::_get_resource_value( + const TResourceType::type resource_type, + const std::map& resource_share) { int64_t resource_value = -1; std::map::const_iterator it = resource_share.find(resource_type); if (it != resource_share.end()) { @@ -165,16 +161,11 @@ int64_t CgroupsMgr::_get_resource_value(const TResourceType::type resource_type, return resource_value; } -AgentStatus CgroupsMgr::_config_disk_throttle(std::string user_name, - std::string level, - int64_t hdd_read_iops, - int64_t hdd_write_iops, - int64_t hdd_read_mbps, - int64_t hdd_write_mbps, - int64_t ssd_read_iops, - int64_t ssd_write_iops, - int64_t ssd_read_mbps, - int64_t ssd_write_mbps) { +AgentStatus CgroupsMgr::_config_disk_throttle(std::string user_name, std::string level, + int64_t hdd_read_iops, int64_t hdd_write_iops, + int64_t hdd_read_mbps, int64_t hdd_write_mbps, + int64_t ssd_read_iops, int64_t ssd_write_iops, + int64_t ssd_read_mbps, int64_t ssd_write_mbps) { string cgroups_path = this->_root_cgroups_path + "/" + user_name + "/" + level; string read_bps_path = cgroups_path + "/blkio.throttle.read_bps_device"; string write_bps_path = cgroups_path + "/blkio.throttle.write_bps_device"; @@ -215,33 +206,25 @@ AgentStatus CgroupsMgr::_config_disk_throttle(std::string user_name, int minor_number = minor(file_stat.st_dev); minor_number = (minor_number / 16) * 16; if (read_iops != -1) { - ctrl_cmd << major_number << ":" - << minor_number << " " - << read_iops; + ctrl_cmd << major_number << ":" << minor_number << " " << read_iops; _echo_cmd_to_cgroup(ctrl_cmd, read_iops_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } if (write_iops != -1) { - ctrl_cmd << major_number << ":" - << minor_number << " " - << write_iops; + ctrl_cmd << major_number << ":" << minor_number << " " << write_iops; _echo_cmd_to_cgroup(ctrl_cmd, write_iops_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } if (read_mbps != -1) { - ctrl_cmd << major_number << ":" - << minor_number << " " - << (read_mbps << 20); + ctrl_cmd << major_number << ":" << minor_number << " " << (read_mbps << 20); _echo_cmd_to_cgroup(ctrl_cmd, read_bps_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } if (write_mbps != -1) { - ctrl_cmd << major_number << ":" - << minor_number << " " - << (write_mbps << 20); + ctrl_cmd << major_number << ":" << minor_number << " " << (write_mbps << 20); _echo_cmd_to_cgroup(ctrl_cmd, write_bps_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); @@ -264,43 +247,42 @@ AgentStatus CgroupsMgr::modify_user_cgroups(const string& user_name, // Traverse the user resource share map to append share value to cgroup's file for (map::const_iterator user_resource = user_share.begin(); - user_resource != user_share.end(); ++user_resource){ - string resource_file_name = user_resource->first; - int32_t user_share_weight = user_resource->second; + user_resource != user_share.end(); ++user_resource) { + string resource_file_name = user_resource->first; + int32_t user_share_weight = user_resource->second; + // Append the share_weight value to the file + string user_resource_path = user_cgroups_path + "/" + resource_file_name; + std::ofstream user_cgroups(user_resource_path.c_str(), std::ios::out | std::ios::app); + if (!user_cgroups.is_open()) { + return AgentStatus::DORIS_ERROR; + } + user_cgroups << user_share_weight << std::endl; + user_cgroups.close(); + LOG(INFO) << "Append " << user_share_weight << " to " << user_resource_path; + for (map::const_iterator level_resource = level_share.begin(); + level_resource != level_share.end(); ++level_resource) { + // Append resource share to level shares + string level_name = level_resource->first; + int32_t level_share_weight = level_resource->second; + // Check if the level cgroups exist + string level_cgroups_path = user_cgroups_path + "/" + level_name; + if (!is_file_exist(level_cgroups_path.c_str())) { + if (!boost::filesystem::create_directory(level_cgroups_path)) { + return AgentStatus::DORIS_ERROR; + } + } + // Append the share_weight value to the file - string user_resource_path = user_cgroups_path + "/" + resource_file_name; - std::ofstream user_cgroups(user_resource_path.c_str(), std::ios::out | std::ios::app); - if (!user_cgroups.is_open()) { + string level_resource_path = level_cgroups_path + "/" + resource_file_name; + std::ofstream level_cgroups(level_resource_path.c_str(), std::ios::out | std::ios::app); + if (!level_cgroups.is_open()) { return AgentStatus::DORIS_ERROR; } - user_cgroups << user_share_weight << std::endl; - user_cgroups.close(); - LOG(INFO) << "Append " << user_share_weight << " to " << user_resource_path; - for (map::const_iterator level_resource = level_share.begin(); - level_resource != level_share.end(); ++level_resource){ - // Append resource share to level shares - string level_name = level_resource->first; - int32_t level_share_weight = level_resource->second; - // Check if the level cgroups exist - string level_cgroups_path = user_cgroups_path + "/" + level_name; - if (!is_file_exist(level_cgroups_path.c_str())) { - if (!boost::filesystem::create_directory(level_cgroups_path)) { - return AgentStatus::DORIS_ERROR; - } - } - - // Append the share_weight value to the file - string level_resource_path = level_cgroups_path + "/" + resource_file_name; - std::ofstream level_cgroups(level_resource_path.c_str(), - std::ios::out | std::ios::app); - if (!level_cgroups.is_open()) { - return AgentStatus::DORIS_ERROR; - } - level_cgroups << level_share_weight << std::endl; - level_cgroups.close(); - - LOG(INFO) << "Append " << level_share_weight << " to " << level_resource_path; - } + level_cgroups << level_share_weight << std::endl; + level_cgroups.close(); + + LOG(INFO) << "Append " << level_share_weight << " to " << level_resource_path; + } } return AgentStatus::DORIS_SUCCESS; } @@ -308,47 +290,44 @@ AgentStatus CgroupsMgr::modify_user_cgroups(const string& user_name, AgentStatus CgroupsMgr::init_cgroups() { std::string root_cgroups_tasks_path = this->_root_cgroups_path + "/tasks"; // Check if the root cgroups exists - if (is_directory(this->_root_cgroups_path.c_str()) - && is_file_exist(root_cgroups_tasks_path.c_str())) { - // Check the folder's virtual filesystem to find whether it is a cgroup file system + if (is_directory(this->_root_cgroups_path.c_str()) && + is_file_exist(root_cgroups_tasks_path.c_str())) { + // Check the folder's virtual filesystem to find whether it is a cgroup file system #ifndef BE_TEST - struct statfs fs_type; - statfs(root_cgroups_tasks_path.c_str(), &fs_type); - if (fs_type.f_type != CGROUP_SUPER_MAGIC) { - LOG(ERROR) << _root_cgroups_path << " is not a cgroups file system."; - _is_cgroups_init_success = false; - return AgentStatus::DORIS_ERROR; - } + struct statfs fs_type; + statfs(root_cgroups_tasks_path.c_str(), &fs_type); + if (fs_type.f_type != CGROUP_SUPER_MAGIC) { + LOG(ERROR) << _root_cgroups_path << " is not a cgroups file system."; + _is_cgroups_init_success = false; + return AgentStatus::DORIS_ERROR; + } #endif - // Check if current user have write permission to cgroup folder - if (access(_root_cgroups_path.c_str(), W_OK) != 0) { - LOG(ERROR) << "Doris does not have write permission to " - << _root_cgroups_path; - _is_cgroups_init_success = false; - return AgentStatus::DORIS_ERROR; - } - // If root folder exists, then delete all subfolders under it - boost::filesystem::directory_iterator item_begin(this->_root_cgroups_path); - boost::filesystem::directory_iterator item_end; - for (; item_begin != item_end; item_begin++) { - if (is_directory(item_begin->path().string().c_str())) { - // Delete the sub folder - if (delete_user_cgroups(item_begin->path().filename().string()) - != AgentStatus::DORIS_SUCCESS) { - LOG(ERROR) << "Could not clean subfolder " - << item_begin->path().string(); - _is_cgroups_init_success = false; - return AgentStatus::DORIS_ERROR; - } + // Check if current user have write permission to cgroup folder + if (access(_root_cgroups_path.c_str(), W_OK) != 0) { + LOG(ERROR) << "Doris does not have write permission to " << _root_cgroups_path; + _is_cgroups_init_success = false; + return AgentStatus::DORIS_ERROR; + } + // If root folder exists, then delete all subfolders under it + boost::filesystem::directory_iterator item_begin(this->_root_cgroups_path); + boost::filesystem::directory_iterator item_end; + for (; item_begin != item_end; item_begin++) { + if (is_directory(item_begin->path().string().c_str())) { + // Delete the sub folder + if (delete_user_cgroups(item_begin->path().filename().string()) != + AgentStatus::DORIS_SUCCESS) { + LOG(ERROR) << "Could not clean subfolder " << item_begin->path().string(); + _is_cgroups_init_success = false; + return AgentStatus::DORIS_ERROR; } } - LOG(INFO) << "Initialize doris cgroups successfully under folder " - << _root_cgroups_path; - _is_cgroups_init_success = true; - return AgentStatus::DORIS_SUCCESS; + } + LOG(INFO) << "Initialize doris cgroups successfully under folder " << _root_cgroups_path; + _is_cgroups_init_success = true; + return AgentStatus::DORIS_SUCCESS; } else { LOG(WARNING) << "Could not find a valid cgroups path for resource isolation," - << "current value is " << _root_cgroups_path << ". ignore it."; + << "current value is " << _root_cgroups_path << ". ignore it."; _is_cgroups_init_success = false; return AgentStatus::DORIS_ERROR; } @@ -362,8 +341,7 @@ void CgroupsMgr::apply_cgroup(const string& user_name, const string& level) { s_global_cg_mgr->assign_to_cgroups(user_name, level); } -AgentStatus CgroupsMgr::assign_to_cgroups(const string& user_name, - const string& level) { +AgentStatus CgroupsMgr::assign_to_cgroups(const string& user_name, const string& level) { if (!_is_cgroups_init_success) { return AgentStatus::DORIS_ERROR; } @@ -371,17 +349,15 @@ AgentStatus CgroupsMgr::assign_to_cgroups(const string& user_name, return assign_thread_to_cgroups(tid, user_name, level); } -AgentStatus CgroupsMgr::assign_thread_to_cgroups(int64_t thread_id, - const string& user_name, +AgentStatus CgroupsMgr::assign_thread_to_cgroups(int64_t thread_id, const string& user_name, const string& level) { if (!_is_cgroups_init_success) { return AgentStatus::DORIS_ERROR; } string tasks_path = _root_cgroups_path + "/" + user_name + "/" + level + "/tasks"; if (!is_file_exist(_root_cgroups_path + "/" + user_name)) { - tasks_path = this->_root_cgroups_path + "/" - + _default_user_name + "/" - + _default_level + "/tasks"; + tasks_path = this->_root_cgroups_path + "/" + _default_user_name + "/" + _default_level + + "/tasks"; } else if (!is_file_exist(_root_cgroups_path + "/" + user_name + "/" + level)) { tasks_path = this->_root_cgroups_path + "/" + user_name + "/tasks"; } @@ -429,17 +405,15 @@ AgentStatus CgroupsMgr::drop_cgroups(const string& deleted_cgroups_path) { // If failed then there maybe exist active tasks under it and try to relocate them // Currently, try 10 times to relocate and delete the cgroups. int32_t i = 0; - while (is_file_exist(deleted_cgroups_path) - && rmdir(deleted_cgroups_path.c_str()) < 0 - && i < this->_drop_retry_times) { + while (is_file_exist(deleted_cgroups_path) && rmdir(deleted_cgroups_path.c_str()) < 0 && + i < this->_drop_retry_times) { this->relocate_tasks(deleted_cgroups_path, this->_root_cgroups_path); ++i; #ifdef BE_TEST boost::filesystem::remove_all(deleted_cgroups_path); #endif - if (i == this->_drop_retry_times){ - LOG(ERROR) << "drop cgroups under path: " << deleted_cgroups_path - << " failed."; + if (i == this->_drop_retry_times) { + LOG(ERROR) << "drop cgroups under path: " << deleted_cgroups_path << " failed."; return AgentStatus::DORIS_ERROR; } } @@ -470,8 +444,7 @@ AgentStatus CgroupsMgr::relocate_tasks(const string& src_cgroups, const string& } void CgroupsMgr::_echo_cmd_to_cgroup(stringstream& ctrl_cmd, string& cgroups_path) { - std::ofstream cgroups_stream(cgroups_path.c_str(), - std::ios::out | std::ios::app); + std::ofstream cgroups_stream(cgroups_path.c_str(), std::ios::out | std::ios::app); if (cgroups_stream.is_open()) { cgroups_stream << ctrl_cmd.str() << std::endl; cgroups_stream.close(); diff --git a/be/src/agent/cgroups_mgr.h b/be/src/agent/cgroups_mgr.h index 1f2afcc2dac799..4c30f190bab27a 100644 --- a/be/src/agent/cgroups_mgr.h +++ b/be/src/agent/cgroups_mgr.h @@ -18,11 +18,13 @@ #ifndef DORIS_BE_SRC_AGENT_CGROUPS_MGR_H #define DORIS_BE_SRC_AGENT_CGROUPS_MGR_H +#include + #include #include #include #include -#include + #include "agent/status.h" #include "gen_cpp/MasterService_types.h" @@ -40,11 +42,11 @@ class CgroupsMgr { // Compare the old user resource and new user resource to find deleted user // then delete nonexisting cgroups, create new user cgroups, update all user cgroups - AgentStatus update_local_cgroups(const TFetchResourceResult& new_fetched_resource); - + AgentStatus update_local_cgroups(const TFetchResourceResult& new_fetched_resource); + // Delete all existing cgroups under root path AgentStatus init_cgroups(); - + // Modify cgroup resource shares under cgroups_root_path. // Create related cgroups if it not exist. // @@ -53,34 +55,29 @@ class CgroupsMgr { // // user_share: a mapping for shares for different resource like (cpu.share, 100) // mapping key is resource file name in cgroup; value is share weight - // + // // level_share: a mapping for shares for different levels under the user. // mapping key is level name; value is level's share. Currently, different resource using the same share. - AgentStatus modify_user_cgroups(const std::string& user_name, - const std::map& user_share, + AgentStatus modify_user_cgroups(const std::string& user_name, + const std::map& user_share, const std::map& level_share); - static void apply_cgroup(const std::string& user_name, - const std::string& level); + static void apply_cgroup(const std::string& user_name, const std::string& level); - static void apply_system_cgroup() { - apply_cgroup(_s_system_user, _s_system_group); - } + static void apply_system_cgroup() { apply_cgroup(_s_system_user, _s_system_group); } // Assign the thread calling this funciton to the cgroup identified by user name and level // // Input parameters: // user_name&level: the user name and level used to find the cgroup - AgentStatus assign_to_cgroups(const std::string& user_name, - const std::string& level); + AgentStatus assign_to_cgroups(const std::string& user_name, const std::string& level); // Assign the thread identified by thread id to the cgroup identified by user name and level // // Input parameters: // thread_id: the unique id for the thread // user_name&level: the user name and level used to find the cgroup - AgentStatus assign_thread_to_cgroups(int64_t thread_id, - const std::string& user_name, + AgentStatus assign_thread_to_cgroups(int64_t thread_id, const std::string& user_name, const std::string& level); // Delete the user's cgroups and its sub level cgroups using DropCgroups @@ -88,7 +85,7 @@ class CgroupsMgr { // user name: user name to be deleted AgentStatus delete_user_cgroups(const std::string& user_name); // Delete a cgroup - // If there are active tasks in this cgroups, they will be relocated + // If there are active tasks in this cgroups, they will be relocated // to root cgroups. // If there are sub cgroups, it will return error. // Input parameters: @@ -102,19 +99,17 @@ class CgroupsMgr { // dest_cgroups: absolute path for dest cgroups folder AgentStatus relocate_tasks(const std::string& src_cgroups, const std::string& dest_cgroups); - int64_t get_cgroups_version() { - return _cur_version; - } + int64_t get_cgroups_version() { return _cur_version; } // set the disk throttle for the user by getting resource value from the map and echo it to the cgroups. - // currently, both the user and groups under the user are set to the same value + // currently, both the user and groups under the user are set to the same value // because throttle does not support hierachy. // Input parameters: // user_name: name for the user // resource_share: resource value get from fe - void _config_user_disk_throttle(std::string user_name, + void _config_user_disk_throttle(std::string user_name, const std::map& resource_share); - + // get user resource share value from the map int64_t _get_resource_value(const TResourceType::type resource_type, const std::map& resource_share); @@ -130,23 +125,18 @@ class CgroupsMgr { // ssd_write_iops: write iops number for ssd disk. // ssd_read_mbps: read bps number for ssd disk, using mb not byte or kb. // ssd_write_mbps: write bps number for ssd disk, using mb not byte or kb. - AgentStatus _config_disk_throttle(std::string user_name, - std::string level, - int64_t hdd_read_iops, - int64_t hdd_write_iops, - int64_t hdd_read_mbps, - int64_t hdd_write_mbps, - int64_t ssd_read_iops, - int64_t ssd_write_iops, - int64_t ssd_read_mbps, - int64_t ssd_write_mbps); - + AgentStatus _config_disk_throttle(std::string user_name, std::string level, + int64_t hdd_read_iops, int64_t hdd_write_iops, + int64_t hdd_read_mbps, int64_t hdd_write_mbps, + int64_t ssd_read_iops, int64_t ssd_write_iops, + int64_t ssd_read_mbps, int64_t ssd_write_mbps); + // echo command in string stream to the cgroup file // Input parameters: // ctrl_cmd: stringstream that contains the string to echo // cgroups_path: target cgroup file path void _echo_cmd_to_cgroup(std::stringstream& ctrl_cmd, std::string& cgroups_path); - + // check if the path exists and it is a directory // Input parameters: // file_path: path to the file @@ -165,6 +155,7 @@ class CgroupsMgr { public: const static std::string _s_system_user; const static std::string _s_system_group; + private: ExecEnv* _exec_env; std::string _root_cgroups_path; @@ -173,11 +164,11 @@ class CgroupsMgr { std::string _default_user_name = "default"; std::string _default_level = "normal"; int64_t _cur_version; - std::set _local_users; - std::mutex _update_cgroups_mtx; + std::set _local_users; + std::mutex _update_cgroups_mtx; // A static mapping from fe's resource type to cgroups file - static std::map _s_resource_cgroups; + static std::map _s_resource_cgroups; }; -} +} // namespace doris #endif diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index e17f2726a4e47f..47f12b53117231 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -16,22 +16,23 @@ // under the License. #include "agent/heartbeat_server.h" -#include -#include -#include #include +#include +#include +#include + #include "common/status.h" #include "gen_cpp/HeartbeatService.h" #include "gen_cpp/Status_types.h" #include "olap/storage_engine.h" #include "olap/utils.h" +#include "runtime/heartbeat_flags.h" #include "service/backend_options.h" #include "util/debug_util.h" #include "util/thrift_server.h" #include "util/time.h" -#include "runtime/heartbeat_flags.h" using std::fstream; using std::nothrow; @@ -41,9 +42,8 @@ using apache::thrift::transport::TProcessor; namespace doris { -HeartbeatServer::HeartbeatServer(TMasterInfo* master_info) : - _master_info(master_info), - _fe_epoch(0) { +HeartbeatServer::HeartbeatServer(TMasterInfo* master_info) + : _master_info(master_info), _fe_epoch(0) { _olap_engine = StorageEngine::instance(); _be_epoch = GetCurrentTimeMicros() / 1000; } @@ -52,16 +52,14 @@ void HeartbeatServer::init_cluster_id() { _master_info->cluster_id = _olap_engine->effective_cluster_id(); } -void HeartbeatServer::heartbeat( - THeartbeatResult& heartbeat_result, - const TMasterInfo& master_info) { - +void HeartbeatServer::heartbeat(THeartbeatResult& heartbeat_result, + const TMasterInfo& master_info) { //print heartbeat in every minute LOG_EVERY_N(INFO, 12) << "get heartbeat from FE." - << "host:" << master_info.network_address.hostname - << ", port:" << master_info.network_address.port - << ", cluster id:" << master_info.cluster_id - << ", counter:" << google::COUNTER; + << "host:" << master_info.network_address.hostname + << ", port:" << master_info.network_address.port + << ", cluster id:" << master_info.cluster_id + << ", counter:" << google::COUNTER; // do heartbeat Status st = _heartbeat(master_info); @@ -83,7 +81,7 @@ Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) { if (master_info.__isset.backend_ip) { if (master_info.backend_ip != BackendOptions::get_localhost()) { LOG(WARNING) << "backend ip saved in master does not equal to backend local ip" - << master_info.backend_ip << " vs. " << BackendOptions::get_localhost(); + << master_info.backend_ip << " vs. " << BackendOptions::get_localhost(); std::stringstream ss; ss << "actual backend local ip: " << BackendOptions::get_localhost(); return Status::InternalError(ss.str()); @@ -101,7 +99,8 @@ Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) { } else { _master_info->cluster_id = master_info.cluster_id; LOG(INFO) << "record cluster id. host: " << master_info.network_address.hostname - << ". port: " << master_info.network_address.port << ". cluster id: " << master_info.cluster_id; + << ". port: " << master_info.network_address.port + << ". cluster id: " << master_info.cluster_id; } } else { if (_master_info->cluster_id != master_info.cluster_id) { @@ -111,19 +110,21 @@ Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) { } bool need_report = false; - if (_master_info->network_address.hostname != master_info.network_address.hostname - || _master_info->network_address.port != master_info.network_address.port) { + if (_master_info->network_address.hostname != master_info.network_address.hostname || + _master_info->network_address.port != master_info.network_address.port) { if (master_info.epoch > _fe_epoch) { _master_info->network_address.hostname = master_info.network_address.hostname; _master_info->network_address.port = master_info.network_address.port; _fe_epoch = master_info.epoch; need_report = true; - LOG(INFO) << "master change. new master host: " << _master_info->network_address.hostname - << ". port: " << _master_info->network_address.port << ". epoch: " << _fe_epoch; + LOG(INFO) << "master change. new master host: " + << _master_info->network_address.hostname + << ". port: " << _master_info->network_address.port + << ". epoch: " << _fe_epoch; } else { LOG(WARNING) << "epoch is not greater than local. ignore heartbeat. host: " << _master_info->network_address.hostname - << " port: " << _master_info->network_address.port + << " port: " << _master_info->network_address.port << " local epoch: " << _fe_epoch << " received epoch: " << master_info.epoch; return Status::InternalError("epoch is not greater than local. ignore heartbeat."); @@ -169,12 +170,9 @@ Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) { return Status::OK(); } -AgentStatus create_heartbeat_server( - ExecEnv* exec_env, - uint32_t server_port, - ThriftServer** thrift_server, - uint32_t worker_thread_num, - TMasterInfo* local_master_info) { +AgentStatus create_heartbeat_server(ExecEnv* exec_env, uint32_t server_port, + ThriftServer** thrift_server, uint32_t worker_thread_num, + TMasterInfo* local_master_info) { HeartbeatServer* heartbeat_server = new (nothrow) HeartbeatServer(local_master_info); if (heartbeat_server == NULL) { return DORIS_ERROR; @@ -185,11 +183,8 @@ AgentStatus create_heartbeat_server( boost::shared_ptr handler(heartbeat_server); boost::shared_ptr server_processor(new HeartbeatServiceProcessor(handler)); string server_name("heartbeat"); - *thrift_server = new ThriftServer( - server_name, - server_processor, - server_port, - worker_thread_num); + *thrift_server = + new ThriftServer(server_name, server_processor, server_port, worker_thread_num); return DORIS_SUCCESS; } -} // namesapce doris +} // namespace doris diff --git a/be/src/agent/heartbeat_server.h b/be/src/agent/heartbeat_server.h index 67816e45c74467..5e845782eb1183 100644 --- a/be/src/agent/heartbeat_server.h +++ b/be/src/agent/heartbeat_server.h @@ -20,13 +20,12 @@ #include -#include "thrift/transport/TTransportUtils.h" - #include "agent/status.h" #include "gen_cpp/HeartbeatService.h" #include "gen_cpp/Status_types.h" #include "olap/olap_define.h" #include "runtime/exec_env.h" +#include "thrift/transport/TTransportUtils.h" namespace doris { @@ -38,7 +37,7 @@ class ThriftServer; class HeartbeatServer : public HeartbeatServiceIf { public: explicit HeartbeatServer(TMasterInfo* master_info); - virtual ~HeartbeatServer() {}; + virtual ~HeartbeatServer(){}; virtual void init_cluster_id(); @@ -64,13 +63,10 @@ class HeartbeatServer : public HeartbeatServiceIf { int64_t _fe_epoch; DISALLOW_COPY_AND_ASSIGN(HeartbeatServer); -}; // class HeartBeatServer +}; // class HeartBeatServer -AgentStatus create_heartbeat_server( - ExecEnv* exec_env, - uint32_t heartbeat_server_port, - ThriftServer** heart_beat_server, - uint32_t worker_thread_num, - TMasterInfo* local_master_info); -} // namespace doris -#endif // DORIS_BE_SRC_AGENT_HEARTBEAT_SERVER_H +AgentStatus create_heartbeat_server(ExecEnv* exec_env, uint32_t heartbeat_server_port, + ThriftServer** heart_beat_server, uint32_t worker_thread_num, + TMasterInfo* local_master_info); +} // namespace doris +#endif // DORIS_BE_SRC_AGENT_HEARTBEAT_SERVER_H diff --git a/be/src/agent/pusher.cpp b/be/src/agent/pusher.cpp index fa90b6391b77c3..8e3ea72353087d 100644 --- a/be/src/agent/pusher.cpp +++ b/be/src/agent/pusher.cpp @@ -16,16 +16,19 @@ // under the License. #include "agent/pusher.h" + #include + #include #include #include #include #include #include + +#include "agent/cgroups_mgr.h" #include "boost/filesystem.hpp" #include "boost/lexical_cast.hpp" -#include "agent/cgroups_mgr.h" #include "gen_cpp/AgentService_types.h" #include "http/http_client.h" #include "olap/olap_common.h" @@ -40,24 +43,20 @@ using std::vector; namespace doris { -Pusher::Pusher(OLAPEngine* engine, const TPushReq& push_req) : - _push_req(push_req), _engine(engine) { -} +Pusher::Pusher(OLAPEngine* engine, const TPushReq& push_req) + : _push_req(push_req), _engine(engine) {} -Pusher::~Pusher() { -} +Pusher::~Pusher() {} AgentStatus Pusher::init() { AgentStatus status = DORIS_SUCCESS; // Check replica exist OLAPTablePtr olap_table; - olap_table = _engine->get_table( - _push_req.tablet_id, - _push_req.schema_hash); + olap_table = _engine->get_table(_push_req.tablet_id, _push_req.schema_hash); if (olap_table.get() == NULL) { - OLAP_LOG_WARNING("get tables failed. tablet_id: %ld, schema_hash: %ld", - _push_req.tablet_id, _push_req.schema_hash); + OLAP_LOG_WARNING("get tables failed. tablet_id: %ld, schema_hash: %ld", _push_req.tablet_id, + _push_req.schema_hash); return DORIS_PUSH_INVALID_TABLE; } @@ -101,8 +100,8 @@ AgentStatus Pusher::_get_tmp_file_dir(const string& root_path, string* download_ if (0 != error_code) { status = DORIS_ERROR; - LOG(WARNING) << "create download dir failed.path: " - << *download_path << ", error code: " << error_code; + LOG(WARNING) << "create download dir failed.path: " << *download_path + << ", error code: " << error_code; } } @@ -130,13 +129,12 @@ AgentStatus Pusher::process(vector* tablet_infos) { estimate_time_out = config::download_low_speed_time; } bool is_timeout = false; - auto download_cb = [this, estimate_time_out, file_size, &is_timeout] (HttpClient* client) { + auto download_cb = [this, estimate_time_out, file_size, &is_timeout](HttpClient* client) { // Check timeout and set timeout time_t now = time(NULL); if (_push_req.timeout > 0 && _push_req.timeout < now) { // return status to break this callback - VLOG(3) << "check time out. time_out:" << _push_req.timeout - << ", now:" << now; + VLOG(3) << "check time out. time_out:" << _push_req.timeout << ", now:" << now; is_timeout = true; return Status::OK(); } @@ -159,7 +157,7 @@ AgentStatus Pusher::process(vector* tablet_infos) { uint64_t local_file_size = boost::filesystem::file_size(_local_file_path); if (file_size != local_file_size) { LOG(WARNING) << "download_file size error. file_size=" << file_size - << ", local_file_size=" << local_file_size; + << ", local_file_size=" << local_file_size; return Status::InternalError("downloaded file's size isn't right"); } } @@ -167,7 +165,7 @@ AgentStatus Pusher::process(vector* tablet_infos) { _push_req.http_file_path = _local_file_path; return Status::OK(); }; - + MonotonicStopWatch stopwatch; stopwatch.start(); auto st = HttpClient::execute_with_retry(MAX_RETRY, 1, download_cb); @@ -178,18 +176,16 @@ AgentStatus Pusher::process(vector* tablet_infos) { if (st.ok() && !is_timeout) { double rate = -1.0; if (_push_req.__isset.http_file_size) { - rate = (double) _push_req.http_file_size / (cost / 1000 / 1000 / 1000) / 1024; + rate = (double)_push_req.http_file_size / (cost / 1000 / 1000 / 1000) / 1024; } LOG(INFO) << "down load file success. local_file=" << _local_file_path - << ", remote_file=" << _remote_file_path - << ", tablet_id" << _push_req.tablet_id - << ", cost=" << cost / 1000 << "us, file_size" << _push_req.http_file_size - << ", download rage:" << rate << "KB/s"; + << ", remote_file=" << _remote_file_path << ", tablet_id" + << _push_req.tablet_id << ", cost=" << cost / 1000 << "us, file_size" + << _push_req.http_file_size << ", download rage:" << rate << "KB/s"; } else { LOG(WARNING) << "down load file failed. remote_file=" << _remote_file_path - << ", tablet=" << _push_req.tablet_id - << ", cost=" << cost / 1000 - << "us, errmsg=" << st.get_error_msg() << ", is_timeout=" << is_timeout; + << ", tablet=" << _push_req.tablet_id << ", cost=" << cost / 1000 + << "us, errmsg=" << st.get_error_msg() << ", is_timeout=" << is_timeout; status = DORIS_ERROR; } } diff --git a/be/src/agent/status.h b/be/src/agent/status.h index e5174aed8daa21..beea4c9fc18753 100644 --- a/be/src/agent/status.h +++ b/be/src/agent/status.h @@ -44,5 +44,5 @@ enum AgentStatus { DORIS_INTERNAL_ERROR = -902, DORIS_DISK_REACH_CAPACITY_LIMIT = -903 }; -} // namespace doris -#endif // DORIS_BE_SRC_AGENT_STATUS_H +} // namespace doris +#endif // DORIS_BE_SRC_AGENT_STATUS_H diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index fac22f01044d7b..e5c979cbc6f9ab 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -329,8 +329,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { TStatus task_status; std::vector finish_tablet_infos; - OLAPStatus create_status = - _env->storage_engine()->create_tablet(create_tablet_req); + OLAPStatus create_status = _env->storage_engine()->create_tablet(create_tablet_req); if (create_status != OLAPStatus::OLAP_SUCCESS) { LOG(WARNING) << "create table failed. status: " << create_status << ", signature: " << agent_task_req.signature; @@ -450,8 +449,7 @@ void TaskWorkerPool::_alter_tablet_worker_thread_callback() { TTaskType::type task_type = agent_task_req.task_type; switch (task_type) { case TTaskType::ALTER: - _alter_tablet(agent_task_req, signature, - task_type, &finish_task_request); + _alter_tablet(agent_task_req, signature, task_type, &finish_task_request); break; default: // pass @@ -578,10 +576,9 @@ void TaskWorkerPool::_push_worker_thread_callback() { return; } - index = _get_next_task_index( - config::push_worker_count_normal_priority + - config::push_worker_count_high_priority, - _tasks, priority); + index = _get_next_task_index(config::push_worker_count_normal_priority + + config::push_worker_count_high_priority, + _tasks, priority); if (index < 0) { // there is no high priority task. notify other thread to handle normal task @@ -626,8 +623,8 @@ void TaskWorkerPool::_push_worker_thread_callback() { } if (status == DORIS_SUCCESS) { - VLOG(3) << "push ok. signature: " << agent_task_req.signature - << ", push_type: " << push_req.push_type; + VLOG(3) << "push ok. signature: " << agent_task_req.signature + << ", push_type: " << push_req.push_type; error_msgs.push_back("push success"); ++_s_report_version; @@ -867,8 +864,8 @@ void TaskWorkerPool::_clone_worker_thread_callback() { std::vector error_msgs; std::vector tablet_infos; - EngineCloneTask engine_task(clone_req, _master_info, - agent_task_req.signature, &error_msgs, &tablet_infos, &status); + EngineCloneTask engine_task(clone_req, _master_info, agent_task_req.signature, &error_msgs, + &tablet_infos, &status); _env->storage_engine()->execute_task(&engine_task); // Return result to fe TStatus task_status; @@ -919,18 +916,19 @@ void TaskWorkerPool::_storage_medium_migrate_worker_thread_callback() { // check request and get info TabletSharedPtr tablet; DataDir* dest_store; - if (_check_migrate_requset(storage_medium_migrate_req, tablet, &dest_store) != OLAP_SUCCESS) { - status_code = TStatusCode::RUNTIME_ERROR; + if (_check_migrate_requset(storage_medium_migrate_req, tablet, &dest_store) != + OLAP_SUCCESS) { + status_code = TStatusCode::RUNTIME_ERROR; } else { EngineStorageMigrationTask engine_task(tablet, dest_store); OLAPStatus res = _env->storage_engine()->execute_task(&engine_task); if (res != OLAP_SUCCESS) { LOG(WARNING) << "storage media migrate failed. status: " << res - << ", signature: " << agent_task_req.signature; + << ", signature: " << agent_task_req.signature; status_code = TStatusCode::RUNTIME_ERROR; } else { LOG(INFO) << "storage media migrate success. status:" << res << "," - << ", signature:" << agent_task_req.signature; + << ", signature:" << agent_task_req.signature; } } @@ -950,17 +948,14 @@ void TaskWorkerPool::_storage_medium_migrate_worker_thread_callback() { } } -OLAPStatus TaskWorkerPool::_check_migrate_requset( - const TStorageMediumMigrateReq& req, - TabletSharedPtr& tablet, - DataDir** dest_store) { - +OLAPStatus TaskWorkerPool::_check_migrate_requset(const TStorageMediumMigrateReq& req, + TabletSharedPtr& tablet, DataDir** dest_store) { int64_t tablet_id = req.tablet_id; int32_t schema_hash = req.schema_hash; tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet == nullptr) { LOG(WARNING) << "can't find tablet. tablet_id= " << tablet_id - << " schema_hash=" << schema_hash; + << " schema_hash=" << schema_hash; return OLAP_ERR_TABLE_NOT_FOUND; } @@ -979,7 +974,7 @@ OLAPStatus TaskWorkerPool::_check_migrate_requset( uint32_t count = StorageEngine::instance()->available_storage_medium_type_count(); if (count <= 1) { LOG(INFO) << "available storage medium type count is less than 1, " - << "no need to migrate. count=" << count; + << "no need to migrate. count=" << count; return OLAP_REQUEST_FAILED; } // check current tablet storage medium @@ -987,7 +982,7 @@ OLAPStatus TaskWorkerPool::_check_migrate_requset( TStorageMedium::type src_storage_medium = tablet->data_dir()->storage_medium(); if (src_storage_medium == storage_medium) { LOG(INFO) << "tablet is already on specified storage medium. " - << "storage_medium=" << storage_medium; + << "storage_medium=" << storage_medium; return OLAP_REQUEST_FAILED; } // get a random store of specified storage medium @@ -1004,7 +999,7 @@ OLAPStatus TaskWorkerPool::_check_migrate_requset( int64_t tablet_size = tablet->tablet_footprint(); if ((*dest_store)->reach_capacity_limit(tablet_size)) { LOG(WARNING) << "reach the capacity limit of path: " << (*dest_store)->path() - << ", tablet size: " << tablet_size; + << ", tablet size: " << tablet_size; return OLAP_ERR_DISK_REACH_CAPACITY_LIMIT; } @@ -1080,15 +1075,16 @@ void TaskWorkerPool::_report_task_worker_thread_callback() { if (status != DORIS_SUCCESS) { DorisMetrics::instance()->report_task_requests_failed->increment(1); - LOG(WARNING) << "report task failed. status: " << status << ", master host: " - << _master_info.network_address.hostname + LOG(WARNING) << "report task failed. status: " << status + << ", master host: " << _master_info.network_address.hostname << "port: " << _master_info.network_address.port; } else { LOG(INFO) << "finish report task. master host: " - << _master_info.network_address.hostname - << " port: " << _master_info.network_address.port; + << _master_info.network_address.hostname + << " port: " << _master_info.network_address.port; } - } while (!_stop_background_threads_latch.wait_for(MonoDelta::FromSeconds(config::report_task_interval_seconds))); + } while (!_stop_background_threads_latch.wait_for( + MonoDelta::FromSeconds(config::report_task_interval_seconds))); } /// disk state report thread will report disk state at a configurable fix interval. @@ -1108,7 +1104,8 @@ void TaskWorkerPool::_report_disk_state_worker_thread_callback() { } // wait at most report_disk_state_interval_seconds, or being notified - _worker_thread_condition_variable.wait_for(MonoDelta::FromSeconds(config::report_disk_state_interval_seconds)); + _worker_thread_condition_variable.wait_for( + MonoDelta::FromSeconds(config::report_disk_state_interval_seconds)); if (!_is_work) { break; } @@ -1136,13 +1133,13 @@ void TaskWorkerPool::_report_disk_state_worker_thread_callback() { if (status != DORIS_SUCCESS) { DorisMetrics::instance()->report_disk_requests_failed->increment(1); - LOG(WARNING) << "report disk state failed. status: " << status << ", master host: " - << _master_info.network_address.hostname + LOG(WARNING) << "report disk state failed. status: " << status + << ", master host: " << _master_info.network_address.hostname << ", port: " << _master_info.network_address.port; } else { LOG(INFO) << "finish report disk state. master host: " - << _master_info.network_address.hostname - << ", port: " << _master_info.network_address.port; + << _master_info.network_address.hostname + << ", port: " << _master_info.network_address.port; } } StorageEngine::instance()->deregister_report_listener(this); @@ -1165,7 +1162,8 @@ void TaskWorkerPool::_report_tablet_worker_thread_callback() { } // wait at most report_tablet_interval_seconds, or being notified - _worker_thread_condition_variable.wait_for(MonoDelta::FromSeconds(config::report_tablet_interval_seconds)); + _worker_thread_condition_variable.wait_for( + MonoDelta::FromSeconds(config::report_tablet_interval_seconds)); if (!_is_work) { break; } @@ -1194,8 +1192,8 @@ void TaskWorkerPool::_report_tablet_worker_thread_callback() { << ", port:" << _master_info.network_address.port; } else { LOG(INFO) << "finish report tablets. master host: " - << _master_info.network_address.hostname - << ", port: " << _master_info.network_address.port; + << _master_info.network_address.hostname + << ", port: " << _master_info.network_address.port; } } StorageEngine::instance()->deregister_report_listener(this); @@ -1223,8 +1221,7 @@ void TaskWorkerPool::_upload_worker_thread_callback() { << ", job id:" << upload_request.job_id; std::map> tablet_files; - SnapshotLoader loader(_env, upload_request.job_id, - agent_task_req.signature); + SnapshotLoader loader(_env, upload_request.job_id, agent_task_req.signature); Status status = loader.upload(upload_request.src_dest_map, upload_request.broker_addr, upload_request.broker_prop, &tablet_files); @@ -1282,8 +1279,7 @@ void TaskWorkerPool::_download_worker_thread_callback() { // TODO: download std::vector downloaded_tablet_ids; - SnapshotLoader loader(_env, download_request.job_id, - agent_task_req.signature); + SnapshotLoader loader(_env, download_request.job_id, agent_task_req.signature); Status status = loader.download(download_request.src_dest_map, download_request.broker_addr, download_request.broker_prop, &downloaded_tablet_ids); @@ -1486,9 +1482,9 @@ void TaskWorkerPool::_move_dir_thread_callback() { TStatus task_status; // TODO: move dir - AgentStatus status = _move_dir( - move_dir_req.tablet_id, move_dir_req.schema_hash, move_dir_req.src, - move_dir_req.job_id, true /* TODO */, &error_msgs); + AgentStatus status = + _move_dir(move_dir_req.tablet_id, move_dir_req.schema_hash, move_dir_req.src, + move_dir_req.job_id, true /* TODO */, &error_msgs); if (status != DORIS_SUCCESS) { status_code = TStatusCode::RUNTIME_ERROR; diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h index c40e6e173a56f4..fa62b066bea0f8 100644 --- a/be/src/agent/task_worker_pool.h +++ b/be/src/agent/task_worker_pool.h @@ -32,8 +32,8 @@ #include "olap/olap_define.h" #include "olap/storage_engine.h" #include "util/condition_variable.h" -#include "util/mutex.h" #include "util/countdown_latch.h" +#include "util/mutex.h" #include "util/thread.h" namespace doris { @@ -73,38 +73,60 @@ class TaskWorkerPool { }; inline const std::string TYPE_STRING(TaskWorkerType type) { - switch(type) { - case CREATE_TABLE: return "CREATE_TABLE"; - case DROP_TABLE: return "DROP_TABLE"; - case PUSH: return "PUSH"; - case REALTIME_PUSH: return "REALTIME_PUSH"; - case PUBLISH_VERSION: return "PUBLISH_VERSION"; - case CLEAR_ALTER_TASK: return "CLEAR_ALTER_TASK"; - case CLEAR_TRANSACTION_TASK: return "CLEAR_TRANSACTION_TASK"; - case DELETE: return "DELETE"; - case ALTER_TABLE: return "ALTER_TABLE"; - case QUERY_SPLIT_KEY: return "QUERY_SPLIT_KEY"; - case CLONE: return "CLONE"; - case STORAGE_MEDIUM_MIGRATE: return "STORAGE_MEDIUM_MIGRATE"; - case CHECK_CONSISTENCY: return "CHECK_CONSISTENCY"; - case REPORT_TASK: return "REPORT_TASK"; - case REPORT_DISK_STATE: return "REPORT_DISK_STATE"; - case REPORT_OLAP_TABLE: return "REPORT_OLAP_TABLE"; - case UPLOAD: return "UPLOAD"; - case DOWNLOAD: return "DOWNLOAD"; - case MAKE_SNAPSHOT: return "MAKE_SNAPSHOT"; - case RELEASE_SNAPSHOT: return "RELEASE_SNAPSHOT"; - case MOVE: return "MOVE"; - case RECOVER_TABLET: return "RECOVER_TABLET"; - case UPDATE_TABLET_META_INFO: return "UPDATE_TABLET_META_INFO"; - default: return "Unknown"; + switch (type) { + case CREATE_TABLE: + return "CREATE_TABLE"; + case DROP_TABLE: + return "DROP_TABLE"; + case PUSH: + return "PUSH"; + case REALTIME_PUSH: + return "REALTIME_PUSH"; + case PUBLISH_VERSION: + return "PUBLISH_VERSION"; + case CLEAR_ALTER_TASK: + return "CLEAR_ALTER_TASK"; + case CLEAR_TRANSACTION_TASK: + return "CLEAR_TRANSACTION_TASK"; + case DELETE: + return "DELETE"; + case ALTER_TABLE: + return "ALTER_TABLE"; + case QUERY_SPLIT_KEY: + return "QUERY_SPLIT_KEY"; + case CLONE: + return "CLONE"; + case STORAGE_MEDIUM_MIGRATE: + return "STORAGE_MEDIUM_MIGRATE"; + case CHECK_CONSISTENCY: + return "CHECK_CONSISTENCY"; + case REPORT_TASK: + return "REPORT_TASK"; + case REPORT_DISK_STATE: + return "REPORT_DISK_STATE"; + case REPORT_OLAP_TABLE: + return "REPORT_OLAP_TABLE"; + case UPLOAD: + return "UPLOAD"; + case DOWNLOAD: + return "DOWNLOAD"; + case MAKE_SNAPSHOT: + return "MAKE_SNAPSHOT"; + case RELEASE_SNAPSHOT: + return "RELEASE_SNAPSHOT"; + case MOVE: + return "MOVE"; + case RECOVER_TABLET: + return "RECOVER_TABLET"; + case UPDATE_TABLET_META_INFO: + return "UPDATE_TABLET_META_INFO"; + default: + return "Unknown"; } } - TaskWorkerPool( - const TaskWorkerType task_worker_type, - ExecEnv* env, - const TMasterInfo& master_info); + TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* env, + const TMasterInfo& master_info); virtual ~TaskWorkerPool(); // Start the task worker thread pool @@ -127,7 +149,7 @@ class TaskWorkerPool { void _remove_task_info(const TTaskType::type task_type, int64_t signature); void _finish_task(const TFinishTaskRequest& finish_task_request); uint32_t _get_next_task_index(int32_t thread_count, std::deque& tasks, - TPriority::type priority); + TPriority::type priority); void _create_tablet_worker_thread_callback(); void _drop_tablet_worker_thread_callback(); @@ -148,30 +170,18 @@ class TaskWorkerPool { void _move_dir_thread_callback(); void _update_tablet_meta_worker_thread_callback(); - void _alter_tablet( - const TAgentTaskRequest& alter_tablet_request, - int64_t signature, - const TTaskType::type task_type, - TFinishTaskRequest* finish_task_request); - - AgentStatus _get_tablet_info( - const TTabletId tablet_id, - const TSchemaHash schema_hash, - int64_t signature, - TTabletInfo* tablet_info); - - AgentStatus _move_dir( - const TTabletId tablet_id, - const TSchemaHash schema_hash, - const std::string& src, - int64_t job_id, - bool overwrite, - std::vector* error_msgs); - - OLAPStatus _check_migrate_requset( - const TStorageMediumMigrateReq& req, - TabletSharedPtr& tablet, - DataDir** dest_store); + void _alter_tablet(const TAgentTaskRequest& alter_tablet_request, int64_t signature, + const TTaskType::type task_type, TFinishTaskRequest* finish_task_request); + + AgentStatus _get_tablet_info(const TTabletId tablet_id, const TSchemaHash schema_hash, + int64_t signature, TTabletInfo* tablet_info); + + AgentStatus _move_dir(const TTabletId tablet_id, const TSchemaHash schema_hash, + const std::string& src, int64_t job_id, bool overwrite, + std::vector* error_msgs); + + OLAPStatus _check_migrate_requset(const TStorageMediumMigrateReq& req, TabletSharedPtr& tablet, + DataDir** dest_store); private: std::string _name; @@ -201,6 +211,6 @@ class TaskWorkerPool { static std::map> _s_task_signatures; DISALLOW_COPY_AND_ASSIGN(TaskWorkerPool); -}; // class TaskWorkerPool -} // namespace doris -#endif // DORIS_BE_SRC_TASK_WORKER_POOL_H +}; // class TaskWorkerPool +} // namespace doris +#endif // DORIS_BE_SRC_TASK_WORKER_POOL_H diff --git a/be/src/agent/topic_listener.h b/be/src/agent/topic_listener.h index 677be784fcdfb2..1d98c9834b32ce 100644 --- a/be/src/agent/topic_listener.h +++ b/be/src/agent/topic_listener.h @@ -21,19 +21,17 @@ #include "gen_cpp/AgentService_types.h" namespace doris { - -class TopicListener { +class TopicListener { public: - - virtual ~TopicListener(){} + virtual ~TopicListener() {} // Deal with a single update // // Input parameters: // protocol version: the version for the protocol, listeners should deal with the msg according to the protocol // topic_update: single update - virtual void handle_update(const TAgentServiceVersion::type& protocol_version, + virtual void handle_update(const TAgentServiceVersion::type& protocol_version, const TTopicUpdate& topic_update) = 0; }; -} +} // namespace doris #endif diff --git a/be/src/agent/topic_subscriber.cpp b/be/src/agent/topic_subscriber.cpp index 0e4c1fe11274b4..abc9b305ac1768 100644 --- a/be/src/agent/topic_subscriber.cpp +++ b/be/src/agent/topic_subscriber.cpp @@ -16,17 +16,17 @@ // under the License. #include "agent/topic_subscriber.h" + #include "common/logging.h" namespace doris { -TopicSubscriber::TopicSubscriber() { -} +TopicSubscriber::TopicSubscriber() {} TopicSubscriber::~TopicSubscriber() { // Delete all listeners in the register - std::map>::iterator it - = _registered_listeners.begin(); + std::map>::iterator it = + _registered_listeners.begin(); for (; it != _registered_listeners.end(); ++it) { std::vector& listeners = it->second; std::vector::iterator listener_it = listeners.begin(); @@ -53,9 +53,8 @@ void TopicSubscriber::handle_updates(const TAgentPublishRequest& agent_publish_r std::vector::iterator listener_it = listeners.begin(); // Send the update to all listeners with protocol version. for (; listener_it != listeners.end(); ++listener_it) { - (*listener_it)->handle_update(agent_publish_request.protocol_version, - *topic_update_it); - } + (*listener_it)->handle_update(agent_publish_request.protocol_version, *topic_update_it); + } } } } // namespace doris diff --git a/be/src/agent/topic_subscriber.h b/be/src/agent/topic_subscriber.h index cfde9a6013260f..be71cda3c4c03b 100644 --- a/be/src/agent/topic_subscriber.h +++ b/be/src/agent/topic_subscriber.h @@ -18,17 +18,16 @@ #ifndef DORIS_BE_SRC_AGENT_TOPIC_SUBSCRIBER_H #define DORIS_BE_SRC_AGENT_TOPIC_SUBSCRIBER_H -#include #include +#include + #include "agent/topic_listener.h" #include "gen_cpp/AgentService_types.h" namespace doris { class TopicSubscriber { - public: - TopicSubscriber(); ~TopicSubscriber(); // Put the topic type and listener to the map diff --git a/be/src/agent/user_resource_listener.cpp b/be/src/agent/user_resource_listener.cpp index 4c69f0b48d0fe2..26057720a00b9c 100644 --- a/be/src/agent/user_resource_listener.cpp +++ b/be/src/agent/user_resource_listener.cpp @@ -16,13 +16,16 @@ // under the License. #include "agent/user_resource_listener.h" -#include -#include + +#include #include -#include -#include #include -#include +#include +#include + +#include +#include + #include "common/logging.h" #include "gen_cpp/FrontendService.h" #include "runtime/client_cache.h" @@ -30,31 +33,27 @@ namespace doris { using std::string; -using apache::thrift::TException; +using apache::thrift::TException; using apache::thrift::transport::TTransportException; // Initialize the resource to cgroups file mapping // TRESOURCE_IOPS not mapped -UserResourceListener::UserResourceListener(ExecEnv* exec_env, - const TMasterInfo& master_info) - : _master_info(master_info), - _exec_env(exec_env), - _cgroups_mgr(*(exec_env->cgroups_mgr())) { -} +UserResourceListener::UserResourceListener(ExecEnv* exec_env, const TMasterInfo& master_info) + : _master_info(master_info), + _exec_env(exec_env), + _cgroups_mgr(*(exec_env->cgroups_mgr())) {} -UserResourceListener::~UserResourceListener() { -} +UserResourceListener::~UserResourceListener() {} -void UserResourceListener::handle_update(const TAgentServiceVersion::type& protocol_version, +void UserResourceListener::handle_update(const TAgentServiceVersion::type& protocol_version, const TTopicUpdate& topic_update) { - std::vector updates = topic_update.updates; + std::vector updates = topic_update.updates; if (updates.size() > 0) { int64_t new_version = updates[0].int_value; // Async call to update users resource method - std::async(std::launch::async, - &UserResourceListener::update_users_resource, - this, new_version); + std::async(std::launch::async, &UserResourceListener::update_users_resource, this, + new_version); } } @@ -64,16 +63,15 @@ void UserResourceListener::update_users_resource(int64_t new_version) { } // Call fe to get latest user resource Status master_status; - // using 500ms as default timeout value + // using 500ms as default timeout value FrontendServiceConnection client(_exec_env->frontend_client_cache(), - _master_info.network_address, - config::thrift_rpc_timeout_ms, - &master_status); + _master_info.network_address, config::thrift_rpc_timeout_ms, + &master_status); TFetchResourceResult new_fetched_resource; - if (!master_status.ok()) { - LOG(ERROR) << "Get frontend client failed, with address:" - << _master_info.network_address.hostname << ":" - << _master_info.network_address.port; + if (!master_status.ok()) { + LOG(ERROR) << "Get frontend client failed, with address:" + << _master_info.network_address.hostname << ":" + << _master_info.network_address.port; return; } try { @@ -83,24 +81,23 @@ void UserResourceListener::update_users_resource(int64_t new_version) { // reopen the client and set timeout to 500ms master_status = client.reopen(config::thrift_rpc_timeout_ms); - if (!master_status.ok()) { - LOG(WARNING) << "Reopen to get frontend client failed, with address:" - << _master_info.network_address.hostname << ":" - << _master_info.network_address.port; + if (!master_status.ok()) { + LOG(WARNING) << "Reopen to get frontend client failed, with address:" + << _master_info.network_address.hostname << ":" + << _master_info.network_address.port; return; } LOG(WARNING) << "fetchResource from frontend failed, retry!"; client->fetchResource(new_fetched_resource); } - } catch (TException& e) { + } catch (TException& e) { // Already try twice, log here client.reopen(config::thrift_rpc_timeout_ms); - LOG(WARNING) << "retry to fetchResource from " - << _master_info.network_address.hostname << ":" - << _master_info.network_address.port << " failed:\n" - << e.what(); + LOG(WARNING) << "retry to fetchResource from " << _master_info.network_address.hostname + << ":" << _master_info.network_address.port << " failed:\n" + << e.what(); return; } - _cgroups_mgr.update_local_cgroups(new_fetched_resource); -} + _cgroups_mgr.update_local_cgroups(new_fetched_resource); } +} // namespace doris diff --git a/be/src/agent/user_resource_listener.h b/be/src/agent/user_resource_listener.h index 2fb6d9713949c5..31cb019764fa39 100644 --- a/be/src/agent/user_resource_listener.h +++ b/be/src/agent/user_resource_listener.h @@ -15,15 +15,16 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_AGENT_USER_RESOURCE_LISTENER_H +#ifndef DORIS_BE_SRC_AGENT_USER_RESOURCE_LISTENER_H #define DORIS_BE_SRC_AGENT_USER_RESOURCE_LISTENER_H #include -#include "agent/topic_listener.h" + #include "agent/cgroups_mgr.h" +#include "agent/topic_listener.h" #include "gen_cpp/AgentService_types.h" -#include "gen_cpp/MasterService_types.h" #include "gen_cpp/HeartbeatService_types.h" +#include "gen_cpp/MasterService_types.h" #include "runtime/exec_env.h" namespace doris { @@ -31,22 +32,22 @@ namespace doris { class ExecEnv; class UserResourceListener : public TopicListener { - public: ~UserResourceListener(); // Input parameters: // root_cgroups_path: root cgroups allocated by admin to doris UserResourceListener(ExecEnv* exec_env, const TMasterInfo& master_info); // This method should be async - virtual void handle_update(const TAgentServiceVersion::type& protocol_version, + virtual void handle_update(const TAgentServiceVersion::type& protocol_version, const TTopicUpdate& topic_update); + private: const TMasterInfo& _master_info; ExecEnv* _exec_env; - CgroupsMgr& _cgroups_mgr; + CgroupsMgr& _cgroups_mgr; // Call cgroups mgr to update user's cgroups resource share // Also refresh local user resource's cache void update_users_resource(int64_t new_version); -}; -} +}; +} // namespace doris #endif diff --git a/be/src/agent/utils.cpp b/be/src/agent/utils.cpp index a24effadd9cd50..4ad4de25db94fd 100644 --- a/be/src/agent/utils.cpp +++ b/be/src/agent/utils.cpp @@ -17,15 +17,15 @@ #include "agent/utils.h" -#include -#include -#include - #include #include #include #include +#include +#include +#include + #include "common/status.h" using std::map; @@ -37,22 +37,15 @@ using apache::thrift::transport::TTransportException; namespace doris { -MasterServerClient::MasterServerClient( - const TMasterInfo& master_info, - FrontendServiceClientCache* client_cache) : - _master_info(master_info), - _client_cache(client_cache) { -} +MasterServerClient::MasterServerClient(const TMasterInfo& master_info, + FrontendServiceClientCache* client_cache) + : _master_info(master_info), _client_cache(client_cache) {} -AgentStatus MasterServerClient::finish_task( - const TFinishTaskRequest& request, - TMasterResult* result) { +AgentStatus MasterServerClient::finish_task(const TFinishTaskRequest& request, + TMasterResult* result) { Status client_status; - FrontendServiceConnection client( - _client_cache, - _master_info.network_address, - config::thrift_rpc_timeout_ms, - &client_status); + FrontendServiceConnection client(_client_cache, _master_info.network_address, + config::thrift_rpc_timeout_ms, &client_status); if (!client_status.ok()) { LOG(WARNING) << "fail to get master client from cache. " @@ -81,8 +74,7 @@ AgentStatus MasterServerClient::finish_task( client.reopen(config::thrift_rpc_timeout_ms); LOG(WARNING) << "fail to finish_task. " << "host=" << _master_info.network_address.hostname - << ", port=" << _master_info.network_address.port - << ", error=" << e.what(); + << ", port=" << _master_info.network_address.port << ", error=" << e.what(); return DORIS_ERROR; } @@ -91,11 +83,8 @@ AgentStatus MasterServerClient::finish_task( AgentStatus MasterServerClient::report(const TReportRequest& request, TMasterResult* result) { Status client_status; - FrontendServiceConnection client( - _client_cache, - _master_info.network_address, - config::thrift_rpc_timeout_ms, - &client_status); + FrontendServiceConnection client(_client_cache, _master_info.network_address, + config::thrift_rpc_timeout_ms, &client_status); if (!client_status.ok()) { LOG(WARNING) << "fail to get master client from cache. " @@ -143,13 +132,11 @@ AgentStatus MasterServerClient::report(const TReportRequest& request, TMasterRes return DORIS_SUCCESS; } -AgentStatus AgentUtils::rsync_from_remote( - const string& remote_host, - const string& remote_file_path, - const string& local_file_path, - const std::vector& exclude_file_patterns, - uint32_t transport_speed_limit_kbps, - uint32_t timeout_second) { +AgentStatus AgentUtils::rsync_from_remote(const string& remote_host, const string& remote_file_path, + const string& local_file_path, + const std::vector& exclude_file_patterns, + uint32_t transport_speed_limit_kbps, + uint32_t timeout_second) { int ret_code = 0; std::stringstream cmd_stream; cmd_stream << "rsync -r -q -e \"ssh -o StrictHostKeyChecking=no\""; @@ -238,7 +225,7 @@ bool AgentUtils::exec_cmd(const string& command, string* errmsg, bool redirect_s } // Execute command. - FILE *fp = popen(cmd.c_str(), "r"); + FILE* fp = popen(cmd.c_str(), "r"); if (fp == NULL) { std::stringstream err_stream; err_stream << "popen failed. " << strerror(errno) << ", with errno: " << errno << ".\n"; @@ -259,8 +246,8 @@ bool AgentUtils::exec_cmd(const string& command, string* errmsg, bool redirect_s *errmsg += "pclose cannot obtain the child status.\n"; } else { std::stringstream err_stream; - err_stream << "Close popen failed. " << strerror(errno) << ", with errno: " - << errno << "\n"; + err_stream << "Close popen failed. " << strerror(errno) << ", with errno: " << errno + << "\n"; *errmsg += err_stream.str(); } return false; @@ -269,7 +256,7 @@ bool AgentUtils::exec_cmd(const string& command, string* errmsg, bool redirect_s // Get return code of command. int32_t status_child = WEXITSTATUS(rc); if (status_child == 0) { - return true; + return true; } else { return false; } @@ -277,11 +264,10 @@ bool AgentUtils::exec_cmd(const string& command, string* errmsg, bool redirect_s bool AgentUtils::write_json_to_file(const map& info, const string& path) { rapidjson::Document json_info(rapidjson::kObjectType); - for (auto &it : info) { - json_info.AddMember( - rapidjson::Value(it.first.c_str(), json_info.GetAllocator()).Move(), - rapidjson::Value(it.second.c_str(), json_info.GetAllocator()).Move(), - json_info.GetAllocator()); + for (auto& it : info) { + json_info.AddMember(rapidjson::Value(it.first.c_str(), json_info.GetAllocator()).Move(), + rapidjson::Value(it.second.c_str(), json_info.GetAllocator()).Move(), + json_info.GetAllocator()); } rapidjson::StringBuffer json_info_str; rapidjson::Writer writer(json_info_str); @@ -296,4 +282,4 @@ bool AgentUtils::write_json_to_file(const map& info, const strin return true; } -} // namespace doris +} // namespace doris diff --git a/be/src/agent/utils.h b/be/src/agent/utils.h index c732e0304ab242..1f40ec3bfd3b76 100644 --- a/be/src/agent/utils.h +++ b/be/src/agent/utils.h @@ -29,7 +29,7 @@ namespace doris { class MasterServerClient { public: MasterServerClient(const TMasterInfo& master_info, FrontendServiceClientCache* client_cache); - virtual ~MasterServerClient() {}; + virtual ~MasterServerClient(){}; // Report finished task to the master server // @@ -59,8 +59,8 @@ class MasterServerClient { class AgentUtils { public: - AgentUtils() {}; - virtual ~AgentUtils() {}; + AgentUtils(){}; + virtual ~AgentUtils(){}; // Use rsync synchronize folder from remote agent to local folder // @@ -71,28 +71,27 @@ class AgentUtils { // * exclude_file_patterns: the patterns of the exclude file // * transport_speed_limit_kbps: speed limit of transport(kb/s) // * timeout_second: timeout of synchronize - virtual AgentStatus rsync_from_remote( - const std::string& remote_host, - const std::string& remote_file_path, - const std::string& local_file_path, - const std::vector& exclude_file_patterns, - const uint32_t transport_speed_limit_kbps, - const uint32_t timeout_second); + virtual AgentStatus rsync_from_remote(const std::string& remote_host, + const std::string& remote_file_path, + const std::string& local_file_path, + const std::vector& exclude_file_patterns, + const uint32_t transport_speed_limit_kbps, + const uint32_t timeout_second); // Print AgentStatus as string virtual std::string print_agent_status(AgentStatus status); // Execute shell cmd - virtual bool exec_cmd(const std::string& command, std::string* errmsg, bool redirect_stderr = true); + virtual bool exec_cmd(const std::string& command, std::string* errmsg, + bool redirect_stderr = true); // Write a map to file by json format - virtual bool write_json_to_file( - const std::map& info, - const std::string& path); + virtual bool write_json_to_file(const std::map& info, + const std::string& path); private: DISALLOW_COPY_AND_ASSIGN(AgentUtils); -}; // class AgentUtils +}; // class AgentUtils -} // namespace doris -#endif // DORIS_BE_SRC_AGENT_UTILS_H +} // namespace doris +#endif // DORIS_BE_SRC_AGENT_UTILS_H diff --git a/be/src/common/atomic.h b/be/src/common/atomic.h index c270411c4469bf..f9a6416c44e69f 100644 --- a/be/src/common/atomic.h +++ b/be/src/common/atomic.h @@ -36,29 +36,25 @@ class AtomicUtil { // while (1) CpuWait(); static ALWAYS_INLINE void cpu_wait() { #if (defined(__i386) || defined(__x86_64__)) - asm volatile("pause\n": : :"memory"); + asm volatile("pause\n" : : : "memory"); #elif defined(__aarch64__) asm volatile("yield\n" ::: "memory"); #endif } /// Provides "barrier" semantics (see below) without a memory access. - static ALWAYS_INLINE void memory_barrier() { - __sync_synchronize(); - } + static ALWAYS_INLINE void memory_barrier() { __sync_synchronize(); } /// Provides a compiler barrier. The compiler is not allowed to reorder memory /// accesses across this (but the CPU can). This generates no instructions. - static ALWAYS_INLINE void compiler_barrier() { - __asm__ __volatile__("" : : : "memory"); - } + static ALWAYS_INLINE void compiler_barrier() { __asm__ __volatile__("" : : : "memory"); } }; // Wrapper for atomic integers. This should be switched to c++ 11 when // we can switch. // This class overloads operators to behave like a regular integer type // but all operators and functions are thread safe. -template +template class AtomicInt { public: AtomicInt(T initial) : _value(initial) {} @@ -114,34 +110,22 @@ class AtomicInt { } // Safe read of the value - T read() { - return __sync_fetch_and_add(&_value, 0); - } + T read() { return __sync_fetch_and_add(&_value, 0); } /// Atomic load with "acquire" memory-ordering semantic. - ALWAYS_INLINE T load() const { - return base::subtle::Acquire_Load(&_value); - } + ALWAYS_INLINE T load() const { return base::subtle::Acquire_Load(&_value); } /// Atomic store with "release" memory-ordering semantic. - ALWAYS_INLINE void store(T x) { - base::subtle::Release_Store(&_value, x); - } + ALWAYS_INLINE void store(T x) { base::subtle::Release_Store(&_value, x); } /// Atomic add with "barrier" memory-ordering semantic. Returns the new value. - ALWAYS_INLINE T add(T x) { - return base::subtle::Barrier_AtomicIncrement(&_value, x); - } + ALWAYS_INLINE T add(T x) { return base::subtle::Barrier_AtomicIncrement(&_value, x); } // Increments by delta (i.e. += delta) and returns the new val - T update_and_fetch(T delta) { - return __sync_add_and_fetch(&_value, delta); - } + T update_and_fetch(T delta) { return __sync_add_and_fetch(&_value, delta); } // Increment by delta and returns the old val - T fetch_and_update(T delta) { - return __sync_fetch_and_add(&_value, delta); - } + T fetch_and_update(T delta) { return __sync_fetch_and_add(&_value, delta); } // Updates the int to 'value' if value is larger void update_max(T value) { @@ -176,9 +160,7 @@ class AtomicInt { } // Atomically updates _value with new_val. Returns the old _value. - T swap(const T& new_val) { - return __sync_lock_test_and_set(&_value, new_val); - } + T swap(const T& new_val) { return __sync_lock_test_and_set(&_value, new_val); } private: T _value; @@ -190,7 +172,7 @@ typedef AtomicInt AtomicInt32; typedef AtomicInt AtomicInt64; /// Atomic pointer. Operations have the same semantics as AtomicInt. -template +template class AtomicPtr { public: AtomicPtr(T* initial = nullptr) : _ptr(reinterpret_cast(initial)) {} @@ -204,7 +186,7 @@ class AtomicPtr { /// Store 'new_val' and return the previous value. Implies a Release memory barrier /// (i.e. the same as Store()). inline T* swap(T* val) { - return reinterpret_cast(_ptr.swap(reinterpret_cast(val))); + return reinterpret_cast(_ptr.swap(reinterpret_cast(val))); } private: diff --git a/be/src/common/compiler_util.h b/be/src/common/compiler_util.h index e9e9e90486f509..accc70c3844b5a 100644 --- a/be/src/common/compiler_util.h +++ b/be/src/common/compiler_util.h @@ -44,7 +44,6 @@ /// decision, e.g. not inlining a small function on a hot path. #define ALWAYS_INLINE __attribute__((always_inline)) -#define ALIGN_CACHE_LINE __attribute__ ((aligned (CACHE_LINE_SIZE))) +#define ALIGN_CACHE_LINE __attribute__((aligned(CACHE_LINE_SIZE))) #endif - diff --git a/be/src/common/config.h b/be/src/common/config.h index e1fc5357cc105d..75ed1eb79f18c2 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -22,595 +22,594 @@ namespace doris { namespace config { - // Dir of custom config file - CONF_String(custom_config_dir, "${DORIS_HOME}/conf"); - - // cluster id - CONF_Int32(cluster_id, "-1"); - // port on which BackendService is exported - CONF_Int32(be_port, "9060"); - - // port for brpc - CONF_Int32(brpc_port, "8060"); - - // the number of bthreads for brpc, the default value is set to -1, which means the number of bthreads is #cpu-cores - CONF_Int32(brpc_num_threads, "-1") - - // Declare a selection strategy for those servers have many ips. - // Note that there should at most one ip match this list. - // this is a list in semicolon-delimited format, in CIDR notation, e.g. 10.10.10.0/24 - // If no ip match this rule, will choose one randomly. - CONF_String(priority_networks, ""); - - //// - //// tcmalloc gc parameter - //// - // min memory for TCmalloc, when used memory is smaller than this, do not returned to OS - CONF_mInt64(tc_use_memory_min, "10737418240"); - // free memory rate.[0-100] - CONF_mInt64(tc_free_memory_rate, "20"); - - // Bound on the total amount of bytes allocated to thread caches. - // This bound is not strict, so it is possible for the cache to go over this bound - // in certain circumstances. This value defaults to 1GB - // If you suspect your application is not scaling to many threads due to lock contention in TCMalloc, - // you can try increasing this value. This may improve performance, at a cost of extra memory - // use by TCMalloc. - // reference: https://gperftools.github.io/gperftools/tcmalloc.html: TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES - // https://github.com/gperftools/gperftools/issues/1111 - CONF_Int64(tc_max_total_thread_cache_bytes, "1073741824"); - - // process memory limit specified as number of bytes - // ('[bB]?'), megabytes ('[mM]'), gigabytes ('[gG]'), - // or percentage of the physical memory ('%'). - // defaults to bytes if no unit is given" - // must larger than 0. and if larger than physical memory size, - // it will be set to physical memory size. - CONF_String(mem_limit, "80%"); - - // the port heartbeat service used - CONF_Int32(heartbeat_service_port, "9050"); - // the count of heart beat service - CONF_Int32(heartbeat_service_thread_count, "1"); - // the count of thread to create table - CONF_Int32(create_tablet_worker_count, "3"); - // the count of thread to drop table - CONF_Int32(drop_tablet_worker_count, "3"); - // the count of thread to batch load - CONF_Int32(push_worker_count_normal_priority, "3"); - // the count of thread to high priority batch load - CONF_Int32(push_worker_count_high_priority, "3"); - // the count of thread to publish version - CONF_Int32(publish_version_worker_count, "8"); - // the count of thread to clear transaction task - CONF_Int32(clear_transaction_task_worker_count, "1"); - // the count of thread to delete - CONF_Int32(delete_worker_count, "3"); - // the count of thread to alter table - CONF_Int32(alter_tablet_worker_count, "3"); - // the count of thread to clone - CONF_Int32(clone_worker_count, "3"); - // the count of thread to clone - CONF_Int32(storage_medium_migrate_count, "1"); - // the count of thread to check consistency - CONF_Int32(check_consistency_worker_count, "1"); - // the count of thread to upload - CONF_Int32(upload_worker_count, "1"); - // the count of thread to download - CONF_Int32(download_worker_count, "1"); - // the count of thread to make snapshot - CONF_Int32(make_snapshot_worker_count, "5"); - // the count of thread to release snapshot - CONF_Int32(release_snapshot_worker_count, "5"); - // the interval time(seconds) for agent report tasks signatrue to FE - CONF_mInt32(report_task_interval_seconds, "10"); - // the interval time(seconds) for agent report disk state to FE - CONF_mInt32(report_disk_state_interval_seconds, "60"); - // the interval time(seconds) for agent report olap table to FE - CONF_mInt32(report_tablet_interval_seconds, "60"); - // the interval time(seconds) for agent report plugin status to FE - // CONF_Int32(report_plugin_interval_seconds, "120"); - // the timeout(seconds) for alter table - // CONF_Int32(alter_tablet_timeout_seconds, "86400"); - // the timeout(seconds) for make snapshot - // CONF_Int32(make_snapshot_timeout_seconds, "600"); - // the timeout(seconds) for release snapshot - // CONF_Int32(release_snapshot_timeout_seconds, "600"); - // the max download speed(KB/s) - CONF_mInt32(max_download_speed_kbps, "50000"); - // download low speed limit(KB/s) - CONF_mInt32(download_low_speed_limit_kbps, "50"); - // download low speed time(seconds) - CONF_mInt32(download_low_speed_time, "300"); - // curl verbose mode - // CONF_Int64(curl_verbose_mode, "1"); - // seconds to sleep for each time check table status - // CONF_Int32(check_status_sleep_time_seconds, "10"); - // sleep time for one second - CONF_Int32(sleep_one_second, "1"); - // sleep time for five seconds - CONF_Int32(sleep_five_seconds, "5"); - - // log dir - CONF_String(sys_log_dir, "${DORIS_HOME}/log"); - CONF_String(user_function_dir, "${DORIS_HOME}/lib/udf"); - // INFO, WARNING, ERROR, FATAL - CONF_String(sys_log_level, "INFO"); - // TIME-DAY, TIME-HOUR, SIZE-MB-nnn - CONF_String(sys_log_roll_mode, "SIZE-MB-1024"); - // log roll num - CONF_Int32(sys_log_roll_num, "10"); - // verbose log - CONF_Strings(sys_log_verbose_modules, ""); - // verbose log level - CONF_Int32(sys_log_verbose_level, "10"); - // log buffer level - CONF_String(log_buffer_level, ""); - - // Pull load task dir - CONF_String(pull_load_task_dir, "${DORIS_HOME}/var/pull_load"); - - // the maximum number of bytes to display on the debug webserver's log page - CONF_Int64(web_log_bytes, "1048576"); - // number of threads available to serve backend execution requests - CONF_Int32(be_service_threads, "64"); - // key=value pair of default query options for Doris, separated by ',' - CONF_String(default_query_options, ""); - - // If non-zero, Doris will output memory usage every log_mem_usage_interval'th fragment completion. - // CONF_Int32(log_mem_usage_interval, "0"); - - // cgroups allocated for doris - CONF_String(doris_cgroups, ""); - - // Controls the number of threads to run work per core. It's common to pick 2x - // or 3x the number of cores. This keeps the cores busy without causing excessive - // thrashing. - CONF_Int32(num_threads_per_core, "3"); - // if true, compresses tuple data in Serialize - CONF_Bool(compress_rowbatches, "true"); - // serialize and deserialize each returned row batch - CONF_Bool(serialize_batch, "false"); - // interval between profile reports; in seconds - CONF_mInt32(status_report_interval, "5"); - // Local directory to copy UDF libraries from HDFS into - CONF_String(local_library_dir, "${UDF_RUNTIME_DIR}"); - // number of olap scanner thread pool size - CONF_Int32(doris_scanner_thread_pool_thread_num, "48"); - // number of olap scanner thread pool queue size - CONF_Int32(doris_scanner_thread_pool_queue_size, "102400"); - // number of etl thread pool size - CONF_Int32(etl_thread_pool_size, "8"); - // number of etl thread pool size - CONF_Int32(etl_thread_pool_queue_size, "256"); - // port on which to run Doris test backend - CONF_Int32(port, "20001"); - // default thrift client connect timeout(in seconds) - CONF_Int32(thrift_connect_timeout_seconds, "3"); - // default thrift client retry interval (in milliseconds) - CONF_mInt64(thrift_client_retry_interval_ms, "1000"); - // max row count number for single scan range - CONF_mInt32(doris_scan_range_row_count, "524288"); - // size of scanner queue between scanner thread and compute thread - CONF_mInt32(doris_scanner_queue_size, "1024"); - // single read execute fragment row size - CONF_mInt32(doris_scanner_row_num, "16384"); - // number of max scan keys - CONF_mInt32(doris_max_scan_key_num, "1024"); - // the max number of push down values of a single column. - // if exceed, no conditions will be pushed down for that column. - CONF_mInt32(max_pushdown_conditions_per_column, "1024"); - // return_row / total_row - CONF_mInt32(doris_max_pushdown_conjuncts_return_rate, "90"); - // (Advanced) Maximum size of per-query receive-side buffer - CONF_mInt32(exchg_node_buffer_size_bytes, "10485760"); - // insert sort threshold for sorter - // CONF_Int32(insertion_threshold, "16"); - // the block_size every block allocate for sorter - CONF_Int32(sorter_block_size, "8388608"); - // push_write_mbytes_per_sec - CONF_Int32(push_write_mbytes_per_sec, "10"); - - CONF_mInt64(column_dictionary_key_ratio_threshold, "0"); - CONF_mInt64(column_dictionary_key_size_threshold, "0"); - // if true, output IR after optimization passes - // CONF_Bool(dump_ir, "false"); - // if set, saves the generated IR to the output file. - //CONF_String(module_output, ""); - // memory_limitation_per_thread_for_schema_change unit GB - CONF_mInt32(memory_limitation_per_thread_for_schema_change, "2"); - - // CONF_Int64(max_unpacked_row_block_size, "104857600"); - - CONF_mInt32(file_descriptor_cache_clean_interval, "3600"); - CONF_mInt32(disk_stat_monitor_interval, "5"); - CONF_mInt32(unused_rowset_monitor_interval, "30"); - CONF_String(storage_root_path, "${DORIS_HOME}/storage"); - - // Config is used to check incompatible old format hdr_ format - // whether doris uses strict way. When config is true, process will log fatal - // and exit. When config is false, process will only log warning. - CONF_Bool(storage_strict_check_incompatible_old_format, "true"); - - // BE process will exit if the percentage of error disk reach this value. - CONF_mInt32(max_percentage_of_error_disk, "0"); - // CONF_Int32(default_num_rows_per_data_block, "1024"); - CONF_mInt32(default_num_rows_per_column_file_block, "1024"); - // pending data policy - CONF_mInt32(pending_data_expire_time_sec, "1800"); - // inc_rowset expired interval - CONF_mInt32(inc_rowset_expired_sec, "1800"); - // inc_rowset snapshot rs sweep time interval - CONF_mInt32(tablet_rowset_stale_sweep_time_sec, "1800"); - // garbage sweep policy - CONF_Int32(max_garbage_sweep_interval, "3600"); - CONF_Int32(min_garbage_sweep_interval, "180"); - CONF_mInt32(snapshot_expire_time_sec, "172800"); - // 仅仅是建议值,当磁盘空间不足时,trash下的文件保存期可不遵守这个参数 - CONF_mInt32(trash_file_expire_time_sec, "259200"); - // check row nums for BE/CE and schema change. true is open, false is closed. - CONF_mBool(row_nums_check, "true"); - //file descriptors cache, by default, cache 32768 descriptors - CONF_Int32(file_descriptor_cache_capacity, "32768"); - // minimum file descriptor number - // modify them upon necessity - CONF_Int32(min_file_descriptor_number, "60000"); - CONF_Int64(index_stream_cache_capacity, "10737418240"); - // CONF_Int64(max_packed_row_block_size, "20971520"); - - // Cache for storage page size - CONF_String(storage_page_cache_limit, "20G"); - // whether to disable page cache feature in storage - CONF_Bool(disable_storage_page_cache, "false"); - - // be policy - // whether disable automatic compaction task - CONF_mBool(disable_auto_compaction, "false"); - // check the configuration of auto compaction in seconds when auto compaction disabled - CONF_mInt32(check_auto_compaction_interval_seconds, "5"); - - // CONF_Int64(base_compaction_start_hour, "20"); - // CONF_Int64(base_compaction_end_hour, "7"); - CONF_mInt64(base_compaction_num_cumulative_deltas, "5"); - CONF_mDouble(base_cumulative_delta_ratio, "0.3"); - CONF_mInt64(base_compaction_interval_seconds_since_last_operation, "86400"); - CONF_mInt32(base_compaction_write_mbytes_per_sec, "5"); - - // config the cumulative compaction policy - // Valid configs: num_base, size_based - // num_based policy, the original version of cumulative compaction, cumulative version compaction once. - // size_based policy, a optimization version of cumulative compaction, targeting the use cases requiring - // lower write amplification, trading off read amplification and space amplification. - CONF_String(cumulative_compaction_policy, "size_based"); - - // In size_based policy, output rowset of cumulative compaction total disk size exceed this config size, - // this rowset will be given to base compaction, unit is m byte. - CONF_mInt64(cumulative_size_based_promotion_size_mbytes, "1024"); - // In size_based policy, output rowset of cumulative compaction total disk size exceed this config ratio of - // base rowset's total disk size, this rowset will be given to base compaction. The value must be between - // 0 and 1. - CONF_mDouble(cumulative_size_based_promotion_ratio, "0.05"); - // In size_based policy, the smallest size of rowset promotion. When the rowset is less than this config, this - // rowset will be not given to base compaction. The unit is m byte. - CONF_mInt64(cumulative_size_based_promotion_min_size_mbytes, "64"); - // The lower bound size to do cumulative compaction. When total disk size of candidate rowsets is less than - // this size, size_based policy may not do to cumulative compaction. The unit is m byte. - CONF_mInt64(cumulative_size_based_compaction_lower_size_mbytes, "64"); - - // cumulative compaction policy: min and max delta file's number - CONF_mInt64(min_cumulative_compaction_num_singleton_deltas, "5"); - CONF_mInt64(max_cumulative_compaction_num_singleton_deltas, "1000"); - CONF_mInt64(cumulative_compaction_budgeted_bytes, "104857600"); - // CONF_Int32(cumulative_compaction_write_mbytes_per_sec, "100"); - // cumulative compaction skips recently published deltas in order to prevent - // compacting a version that might be queried (in case the query planning phase took some time). - // the following config set the window size - CONF_mInt32(cumulative_compaction_skip_window_seconds, "30"); - - // if compaction of a tablet failed, this tablet should not be chosen to - // compaction until this interval passes. - CONF_mInt64(min_compaction_failure_interval_sec, "600"); // 10 min - - // This config can be set to limit thread number in compaction thread pool. - CONF_mInt32(min_compaction_threads, "10"); - CONF_mInt32(max_compaction_threads, "10"); - - // The upper limit of "permits" held by all compaction tasks. This config can be set to limit memory consumption for compaction. - CONF_mInt64(total_permits_for_compaction_score, "10000"); - - // Compaction task number per disk. - CONF_mInt32(compaction_task_num_per_disk, "2"); - - // How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation. - CONF_mInt32(cumulative_compaction_rounds_for_each_base_compaction_round, "9"); - - // Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction - CONF_mInt64(row_step_for_compaction_merge_log, "0"); - - // Threshold to logging compaction trace, in seconds. - CONF_mInt32(base_compaction_trace_threshold, "10"); - CONF_mInt32(cumulative_compaction_trace_threshold, "2"); - - // time interval to record tablet scan count in second for the purpose of calculating tablet scan frequency - CONF_mInt64(tablet_scan_frequency_time_node_interval_second, "300"); - // coefficient for tablet scan frequency and compaction score when finding a tablet for compaction - CONF_mInt32(compaction_tablet_scan_frequency_factor, "0"); - CONF_mInt32(compaction_tablet_compaction_score_factor, "1"); - - - // Port to start debug webserver on - CONF_Int32(webserver_port, "8040"); - // Number of webserver workers - CONF_Int32(webserver_num_workers, "48"); - // Period to update rate counters and sampling counters in ms. - CONF_mInt32(periodic_counter_update_period_ms, "500"); - - // Used for mini Load. mini load data file will be removed after this time. - CONF_Int64(load_data_reserve_hours, "4"); - // log error log will be removed after this time - CONF_mInt64(load_error_log_reserve_hours, "48"); - CONF_Int32(number_tablet_writer_threads, "16"); - - // The maximum amount of data that can be processed by a stream load - CONF_mInt64(streaming_load_max_mb, "10240"); - // Some data formats, such as JSON, cannot be streamed. - // Therefore, it is necessary to limit the maximum number of - // such data when using stream load to prevent excessive memory consumption. - CONF_mInt64(streaming_load_json_max_mb, "100"); - // the alive time of a TabletsChannel. - // If the channel does not receive any data till this time, - // the channel will be removed. - CONF_Int32(streaming_load_rpc_max_alive_time_sec, "1200"); - // the timeout of a rpc to open the tablet writer in remote BE. - // short operation time, can set a short timeout - CONF_Int32(tablet_writer_open_rpc_timeout_sec, "60"); - // Deprecated, use query_timeout instead - // the timeout of a rpc to process one batch in tablet writer. - // you may need to increase this timeout if using larger 'streaming_load_max_mb', - // or encounter 'tablet writer write failed' error when loading. - // CONF_Int32(tablet_writer_rpc_timeout_sec, "600"); - // OlapTableSink sender's send interval, should be less than the real response time of a tablet writer rpc. - CONF_mInt32(olap_table_sink_send_interval_ms, "10"); - - // Fragment thread pool - CONF_Int32(fragment_pool_thread_num_min, "64"); - CONF_Int32(fragment_pool_thread_num_max, "512"); - CONF_Int32(fragment_pool_queue_size, "2048"); - - //for cast - // CONF_Bool(cast, "true"); - - // Spill to disk when query - // Writable scratch directories, split by ";" - CONF_String(query_scratch_dirs, "${DORIS_HOME}"); - - // Control the number of disks on the machine. If 0, this comes from the system settings. - CONF_Int32(num_disks, "0"); - // The maximum number of the threads per disk is also the max queue depth per disk. - CONF_Int32(num_threads_per_disk, "0"); - // The read size is the size of the reads sent to os. - // There is a trade off of latency and throughout, trying to keep disks busy but - // not introduce seeks. The literature seems to agree that with 8 MB reads, random - // io and sequential io perform similarly. - CONF_Int32(read_size, "8388608"); // 8 * 1024 * 1024, Read Size (in bytes) - CONF_Int32(min_buffer_size, "1024"); // 1024, The minimum read buffer size (in bytes) - - // For each io buffer size, the maximum number of buffers the IoMgr will hold onto - // With 1024B through 8MB buffers, this is up to ~2GB of buffers. - CONF_Int32(max_free_io_buffers, "128"); - - CONF_Bool(disable_mem_pools, "false"); - - // Whether to allocate chunk using mmap. If you enable this, you'd better to - // increase vm.max_map_count's value whose default value is 65530. - // you can do it as root via "sysctl -w vm.max_map_count=262144" or - // "echo 262144 > /proc/sys/vm/max_map_count" - // NOTE: When this is set to true, you must set chunk_reserved_bytes_limit - // to a relative large number or the performance is very very bad. - CONF_Bool(use_mmap_allocate_chunk, "false"); - - // Chunk Allocator's reserved bytes limit, - // Default value is 2GB, increase this variable can improve performance, but will - // acquire more free memory which can not be used by other modules - CONF_Int64(chunk_reserved_bytes_limit, "2147483648"); - - // The probing algorithm of partitioned hash table. - // Enable quadratic probing hash table - CONF_Bool(enable_quadratic_probing, "false"); - - // for pprof - CONF_String(pprof_profile_dir, "${DORIS_HOME}/log"); - - // for partition - // CONF_Bool(enable_partitioned_hash_join, "false") - CONF_Bool(enable_partitioned_aggregation, "true"); - - // to forward compatibility, will be removed later - CONF_mBool(enable_token_check, "true"); - - // to open/close system metrics - CONF_Bool(enable_system_metrics, "true"); - - CONF_mBool(enable_prefetch, "true"); - - // Number of cores Doris will used, this will effect only when it's greater than 0. - // Otherwise, Doris will use all cores returned from "/proc/cpuinfo". - CONF_Int32(num_cores, "0"); - - // CONF_Bool(thread_creation_fault_injection, "false"); - - // Set this to encrypt and perform an integrity - // check on all data spilled to disk during a query - // CONF_Bool(disk_spill_encryption, "false"); - - // When BE start, If there is a broken disk, BE process will exit by default. - // Otherwise, we will ignore the broken disk, - CONF_Bool(ignore_broken_disk, "false"); - - // Writable scratch directories - CONF_String(scratch_dirs, "/tmp"); - - // If false and --scratch_dirs contains multiple directories on the same device, - // then only the first writable directory is used - // CONF_Bool(allow_multiple_scratch_dirs_per_device, "false"); - - // linux transparent huge page - CONF_Bool(madvise_huge_pages, "false"); - - // whether use mmap to allocate memory - CONF_Bool(mmap_buffers, "false"); - - // max memory can be allocated by buffer pool - CONF_String(buffer_pool_limit, "80G"); - - // clean page can be hold by buffer pool - CONF_String(buffer_pool_clean_pages_limit, "20G"); - - // Sleep time in seconds between memory maintenance iterations - CONF_mInt64(memory_maintenance_sleep_time_s, "10"); - - // Alignment - CONF_Int32(memory_max_alignment, "16"); +// Dir of custom config file +CONF_String(custom_config_dir, "${DORIS_HOME}/conf"); + +// cluster id +CONF_Int32(cluster_id, "-1"); +// port on which BackendService is exported +CONF_Int32(be_port, "9060"); + +// port for brpc +CONF_Int32(brpc_port, "8060"); + +// the number of bthreads for brpc, the default value is set to -1, which means the number of bthreads is #cpu-cores +CONF_Int32(brpc_num_threads, "-1") + + // Declare a selection strategy for those servers have many ips. + // Note that there should at most one ip match this list. + // this is a list in semicolon-delimited format, in CIDR notation, e.g. 10.10.10.0/24 + // If no ip match this rule, will choose one randomly. + CONF_String(priority_networks, ""); + +//// +//// tcmalloc gc parameter +//// +// min memory for TCmalloc, when used memory is smaller than this, do not returned to OS +CONF_mInt64(tc_use_memory_min, "10737418240"); +// free memory rate.[0-100] +CONF_mInt64(tc_free_memory_rate, "20"); + +// Bound on the total amount of bytes allocated to thread caches. +// This bound is not strict, so it is possible for the cache to go over this bound +// in certain circumstances. This value defaults to 1GB +// If you suspect your application is not scaling to many threads due to lock contention in TCMalloc, +// you can try increasing this value. This may improve performance, at a cost of extra memory +// use by TCMalloc. +// reference: https://gperftools.github.io/gperftools/tcmalloc.html: TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES +// https://github.com/gperftools/gperftools/issues/1111 +CONF_Int64(tc_max_total_thread_cache_bytes, "1073741824"); + +// process memory limit specified as number of bytes +// ('[bB]?'), megabytes ('[mM]'), gigabytes ('[gG]'), +// or percentage of the physical memory ('%'). +// defaults to bytes if no unit is given" +// must larger than 0. and if larger than physical memory size, +// it will be set to physical memory size. +CONF_String(mem_limit, "80%"); + +// the port heartbeat service used +CONF_Int32(heartbeat_service_port, "9050"); +// the count of heart beat service +CONF_Int32(heartbeat_service_thread_count, "1"); +// the count of thread to create table +CONF_Int32(create_tablet_worker_count, "3"); +// the count of thread to drop table +CONF_Int32(drop_tablet_worker_count, "3"); +// the count of thread to batch load +CONF_Int32(push_worker_count_normal_priority, "3"); +// the count of thread to high priority batch load +CONF_Int32(push_worker_count_high_priority, "3"); +// the count of thread to publish version +CONF_Int32(publish_version_worker_count, "8"); +// the count of thread to clear transaction task +CONF_Int32(clear_transaction_task_worker_count, "1"); +// the count of thread to delete +CONF_Int32(delete_worker_count, "3"); +// the count of thread to alter table +CONF_Int32(alter_tablet_worker_count, "3"); +// the count of thread to clone +CONF_Int32(clone_worker_count, "3"); +// the count of thread to clone +CONF_Int32(storage_medium_migrate_count, "1"); +// the count of thread to check consistency +CONF_Int32(check_consistency_worker_count, "1"); +// the count of thread to upload +CONF_Int32(upload_worker_count, "1"); +// the count of thread to download +CONF_Int32(download_worker_count, "1"); +// the count of thread to make snapshot +CONF_Int32(make_snapshot_worker_count, "5"); +// the count of thread to release snapshot +CONF_Int32(release_snapshot_worker_count, "5"); +// the interval time(seconds) for agent report tasks signatrue to FE +CONF_mInt32(report_task_interval_seconds, "10"); +// the interval time(seconds) for agent report disk state to FE +CONF_mInt32(report_disk_state_interval_seconds, "60"); +// the interval time(seconds) for agent report olap table to FE +CONF_mInt32(report_tablet_interval_seconds, "60"); +// the interval time(seconds) for agent report plugin status to FE +// CONF_Int32(report_plugin_interval_seconds, "120"); +// the timeout(seconds) for alter table +// CONF_Int32(alter_tablet_timeout_seconds, "86400"); +// the timeout(seconds) for make snapshot +// CONF_Int32(make_snapshot_timeout_seconds, "600"); +// the timeout(seconds) for release snapshot +// CONF_Int32(release_snapshot_timeout_seconds, "600"); +// the max download speed(KB/s) +CONF_mInt32(max_download_speed_kbps, "50000"); +// download low speed limit(KB/s) +CONF_mInt32(download_low_speed_limit_kbps, "50"); +// download low speed time(seconds) +CONF_mInt32(download_low_speed_time, "300"); +// curl verbose mode +// CONF_Int64(curl_verbose_mode, "1"); +// seconds to sleep for each time check table status +// CONF_Int32(check_status_sleep_time_seconds, "10"); +// sleep time for one second +CONF_Int32(sleep_one_second, "1"); +// sleep time for five seconds +CONF_Int32(sleep_five_seconds, "5"); + +// log dir +CONF_String(sys_log_dir, "${DORIS_HOME}/log"); +CONF_String(user_function_dir, "${DORIS_HOME}/lib/udf"); +// INFO, WARNING, ERROR, FATAL +CONF_String(sys_log_level, "INFO"); +// TIME-DAY, TIME-HOUR, SIZE-MB-nnn +CONF_String(sys_log_roll_mode, "SIZE-MB-1024"); +// log roll num +CONF_Int32(sys_log_roll_num, "10"); +// verbose log +CONF_Strings(sys_log_verbose_modules, ""); +// verbose log level +CONF_Int32(sys_log_verbose_level, "10"); +// log buffer level +CONF_String(log_buffer_level, ""); + +// Pull load task dir +CONF_String(pull_load_task_dir, "${DORIS_HOME}/var/pull_load"); + +// the maximum number of bytes to display on the debug webserver's log page +CONF_Int64(web_log_bytes, "1048576"); +// number of threads available to serve backend execution requests +CONF_Int32(be_service_threads, "64"); +// key=value pair of default query options for Doris, separated by ',' +CONF_String(default_query_options, ""); + +// If non-zero, Doris will output memory usage every log_mem_usage_interval'th fragment completion. +// CONF_Int32(log_mem_usage_interval, "0"); + +// cgroups allocated for doris +CONF_String(doris_cgroups, ""); + +// Controls the number of threads to run work per core. It's common to pick 2x +// or 3x the number of cores. This keeps the cores busy without causing excessive +// thrashing. +CONF_Int32(num_threads_per_core, "3"); +// if true, compresses tuple data in Serialize +CONF_Bool(compress_rowbatches, "true"); +// serialize and deserialize each returned row batch +CONF_Bool(serialize_batch, "false"); +// interval between profile reports; in seconds +CONF_mInt32(status_report_interval, "5"); +// Local directory to copy UDF libraries from HDFS into +CONF_String(local_library_dir, "${UDF_RUNTIME_DIR}"); +// number of olap scanner thread pool size +CONF_Int32(doris_scanner_thread_pool_thread_num, "48"); +// number of olap scanner thread pool queue size +CONF_Int32(doris_scanner_thread_pool_queue_size, "102400"); +// number of etl thread pool size +CONF_Int32(etl_thread_pool_size, "8"); +// number of etl thread pool size +CONF_Int32(etl_thread_pool_queue_size, "256"); +// port on which to run Doris test backend +CONF_Int32(port, "20001"); +// default thrift client connect timeout(in seconds) +CONF_Int32(thrift_connect_timeout_seconds, "3"); +// default thrift client retry interval (in milliseconds) +CONF_mInt64(thrift_client_retry_interval_ms, "1000"); +// max row count number for single scan range +CONF_mInt32(doris_scan_range_row_count, "524288"); +// size of scanner queue between scanner thread and compute thread +CONF_mInt32(doris_scanner_queue_size, "1024"); +// single read execute fragment row size +CONF_mInt32(doris_scanner_row_num, "16384"); +// number of max scan keys +CONF_mInt32(doris_max_scan_key_num, "1024"); +// the max number of push down values of a single column. +// if exceed, no conditions will be pushed down for that column. +CONF_mInt32(max_pushdown_conditions_per_column, "1024"); +// return_row / total_row +CONF_mInt32(doris_max_pushdown_conjuncts_return_rate, "90"); +// (Advanced) Maximum size of per-query receive-side buffer +CONF_mInt32(exchg_node_buffer_size_bytes, "10485760"); +// insert sort threshold for sorter +// CONF_Int32(insertion_threshold, "16"); +// the block_size every block allocate for sorter +CONF_Int32(sorter_block_size, "8388608"); +// push_write_mbytes_per_sec +CONF_Int32(push_write_mbytes_per_sec, "10"); + +CONF_mInt64(column_dictionary_key_ratio_threshold, "0"); +CONF_mInt64(column_dictionary_key_size_threshold, "0"); +// if true, output IR after optimization passes +// CONF_Bool(dump_ir, "false"); +// if set, saves the generated IR to the output file. +//CONF_String(module_output, ""); +// memory_limitation_per_thread_for_schema_change unit GB +CONF_mInt32(memory_limitation_per_thread_for_schema_change, "2"); + +// CONF_Int64(max_unpacked_row_block_size, "104857600"); + +CONF_mInt32(file_descriptor_cache_clean_interval, "3600"); +CONF_mInt32(disk_stat_monitor_interval, "5"); +CONF_mInt32(unused_rowset_monitor_interval, "30"); +CONF_String(storage_root_path, "${DORIS_HOME}/storage"); + +// Config is used to check incompatible old format hdr_ format +// whether doris uses strict way. When config is true, process will log fatal +// and exit. When config is false, process will only log warning. +CONF_Bool(storage_strict_check_incompatible_old_format, "true"); + +// BE process will exit if the percentage of error disk reach this value. +CONF_mInt32(max_percentage_of_error_disk, "0"); +// CONF_Int32(default_num_rows_per_data_block, "1024"); +CONF_mInt32(default_num_rows_per_column_file_block, "1024"); +// pending data policy +CONF_mInt32(pending_data_expire_time_sec, "1800"); +// inc_rowset expired interval +CONF_mInt32(inc_rowset_expired_sec, "1800"); +// inc_rowset snapshot rs sweep time interval +CONF_mInt32(tablet_rowset_stale_sweep_time_sec, "1800"); +// garbage sweep policy +CONF_Int32(max_garbage_sweep_interval, "3600"); +CONF_Int32(min_garbage_sweep_interval, "180"); +CONF_mInt32(snapshot_expire_time_sec, "172800"); +// 仅仅是建议值,当磁盘空间不足时,trash下的文件保存期可不遵守这个参数 +CONF_mInt32(trash_file_expire_time_sec, "259200"); +// check row nums for BE/CE and schema change. true is open, false is closed. +CONF_mBool(row_nums_check, "true"); +//file descriptors cache, by default, cache 32768 descriptors +CONF_Int32(file_descriptor_cache_capacity, "32768"); +// minimum file descriptor number +// modify them upon necessity +CONF_Int32(min_file_descriptor_number, "60000"); +CONF_Int64(index_stream_cache_capacity, "10737418240"); +// CONF_Int64(max_packed_row_block_size, "20971520"); + +// Cache for storage page size +CONF_String(storage_page_cache_limit, "20G"); +// whether to disable page cache feature in storage +CONF_Bool(disable_storage_page_cache, "false"); + +// be policy +// whether disable automatic compaction task +CONF_mBool(disable_auto_compaction, "false"); +// check the configuration of auto compaction in seconds when auto compaction disabled +CONF_mInt32(check_auto_compaction_interval_seconds, "5"); + +// CONF_Int64(base_compaction_start_hour, "20"); +// CONF_Int64(base_compaction_end_hour, "7"); +CONF_mInt64(base_compaction_num_cumulative_deltas, "5"); +CONF_mDouble(base_cumulative_delta_ratio, "0.3"); +CONF_mInt64(base_compaction_interval_seconds_since_last_operation, "86400"); +CONF_mInt32(base_compaction_write_mbytes_per_sec, "5"); + +// config the cumulative compaction policy +// Valid configs: num_base, size_based +// num_based policy, the original version of cumulative compaction, cumulative version compaction once. +// size_based policy, a optimization version of cumulative compaction, targeting the use cases requiring +// lower write amplification, trading off read amplification and space amplification. +CONF_String(cumulative_compaction_policy, "size_based"); + +// In size_based policy, output rowset of cumulative compaction total disk size exceed this config size, +// this rowset will be given to base compaction, unit is m byte. +CONF_mInt64(cumulative_size_based_promotion_size_mbytes, "1024"); +// In size_based policy, output rowset of cumulative compaction total disk size exceed this config ratio of +// base rowset's total disk size, this rowset will be given to base compaction. The value must be between +// 0 and 1. +CONF_mDouble(cumulative_size_based_promotion_ratio, "0.05"); +// In size_based policy, the smallest size of rowset promotion. When the rowset is less than this config, this +// rowset will be not given to base compaction. The unit is m byte. +CONF_mInt64(cumulative_size_based_promotion_min_size_mbytes, "64"); +// The lower bound size to do cumulative compaction. When total disk size of candidate rowsets is less than +// this size, size_based policy may not do to cumulative compaction. The unit is m byte. +CONF_mInt64(cumulative_size_based_compaction_lower_size_mbytes, "64"); + +// cumulative compaction policy: min and max delta file's number +CONF_mInt64(min_cumulative_compaction_num_singleton_deltas, "5"); +CONF_mInt64(max_cumulative_compaction_num_singleton_deltas, "1000"); +CONF_mInt64(cumulative_compaction_budgeted_bytes, "104857600"); +// CONF_Int32(cumulative_compaction_write_mbytes_per_sec, "100"); +// cumulative compaction skips recently published deltas in order to prevent +// compacting a version that might be queried (in case the query planning phase took some time). +// the following config set the window size +CONF_mInt32(cumulative_compaction_skip_window_seconds, "30"); + +// if compaction of a tablet failed, this tablet should not be chosen to +// compaction until this interval passes. +CONF_mInt64(min_compaction_failure_interval_sec, "600"); // 10 min + +// This config can be set to limit thread number in compaction thread pool. +CONF_mInt32(min_compaction_threads, "10"); +CONF_mInt32(max_compaction_threads, "10"); + +// The upper limit of "permits" held by all compaction tasks. This config can be set to limit memory consumption for compaction. +CONF_mInt64(total_permits_for_compaction_score, "10000"); + +// Compaction task number per disk. +CONF_mInt32(compaction_task_num_per_disk, "2"); + +// How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation. +CONF_mInt32(cumulative_compaction_rounds_for_each_base_compaction_round, "9"); + +// Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction +CONF_mInt64(row_step_for_compaction_merge_log, "0"); + +// Threshold to logging compaction trace, in seconds. +CONF_mInt32(base_compaction_trace_threshold, "10"); +CONF_mInt32(cumulative_compaction_trace_threshold, "2"); + +// time interval to record tablet scan count in second for the purpose of calculating tablet scan frequency +CONF_mInt64(tablet_scan_frequency_time_node_interval_second, "300"); +// coefficient for tablet scan frequency and compaction score when finding a tablet for compaction +CONF_mInt32(compaction_tablet_scan_frequency_factor, "0"); +CONF_mInt32(compaction_tablet_compaction_score_factor, "1"); + +// Port to start debug webserver on +CONF_Int32(webserver_port, "8040"); +// Number of webserver workers +CONF_Int32(webserver_num_workers, "48"); +// Period to update rate counters and sampling counters in ms. +CONF_mInt32(periodic_counter_update_period_ms, "500"); + +// Used for mini Load. mini load data file will be removed after this time. +CONF_Int64(load_data_reserve_hours, "4"); +// log error log will be removed after this time +CONF_mInt64(load_error_log_reserve_hours, "48"); +CONF_Int32(number_tablet_writer_threads, "16"); + +// The maximum amount of data that can be processed by a stream load +CONF_mInt64(streaming_load_max_mb, "10240"); +// Some data formats, such as JSON, cannot be streamed. +// Therefore, it is necessary to limit the maximum number of +// such data when using stream load to prevent excessive memory consumption. +CONF_mInt64(streaming_load_json_max_mb, "100"); +// the alive time of a TabletsChannel. +// If the channel does not receive any data till this time, +// the channel will be removed. +CONF_Int32(streaming_load_rpc_max_alive_time_sec, "1200"); +// the timeout of a rpc to open the tablet writer in remote BE. +// short operation time, can set a short timeout +CONF_Int32(tablet_writer_open_rpc_timeout_sec, "60"); +// Deprecated, use query_timeout instead +// the timeout of a rpc to process one batch in tablet writer. +// you may need to increase this timeout if using larger 'streaming_load_max_mb', +// or encounter 'tablet writer write failed' error when loading. +// CONF_Int32(tablet_writer_rpc_timeout_sec, "600"); +// OlapTableSink sender's send interval, should be less than the real response time of a tablet writer rpc. +CONF_mInt32(olap_table_sink_send_interval_ms, "10"); + +// Fragment thread pool +CONF_Int32(fragment_pool_thread_num_min, "64"); +CONF_Int32(fragment_pool_thread_num_max, "512"); +CONF_Int32(fragment_pool_queue_size, "2048"); + +//for cast +// CONF_Bool(cast, "true"); + +// Spill to disk when query +// Writable scratch directories, split by ";" +CONF_String(query_scratch_dirs, "${DORIS_HOME}"); + +// Control the number of disks on the machine. If 0, this comes from the system settings. +CONF_Int32(num_disks, "0"); +// The maximum number of the threads per disk is also the max queue depth per disk. +CONF_Int32(num_threads_per_disk, "0"); +// The read size is the size of the reads sent to os. +// There is a trade off of latency and throughout, trying to keep disks busy but +// not introduce seeks. The literature seems to agree that with 8 MB reads, random +// io and sequential io perform similarly. +CONF_Int32(read_size, "8388608"); // 8 * 1024 * 1024, Read Size (in bytes) +CONF_Int32(min_buffer_size, "1024"); // 1024, The minimum read buffer size (in bytes) + +// For each io buffer size, the maximum number of buffers the IoMgr will hold onto +// With 1024B through 8MB buffers, this is up to ~2GB of buffers. +CONF_Int32(max_free_io_buffers, "128"); + +CONF_Bool(disable_mem_pools, "false"); + +// Whether to allocate chunk using mmap. If you enable this, you'd better to +// increase vm.max_map_count's value whose default value is 65530. +// you can do it as root via "sysctl -w vm.max_map_count=262144" or +// "echo 262144 > /proc/sys/vm/max_map_count" +// NOTE: When this is set to true, you must set chunk_reserved_bytes_limit +// to a relative large number or the performance is very very bad. +CONF_Bool(use_mmap_allocate_chunk, "false"); + +// Chunk Allocator's reserved bytes limit, +// Default value is 2GB, increase this variable can improve performance, but will +// acquire more free memory which can not be used by other modules +CONF_Int64(chunk_reserved_bytes_limit, "2147483648"); + +// The probing algorithm of partitioned hash table. +// Enable quadratic probing hash table +CONF_Bool(enable_quadratic_probing, "false"); + +// for pprof +CONF_String(pprof_profile_dir, "${DORIS_HOME}/log"); + +// for partition +// CONF_Bool(enable_partitioned_hash_join, "false") +CONF_Bool(enable_partitioned_aggregation, "true"); + +// to forward compatibility, will be removed later +CONF_mBool(enable_token_check, "true"); + +// to open/close system metrics +CONF_Bool(enable_system_metrics, "true"); + +CONF_mBool(enable_prefetch, "true"); + +// Number of cores Doris will used, this will effect only when it's greater than 0. +// Otherwise, Doris will use all cores returned from "/proc/cpuinfo". +CONF_Int32(num_cores, "0"); + +// CONF_Bool(thread_creation_fault_injection, "false"); + +// Set this to encrypt and perform an integrity +// check on all data spilled to disk during a query +// CONF_Bool(disk_spill_encryption, "false"); + +// When BE start, If there is a broken disk, BE process will exit by default. +// Otherwise, we will ignore the broken disk, +CONF_Bool(ignore_broken_disk, "false"); + +// Writable scratch directories +CONF_String(scratch_dirs, "/tmp"); + +// If false and --scratch_dirs contains multiple directories on the same device, +// then only the first writable directory is used +// CONF_Bool(allow_multiple_scratch_dirs_per_device, "false"); + +// linux transparent huge page +CONF_Bool(madvise_huge_pages, "false"); + +// whether use mmap to allocate memory +CONF_Bool(mmap_buffers, "false"); + +// max memory can be allocated by buffer pool +CONF_String(buffer_pool_limit, "80G"); + +// clean page can be hold by buffer pool +CONF_String(buffer_pool_clean_pages_limit, "20G"); + +// Sleep time in seconds between memory maintenance iterations +CONF_mInt64(memory_maintenance_sleep_time_s, "10"); + +// Alignment +CONF_Int32(memory_max_alignment, "16"); - // write buffer size before flush - CONF_mInt64(write_buffer_size, "104857600"); +// write buffer size before flush +CONF_mInt64(write_buffer_size, "104857600"); - // following 2 configs limit the memory consumption of load process on a Backend. - // eg: memory limit to 80% of mem limit config but up to 100GB(default) - // NOTICE(cmy): set these default values very large because we don't want to - // impact the load performance when user upgrading Doris. - // user should set these configs properly if necessary. - CONF_Int64(load_process_max_memory_limit_bytes, "107374182400"); // 100GB - CONF_Int32(load_process_max_memory_limit_percent, "80"); // 80% +// following 2 configs limit the memory consumption of load process on a Backend. +// eg: memory limit to 80% of mem limit config but up to 100GB(default) +// NOTICE(cmy): set these default values very large because we don't want to +// impact the load performance when user upgrading Doris. +// user should set these configs properly if necessary. +CONF_Int64(load_process_max_memory_limit_bytes, "107374182400"); // 100GB +CONF_Int32(load_process_max_memory_limit_percent, "80"); // 80% - // update interval of tablet stat cache - CONF_mInt32(tablet_stat_cache_update_interval_second, "300"); +// update interval of tablet stat cache +CONF_mInt32(tablet_stat_cache_update_interval_second, "300"); - // result buffer cancelled time (unit: second) - CONF_mInt32(result_buffer_cancelled_interval_time, "300"); +// result buffer cancelled time (unit: second) +CONF_mInt32(result_buffer_cancelled_interval_time, "300"); - // the increased frequency of priority for remaining tasks in BlockingPriorityQueue - CONF_mInt32(priority_queue_remaining_tasks_increased_frequency, "512"); +// the increased frequency of priority for remaining tasks in BlockingPriorityQueue +CONF_mInt32(priority_queue_remaining_tasks_increased_frequency, "512"); - // sync tablet_meta when modifying meta - CONF_mBool(sync_tablet_meta, "false"); +// sync tablet_meta when modifying meta +CONF_mBool(sync_tablet_meta, "false"); - // default thrift rpc timeout ms - CONF_mInt32(thrift_rpc_timeout_ms, "5000"); +// default thrift rpc timeout ms +CONF_mInt32(thrift_rpc_timeout_ms, "5000"); - // txn commit rpc timeout - CONF_mInt32(txn_commit_rpc_timeout_ms, "10000"); +// txn commit rpc timeout +CONF_mInt32(txn_commit_rpc_timeout_ms, "10000"); - // If set to true, metric calculator will run - CONF_Bool(enable_metric_calculator, "true"); +// If set to true, metric calculator will run +CONF_Bool(enable_metric_calculator, "true"); - // max consumer num in one data consumer group, for routine load - CONF_mInt32(max_consumer_num_per_group, "3"); +// max consumer num in one data consumer group, for routine load +CONF_mInt32(max_consumer_num_per_group, "3"); - // the size of thread pool for routine load task. - // this should be larger than FE config 'max_concurrent_task_num_per_be' (default 5) - CONF_Int32(routine_load_thread_pool_size, "10"); +// the size of thread pool for routine load task. +// this should be larger than FE config 'max_concurrent_task_num_per_be' (default 5) +CONF_Int32(routine_load_thread_pool_size, "10"); - // Is set to true, index loading failure will not causing BE exit, - // and the tablet will be marked as bad, so that FE will try to repair it. - // CONF_Bool(auto_recover_index_loading_failure, "false"); +// Is set to true, index loading failure will not causing BE exit, +// and the tablet will be marked as bad, so that FE will try to repair it. +// CONF_Bool(auto_recover_index_loading_failure, "false"); - // max external scan cache batch count, means cache max_memory_cache_batch_count * batch_size row - // default is 20, batch_size's default value is 1024 means 20 * 1024 rows will be cached - CONF_mInt32(max_memory_sink_batch_count, "20"); +// max external scan cache batch count, means cache max_memory_cache_batch_count * batch_size row +// default is 20, batch_size's default value is 1024 means 20 * 1024 rows will be cached +CONF_mInt32(max_memory_sink_batch_count, "20"); - // This configuration is used for the context gc thread schedule period - // note: unit is minute, default is 5min - CONF_mInt32(scan_context_gc_interval_min, "5"); +// This configuration is used for the context gc thread schedule period +// note: unit is minute, default is 5min +CONF_mInt32(scan_context_gc_interval_min, "5"); - // es scroll keep-alive - CONF_String(es_scroll_keepalive, "5m"); +// es scroll keep-alive +CONF_String(es_scroll_keepalive, "5m"); - // HTTP connection timeout for es - CONF_Int32(es_http_timeout_ms, "5000"); +// HTTP connection timeout for es +CONF_Int32(es_http_timeout_ms, "5000"); - // the max client cache number per each host - // There are variety of client cache in BE, but currently we use the - // same cache size configuration. - // TODO(cmy): use different config to set different client cache if necessary. - CONF_Int32(max_client_cache_size_per_host, "10"); +// the max client cache number per each host +// There are variety of client cache in BE, but currently we use the +// same cache size configuration. +// TODO(cmy): use different config to set different client cache if necessary. +CONF_Int32(max_client_cache_size_per_host, "10"); - // Dir to save files downloaded by SmallFileMgr - CONF_String(small_file_dir, "${DORIS_HOME}/lib/small_file/"); - // path gc - CONF_Bool(path_gc_check, "true"); - CONF_Int32(path_gc_check_interval_second, "86400"); - CONF_mInt32(path_gc_check_step, "1000"); - CONF_mInt32(path_gc_check_step_interval_ms, "10"); - CONF_mInt32(path_scan_interval_second, "86400"); +// Dir to save files downloaded by SmallFileMgr +CONF_String(small_file_dir, "${DORIS_HOME}/lib/small_file/"); +// path gc +CONF_Bool(path_gc_check, "true"); +CONF_Int32(path_gc_check_interval_second, "86400"); +CONF_mInt32(path_gc_check_step, "1000"); +CONF_mInt32(path_gc_check_step_interval_ms, "10"); +CONF_mInt32(path_scan_interval_second, "86400"); - // The following 2 configs limit the max usage of disk capacity of a data dir. - // If both of these 2 threshold reached, no more data can be writen into that data dir. - // The percent of max used capacity of a data dir - CONF_mInt32(storage_flood_stage_usage_percent, "95"); // 95% - // The min bytes that should be left of a data dir - CONF_mInt64(storage_flood_stage_left_capacity_bytes, "1073741824"); // 1GB - // number of thread for flushing memtable per store - CONF_Int32(flush_thread_num_per_store, "2"); +// The following 2 configs limit the max usage of disk capacity of a data dir. +// If both of these 2 threshold reached, no more data can be writen into that data dir. +// The percent of max used capacity of a data dir +CONF_mInt32(storage_flood_stage_usage_percent, "95"); // 95% +// The min bytes that should be left of a data dir +CONF_mInt64(storage_flood_stage_left_capacity_bytes, "1073741824"); // 1GB +// number of thread for flushing memtable per store +CONF_Int32(flush_thread_num_per_store, "2"); - // config for tablet meta checkpoint - CONF_mInt32(tablet_meta_checkpoint_min_new_rowsets_num, "10"); - CONF_mInt32(tablet_meta_checkpoint_min_interval_secs, "600"); +// config for tablet meta checkpoint +CONF_mInt32(tablet_meta_checkpoint_min_new_rowsets_num, "10"); +CONF_mInt32(tablet_meta_checkpoint_min_interval_secs, "600"); - // config for default rowset type - // Valid configs: ALPHA, BETA - CONF_String(default_rowset_type, "BETA"); +// config for default rowset type +// Valid configs: ALPHA, BETA +CONF_String(default_rowset_type, "BETA"); - // Maximum size of a single message body in all protocols - CONF_Int64(brpc_max_body_size, "209715200"); - // Max unwritten bytes in each socket, if the limit is reached, Socket.Write fails with EOVERCROWDED - CONF_Int64(brpc_socket_max_unwritten_bytes, "67108864"); +// Maximum size of a single message body in all protocols +CONF_Int64(brpc_max_body_size, "209715200"); +// Max unwritten bytes in each socket, if the limit is reached, Socket.Write fails with EOVERCROWDED +CONF_Int64(brpc_socket_max_unwritten_bytes, "67108864"); + +// max number of txns for every txn_partition_map in txn manager +// this is a self protection to avoid too many txns saving in manager +CONF_mInt64(max_runnings_transactions_per_txn_map, "100"); + +// tablet_map_lock shard size, the value is 2^n, n=0,1,2,3,4 +// this is a an enhancement for better performance to manage tablet +CONF_Int32(tablet_map_shard_size, "1"); + +CONF_String(plugin_path, "${DORIS_HOME}/plugin"); + +// txn_map_lock shard size, the value is 2^n, n=0,1,2,3,4 +// this is a an enhancement for better performance to manage txn +CONF_Int32(txn_map_shard_size, "128"); + +// txn_lock shard size, the value is 2^n, n=0,1,2,3,4 +// this is a an enhancement for better performance to commit and publish txn +CONF_Int32(txn_shard_size, "1024"); + +// Whether to continue to start be when load tablet from header failed. +CONF_Bool(ignore_load_tablet_failure, "false"); + +// Whether to continue to start be when load tablet from header failed. +CONF_Bool(ignore_rowset_stale_unconsistent_delete, "false"); + +// Soft memory limit as a fraction of hard memory limit. +CONF_Double(soft_mem_limit_frac, "0.9"); + +// Set max cache's size of query results, the unit is M byte +CONF_Int32(query_cache_max_size_mb, "256"); + +// Cache memory is pruned when reach query_cache_max_size_mb + query_cache_elasticity_size_mb +CONF_Int32(query_cache_elasticity_size_mb, "128"); + +// Maximum number of cache partitions corresponding to a SQL +CONF_Int32(query_cache_max_partition_count, "1024"); + +// Maximum number of version of a tablet. If the version num of a tablet exceed limit, +// the load process will reject new incoming load job of this tablet. +// This is to avoid too many version num. +CONF_mInt32(max_tablet_version_num, "500"); + +// Frontend mainly use two thrift sever type: THREAD_POOL, THREADED. if fe use THREADED model for thrift server, +// the thrift_server_type_of_fe should be set THREADED to make be thrift client to fe constructed with TFramedTransport +CONF_String(thrift_server_type_of_fe, "THREAD_POOL"); - // max number of txns for every txn_partition_map in txn manager - // this is a self protection to avoid too many txns saving in manager - CONF_mInt64(max_runnings_transactions_per_txn_map, "100"); - - // tablet_map_lock shard size, the value is 2^n, n=0,1,2,3,4 - // this is a an enhancement for better performance to manage tablet - CONF_Int32(tablet_map_shard_size, "1"); - - CONF_String(plugin_path, "${DORIS_HOME}/plugin"); - - // txn_map_lock shard size, the value is 2^n, n=0,1,2,3,4 - // this is a an enhancement for better performance to manage txn - CONF_Int32(txn_map_shard_size, "128"); - - // txn_lock shard size, the value is 2^n, n=0,1,2,3,4 - // this is a an enhancement for better performance to commit and publish txn - CONF_Int32(txn_shard_size, "1024"); - - // Whether to continue to start be when load tablet from header failed. - CONF_Bool(ignore_load_tablet_failure, "false"); - - // Whether to continue to start be when load tablet from header failed. - CONF_Bool(ignore_rowset_stale_unconsistent_delete, "false"); - - // Soft memory limit as a fraction of hard memory limit. - CONF_Double(soft_mem_limit_frac, "0.9"); - - // Set max cache's size of query results, the unit is M byte - CONF_Int32(query_cache_max_size_mb, "256"); - - // Cache memory is pruned when reach query_cache_max_size_mb + query_cache_elasticity_size_mb - CONF_Int32(query_cache_elasticity_size_mb, "128"); - - // Maximum number of cache partitions corresponding to a SQL - CONF_Int32(query_cache_max_partition_count, "1024"); - - // Maximum number of version of a tablet. If the version num of a tablet exceed limit, - // the load process will reject new incoming load job of this tablet. - // This is to avoid too many version num. - CONF_mInt32(max_tablet_version_num, "500"); - - // Frontend mainly use two thrift sever type: THREAD_POOL, THREADED. if fe use THREADED model for thrift server, - // the thrift_server_type_of_fe should be set THREADED to make be thrift client to fe constructed with TFramedTransport - CONF_String(thrift_server_type_of_fe, "THREAD_POOL"); - } // namespace config } // namespace doris diff --git a/be/src/common/configbase.cpp b/be/src/common/configbase.cpp index c46c34b2cd5e4d..1a8f97ad1797c3 100644 --- a/be/src/common/configbase.cpp +++ b/be/src/common/configbase.cpp @@ -246,8 +246,8 @@ void Properties::set(const std::string& key, const std::string& val) { } bool Properties::dump(const std::string& conffile) { - std::vector files = { conffile }; - Status st = FileSystemUtil::remove_paths(files); + std::vector files = {conffile}; + Status st = FileSystemUtil::remove_paths(files); if (!st.ok()) { return false; } @@ -261,7 +261,7 @@ bool Properties::dump(const std::string& conffile) { out << "# You can modify this file manually, and the configurations in this file\n"; out << "# will overwrite the configurations in be.conf\n"; out << "\n"; - + for (auto const& iter : file_conf_map) { out << iter.first << " = " << iter.second << "\n"; } @@ -291,20 +291,19 @@ std::ostream& operator<<(std::ostream& out, const std::vector& v) { return out; } -#define SET_FIELD(FIELD, TYPE, FILL_CONFMAP, SET_TO_DEFAULT) \ - if (strcmp((FIELD).type, #TYPE) == 0) { \ - if (!props.get_or_default( \ - (FIELD).name, ((SET_TO_DEFAULT) ? (FIELD).defval : nullptr), \ - *reinterpret_cast((FIELD).storage))) { \ - std::cerr << "config field error: " << (FIELD).name << std::endl; \ - return false; \ - } \ - if (FILL_CONFMAP) { \ - std::ostringstream oss; \ - oss << (*reinterpret_cast((FIELD).storage)); \ - (*full_conf_map)[(FIELD).name] = oss.str(); \ - } \ - continue; \ +#define SET_FIELD(FIELD, TYPE, FILL_CONFMAP, SET_TO_DEFAULT) \ + if (strcmp((FIELD).type, #TYPE) == 0) { \ + if (!props.get_or_default((FIELD).name, ((SET_TO_DEFAULT) ? (FIELD).defval : nullptr), \ + *reinterpret_cast((FIELD).storage))) { \ + std::cerr << "config field error: " << (FIELD).name << std::endl; \ + return false; \ + } \ + if (FILL_CONFMAP) { \ + std::ostringstream oss; \ + oss << (*reinterpret_cast((FIELD).storage)); \ + (*full_conf_map)[(FIELD).name] = oss.str(); \ + } \ + continue; \ } // init conf fields @@ -355,12 +354,11 @@ bool init(const char* conf_file, bool fillconfmap, bool must_exist, bool set_to_ return Status::OK(); \ } - // write config to be_custom.conf // the caller need to make sure that the given config is valid bool persist_config(const std::string& field, const std::string& value) { // lock to make sure only one thread can modify the be_custom.conf - std::lock_guard l(custom_conf_lock); + std::lock_guard l(custom_conf_lock); static const string conffile = string(getenv("DORIS_HOME")) + "/conf/be_custom.conf"; Status st = FileSystemUtil::create_file(conffile); diff --git a/be/src/common/configbase.h b/be/src/common/configbase.h index 93c06ef31f06de..80b690c47defae 100644 --- a/be/src/common/configbase.h +++ b/be/src/common/configbase.h @@ -137,7 +137,8 @@ extern std::mutex custom_conf_lock; // If fillconfmap is true, the updated config will also update the `full_conf_map`. // If must_exist is true and `conf_file` does not exist, this function will return false. // If set_to_default is true, the config value will be set to default value if not found in `conf_file`. -bool init(const char* conf_file, bool fillconfmap = false, bool must_exist = true, bool set_to_default = true); +bool init(const char* conf_file, bool fillconfmap = false, bool must_exist = true, + bool set_to_default = true); Status set_config(const std::string& field, const std::string& value, bool need_persist = false); diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 5b045c06860f22..c05ef99146122f 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -21,44 +21,44 @@ #include #include "common/config.h" -#include "util/cpu_info.h" -#include "util/debug_util.h" -#include "util/disk_info.h" -#include "util/logging.h" -#include "util/mem_info.h" -#include "util/network_util.h" -#include "util/thrift_util.h" -#include "util/doris_metrics.h" -#include "runtime/bufferpool/buffer_pool.h" -#include "runtime/exec_env.h" -#include "runtime/memory/chunk_allocator.h" -#include "runtime/mem_tracker.h" -#include "runtime/user_function_cache.h" -#include "exprs/operators.h" -#include "exprs/is_null_predicate.h" -#include "exprs/like_predicate.h" -#include "exprs/compound_predicate.h" -#include "exprs/new_in_predicate.h" -#include "exprs/string_functions.h" +#include "exprs/bitmap_function.h" #include "exprs/cast_functions.h" -#include "exprs/math_functions.h" +#include "exprs/compound_predicate.h" +#include "exprs/decimal_operators.h" +#include "exprs/decimalv2_operators.h" #include "exprs/encryption_functions.h" #include "exprs/es_functions.h" +#include "exprs/grouping_sets_functions.h" #include "exprs/hash_functions.h" -#include "exprs/timestamp_functions.h" -#include "exprs/decimal_operators.h" -#include "exprs/decimalv2_operators.h" +#include "exprs/hll_function.h" +#include "exprs/hll_hash_function.h" +#include "exprs/is_null_predicate.h" +#include "exprs/json_functions.h" +#include "exprs/like_predicate.h" +#include "exprs/math_functions.h" +#include "exprs/new_in_predicate.h" +#include "exprs/operators.h" +#include "exprs/string_functions.h" #include "exprs/time_operators.h" +#include "exprs/timestamp_functions.h" #include "exprs/utility_functions.h" -#include "exprs/json_functions.h" -#include "exprs/hll_hash_function.h" -#include "exprs/grouping_sets_functions.h" -#include "exprs/bitmap_function.h" -#include "exprs/hll_function.h" #include "geo/geo_functions.h" #include "olap/options.h" -#include "util/time.h" +#include "runtime/bufferpool/buffer_pool.h" +#include "runtime/exec_env.h" +#include "runtime/mem_tracker.h" +#include "runtime/memory/chunk_allocator.h" +#include "runtime/user_function_cache.h" +#include "util/cpu_info.h" +#include "util/debug_util.h" +#include "util/disk_info.h" +#include "util/doris_metrics.h" +#include "util/logging.h" +#include "util/mem_info.h" +#include "util/network_util.h" #include "util/system_metrics.h" +#include "util/thrift_util.h" +#include "util/time.h" namespace doris { @@ -69,7 +69,8 @@ void Daemon::tcmalloc_gc_thread() { size_t used_size = 0; size_t free_size = 0; - MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &used_size); + MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", + &used_size); MallocExtension::instance()->GetNumericProperty("tcmalloc.pageheap_free_bytes", &free_size); size_t alloc_size = used_size + free_size; @@ -83,7 +84,8 @@ void Daemon::tcmalloc_gc_thread() { } void Daemon::memory_maintenance_thread() { - while (!_stop_background_threads_latch.wait_for(MonoDelta::FromSeconds(config::memory_maintenance_sleep_time_s))) { + while (!_stop_background_threads_latch.wait_for( + MonoDelta::FromSeconds(config::memory_maintenance_sleep_time_s))) { ExecEnv* env = ExecEnv::GetInstance(); // ExecEnv may not have been created yet or this may be the catalogd or statestored, // which don't have ExecEnvs. @@ -98,7 +100,7 @@ void Daemon::memory_maintenance_thread() { // if the system is idle, we need to refresh the tracker occasionally since // untracked memory may be allocated or freed, e.g. by background threads. if (env->process_mem_tracker() != nullptr && - !env->process_mem_tracker()->is_consumption_metric_null()) { + !env->process_mem_tracker()->is_consumption_metric_null()) { env->process_mem_tracker()->RefreshConsumptionFromMetric(); } } @@ -130,29 +132,31 @@ void Daemon::calculate_metrics_thread() { lst_push_bytes = DorisMetrics::instance()->push_request_write_bytes->value(); lst_query_bytes = DorisMetrics::instance()->query_scan_bytes->value(); DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time); - DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes, &lst_net_receive_bytes); + DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes, + &lst_net_receive_bytes); } else { int64_t current_ts = GetCurrentTimeMicros() / 1000; long interval = (current_ts - last_ts) / 1000; last_ts = current_ts; // 1. push bytes per second - int64_t current_push_bytes = DorisMetrics::instance()->push_request_write_bytes->value(); + int64_t current_push_bytes = + DorisMetrics::instance()->push_request_write_bytes->value(); int64_t pps = (current_push_bytes - lst_push_bytes) / (interval + 1); - DorisMetrics::instance()->push_request_write_bytes_per_second->set_value( - pps < 0 ? 0 : pps); + DorisMetrics::instance()->push_request_write_bytes_per_second->set_value(pps < 0 ? 0 + : pps); lst_push_bytes = current_push_bytes; // 2. query bytes per second int64_t current_query_bytes = DorisMetrics::instance()->query_scan_bytes->value(); int64_t qps = (current_query_bytes - lst_query_bytes) / (interval + 1); - DorisMetrics::instance()->query_scan_bytes_per_second->set_value( - qps < 0 ? 0 : qps); + DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : qps); lst_query_bytes = current_query_bytes; // 3. max disk io util DorisMetrics::instance()->max_disk_io_util_percent->set_value( - DorisMetrics::instance()->system_metrics()->get_max_io_util(lst_disks_io_time, 15)); + DorisMetrics::instance()->system_metrics()->get_max_io_util(lst_disks_io_time, + 15)); // update lst map DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time); @@ -160,11 +164,12 @@ void Daemon::calculate_metrics_thread() { int64_t max_send = 0; int64_t max_receive = 0; DorisMetrics::instance()->system_metrics()->get_max_net_traffic( - lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive); + lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive); DorisMetrics::instance()->max_network_send_bytes_rate->set_value(max_send); DorisMetrics::instance()->max_network_receive_bytes_rate->set_value(max_receive); // update lst map - DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes, &lst_net_receive_bytes); + DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes, + &lst_net_receive_bytes); } } while (!_stop_background_threads_latch.wait_for(MonoDelta::FromSeconds(15))); } @@ -189,15 +194,14 @@ static void init_doris_metrics(const std::vector& store_paths) { return; } } - DorisMetrics::instance()->initialize( - init_system_metrics, disk_devices, network_interfaces); + DorisMetrics::instance()->initialize(init_system_metrics, disk_devices, network_interfaces); } void sigterm_handler(int signo) { k_doris_exit = true; } -int install_signal(int signo, void(*handler)(int)) { +int install_signal(int signo, void (*handler)(int)) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = handler; @@ -205,9 +209,8 @@ int install_signal(int signo, void(*handler)(int)) { auto ret = sigaction(signo, &sa, nullptr); if (ret != 0) { char buf[64]; - LOG(ERROR) << "install signal failed, signo=" << signo - << ", errno=" << errno - << ", errmsg=" << strerror_r(errno, buf, sizeof(buf)); + LOG(ERROR) << "install signal failed, signo=" << signo << ", errno=" << errno + << ", errmsg=" << strerror_r(errno, buf, sizeof(buf)); } return ret; } @@ -272,20 +275,20 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { void Daemon::start() { Status st; #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) - st = Thread::create("Daemon", "tcmalloc_gc_thread", - [this]() { this->tcmalloc_gc_thread(); }, - &_tcmalloc_gc_thread); + st = Thread::create( + "Daemon", "tcmalloc_gc_thread", [this]() { this->tcmalloc_gc_thread(); }, + &_tcmalloc_gc_thread); CHECK(st.ok()) << st.to_string(); #endif - st = Thread::create("Daemon", "memory_maintenance_thread", - [this]() { this->memory_maintenance_thread(); }, - &_memory_maintenance_thread); + st = Thread::create( + "Daemon", "memory_maintenance_thread", [this]() { this->memory_maintenance_thread(); }, + &_memory_maintenance_thread); CHECK(st.ok()) << st.to_string(); if (config::enable_metric_calculator) { - st = Thread::create("Daemon", "calculate_metrics_thread", - [this]() { this->calculate_metrics_thread(); }, - &_calculate_metrics_thread); + st = Thread::create( + "Daemon", "calculate_metrics_thread", + [this]() { this->calculate_metrics_thread(); }, &_calculate_metrics_thread); CHECK(st.ok()) << st.to_string(); } } @@ -304,4 +307,4 @@ void Daemon::stop() { } } -} // namespace doris +} // namespace doris diff --git a/be/src/common/daemon.h b/be/src/common/daemon.h index c11d99f441d584..4544c1a9a151a6 100644 --- a/be/src/common/daemon.h +++ b/be/src/common/daemon.h @@ -54,4 +54,4 @@ class Daemon { scoped_refptr _memory_maintenance_thread; scoped_refptr _calculate_metrics_thread; }; -} // namespace doris +} // namespace doris diff --git a/be/src/common/global_types.h b/be/src/common/global_types.h index ebe2815d3d3048..014c40218c4b68 100644 --- a/be/src/common/global_types.h +++ b/be/src/common/global_types.h @@ -28,6 +28,6 @@ typedef int SlotId; typedef int TableId; typedef int PlanNodeId; -}; +}; // namespace doris #endif diff --git a/be/src/common/hdfs.h b/be/src/common/hdfs.h index f87d0be62dc760..f41fc4a1d6cccd 100644 --- a/be/src/common/hdfs.h +++ b/be/src/common/hdfs.h @@ -29,4 +29,3 @@ typedef void* hdfsFile; #endif #endif - diff --git a/be/src/common/logconfig.cpp b/be/src/common/logconfig.cpp index 72d3b7bc0cea85..e3463f128d4e23 100644 --- a/be/src/common/logconfig.cpp +++ b/be/src/common/logconfig.cpp @@ -15,19 +15,18 @@ // specific language governing permissions and limitations // under the License. -#include "util/logging.h" +#include +#include -#include #include -#include #include +#include +#include #include -#include -#include - #include "common/config.h" #include "gutil/stringprintf.h" +#include "util/logging.h" namespace doris { @@ -35,8 +34,7 @@ static bool logging_initialized = false; static std::mutex logging_mutex; -static bool iequals(const std::string& a, const std::string& b) -{ +static bool iequals(const std::string& a, const std::string& b) { unsigned int sz = a.size(); if (b.size() != sz) { return false; @@ -47,10 +45,9 @@ static bool iequals(const std::string& a, const std::string& b) } } return true; -} +} bool init_glog(const char* basename, bool install_signal_handler) { - std::lock_guard logging_lock(logging_mutex); if (logging_initialized) { @@ -68,10 +65,10 @@ bool init_glog(const char* basename, bool install_signal_handler) { // 0 means buffer INFO only FLAGS_logbuflevel = 0; // buffer log messages for at most this many seconds - FLAGS_logbufsecs = 30; + FLAGS_logbufsecs = 30; // set roll num FLAGS_log_filenum_quota = config::sys_log_roll_num; - + // set log level std::string& loglevel = config::sys_log_level; if (iequals(loglevel, "INFO")) { @@ -109,7 +106,7 @@ bool init_glog(const char* basename, bool install_signal_handler) { } else if (rollmode.substr(0, sizeflag.length()).compare(sizeflag) == 0) { FLAGS_log_split_method = "size"; std::string sizestr = rollmode.substr(sizeflag.size(), rollmode.size() - sizeflag.size()); - if (sizestr.size() != 0) { + if (sizestr.size() != 0) { char* end = NULL; errno = 0; const char* sizecstr = sizestr.c_str(); @@ -143,9 +140,8 @@ bool init_glog(const char* basename, bool install_signal_handler) { google::InitGoogleLogging(basename); logging_initialized = true; - - return true; + return true; } void shutdown_logging() { @@ -159,13 +155,8 @@ std::string FormatTimestampForLog(MicrosecondsInt64 micros_since_epoch) { struct tm tm_time; localtime_r(&secs_since_epoch, &tm_time); - return StringPrintf("%02d%02d %02d:%02d:%02d.%06d", - 1 + tm_time.tm_mon, - tm_time.tm_mday, - tm_time.tm_hour, - tm_time.tm_min, - tm_time.tm_sec, - usecs); + return StringPrintf("%02d%02d %02d:%02d:%02d.%06d", 1 + tm_time.tm_mon, tm_time.tm_mday, + tm_time.tm_hour, tm_time.tm_min, tm_time.tm_sec, usecs); } } // namespace doris diff --git a/be/src/common/logging.h b/be/src/common/logging.h index 14545bc78940b7..e3c3130039837c 100644 --- a/be/src/common/logging.h +++ b/be/src/common/logging.h @@ -23,16 +23,25 @@ // issues when we try to dynamically link the codegen'd functions. #ifdef IR_COMPILE #include -#define DCHECK(condition) while (false) std::cout -#define DCHECK_EQ(a, b) while(false) std::cout -#define DCHECK_NE(a, b) while(false) std::cout -#define DCHECK_GT(a, b) while(false) std::cout -#define DCHECK_LT(a, b) while(false) std::cout -#define DCHECK_GE(a, b) while(false) std::cout -#define DCHECK_LE(a, b) while(false) std::cout +#define DCHECK(condition) \ + while (false) std::cout +#define DCHECK_EQ(a, b) \ + while (false) std::cout +#define DCHECK_NE(a, b) \ + while (false) std::cout +#define DCHECK_GT(a, b) \ + while (false) std::cout +#define DCHECK_LT(a, b) \ + while (false) std::cout +#define DCHECK_GE(a, b) \ + while (false) std::cout +#define DCHECK_LE(a, b) \ + while (false) std::cout // Similar to how glog defines DCHECK for release. -#define LOG(level) while (false) std::cout -#define VLOG(level) while (false) std::cout +#define LOG(level) \ + while (false) std::cout +#define VLOG(level) \ + while (false) std::cout #else // GLOG defines this based on the system but doesn't check if it's already // been defined. undef it first to avoid warnings. @@ -48,11 +57,11 @@ // Define VLOG levels. We want display per-row info less than per-file which // is less than per-query. For now per-connection is the same as per-query. #define VLOG_CONNECTION VLOG(1) -#define VLOG_RPC VLOG(8) -#define VLOG_QUERY VLOG(1) -#define VLOG_FILE VLOG(2) -#define VLOG_ROW VLOG(10) -#define VLOG_PROGRESS VLOG(2) +#define VLOG_RPC VLOG(8) +#define VLOG_QUERY VLOG(1) +#define VLOG_FILE VLOG(2) +#define VLOG_ROW VLOG(10) +#define VLOG_PROGRESS VLOG(2) #define VLOG_CONNECTION_IS_ON VLOG_IS_ON(1) #define VLOG_RPC_IS_ON VLOG_IS_ON(2) @@ -63,8 +72,8 @@ /// Define a wrapper around DCHECK for strongly typed enums that print a useful error /// message on failure. -#define DCHECK_ENUM_EQ(a, b) \ - DCHECK(a == b) << "[ " #a " = " << static_cast(a) << " , " #b " = " \ - << static_cast(b) << " ]" +#define DCHECK_ENUM_EQ(a, b) \ + DCHECK(a == b) << "[ " #a " = " << static_cast(a) << " , " #b " = " \ + << static_cast(b) << " ]" #endif diff --git a/be/src/common/names.h b/be/src/common/names.h old mode 100755 new mode 100644 index c0acdcdef267cc..890b562e5b4e8b --- a/be/src/common/names.h +++ b/be/src/common/names.h @@ -77,14 +77,12 @@ using std::setfill; using std::setw; #endif - #ifdef _GLIBCXX_FSTREAM using std::fstream; using std::ifstream; using std::ofstream; #endif - #ifdef _GLIBCXX_SSTREAM using std::stringstream; using std::istringstream; @@ -133,14 +131,14 @@ using boost::lexical_cast; using boost::shared_mutex; #endif - /// In older versions of boost, when including mutex.hpp, it would include locks.hpp that /// would in turn provide lock_guard<>. In more recent versions, including mutex.hpp would /// include lock_types.hpp that does not provide lock_guard<>. This check verifies if boost /// locks have been included and makes sure to only include lock_guard if the provided lock /// implementations were not included using lock_types.hpp (for older boost versions) or if /// lock_guard.hpp was explicitly included. -#if (defined(BOOST_THREAD_LOCKS_HPP) && BOOST_VERSION < 105300) || defined(BOOST_THREAD_LOCK_GUARD_HPP) +#if (defined(BOOST_THREAD_LOCKS_HPP) && BOOST_VERSION < 105300) || \ + defined(BOOST_THREAD_LOCK_GUARD_HPP) using boost::lock_guard; #endif diff --git a/be/src/common/object_pool.h b/be/src/common/object_pool.h index 70d52e8c3f42f6..054a4108b14495 100644 --- a/be/src/common/object_pool.h +++ b/be/src/common/object_pool.h @@ -18,9 +18,9 @@ #ifndef DORIS_BE_SRC_COMMON_COMMON_OBJECT_POOL_H #define DORIS_BE_SRC_COMMON_COMMON_OBJECT_POOL_H -#include -#include #include +#include +#include #include "util/spinlock.h" @@ -31,11 +31,9 @@ namespace doris { // Thread-safe. class ObjectPool { public: - ObjectPool(): _objects() {} + ObjectPool() : _objects() {} - ~ObjectPool() { - clear(); - } + ~ObjectPool() { clear(); } template T* add(T* t) { @@ -70,10 +68,8 @@ class ObjectPool { template struct SpecificElement : GenericElement { - SpecificElement(T* t): t(t) {} - ~SpecificElement() { - delete t; - } + SpecificElement(T* t) : t(t) {} + ~SpecificElement() { delete t; } T* t; }; @@ -83,6 +79,6 @@ class ObjectPool { SpinLock _lock; }; -} +} // namespace doris #endif diff --git a/be/src/common/resource_tls.cpp b/be/src/common/resource_tls.cpp index ddf916df324b46..b5fd7af78686dc 100644 --- a/be/src/common/resource_tls.cpp +++ b/be/src/common/resource_tls.cpp @@ -64,4 +64,4 @@ int ResourceTls::set_resource_tls(TResourceInfo* info) { return ret; } -} +} // namespace doris diff --git a/be/src/common/resource_tls.h b/be/src/common/resource_tls.h index 0a51f4afd0ac32..ed720235995082 100644 --- a/be/src/common/resource_tls.h +++ b/be/src/common/resource_tls.h @@ -28,6 +28,6 @@ class ResourceTls { static int set_resource_tls(TResourceInfo*); }; -} +} // namespace doris #endif diff --git a/be/src/common/status.cpp b/be/src/common/status.cpp index 446ae5a0109cdd..75eab874f50643 100644 --- a/be/src/common/status.cpp +++ b/be/src/common/status.cpp @@ -8,8 +8,8 @@ namespace doris { -inline const char* assemble_state( - TStatusCode::type code, const Slice& msg, int16_t precise_code, const Slice& msg2) { +inline const char* assemble_state(TStatusCode::type code, const Slice& msg, int16_t precise_code, + const Slice& msg2) { DCHECK(code != TStatusCode::OK); const uint32_t len1 = msg.size; @@ -58,8 +58,7 @@ Status::Status(const PStatus& s) : _state(nullptr) { } Status::Status(TStatusCode::type code, const Slice& msg, int16_t precise_code, const Slice& msg2) - : _state(assemble_state(code, msg, precise_code, msg2)) { -} + : _state(assemble_state(code, msg, precise_code, msg2)) {} void Status::to_thrift(TStatus* s) const { s->error_msgs.clear(); @@ -196,4 +195,4 @@ Status Status::clone_and_append(const Slice& msg) const { return Status(code(), message(), precise_code(), msg); } -} +} // namespace doris diff --git a/be/src/common/status.h b/be/src/common/status.h index 8fc755fdb3ad31..9f9547237f5b6c 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -7,23 +7,21 @@ #include #include -#include "common/logging.h" #include "common/compiler_util.h" -#include "gen_cpp/Status_types.h" // for TStatus -#include "gen_cpp/status.pb.h" // for PStatus -#include "util/slice.h" // for Slice +#include "common/logging.h" +#include "gen_cpp/Status_types.h" // for TStatus +#include "gen_cpp/status.pb.h" // for PStatus +#include "util/slice.h" // for Slice namespace doris { class Status { public: - Status(): _state(nullptr) {} + Status() : _state(nullptr) {} ~Status() noexcept { delete[] _state; } // copy c'tor makes copy of error detail so Status can be returned by value - Status(const Status& s) - : _state(s._state == nullptr ? nullptr : copy_state(s._state)) { - } + Status(const Status& s) : _state(s._state == nullptr ? nullptr : copy_state(s._state)) {} // same as copy c'tor Status& operator=(const Status& s) { @@ -37,9 +35,7 @@ class Status { } // move c'tor - Status(Status&& s) noexcept : _state(s._state) { - s._state = nullptr; - } + Status(Status&& s) noexcept : _state(s._state) { s._state = nullptr; } // move assign Status& operator=(Status&& s) noexcept { @@ -54,97 +50,96 @@ class Status { static Status OK() { return Status(); } - static Status PublishTimeout(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status PublishTimeout(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::PUBLISH_TIMEOUT, msg, precise_code, msg2); } - static Status MemoryAllocFailed(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status MemoryAllocFailed(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::MEM_ALLOC_FAILED, msg, precise_code, msg2); } - static Status BufferAllocFailed(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status BufferAllocFailed(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::BUFFER_ALLOCATION_FAILED, msg, precise_code, msg2); } - static Status InvalidArgument(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status InvalidArgument(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::INVALID_ARGUMENT, msg, precise_code, msg2); } - static Status MinimumReservationUnavailable(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status MinimumReservationUnavailable(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::MINIMUM_RESERVATION_UNAVAILABLE, msg, precise_code, msg2); } - static Status Corruption(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status Corruption(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::CORRUPTION, msg, precise_code, msg2); } - static Status IOError(const Slice& msg, - int16_t precise_code = 1, - const Slice& msg2 = Slice()) { + static Status IOError(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { return Status(TStatusCode::IO_ERROR, msg, precise_code, msg2); } - static Status NotFound(const Slice& msg, - int16_t precise_code = 1, - const Slice& msg2 = Slice()) { + static Status NotFound(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::NOT_FOUND, msg, precise_code, msg2); } - static Status AlreadyExist(const Slice& msg, - int16_t precise_code = 1, + static Status AlreadyExist(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { return Status(TStatusCode::ALREADY_EXIST, msg, precise_code, msg2); } - static Status NotSupported(const Slice& msg, - int16_t precise_code = 1, + static Status NotSupported(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { return Status(TStatusCode::NOT_IMPLEMENTED_ERROR, msg, precise_code, msg2); } - static Status EndOfFile(const Slice& msg, - int16_t precise_code = 1, + static Status EndOfFile(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { return Status(TStatusCode::END_OF_FILE, msg, precise_code, msg2); } - static Status InternalError(const Slice& msg, - int16_t precise_code = 1, - const Slice& msg2 = Slice()) { + static Status InternalError(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::INTERNAL_ERROR, msg, precise_code, msg2); } - static Status RuntimeError(const Slice& msg, - int16_t precise_code = 1, + static Status RuntimeError(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { return Status(TStatusCode::RUNTIME_ERROR, msg, precise_code, msg2); } - static Status Cancelled(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status Cancelled(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::CANCELLED, msg, precise_code, msg2); } - static Status MemoryLimitExceeded(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status MemoryLimitExceeded(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::MEM_LIMIT_EXCEEDED, msg, precise_code, msg2); } - static Status ThriftRpcError(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status ThriftRpcError(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::THRIFT_RPC_ERROR, msg, precise_code, msg2); } - static Status TimedOut(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status TimedOut(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::TIMEOUT, msg, precise_code, msg2); } - static Status TooManyTasks(const Slice& msg, int16_t precise_code = 1, const Slice& msg2 = Slice()) { + static Status TooManyTasks(const Slice& msg, int16_t precise_code = 1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::TOO_MANY_TASKS, msg, precise_code, msg2); } - static Status ServiceUnavailable(const Slice& msg, - int16_t precise_code = -1, + static Status ServiceUnavailable(const Slice& msg, int16_t precise_code = -1, const Slice& msg2 = Slice()) { return Status(TStatusCode::SERVICE_UNAVAILABLE, msg, precise_code, msg2); } - static Status Uninitialized(const Slice& msg, - int16_t precise_code = -1, + static Status Uninitialized(const Slice& msg, int16_t precise_code = -1, const Slice& msg2 = Slice()) { return Status(TStatusCode::UNINITIALIZED, msg, precise_code, msg2); } - static Status Aborted(const Slice& msg, - int16_t precise_code = -1, + static Status Aborted(const Slice& msg, int16_t precise_code = -1, const Slice& msg2 = Slice()) { return Status(TStatusCode::ABORTED, msg, precise_code, msg2); } - static Status DataQualityError(const Slice& msg, - int16_t precise_code = -1, - const Slice& msg2 = Slice()) { + static Status DataQualityError(const Slice& msg, int16_t precise_code = -1, + const Slice& msg2 = Slice()) { return Status(TStatusCode::DATA_QUALITY_ERROR, msg, precise_code, msg2); } @@ -156,7 +151,7 @@ class Status { bool is_end_of_file() const { return code() == TStatusCode::END_OF_FILE; } bool is_not_found() const { return code() == TStatusCode::NOT_FOUND; } bool is_already_exist() const { return code() == TStatusCode::ALREADY_EXIST; } - bool is_io_error() const {return code() == TStatusCode::IO_ERROR; } + bool is_io_error() const { return code() == TStatusCode::IO_ERROR; } /// @return @c true iff the status indicates Uninitialized. bool is_uninitialized() const { return code() == TStatusCode::UNINITIALIZED; } @@ -256,58 +251,58 @@ class Status { }; // some generally useful macros -#define RETURN_IF_ERROR(stmt) \ - do { \ +#define RETURN_IF_ERROR(stmt) \ + do { \ const Status& _status_ = (stmt); \ - if (UNLIKELY(!_status_.ok())) { \ - return _status_; \ - } \ + if (UNLIKELY(!_status_.ok())) { \ + return _status_; \ + } \ } while (false) #define RETURN_IF_STATUS_ERROR(status, stmt) \ - do { \ - status = (stmt); \ - if (UNLIKELY(!status.ok())) { \ - return; \ - } \ + do { \ + status = (stmt); \ + if (UNLIKELY(!status.ok())) { \ + return; \ + } \ } while (false) -#define EXIT_IF_ERROR(stmt) \ - do { \ - const Status& _status_ = (stmt); \ - if (UNLIKELY(!_status_.ok())) { \ +#define EXIT_IF_ERROR(stmt) \ + do { \ + const Status& _status_ = (stmt); \ + if (UNLIKELY(!_status_.ok())) { \ string msg = _status_.get_error_msg(); \ - LOG(ERROR) << msg; \ - exit(1); \ - } \ + LOG(ERROR) << msg; \ + exit(1); \ + } \ } while (false) /// @brief Emit a warning if @c to_call returns a bad status. -#define WARN_IF_ERROR(to_call, warning_prefix) \ - do { \ - const Status& _s = (to_call); \ - if (UNLIKELY(!_s.ok())) { \ - LOG(WARNING) << (warning_prefix) << ": " << _s.to_string(); \ - } \ +#define WARN_IF_ERROR(to_call, warning_prefix) \ + do { \ + const Status& _s = (to_call); \ + if (UNLIKELY(!_s.ok())) { \ + LOG(WARNING) << (warning_prefix) << ": " << _s.to_string(); \ + } \ } while (0); -#define RETURN_WITH_WARN_IF_ERROR(stmt, ret_code, warning_prefix) \ - do { \ - const Status& _s = (stmt); \ - if (UNLIKELY(!_s.ok())) { \ +#define RETURN_WITH_WARN_IF_ERROR(stmt, ret_code, warning_prefix) \ + do { \ + const Status& _s = (stmt); \ + if (UNLIKELY(!_s.ok())) { \ LOG(WARNING) << (warning_prefix) << ", error: " << _s.to_string(); \ - return ret_code; \ - } \ + return ret_code; \ + } \ } while (0); -#define RETURN_NOT_OK_STATUS_WITH_WARN(stmt, warning_prefix) \ - do { \ - const Status& _s = (stmt); \ - if (UNLIKELY(!_s.ok())) { \ - LOG(WARNING) << (warning_prefix) << ", error: " << _s.to_string(); \ - return _s; \ - } \ +#define RETURN_NOT_OK_STATUS_WITH_WARN(stmt, warning_prefix) \ + do { \ + const Status& _s = (stmt); \ + if (UNLIKELY(!_s.ok())) { \ + LOG(WARNING) << (warning_prefix) << ", error: " << _s.to_string(); \ + return _s; \ + } \ } while (0); -} // namespace doris +} // namespace doris #define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) diff --git a/be/src/common/utils.h b/be/src/common/utils.h index 8376fe38a68e67..479848317e4a9e 100644 --- a/be/src/common/utils.h +++ b/be/src/common/utils.h @@ -30,7 +30,7 @@ struct AuthInfo { int64_t auth_code = -1; }; -template +template void set_request_auth(T* req, const AuthInfo& auth) { if (auth.auth_code != -1) { // if auth_code is set, no need to set other info @@ -49,4 +49,4 @@ void set_request_auth(T* req, const AuthInfo& auth) { } } -} +} // namespace doris diff --git a/be/src/env/env.h b/be/src/env/env.h index 18a1cc4d98b46c..d0215739df0b97 100644 --- a/be/src/env/env.h +++ b/be/src/env/env.h @@ -35,12 +35,7 @@ class Env { // CREATE_OR_OPEN | opens | creates // MUST_CREATE | fails | creates // MUST_EXIST | opens | fails - enum OpenMode { - CREATE_OR_OPEN_WITH_TRUNCATE, - CREATE_OR_OPEN, - MUST_CREATE, - MUST_EXIST - }; + enum OpenMode { CREATE_OR_OPEN_WITH_TRUNCATE, CREATE_OR_OPEN, MUST_CREATE, MUST_EXIST }; Env() {} virtual ~Env() {} @@ -265,10 +260,7 @@ class RandomAccessFile { // one of Append or PositionedAppend. We support only Append here. class WritableFile { public: - enum FlushMode { - FLUSH_SYNC, - FLUSH_ASYNC - }; + enum FlushMode { FLUSH_SYNC, FLUSH_ASYNC }; WritableFile() {} virtual ~WritableFile() {} diff --git a/be/src/env/env_posix.cpp b/be/src/env/env_posix.cpp index dd9bdc2bcda2ec..0ccdcb3964f7a0 100644 --- a/be/src/env/env_posix.cpp +++ b/be/src/env/env_posix.cpp @@ -7,25 +7,25 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors -#include "env/env.h" - #include #include #include #include #include -#include #include +#include + #include #include "common/logging.h" +#include "env/env.h" +#include "gutil/gscoped_ptr.h" #include "gutil/macros.h" #include "gutil/port.h" -#include "gutil/gscoped_ptr.h" #include "gutil/strings/substitute.h" #include "util/errno.h" -#include "util/slice.h" #include "util/file_cache.h" +#include "util/slice.h" namespace doris { @@ -101,8 +101,8 @@ static Status do_open(const string& filename, Env::OpenMode mode, int* fd) { return Status::OK(); } -static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, - const Slice* res, size_t res_cnt) { +static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, const Slice* res, + size_t res_cnt) { // Convert the results into the iovec vector to request // and calculate the total bytes requested size_t bytes_req = 0; @@ -110,7 +110,7 @@ static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, for (size_t i = 0; i < res_cnt; i++) { const Slice& result = res[i]; bytes_req += result.size; - iov[i] = { result.data, result.size }; + iov[i] = {result.data, result.size}; } uint64_t cur_offset = offset; @@ -127,8 +127,8 @@ static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, } if (PREDICT_FALSE(r == 0)) { - return Status::EndOfFile( - strings::Substitute("EOF trying to read $0 bytes at offset $1", bytes_req, offset)); + return Status::EndOfFile(strings::Substitute("EOF trying to read $0 bytes at offset $1", + bytes_req, offset)); } if (PREDICT_TRUE(r == rem)) { @@ -146,7 +146,7 @@ static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, } else { // Partially read this result. // Adjust the iov_len and iov_base to request only the missing data. - iov[i].iov_base = static_cast(iov[i].iov_base) + bytes_rem; + iov[i].iov_base = static_cast(iov[i].iov_base) + bytes_rem; iov[i].iov_len -= bytes_rem; break; // Don't need to adjust remaining iovec's } @@ -158,8 +158,8 @@ static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, return Status::OK(); } -static Status do_writev_at(int fd, const string& filename, uint64_t offset, - const Slice* data, size_t data_cnt, size_t* bytes_written) { +static Status do_writev_at(int fd, const string& filename, uint64_t offset, const Slice* data, + size_t data_cnt, size_t* bytes_written) { // Convert the results into the iovec vector to request // and calculate the total bytes requested. size_t bytes_req = 0; @@ -167,7 +167,7 @@ static Status do_writev_at(int fd, const string& filename, uint64_t offset, for (size_t i = 0; i < data_cnt; i++) { const Slice& result = data[i]; bytes_req += result.size; - iov[i] = { result.data, result.size }; + iov[i] = {result.data, result.size}; } uint64_t cur_offset = offset; @@ -198,7 +198,7 @@ static Status do_writev_at(int fd, const string& filename, uint64_t offset, } else { // Partially wrote this result. // Adjust the iov_len and iov_base to write only the missing data. - iov[i].iov_base = static_cast(iov[i].iov_base) + bytes_rem; + iov[i].iov_base = static_cast(iov[i].iov_base) + bytes_rem; iov[i].iov_len -= bytes_rem; break; // Don't need to adjust remaining iovec's. } @@ -211,24 +211,22 @@ static Status do_writev_at(int fd, const string& filename, uint64_t offset, return Status::OK(); } -class PosixSequentialFile: public SequentialFile { +class PosixSequentialFile : public SequentialFile { public: - PosixSequentialFile(string fname, FILE* f) - : _filename(std::move(fname)), _file(f) {} + PosixSequentialFile(string fname, FILE* f) : _filename(std::move(fname)), _file(f) {} ~PosixSequentialFile() override { int err; RETRY_ON_EINTR(err, fclose(_file)); if (PREDICT_FALSE(err != 0)) { LOG(WARNING) << "Failed to close " << _filename - << ", msg=" << errno_to_string(ferror(_file)); + << ", msg=" << errno_to_string(ferror(_file)); } } Status read(Slice* result) override { size_t r; - STREAM_RETRY_ON_EINTR(r, _file, fread_unlocked(result->data, 1, - result->size, _file)); + STREAM_RETRY_ON_EINTR(r, _file, fread_unlocked(result->data, 1, result->size, _file)); if (r < result->size) { if (feof(_file)) { // We leave status as ok if we hit the end of the file. @@ -258,14 +256,13 @@ class PosixSequentialFile: public SequentialFile { class PosixRandomAccessFile : public RandomAccessFile { public: - PosixRandomAccessFile(std::string filename, int fd) : _filename(std::move(filename)), _fd(fd) { - } + PosixRandomAccessFile(std::string filename, int fd) : _filename(std::move(filename)), _fd(fd) {} ~PosixRandomAccessFile() override { int res; RETRY_ON_EINTR(res, close(_fd)); if (res != 0) { LOG(WARNING) << "close file failed, name=" << _filename - << ", msg=" << errno_to_string(errno); + << ", msg=" << errno_to_string(errno); } } @@ -287,6 +284,7 @@ class PosixRandomAccessFile : public RandomAccessFile { } const std::string& file_name() const override { return _filename; } + private: std::string _filename; int _fd; @@ -295,15 +293,16 @@ class PosixRandomAccessFile : public RandomAccessFile { class PosixWritableFile : public WritableFile { public: PosixWritableFile(std::string filename, int fd, uint64_t filesize, bool sync_on_close) - : _filename(std::move(filename)), _fd(fd), _sync_on_close(sync_on_close), _filesize(filesize) { } + : _filename(std::move(filename)), + _fd(fd), + _sync_on_close(sync_on_close), + _filesize(filesize) {} ~PosixWritableFile() override { WARN_IF_ERROR(close(), "Failed to close file, file=" + _filename); } - Status append(const Slice& data) override { - return appendv(&data, 1); - } + Status append(const Slice& data) override { return appendv(&data, 1); } Status appendv(const Slice* data, size_t cnt) override { size_t bytes_written = 0; @@ -396,6 +395,7 @@ class PosixWritableFile : public WritableFile { uint64_t size() const override { return _filesize; } const string& filename() const override { return _filename; } + private: std::string _filename; int _fd; @@ -409,14 +409,9 @@ class PosixWritableFile : public WritableFile { class PosixRandomRWFile : public RandomRWFile { public: PosixRandomRWFile(string fname, int fd, bool sync_on_close) - : _filename(std::move(fname)), - _fd(fd), - _sync_on_close(sync_on_close), - _closed(false) {} + : _filename(std::move(fname)), _fd(fd), _sync_on_close(sync_on_close), _closed(false) {} - ~PosixRandomRWFile() { - WARN_IF_ERROR(close(), "Failed to close " + _filename); - } + ~PosixRandomRWFile() { WARN_IF_ERROR(close(), "Failed to close " + _filename); } virtual Status read_at(uint64_t offset, const Slice& result) const override { return do_readv_at(_fd, _filename, offset, &result, 1); @@ -452,9 +447,7 @@ class PosixRandomRWFile : public RandomRWFile { return Status::OK(); } - Status sync() override { - return do_sync(_fd, _filename); - } + Status sync() override { return do_sync(_fd, _filename); } Status close() override { if (_closed) { @@ -489,9 +482,7 @@ class PosixRandomRWFile : public RandomRWFile { return Status::OK(); } - const string& filename() const override { - return _filename; - } + const string& filename() const override { return _filename; } private: const std::string _filename; @@ -502,10 +493,10 @@ class PosixRandomRWFile : public RandomRWFile { class PosixEnv : public Env { public: - ~PosixEnv() override { } + ~PosixEnv() override {} - Status new_sequential_file( - const string& fname, std::unique_ptr* result) override { + Status new_sequential_file(const string& fname, + std::unique_ptr* result) override { FILE* f; POINTER_RETRY_ON_EINTR(f, fopen(fname.c_str(), "r")); if (f == nullptr) { @@ -517,13 +508,12 @@ class PosixEnv : public Env { // get a RandomAccessFile pointer without file cache Status new_random_access_file(const std::string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result) override { return new_random_access_file(RandomAccessFileOptions(), fname, result); } - Status new_random_access_file(const RandomAccessFileOptions& opts, - const std::string& fname, - std::unique_ptr* result) override { + Status new_random_access_file(const RandomAccessFileOptions& opts, const std::string& fname, + std::unique_ptr* result) override { int fd; RETRY_ON_EINTR(fd, open(fname.c_str(), O_RDONLY)); if (fd < 0) { @@ -533,13 +523,11 @@ class PosixEnv : public Env { return Status::OK(); } - Status new_writable_file(const string& fname, - std::unique_ptr* result) override { + Status new_writable_file(const string& fname, std::unique_ptr* result) override { return new_writable_file(WritableFileOptions(), fname, result); } - Status new_writable_file(const WritableFileOptions& opts, - const string& fname, + Status new_writable_file(const WritableFileOptions& opts, const string& fname, std::unique_ptr* result) override { int fd; RETURN_IF_ERROR(do_open(fname, opts.mode, &fd)); @@ -552,13 +540,11 @@ class PosixEnv : public Env { return Status::OK(); } - Status new_random_rw_file(const string& fname, - std::unique_ptr* result) override { + Status new_random_rw_file(const string& fname, std::unique_ptr* result) override { return new_random_rw_file(RandomRWFileOptions(), fname, result); } - Status new_random_rw_file(const RandomRWFileOptions& opts, - const string& fname, + Status new_random_rw_file(const RandomRWFileOptions& opts, const string& fname, std::unique_ptr* result) override { int fd; RETURN_IF_ERROR(do_open(fname, opts.mode, &fd)); @@ -573,8 +559,7 @@ class PosixEnv : public Env { return Status::OK(); } - Status get_children(const std::string& dir, - std::vector* result) override { + Status get_children(const std::string& dir, std::vector* result) override { result->clear(); DIR* d = opendir(dir.c_str()); if (d == nullptr) { @@ -648,7 +633,7 @@ class PosixEnv : public Env { Status sync_dir(const string& dirname) override { int dir_fd; - RETRY_ON_EINTR(dir_fd, open(dirname.c_str(), O_DIRECTORY|O_RDONLY)); + RETRY_ON_EINTR(dir_fd, open(dirname.c_str(), O_DIRECTORY | O_RDONLY)); if (dir_fd < 0) { return io_error(dirname, errno); } @@ -693,7 +678,7 @@ class PosixEnv : public Env { Status get_file_modified_time(const std::string& fname, uint64_t* file_mtime) override { struct stat s; - if (stat(fname.c_str(), &s) !=0) { + if (stat(fname.c_str(), &s) != 0) { return io_error(fname, errno); } *file_mtime = static_cast(s.st_mtime); diff --git a/be/src/env/env_util.cpp b/be/src/env/env_util.cpp index 2c994e94c3e7d0..3494b2ed10d8b4 100644 --- a/be/src/env/env_util.cpp +++ b/be/src/env/env_util.cpp @@ -23,7 +23,6 @@ using std::shared_ptr; using std::string; - namespace doris { namespace env_util { diff --git a/be/src/exec/aggregation_node.cpp b/be/src/exec/aggregation_node.cpp index 53e33268f9dcfc..3edb36e2a58a54 100644 --- a/be/src/exec/aggregation_node.cpp +++ b/be/src/exec/aggregation_node.cpp @@ -17,11 +17,12 @@ #include "exec/aggregation_node.h" +#include #include -#include -#include #include -#include + +#include +#include #include "exec/hash_table.hpp" #include "exprs/agg_fn_evaluator.h" @@ -44,36 +45,33 @@ namespace doris { // TODO: pass in maximum size; enforce by setting limit in mempool // TODO: have a Status ExecNode::init(const TPlanNode&) member function // that does initialization outside of c'tor, so we can indicate errors -AggregationNode::AggregationNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id), - _intermediate_tuple_desc(NULL), - _output_tuple_id(tnode.agg_node.output_tuple_id), - _output_tuple_desc(NULL), - _singleton_output_tuple(NULL), - //_tuple_pool(new MemPool()), - // - _process_row_batch_fn(NULL), - _needs_finalize(tnode.agg_node.need_finalize), - _build_timer(NULL), - _get_results_timer(NULL), - _hash_table_buckets_counter(NULL) { -} - -AggregationNode::~AggregationNode() { -} +AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id), + _intermediate_tuple_desc(NULL), + _output_tuple_id(tnode.agg_node.output_tuple_id), + _output_tuple_desc(NULL), + _singleton_output_tuple(NULL), + //_tuple_pool(new MemPool()), + // + _process_row_batch_fn(NULL), + _needs_finalize(tnode.agg_node.need_finalize), + _build_timer(NULL), + _get_results_timer(NULL), + _hash_table_buckets_counter(NULL) {} + +AggregationNode::~AggregationNode() {} Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); // ignore return status for now , so we need to introduce ExecNode::init() - RETURN_IF_ERROR(Expr::create_expr_trees( - _pool, tnode.agg_node.grouping_exprs, &_probe_expr_ctxs)); + RETURN_IF_ERROR( + Expr::create_expr_trees(_pool, tnode.agg_node.grouping_exprs, &_probe_expr_ctxs)); for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i) { AggFnEvaluator* evaluator = NULL; - AggFnEvaluator::create( - _pool, tnode.agg_node.aggregate_functions[i], &evaluator); + AggFnEvaluator::create(_pool, tnode.agg_node.aggregate_functions[i], &evaluator); _aggregate_evaluators.push_back(evaluator); } return Status::OK(); @@ -83,19 +81,17 @@ Status AggregationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _get_results_timer = ADD_TIMER(runtime_profile(), "GetResultsTime"); - _hash_table_buckets_counter = - ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT); + _hash_table_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT); _hash_table_load_factor_counter = - ADD_COUNTER(runtime_profile(), "LoadFactor", TUnit::DOUBLE_VALUE); + ADD_COUNTER(runtime_profile(), "LoadFactor", TUnit::DOUBLE_VALUE); SCOPED_TIMER(_runtime_profile->total_time_counter()); - _intermediate_tuple_desc = - state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); + _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size()); - RETURN_IF_ERROR(Expr::prepare( - _probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); // Construct build exprs from _agg_tuple_desc for (int i = 0; i < _probe_expr_ctxs.size(); ++i) { @@ -110,8 +106,7 @@ Status AggregationNode::prepare(RuntimeState* state) { // nor this node's output row desc may contain the intermediate tuple, e.g., // in a single-node plan with an intermediate tuple different from the output tuple. RowDescriptor build_row_desc(_intermediate_tuple_desc, false); - RETURN_IF_ERROR(Expr::prepare( - _build_expr_ctxs, state, build_row_desc, expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_build_expr_ctxs, state, build_row_desc, expr_mem_tracker())); _tuple_pool.reset(new MemPool(mem_tracker().get())); @@ -134,9 +129,9 @@ Status AggregationNode::prepare(RuntimeState* state) { } // TODO: how many buckets? - _hash_tbl.reset(new HashTable( - _build_expr_ctxs, _probe_expr_ctxs, 1, true, - std::vector(_build_expr_ctxs.size(), false), id(), mem_tracker(), 1024)); + _hash_tbl.reset(new HashTable(_build_expr_ctxs, _probe_expr_ctxs, 1, true, + std::vector(_build_expr_ctxs.size(), false), id(), + mem_tracker(), 1024)); if (_probe_expr_ctxs.empty()) { // create single output tuple now; we need to output something @@ -177,8 +172,8 @@ Status AggregationNode::open(RuntimeState* state) { if (VLOG_ROW_IS_ON) { for (int i = 0; i < batch.num_rows(); ++i) { TupleRow* row = batch.get_row(i); - VLOG_ROW << "id=" << id() << " input row: " - << row->to_string(_children[0]->row_desc()); + VLOG_ROW << "id=" << id() + << " input row: " << row->to_string(_children[0]->row_desc()); } } @@ -223,7 +218,7 @@ Status AggregationNode::open(RuntimeState* state) { } VLOG_ROW << "id=" << id() << " aggregated " << num_input_rows << " input rows into " - << num_agg_rows << " output rows"; + << num_agg_rows << " output rows"; _output_iterator = _hash_tbl->begin(); return Status::OK(); } @@ -255,8 +250,7 @@ Status AggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* int row_idx = row_batch->add_row(); TupleRow* row = row_batch->get_row(row_idx); Tuple* intermediate_tuple = _output_iterator.get_row()->get_tuple(0); - Tuple* output_tuple = - finalize_tuple(intermediate_tuple, row_batch->tuple_data_pool()); + Tuple* output_tuple = finalize_tuple(intermediate_tuple, row_batch->tuple_data_pool()); row->set_tuple(0, output_tuple); if (ExecNode::eval_conjuncts(ctxs, num_ctxs, row)) { @@ -274,8 +268,7 @@ Status AggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* *eos = _output_iterator.at_end() || reached_limit(); if (*eos) { - if (_hash_tbl.get() != NULL && - _hash_table_buckets_counter != NULL) { + if (_hash_tbl.get() != NULL && _hash_table_buckets_counter != NULL) { COUNTER_SET(_hash_table_buckets_counter, _hash_tbl->num_buckets()); } } @@ -328,7 +321,8 @@ Status AggregationNode::close(RuntimeState* state) { Tuple* AggregationNode::construct_intermediate_tuple() { Tuple* agg_tuple = Tuple::create(_intermediate_tuple_desc->byte_size(), _tuple_pool.get()); - std::vector::const_iterator slot_desc = _intermediate_tuple_desc->slots().begin(); + std::vector::const_iterator slot_desc = + _intermediate_tuple_desc->slots().begin(); // copy grouping values for (int i = 0; i < _probe_expr_ctxs.size(); ++i, ++slot_desc) { @@ -359,8 +353,7 @@ Tuple* AggregationNode::construct_intermediate_tuple() { // - max: min_value // TODO: remove when we don't use the irbuilder for codegen here. // This optimization no longer applies with AnyVal - if (!(*slot_desc)->type().is_string_type() && - !(*slot_desc)->type().is_date_type()) { + if (!(*slot_desc)->type().is_string_type() && !(*slot_desc)->type().is_date_type()) { ExprValue default_value; void* default_value_ptr = NULL; @@ -434,20 +427,18 @@ Tuple* AggregationNode::finalize_tuple(Tuple* tuple, MemPool* pool) { void AggregationNode::debug_string(int indentation_level, std::stringstream* out) const { *out << std::string(indentation_level * 2, ' '); *out << "AggregationNode(intermediate_tuple_id=" << _intermediate_tuple_id - << " output_tuple_id=" << _output_tuple_id - << " needs_finalize=" << _needs_finalize + << " output_tuple_id=" << _output_tuple_id << " needs_finalize=" + << _needs_finalize // << " probe_exprs=" << Expr::debug_string(_probe_exprs) << " agg_exprs=" << AggFnEvaluator::debug_string(_aggregate_evaluators); ExecNode::debug_string(indentation_level, out); *out << ")"; } -void AggregationNode::push_down_predicate(RuntimeState *state, - std::list *expr_ctxs) { +void AggregationNode::push_down_predicate(RuntimeState* state, std::list* expr_ctxs) { // groupby can pushdown, agg can't pushdown // Now we doesn't pushdown for easy. return; } -} - +} // namespace doris diff --git a/be/src/exec/aggregation_node.h b/be/src/exec/aggregation_node.h index 72e9f0ad0bb135..464d3647d288b1 100644 --- a/be/src/exec/aggregation_node.h +++ b/be/src/exec/aggregation_node.h @@ -61,8 +61,7 @@ class AggregationNode : public ExecNode { virtual Status close(RuntimeState* state); virtual void debug_string(int indentation_level, std::stringstream* out) const; - virtual void push_down_predicate( - RuntimeState *state, std::list *expr_ctxs); + virtual void push_down_predicate(RuntimeState* state, std::list* expr_ctxs); private: boost::scoped_ptr _hash_tbl; @@ -73,7 +72,7 @@ class AggregationNode : public ExecNode { /// FunctionContext for each agg fn and backing pool. std::vector _agg_fn_ctxs; boost::scoped_ptr _agg_fn_pool; - + // Exprs used to evaluate input rows std::vector _probe_expr_ctxs; // Exprs used to insert constructed aggregation tuple into the hash table. @@ -88,8 +87,8 @@ class AggregationNode : public ExecNode { /// the intermediate tuple. TupleId _output_tuple_id; TupleDescriptor* _output_tuple_desc; - - Tuple* _singleton_output_tuple; // result of aggregation w/o GROUP BY + + Tuple* _singleton_output_tuple; // result of aggregation w/o GROUP BY boost::scoped_ptr _tuple_pool; typedef void (*ProcessRowBatchFn)(AggregationNode*, RowBatch*); @@ -129,6 +128,6 @@ class AggregationNode : public ExecNode { void process_row_batch_with_grouping(RowBatch* batch, MemPool* pool); }; -} +} // namespace doris #endif diff --git a/be/src/exec/aggregation_node_ir.cpp b/be/src/exec/aggregation_node_ir.cpp index 4ce5cf37e40b85..0d174411a6b5b1 100644 --- a/be/src/exec/aggregation_node_ir.cpp +++ b/be/src/exec/aggregation_node_ir.cpp @@ -16,7 +16,6 @@ // under the License. #include "exec/aggregation_node.h" - #include "exec/hash_table.hpp" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -48,5 +47,4 @@ void AggregationNode::process_row_batch_with_grouping(RowBatch* batch, MemPool* } } -} - +} // namespace doris diff --git a/be/src/exec/analytic_eval_node.cpp b/be/src/exec/analytic_eval_node.cpp index 6aae4527777290..bb069a17c82d7f 100644 --- a/be/src/exec/analytic_eval_node.cpp +++ b/be/src/exec/analytic_eval_node.cpp @@ -19,7 +19,6 @@ #include "exprs/agg_fn_evaluator.h" #include "exprs/anyval_util.h" - #include "runtime/descriptors.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -30,41 +29,38 @@ namespace doris { using doris_udf::BigIntVal; AnalyticEvalNode::AnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _window(tnode.analytic_node.window), - _intermediate_tuple_desc( - descs.get_tuple_descriptor(tnode.analytic_node.intermediate_tuple_id)), - _result_tuple_desc( - descs.get_tuple_descriptor(tnode.analytic_node.output_tuple_id)), - _buffered_tuple_desc(NULL), - _partition_by_eq_expr_ctx(NULL), - _order_by_eq_expr_ctx(NULL), - _rows_start_offset(0), - _rows_end_offset(0), - _has_first_val_null_offset(false), - _first_val_null_offset(0), - _last_result_idx(-1), - _prev_pool_last_result_idx(-1), - _prev_pool_last_window_idx(-1), - _curr_tuple(NULL), - _dummy_result_tuple(NULL), - _curr_partition_idx(-1), - _prev_input_row(NULL), - _block_mgr_client(nullptr), - _input_eos(false), - _evaluation_timer(NULL) { + const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _window(tnode.analytic_node.window), + _intermediate_tuple_desc( + descs.get_tuple_descriptor(tnode.analytic_node.intermediate_tuple_id)), + _result_tuple_desc(descs.get_tuple_descriptor(tnode.analytic_node.output_tuple_id)), + _buffered_tuple_desc(NULL), + _partition_by_eq_expr_ctx(NULL), + _order_by_eq_expr_ctx(NULL), + _rows_start_offset(0), + _rows_end_offset(0), + _has_first_val_null_offset(false), + _first_val_null_offset(0), + _last_result_idx(-1), + _prev_pool_last_result_idx(-1), + _prev_pool_last_window_idx(-1), + _curr_tuple(NULL), + _dummy_result_tuple(NULL), + _curr_partition_idx(-1), + _prev_input_row(NULL), + _block_mgr_client(nullptr), + _input_eos(false), + _evaluation_timer(NULL) { if (tnode.analytic_node.__isset.buffered_tuple_id) { - _buffered_tuple_desc = descs.get_tuple_descriptor( - tnode.analytic_node.buffered_tuple_id); + _buffered_tuple_desc = descs.get_tuple_descriptor(tnode.analytic_node.buffered_tuple_id); } if (!tnode.analytic_node.__isset.window) { _fn_scope = AnalyticEvalNode::PARTITION; } else if (tnode.analytic_node.window.type == TAnalyticWindowType::RANGE) { _fn_scope = AnalyticEvalNode::RANGE; - DCHECK(!_window.__isset.window_start) - << "RANGE windows must have UNBOUNDED PRECEDING"; + DCHECK(!_window.__isset.window_start) << "RANGE windows must have UNBOUNDED PRECEDING"; DCHECK(!_window.__isset.window_end || _window.window_end.type == TAnalyticWindowBoundaryType::CURRENT_ROW) << "RANGE window end bound must be CURRENT ROW or UNBOUNDED FOLLOWING"; @@ -103,7 +99,7 @@ AnalyticEvalNode::AnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, } } - VLOG_ROW << "tnode=" << apache::thrift::ThriftDebugString(tnode); + VLOG_ROW << "tnode=" << apache::thrift::ThriftDebugString(tnode); } Status AnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { @@ -113,8 +109,8 @@ Status AnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { for (int i = 0; i < analytic_node.analytic_functions.size(); ++i) { AggFnEvaluator* evaluator = NULL; - RETURN_IF_ERROR(AggFnEvaluator::create( - _pool, analytic_node.analytic_functions[i], true, &evaluator)); + RETURN_IF_ERROR(AggFnEvaluator::create(_pool, analytic_node.analytic_functions[i], true, + &evaluator)); _evaluators.push_back(evaluator); const TFunction& fn = analytic_node.analytic_functions[i].nodes[0].fn; _is_lead_fn.push_back("lead" == fn.name.function_name); @@ -128,14 +124,14 @@ Status AnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { if (analytic_node.__isset.partition_by_eq) { DCHECK(analytic_node.__isset.buffered_tuple_id); - RETURN_IF_ERROR(Expr::create_expr_tree( - _pool, analytic_node.partition_by_eq, &_partition_by_eq_expr_ctx)); + RETURN_IF_ERROR(Expr::create_expr_tree(_pool, analytic_node.partition_by_eq, + &_partition_by_eq_expr_ctx)); } if (analytic_node.__isset.order_by_eq) { DCHECK(analytic_node.__isset.buffered_tuple_id); - RETURN_IF_ERROR(Expr::create_expr_tree( - _pool, analytic_node.order_by_eq, &_order_by_eq_expr_ctx)); + RETURN_IF_ERROR( + Expr::create_expr_tree(_pool, analytic_node.order_by_eq, &_order_by_eq_expr_ctx)); } return Status::OK(); @@ -182,8 +178,7 @@ Status AnalyticEvalNode::prepare(RuntimeState* state) { } } - _child_tuple_cmp_row = reinterpret_cast( - _mem_pool->allocate(sizeof(Tuple*) * 2)); + _child_tuple_cmp_row = reinterpret_cast(_mem_pool->allocate(sizeof(Tuple*) * 2)); return Status::OK(); } @@ -193,16 +188,19 @@ Status AnalyticEvalNode::open(RuntimeState* state) { RETURN_IF_CANCELLED(state); //RETURN_IF_ERROR(QueryMaintenance(state)); RETURN_IF_ERROR(child(0)->open(state)); - RETURN_IF_ERROR(state->block_mgr2()->register_client(2, mem_tracker(), state, &_block_mgr_client)); - _input_stream.reset(new BufferedTupleStream2(state, child(0)->row_desc(), state->block_mgr2(), _block_mgr_client, false, true)); + RETURN_IF_ERROR( + state->block_mgr2()->register_client(2, mem_tracker(), state, &_block_mgr_client)); + _input_stream.reset(new BufferedTupleStream2(state, child(0)->row_desc(), state->block_mgr2(), + _block_mgr_client, false, true)); RETURN_IF_ERROR(_input_stream->init(id(), runtime_profile(), true)); bool got_read_buffer; RETURN_IF_ERROR(_input_stream->prepare_for_read(true, &got_read_buffer)); if (!got_read_buffer) { - std::string msg("Failed to acquire initial read buffer for analytic function " - "evaluation. Reducing query concurrency or increasing the memory limit may " - "help this query to complete successfully."); + std::string msg( + "Failed to acquire initial read buffer for analytic function " + "evaluation. Reducing query concurrency or increasing the memory limit may " + "help this query to complete successfully."); return mem_tracker()->MemLimitExceeded(state, msg, -1); } @@ -212,10 +210,10 @@ Status AnalyticEvalNode::open(RuntimeState* state) { RETURN_IF_ERROR(_evaluators[i]->open(state, _fn_ctxs[i])); if ("first_value_rewrite" == _evaluators[i]->fn_name() && - _fn_ctxs[i]->get_num_args() == 2) { + _fn_ctxs[i]->get_num_args() == 2) { DCHECK(!_has_first_val_null_offset); _first_val_null_offset = - reinterpret_cast(_fn_ctxs[i]->get_constant_arg(1))->val; + reinterpret_cast(_fn_ctxs[i]->get_constant_arg(1))->val; VLOG_FILE << id() << " FIRST_VAL rewrite null offset: " << _first_val_null_offset; _has_first_val_null_offset = true; } @@ -238,8 +236,10 @@ Status AnalyticEvalNode::open(RuntimeState* state) { // Fetch the first input batch so that some _prev_input_row can be set here to avoid // special casing in GetNext(). - _prev_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); - _curr_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _prev_child_batch.reset( + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _curr_child_batch.reset( + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); while (!_input_eos && _prev_input_row == NULL) { RETURN_IF_ERROR(child(0)->get_next(state, _curr_child_batch.get(), &_input_eos)); @@ -324,14 +324,13 @@ std::string AnalyticEvalNode::debug_state_string(bool detailed) const { std::stringstream ss; ss << "num_returned=" << _input_stream->rows_returned() << " num_rows=" << _input_stream->num_rows() - << " _curr_partition_idx=" << _curr_partition_idx - << " last_result_idx=" << _last_result_idx; + << " _curr_partition_idx=" << _curr_partition_idx << " last_result_idx=" << _last_result_idx; if (detailed) { ss << " result_tuples idx: ["; for (std::list>::const_iterator it = _result_tuples.begin(); - it != _result_tuples.end(); ++it) { + it != _result_tuples.end(); ++it) { ss << it->first; if (*it != _result_tuples.back()) { @@ -345,7 +344,7 @@ std::string AnalyticEvalNode::debug_state_string(bool detailed) const { ss << " window_tuples idx: ["; for (std::list>::const_iterator it = _window_tuples.begin(); - it != _window_tuples.end(); ++it) { + it != _window_tuples.end(); ++it) { ss << it->first; if (*it != _window_tuples.back()) { @@ -379,8 +378,7 @@ std::string AnalyticEvalNode::debug_state_string(bool detailed) const { void AnalyticEvalNode::add_result_tuple(int64_t stream_idx) { VLOG_ROW << id() << " add_result_tuple idx=" << stream_idx; DCHECK(_curr_tuple != NULL); - Tuple* result_tuple = Tuple::create(_result_tuple_desc->byte_size(), - _curr_tuple_pool.get()); + Tuple* result_tuple = Tuple::create(_result_tuple_desc->byte_size(), _curr_tuple_pool.get()); AggFnEvaluator::get_value(_evaluators, _fn_ctxs, _curr_tuple, result_tuple); DCHECK_GT(stream_idx, _last_result_idx); @@ -390,7 +388,7 @@ void AnalyticEvalNode::add_result_tuple(int64_t stream_idx) { } inline void AnalyticEvalNode::try_add_result_tuple_for_prev_row(bool next_partition, - int64_t stream_idx, TupleRow* row) { + int64_t stream_idx, TupleRow* row) { // The analytic fns are finalized after the previous row if we found a new partition // or the window is a RANGE and the order by exprs changed. For ROWS windows we do not // need to compare the current row to the previous row. @@ -405,8 +403,7 @@ inline void AnalyticEvalNode::try_add_result_tuple_for_prev_row(bool next_partit } } -inline void AnalyticEvalNode::try_add_result_tuple_for_curr_row(int64_t stream_idx, - TupleRow* row) { +inline void AnalyticEvalNode::try_add_result_tuple_for_curr_row(int64_t stream_idx, TupleRow* row) { VLOG_ROW << id() << " try_add_result_tuple_for_curr_row idx=" << stream_idx; // We only add results at this point for ROWS windows (unless unbounded following) @@ -446,7 +443,7 @@ inline void AnalyticEvalNode::try_remove_rows_before_window(int64_t stream_idx) } inline void AnalyticEvalNode::try_add_remaining_results(int64_t partition_idx, - int64_t prev_partition_idx) { + int64_t prev_partition_idx) { DCHECK_LT(prev_partition_idx, partition_idx); // For PARTITION, RANGE, or ROWS with UNBOUNDED PRECEDING: add a result tuple for the @@ -475,7 +472,7 @@ inline void AnalyticEvalNode::try_add_remaining_results(int64_t partition_idx, << " " << debug_state_string(true); for (int64_t next_result_idx = _last_result_idx + 1; next_result_idx < partition_idx; - ++next_result_idx) { + ++next_result_idx) { if (_window_tuples.empty()) { break; } @@ -520,32 +517,30 @@ inline void AnalyticEvalNode::init_next_partition(int64_t stream_idx) { removed_results_past_partition = true; DCHECK(_window.__isset.window_end && _window.window_end.type == TAnalyticWindowBoundaryType::PRECEDING); - VLOG_ROW << id() << " Removing result past partition idx: " - << _result_tuples.back().first; + VLOG_ROW << id() << " Removing result past partition idx: " << _result_tuples.back().first; Tuple* prev_result_tuple = _result_tuples.back().second; _result_tuples.pop_back(); - if (_result_tuples.empty() || - _result_tuples.back().first < prev_partition_stream_idx) { + if (_result_tuples.empty() || _result_tuples.back().first < prev_partition_stream_idx) { // prev_result_tuple was the last result tuple in the partition, add it back with // the index of the last row in the partition so that all output rows in this // partition get the default result tuple. _result_tuples.push_back( - std::pair(_curr_partition_idx - 1, prev_result_tuple)); + std::pair(_curr_partition_idx - 1, prev_result_tuple)); } _last_result_idx = _result_tuples.back().first; } if (removed_results_past_partition) { - VLOG_ROW << id() << " After removing results past partition: " - << debug_state_string(true); + VLOG_ROW << id() << " After removing results past partition: " << debug_state_string(true); DCHECK_EQ(_last_result_idx, _curr_partition_idx - 1); DCHECK_LE(_input_stream->rows_returned(), _last_result_idx); } - if (_fn_scope == ROWS && stream_idx > 0 && (!_window.__isset.window_end || - _window.window_end.type == TAnalyticWindowBoundaryType::FOLLOWING)) { + if (_fn_scope == ROWS && stream_idx > 0 && + (!_window.__isset.window_end || + _window.window_end.type == TAnalyticWindowBoundaryType::FOLLOWING)) { try_add_remaining_results(stream_idx, prev_partition_stream_idx); } @@ -563,7 +558,7 @@ inline void AnalyticEvalNode::init_next_partition(int64_t stream_idx) { // count()) for output rows that have no input rows in the window. We need to add this // result tuple before any input rows are consumed and the evaluators are updated. if (_fn_scope == ROWS && _window.__isset.window_end && - _window.window_end.type == TAnalyticWindowBoundaryType::PRECEDING) { + _window.window_end.type == TAnalyticWindowBoundaryType::PRECEDING) { if (_has_first_val_null_offset) { // Special handling for FIRST_VALUE which has the window rewritten in the FE // in order to evaluate the fn efficiently with a trivial agg fn implementation. @@ -599,8 +594,7 @@ Status AnalyticEvalNode::process_child_batches(RuntimeState* state) { // Consume child batches until eos or there are enough rows to return more than an // output batch. Ensuring there is at least one more row left after returning results // allows us to simplify the logic dealing with _last_result_idx and _result_tuples. - while (_curr_child_batch.get() != NULL && - num_output_rows_ready() < state->batch_size() + 1) { + while (_curr_child_batch.get() != NULL && num_output_rows_ready() < state->batch_size() + 1) { RETURN_IF_CANCELLED(state); //RETURN_IF_ERROR(QueryMaintenance(state)); RETURN_IF_ERROR(process_child_batch(state)); @@ -666,14 +660,14 @@ Status AnalyticEvalNode::process_child_batch(RuntimeState* state) { // The _evaluators are updated with the current row. if (_fn_scope != ROWS || !_window.__isset.window_start || - stream_idx - _rows_start_offset >= _curr_partition_idx) { + stream_idx - _rows_start_offset >= _curr_partition_idx) { VLOG_ROW << id() << " Update idx=" << stream_idx; AggFnEvaluator::add(_evaluators, _fn_ctxs, row, _curr_tuple); if (_window.__isset.window_start) { VLOG_ROW << id() << " Adding tuple to window at idx=" << stream_idx; - Tuple* tuple = row->get_tuple(0)->deep_copy(*_child_tuple_desc, - _curr_tuple_pool.get()); + Tuple* tuple = + row->get_tuple(0)->deep_copy(*_child_tuple_desc, _curr_tuple_pool.get()); _window_tuples.push_back(std::pair(stream_idx, tuple)); last_window_tuple_idx = stream_idx; } @@ -711,26 +705,25 @@ Status AnalyticEvalNode::process_child_batch(RuntimeState* state) { // Transfer resources to _prev_tuple_pool when enough resources have accumulated // and the _prev_tuple_pool has already been transferred to an output batch. - // The memory limit of _curr_tuple_pool is set by the fixed value + // The memory limit of _curr_tuple_pool is set by the fixed value // The size is specified as 8MB, which is used in the extremely strict memory limit. // Eg: exec_mem_limit < 100MB may cause memory exceeded limit problem. So change it to half of max block size to prevent the problem. // TODO: Should we keep the buffer of _curr_tuple_pool or release the memory occupied ASAP? if (_curr_tuple_pool->total_allocated_bytes() > state->block_mgr2()->max_block_size() / 2 && - (_prev_pool_last_result_idx == -1 || _prev_pool_last_window_idx == -1)) { + (_prev_pool_last_result_idx == -1 || _prev_pool_last_window_idx == -1)) { _prev_tuple_pool->acquire_data(_curr_tuple_pool.get(), false); _prev_pool_last_result_idx = _last_result_idx; _prev_pool_last_window_idx = last_window_tuple_idx; - VLOG_FILE << id() << " Transfer resources from curr to prev pool at idx: " - << stream_idx << ", stores tuples with last result idx: " - << _prev_pool_last_result_idx << " last window idx: " - << _prev_pool_last_window_idx; + VLOG_FILE << id() << " Transfer resources from curr to prev pool at idx: " << stream_idx + << ", stores tuples with last result idx: " << _prev_pool_last_result_idx + << " last window idx: " << _prev_pool_last_window_idx; } return Status::OK(); } Status AnalyticEvalNode::get_next_output_batch(RuntimeState* state, RowBatch* output_batch, - bool* eos) { + bool* eos) { SCOPED_TIMER(_evaluation_timer); VLOG_FILE << id() << " get_next_output_batch: " << debug_state_string(false) << " tuple pool size:" << _curr_tuple_pool->total_allocated_bytes(); @@ -837,8 +830,8 @@ Status AnalyticEvalNode::get_next(RuntimeState* state, RowBatch* row_batch, bool // Transfer resources to the output row batch if enough have accumulated and they're // no longer needed by output rows to be returned later. if (_prev_pool_last_result_idx != -1 && - _prev_pool_last_result_idx < _input_stream->rows_returned() && - _prev_pool_last_window_idx < _window_tuples.front().first) { + _prev_pool_last_result_idx < _input_stream->rows_returned() && + _prev_pool_last_window_idx < _window_tuples.front().first) { VLOG_FILE << id() << " Transfer prev pool to output batch, " << " pool size: " << _prev_tuple_pool->total_allocated_bytes() << " last result idx: " << _prev_pool_last_result_idx @@ -935,4 +928,4 @@ void AnalyticEvalNode::debug_string(int indentation_level, std::stringstream* ou // return ExecNode::QueryMaintenance(state); //} -} +} // namespace doris diff --git a/be/src/exec/analytic_eval_node.h b/be/src/exec/analytic_eval_node.h index 8c0f9f43b94859..8c6d7ad2e59439 100644 --- a/be/src/exec/analytic_eval_node.h +++ b/be/src/exec/analytic_eval_node.h @@ -22,8 +22,8 @@ #include "exprs/expr.h" //#include "exprs/expr_context.h" #include "runtime/buffered_block_mgr2.h" -#include "runtime/buffered_tuple_stream2.inline.h" #include "runtime/buffered_tuple_stream2.h" +#include "runtime/buffered_tuple_stream2.inline.h" #include "runtime/tuple.h" #include "thrift/protocol/TDebugProtocol.h" @@ -133,8 +133,7 @@ class AnalyticEvalNode : public ExecNode { // add_result_tuple() with the index of the previous row in _input_stream. next_partition // indicates if the current row is the start of a new partition. stream_idx is the // index of the current input row from _input_stream. - void try_add_result_tuple_for_prev_row(bool next_partition, int64_t stream_idx, - TupleRow* row); + void try_add_result_tuple_for_prev_row(bool next_partition, int64_t stream_idx, TupleRow* row); // Determines if there is a window ending at the current row, and if so, calls // add_result_tuple() with the index of the current row in _input_stream. stream_idx is @@ -330,6 +329,6 @@ class AnalyticEvalNode : public ExecNode { RuntimeProfile::Counter* _evaluation_timer; }; -} +} // namespace doris #endif diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 1ed73e249f8f57..8ec57d41d2285c 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -15,15 +15,14 @@ // specific language governing permissions and limitations // under the License. - #include "base_scanner.h" #include "common/logging.h" #include "runtime/descriptors.h" #include "runtime/mem_tracker.h" #include "runtime/raw_value.h" -#include "runtime/tuple.h" #include "runtime/runtime_state.h" +#include "runtime/tuple.h" namespace doris { @@ -37,7 +36,8 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, #if BE_TEST _mem_tracker(new MemTracker()), #else - _mem_tracker(MemTracker::CreateTracker(-1, "Broker Scanner", state->instance_mem_tracker())), + _mem_tracker( + MemTracker::CreateTracker(-1, "Broker Scanner", state->instance_mem_tracker())), #endif _mem_pool(_mem_tracker.get()), _dest_tuple_desc(nullptr), @@ -65,7 +65,7 @@ Status BaseScanner::open() { Status BaseScanner::init_expr_ctxes() { // Construct _src_slot_descs const TupleDescriptor* src_tuple_desc = - _state->desc_tbl().get_tuple_descriptor(_params.src_tuple_id); + _state->desc_tbl().get_tuple_descriptor(_params.src_tuple_id); if (src_tuple_desc == nullptr) { std::stringstream ss; ss << "Unknown source tuple descriptor, tuple_id=" << _params.src_tuple_id; @@ -86,8 +86,8 @@ Status BaseScanner::init_expr_ctxes() { _src_slot_descs.emplace_back(it->second); } // Construct source tuple and tuple row - _src_tuple = (Tuple*) _mem_pool.allocate(src_tuple_desc->byte_size()); - _src_tuple_row = (TupleRow*) _mem_pool.allocate(sizeof(Tuple*)); + _src_tuple = (Tuple*)_mem_pool.allocate(src_tuple_desc->byte_size()); + _src_tuple_row = (TupleRow*)_mem_pool.allocate(sizeof(Tuple*)); _src_tuple_row->set_tuple(0, _src_tuple); _row_desc.reset(new RowDescriptor(_state->desc_tbl(), std::vector({_params.src_tuple_id}), @@ -110,7 +110,7 @@ Status BaseScanner::init_expr_ctxes() { if (it == std::end(_params.expr_of_dest_slot)) { std::stringstream ss; ss << "No expr for dest slot, id=" << slot_desc->id() - << ", name=" << slot_desc->col_name(); + << ", name=" << slot_desc->col_name(); return Status::InternalError(ss.str()); } ExprContext* ctx = nullptr; @@ -125,9 +125,9 @@ Status BaseScanner::init_expr_ctxes() { } else { auto _src_slot_it = src_slot_desc_map.find(it->second); if (_src_slot_it == std::end(src_slot_desc_map)) { - std::stringstream ss; - ss << "No src slot " << it->second << " in src slot descs"; - return Status::InternalError(ss.str()); + std::stringstream ss; + ss << "No src slot " << it->second << " in src slot descs"; + return Status::InternalError(ss.str()); } _src_slot_descs_order_by_dest.emplace_back(_src_slot_it->second); } @@ -150,7 +150,8 @@ bool BaseScanner::fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool) { // Only when the expr return value is null, we will check the error message. std::string expr_error = ctx->get_error_msg(); if (!expr_error.empty()) { - _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), expr_error); + _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), + expr_error); _counter->num_rows_filtered++; // The ctx is reused, so must clear the error state and message. ctx->clear_error_msg(); @@ -158,18 +159,21 @@ bool BaseScanner::fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool) { } // If _strict_mode is false, _src_slot_descs_order_by_dest size could be zero if (_strict_mode && (_src_slot_descs_order_by_dest[dest_index] != nullptr) && - !_src_tuple->is_null(_src_slot_descs_order_by_dest[dest_index]->null_indicator_offset())) { + !_src_tuple->is_null( + _src_slot_descs_order_by_dest[dest_index]->null_indicator_offset())) { //Type of the slot is must be Varchar in _src_tuple. - StringValue* raw_value = _src_tuple->get_string_slot(_src_slot_descs_order_by_dest[dest_index]->tuple_offset()); + StringValue* raw_value = _src_tuple->get_string_slot( + _src_slot_descs_order_by_dest[dest_index]->tuple_offset()); std::string raw_string; - if (raw_value != nullptr) {//is not null then get raw value + if (raw_value != nullptr) { //is not null then get raw value raw_string = raw_value->to_string(); } std::stringstream error_msg; error_msg << "column(" << slot_desc->col_name() << ") value is incorrect " - << "while strict mode is " << std::boolalpha << _strict_mode - << ", src value is " << raw_string; - _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), error_msg.str()); + << "while strict mode is " << std::boolalpha << _strict_mode + << ", src value is " << raw_string; + _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), + error_msg.str()); _counter->num_rows_filtered++; return false; } @@ -177,7 +181,8 @@ bool BaseScanner::fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool) { std::stringstream error_msg; error_msg << "column(" << slot_desc->col_name() << ") value is null " << "while columns is not nullable"; - _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), error_msg.str()); + _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), + error_msg.str()); _counter->num_rows_filtered++; return false; } @@ -194,7 +199,8 @@ bool BaseScanner::fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool) { return true; } -void BaseScanner::fill_slots_of_columns_from_path(int start, const std::vector& columns_from_path) { +void BaseScanner::fill_slots_of_columns_from_path( + int start, const std::vector& columns_from_path) { // values of columns from path can not be null for (int i = 0; i < columns_from_path.size(); ++i) { auto slot_desc = _src_slot_descs.at(i + start); @@ -207,4 +213,4 @@ void BaseScanner::fill_slots_of_columns_from_path(int start, const std::vector& columns_from_path); + void fill_slots_of_columns_from_path(int start, + const std::vector& columns_from_path); protected: RuntimeState* _state; diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index 7f60b81b095ef8..b4b2f057c303a8 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -20,22 +20,17 @@ #include #include "exprs/expr.h" +#include "gen_cpp/PlanNodes_types.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "util/runtime_profile.h" -#include "gen_cpp/PlanNodes_types.h" namespace doris { -BlockingJoinNode::BlockingJoinNode(const std::string& node_name, - const TJoinOp::type join_op, - ObjectPool* pool, - const TPlanNode& tnode, +BlockingJoinNode::BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, + ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _node_name(node_name), - _join_op(join_op) { -} + : ExecNode(pool, tnode, descs), _node_name(node_name), _join_op(join_op) {} Status BlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { return ExecNode::init(tnode, state); @@ -54,8 +49,7 @@ Status BlockingJoinNode::prepare(RuntimeState* state) { _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _left_child_timer = ADD_TIMER(runtime_profile(), "LeftChildTime"); _build_row_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); - _left_child_row_counter = ADD_COUNTER(runtime_profile(), "LeftChildRows", - TUnit::UNIT); + _left_child_row_counter = ADD_COUNTER(runtime_profile(), "LeftChildRows", TUnit::UNIT); _result_tuple_row_size = _row_descriptor.tuple_descriptors().size() * sizeof(Tuple*); @@ -167,8 +161,7 @@ Status BlockingJoinNode::open(RuntimeState* state) { void BlockingJoinNode::debug_string(int indentation_level, std::stringstream* out) const { *out << std::string(indentation_level * 2, ' '); *out << _node_name; - *out << "(eos=" << (_eos ? "true" : "false") - << " left_batch_pos=" << _left_batch_pos; + *out << "(eos=" << (_eos ? "true" : "false") << " left_batch_pos=" << _left_batch_pos; add_to_debug_string(indentation_level, out); ExecNode::debug_string(indentation_level, out); *out << ")"; @@ -185,7 +178,7 @@ std::string BlockingJoinNode::get_left_child_row_string(TupleRow* row) { } int* is_build_tuple = - std::find(_build_tuple_idx_ptr, _build_tuple_idx_ptr + _build_tuple_size, i); + std::find(_build_tuple_idx_ptr, _build_tuple_idx_ptr + _build_tuple_size, i); if (is_build_tuple != _build_tuple_idx_ptr + _build_tuple_size) { out << Tuple::to_string(NULL, *row_desc().tuple_descriptors()[i]); @@ -214,4 +207,4 @@ void BlockingJoinNode::create_output_row(TupleRow* out, TupleRow* left, TupleRow } } -} +} // namespace doris diff --git a/be/src/exec/blocking_join_node.h b/be/src/exec/blocking_join_node.h index e35dde6be4e1f7..f0406d3d8fbcc4 100644 --- a/be/src/exec/blocking_join_node.h +++ b/be/src/exec/blocking_join_node.h @@ -35,8 +35,8 @@ class TupleRow; // right child in open(). class BlockingJoinNode : public ExecNode { public: - BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); + BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, ObjectPool* pool, + const TPlanNode& tnode, const DescriptorTbl& descs); virtual ~BlockingJoinNode(); @@ -60,15 +60,15 @@ class BlockingJoinNode : public ExecNode { private: const std::string _node_name; TJoinOp::type _join_op; - bool _eos; // if true, nothing left to return in get_next() - boost::scoped_ptr _build_pool; // holds everything referenced from build side + bool _eos; // if true, nothing left to return in get_next() + boost::scoped_ptr _build_pool; // holds everything referenced from build side // _left_batch must be cleared before calling get_next(). The child node // does not initialize all tuple ptrs in the row, only the ones that it // is responsible for. boost::scoped_ptr _left_batch; - int _left_batch_pos; // current scan pos in _left_batch - bool _left_side_eos; // if true, left child has no more rows to process + int _left_batch_pos; // current scan pos in _left_batch + bool _left_side_eos; // if true, left child has no more rows to process TupleRow* _current_left_child_row; // _build_tuple_idx[i] is the tuple index of child(1)'s tuple[i] in the output row @@ -84,10 +84,10 @@ class BlockingJoinNode : public ExecNode { // This should be the same size as the left child tuple row. int _result_tuple_row_size; - RuntimeProfile::Counter* _build_timer; // time to prepare build side - RuntimeProfile::Counter* _left_child_timer; // time to process left child batch - RuntimeProfile::Counter* _build_row_counter; // num build rows - RuntimeProfile::Counter* _left_child_row_counter; // num left child rows + RuntimeProfile::Counter* _build_timer; // time to prepare build side + RuntimeProfile::Counter* _left_child_timer; // time to process left child batch + RuntimeProfile::Counter* _build_row_counter; // num build rows + RuntimeProfile::Counter* _left_child_row_counter; // num left child rows // Init the build-side state for a new left child row (e.g. hash table iterator or list // iterator) given the first row. Used in open() to prepare for get_next(). @@ -102,8 +102,7 @@ class BlockingJoinNode : public ExecNode { // Gives subclasses an opportunity to add debug output to the debug string printed by // debug_string(). - virtual void add_to_debug_string(int indentation_level, std::stringstream* out) const { - } + virtual void add_to_debug_string(int indentation_level, std::stringstream* out) const {} // Subclasses should not override, use add_to_debug_string() to add to the result. virtual void debug_string(int indentation_level, std::stringstream* out) const; @@ -122,12 +121,13 @@ class BlockingJoinNode : public ExecNode { void create_output_row(TupleRow* out_row, TupleRow* left_row, TupleRow* build_row); friend class CrossJoinNode; + private: // Supervises ConstructBuildSide in a separate thread, and returns its status in the // promise parameter. void build_side_thread(RuntimeState* state, boost::promise* status); }; -} +} // namespace doris #endif diff --git a/be/src/exec/broker_reader.cpp b/be/src/exec/broker_reader.cpp index 2125bb31cbb2a9..e5f67b618c427a 100644 --- a/be/src/exec/broker_reader.cpp +++ b/be/src/exec/broker_reader.cpp @@ -155,7 +155,8 @@ Status BrokerReader::readat(int64_t position, int64_t nbytes, int64_t* bytes_rea return status; } - VLOG_RPC << "send pread request to broker:" << broker_addr << " position:" << position << ", read bytes length:" << nbytes; + VLOG_RPC << "send pread request to broker:" << broker_addr << " position:" << position + << ", read bytes length:" << nbytes; try { client->pread(response, request); @@ -255,4 +256,3 @@ void BrokerReader::close() { } } // namespace doris - diff --git a/be/src/exec/broker_reader.h b/be/src/exec/broker_reader.h index 6aa5c065d489ba..4730260588c4f3 100644 --- a/be/src/exec/broker_reader.h +++ b/be/src/exec/broker_reader.h @@ -19,13 +19,13 @@ #include -#include #include +#include #include "common/status.h" #include "exec/file_reader.h" -#include "gen_cpp/Types_types.h" #include "gen_cpp/PaloBrokerService_types.h" +#include "gen_cpp/Types_types.h" namespace doris { @@ -39,19 +39,17 @@ class BrokerReader : public FileReader { public: // If the reader need the file size, set it when construct BrokerReader. // There is no other way to set the file size. - BrokerReader(ExecEnv* env, - const std::vector& broker_addresses, - const std::map& properties, - const std::string& path, - int64_t start_offset, - int64_t file_size = 0); + BrokerReader(ExecEnv* env, const std::vector& broker_addresses, + const std::map& properties, const std::string& path, + int64_t start_offset, int64_t file_size = 0); virtual ~BrokerReader(); virtual Status open() override; - // Read + // Read virtual Status read(uint8_t* buf, size_t* buf_len, bool* eof) override; - virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) override; + virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, + void* out) override; virtual Status read_one_message(uint8_t** buf, size_t* length) override; virtual int64_t size() override; virtual Status seek(int64_t position) override; @@ -74,5 +72,4 @@ class BrokerReader : public FileReader { int _addr_idx; }; -} - +} // namespace doris diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp index 1acf4a144a93fe..233c6c6aeeefda 100644 --- a/be/src/exec/broker_scan_node.cpp +++ b/be/src/exec/broker_scan_node.cpp @@ -21,32 +21,29 @@ #include #include "common/object_pool.h" -#include "runtime/runtime_state.h" -#include "runtime/row_batch.h" -#include "runtime/dpp_sink_internal.h" #include "exec/broker_scanner.h" -#include "exec/parquet_scanner.h" -#include "exec/orc_scanner.h" #include "exec/json_scanner.h" +#include "exec/orc_scanner.h" +#include "exec/parquet_scanner.h" #include "exprs/expr.h" +#include "runtime/dpp_sink_internal.h" +#include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "util/runtime_profile.h" namespace doris { -BrokerScanNode::BrokerScanNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ScanNode(pool, tnode, descs), - _tuple_id(tnode.broker_scan_node.tuple_id), - _runtime_state(nullptr), - _tuple_desc(nullptr), - _num_running_scanners(0), - _scan_finished(false), - _max_buffered_batches(32), - _wait_scanner_timer(nullptr) { -} +BrokerScanNode::BrokerScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _tuple_id(tnode.broker_scan_node.tuple_id), + _runtime_state(nullptr), + _tuple_desc(nullptr), + _num_running_scanners(0), + _scan_finished(false), + _max_buffered_batches(32), + _wait_scanner_timer(nullptr) {} -BrokerScanNode::~BrokerScanNode() { -} +BrokerScanNode::~BrokerScanNode() {} // We use the PartitionRange to compare here. It should not be a member function of PartitionInfo // class because there are some other member in it. @@ -59,17 +56,15 @@ Status BrokerScanNode::init(const TPlanNode& tnode, RuntimeState* state) { auto& broker_scan_node = tnode.broker_scan_node; if (broker_scan_node.__isset.partition_exprs) { // ASSERT broker_scan_node.__isset.partition_infos == true - RETURN_IF_ERROR(Expr::create_expr_trees( - _pool, broker_scan_node.partition_exprs, &_partition_expr_ctxs)); + RETURN_IF_ERROR(Expr::create_expr_trees(_pool, broker_scan_node.partition_exprs, + &_partition_expr_ctxs)); for (auto& t_partition_info : broker_scan_node.partition_infos) { PartitionInfo* info = _pool->add(new PartitionInfo()); RETURN_IF_ERROR(PartitionInfo::from_thrift(_pool, t_partition_info, info)); _partition_infos.emplace_back(info); } // partitions should be in ascending order - std::sort(_partition_infos.begin(), - _partition_infos.end(), - compare_part_use_range); + std::sort(_partition_infos.begin(), _partition_infos.end(), compare_part_use_range); } return Status::OK(); } @@ -98,8 +93,7 @@ Status BrokerScanNode::prepare(RuntimeState* state) { // prepare partition if (_partition_expr_ctxs.size() > 0) { - RETURN_IF_ERROR(Expr::prepare( - _partition_expr_ctxs, state, row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, row_desc(), expr_mem_tracker())); for (auto iter : _partition_infos) { RETURN_IF_ERROR(iter->prepare(state, row_desc(), expr_mem_tracker())); } @@ -158,10 +152,8 @@ Status BrokerScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* std::shared_ptr scanner_batch; { std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && - !_runtime_state->is_cancelled() && - _num_running_scanners > 0 && - _batch_queue.empty()) { + while (_process_status.ok() && !_runtime_state->is_cancelled() && + _num_running_scanners > 0 && _batch_queue.empty()) { SCOPED_TIMER(_wait_scanner_timer); _queue_reader_cond.wait_for(l, std::chrono::seconds(1)); } @@ -215,10 +207,10 @@ Status BrokerScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* for (int i = 0; i < row_batch->num_rows(); ++i) { TupleRow* row = row_batch->get_row(i); VLOG_ROW << "BrokerScanNode output row: " - << Tuple::to_string(row->get_tuple(0), *_tuple_desc); + << Tuple::to_string(row->get_tuple(0), *_tuple_desc); } } - + return Status::OK(); } @@ -243,7 +235,7 @@ Status BrokerScanNode::close(RuntimeState* state) { } } - // Close + // Close _batch_queue.clear(); return ExecNode::close(state); @@ -254,7 +246,7 @@ Status BrokerScanNode::set_scan_ranges(const std::vector& scan _scan_ranges = scan_ranges; // Now we initialize partition information - if (_partition_expr_ctxs.size() > 0) { + if (_partition_expr_ctxs.size() > 0) { for (auto& range : _scan_ranges) { auto& params = range.scan_range.broker_scan_range.params; if (params.__isset.partition_ids) { @@ -271,61 +263,42 @@ void BrokerScanNode::debug_string(int ident_level, std::stringstream* out) const } std::unique_ptr BrokerScanNode::create_scanner(const TBrokerScanRange& scan_range, - ScannerCounter* counter) { - BaseScanner *scan = nullptr; + ScannerCounter* counter) { + BaseScanner* scan = nullptr; switch (scan_range.ranges[0].format_type) { case TFileFormatType::FORMAT_PARQUET: - scan = new ParquetScanner(_runtime_state, - runtime_profile(), - scan_range.params, - scan_range.ranges, - scan_range.broker_addresses, - counter); + scan = new ParquetScanner(_runtime_state, runtime_profile(), scan_range.params, + scan_range.ranges, scan_range.broker_addresses, counter); break; case TFileFormatType::FORMAT_ORC: - scan = new ORCScanner(_runtime_state, - runtime_profile(), - scan_range.params, - scan_range.ranges, - scan_range.broker_addresses, - counter); + scan = new ORCScanner(_runtime_state, runtime_profile(), scan_range.params, + scan_range.ranges, scan_range.broker_addresses, counter); break; case TFileFormatType::FORMAT_JSON: - scan = new JsonScanner(_runtime_state, - runtime_profile(), - scan_range.params, - scan_range.ranges, - scan_range.broker_addresses, - counter); + scan = new JsonScanner(_runtime_state, runtime_profile(), scan_range.params, + scan_range.ranges, scan_range.broker_addresses, counter); break; default: - scan = new BrokerScanner( - _runtime_state, - runtime_profile(), - scan_range.params, - scan_range.ranges, - scan_range.broker_addresses, - counter); + scan = new BrokerScanner(_runtime_state, runtime_profile(), scan_range.params, + scan_range.ranges, scan_range.broker_addresses, counter); } std::unique_ptr scanner(scan); return scanner; } -Status BrokerScanNode::scanner_scan( - const TBrokerScanRange& scan_range, - const std::vector& conjunct_ctxs, - const std::vector& partition_expr_ctxs, - ScannerCounter* counter) { - +Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, + const std::vector& conjunct_ctxs, + const std::vector& partition_expr_ctxs, + ScannerCounter* counter) { //create scanner object and open std::unique_ptr scanner = create_scanner(scan_range, counter); RETURN_IF_ERROR(scanner->open()); bool scanner_eof = false; - + while (!scanner_eof) { // Fill one row batch std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); + new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); @@ -375,14 +348,13 @@ Status BrokerScanNode::scanner_scan( // Row batch has been filled, push this to the queue if (row_batch->num_rows() > 0) { std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && - !_scan_finished.load() && + while (_process_status.ok() && !_scan_finished.load() && !_runtime_state->is_cancelled() && - // stop pushing more batch if - // 1. too many batches in queue, or - // 2. at least one batch in queue and memory exceed limit. - (_batch_queue.size() >= _max_buffered_batches - || (mem_tracker()->AnyLimitExceeded(MemLimit::HARD) && !_batch_queue.empty()))) { + // stop pushing more batch if + // 1. too many batches in queue, or + // 2. at least one batch in queue and memory exceed limit. + (_batch_queue.size() >= _max_buffered_batches || + (mem_tracker()->AnyLimitExceeded(MemLimit::HARD) && !_batch_queue.empty()))) { _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); } // Process already set failed, so we just return OK @@ -400,7 +372,7 @@ Status BrokerScanNode::scanner_scan( // Queue size Must be smaller than _max_buffered_batches _batch_queue.push_back(row_batch); - // Notify reader to + // Notify reader to _queue_reader_cond.notify_one(); } } @@ -415,22 +387,23 @@ void BrokerScanNode::scanner_worker(int start_idx, int length) { if (!status.ok()) { LOG(WARNING) << "Clone conjuncts failed."; } - std::vector partition_expr_ctxs;; + std::vector partition_expr_ctxs; + ; if (status.ok()) { - status = Expr::clone_if_not_exists( - _partition_expr_ctxs, _runtime_state, &partition_expr_ctxs); + status = Expr::clone_if_not_exists(_partition_expr_ctxs, _runtime_state, + &partition_expr_ctxs); if (!status.ok()) { LOG(WARNING) << "Clone conjuncts failed."; } } ScannerCounter counter; for (int i = 0; i < length && status.ok(); ++i) { - const TBrokerScanRange& scan_range = - _scan_ranges[start_idx + i].scan_range.broker_scan_range; + const TBrokerScanRange& scan_range = + _scan_ranges[start_idx + i].scan_range.broker_scan_range; status = scanner_scan(scan_range, scanner_expr_ctxs, partition_expr_ctxs, &counter); if (!status.ok()) { - LOG(WARNING) << "Scanner[" << start_idx + i << "] process failed. status=" - << status.get_error_msg(); + LOG(WARNING) << "Scanner[" << start_idx + i + << "] process failed. status=" << status.get_error_msg(); } } @@ -438,7 +411,7 @@ void BrokerScanNode::scanner_worker(int start_idx, int length) { _runtime_state->update_num_rows_load_filtered(counter.num_rows_filtered); _runtime_state->update_num_rows_load_unselected(counter.num_rows_unselected); - // scanner is going to finish + // scanner is going to finish { std::lock_guard l(_batch_queue_lock); if (!status.ok()) { @@ -475,8 +448,8 @@ int64_t BrokerScanNode::binary_find_partition_id(const PartRangeKey& key) const return -1; } -int64_t BrokerScanNode::get_partition_id( - const std::vector& partition_expr_ctxs, TupleRow* row) const { +int64_t BrokerScanNode::get_partition_id(const std::vector& partition_expr_ctxs, + TupleRow* row) const { if (_partition_infos.size() == 0) { return -1; } @@ -494,4 +467,4 @@ int64_t BrokerScanNode::get_partition_id( return binary_find_partition_id(part_key); } -} +} // namespace doris diff --git a/be/src/exec/broker_scan_node.h b/be/src/exec/broker_scan_node.h index 9f37d31073b30b..dedd358c521641 100644 --- a/be/src/exec/broker_scan_node.h +++ b/be/src/exec/broker_scan_node.h @@ -20,10 +20,10 @@ #include #include #include -#include -#include #include +#include #include +#include #include "base_scanner.h" #include "common/status.h" @@ -64,8 +64,7 @@ class BrokerScanNode : public ScanNode { // If there is no partition information, return -1 // Return partition id if we find the partition match this row, // return -1, if there is no such partition. - int64_t get_partition_id( - const std::vector& partition_exprs, TupleRow* row) const; + int64_t get_partition_id(const std::vector& partition_exprs, TupleRow* row) const; protected: // Write debug string of this into out. @@ -98,7 +97,7 @@ class BrokerScanNode : public ScanNode { int64_t binary_find_partition_id(const PartRangeKey& key) const; std::unique_ptr create_scanner(const TBrokerScanRange& scan_range, - ScannerCounter* counter); + ScannerCounter* counter); private: TupleId _tuple_id; @@ -133,4 +132,4 @@ class BrokerScanNode : public ScanNode { RuntimeProfile::Counter* _wait_scanner_timer; }; -} +} // namespace doris diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index 39bbaa23f9eb96..4741163a77656d 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -17,9 +17,16 @@ #include "exec/broker_scanner.h" -#include #include +#include +#include "exec/broker_reader.h" +#include "exec/decompressor.h" +#include "exec/local_file_reader.h" +#include "exec/plain_text_line_reader.h" +#include "exec/text_converter.h" +#include "exec/text_converter.hpp" +#include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/mem_tracker.h" @@ -27,44 +34,36 @@ #include "runtime/stream_load/load_stream_mgr.h" #include "runtime/stream_load/stream_load_pipe.h" #include "runtime/tuple.h" -#include "exprs/expr.h" -#include "exec/text_converter.h" -#include "exec/text_converter.hpp" -#include "exec/plain_text_line_reader.h" -#include "exec/local_file_reader.h" -#include "exec/broker_reader.h" -#include "exec/decompressor.h" #include "util/utf8_check.h" namespace doris { -BrokerScanner::BrokerScanner(RuntimeState* state, - RuntimeProfile* profile, +BrokerScanner::BrokerScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, - ScannerCounter* counter) : BaseScanner(state, profile, params, counter), - _ranges(ranges), - _broker_addresses(broker_addresses), - // _splittable(params.splittable), - _value_separator(static_cast(params.column_separator)), - _line_delimiter(static_cast(params.line_delimiter)), - _cur_file_reader(nullptr), - _cur_line_reader(nullptr), - _cur_decompressor(nullptr), - _next_range(0), - _cur_line_reader_eof(false), - _scanner_eof(false), - _skip_next_line(false) { -} + ScannerCounter* counter) + : BaseScanner(state, profile, params, counter), + _ranges(ranges), + _broker_addresses(broker_addresses), + // _splittable(params.splittable), + _value_separator(static_cast(params.column_separator)), + _line_delimiter(static_cast(params.line_delimiter)), + _cur_file_reader(nullptr), + _cur_line_reader(nullptr), + _cur_decompressor(nullptr), + _next_range(0), + _cur_line_reader_eof(false), + _scanner_eof(false), + _skip_next_line(false) {} BrokerScanner::~BrokerScanner() { close(); } Status BrokerScanner::open() { - RETURN_IF_ERROR(BaseScanner::open());// base default function - _text_converter.reset(new(std::nothrow) TextConverter('\\')); + RETURN_IF_ERROR(BaseScanner::open()); // base default function + _text_converter.reset(new (std::nothrow) TextConverter('\\')); if (_text_converter == nullptr) { return Status::InternalError("No memory error."); } @@ -84,8 +83,7 @@ Status BrokerScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { } const uint8_t* ptr = nullptr; size_t size = 0; - RETURN_IF_ERROR(_cur_line_reader->read_line( - &ptr, &size, &_cur_line_reader_eof)); + RETURN_IF_ERROR(_cur_line_reader->read_line(&ptr, &size, &_cur_line_reader_eof)); if (_skip_next_line) { _skip_next_line = false; continue; @@ -147,8 +145,9 @@ Status BrokerScanner::open_file_reader() { break; } case TFileType::FILE_BROKER: { - BrokerReader* broker_reader = new BrokerReader( - _state->exec_env(), _broker_addresses, _params.properties, range.path, start_offset); + BrokerReader* broker_reader = + new BrokerReader(_state->exec_env(), _broker_addresses, _params.properties, + range.path, start_offset); RETURN_IF_ERROR(broker_reader->open()); _cur_file_reader = broker_reader; break; @@ -204,8 +203,7 @@ Status BrokerScanner::create_decompressor(TFileFormatType::type type) { return Status::InternalError(ss.str()); } } - RETURN_IF_ERROR(Decompressor::create_decompressor( - compress_type, &_cur_decompressor)); + RETURN_IF_ERROR(Decompressor::create_decompressor(compress_type, &_cur_decompressor)); return Status::OK(); } @@ -247,10 +245,8 @@ Status BrokerScanner::open_line_reader() { case TFileFormatType::FORMAT_CSV_LZ4FRAME: case TFileFormatType::FORMAT_CSV_LZOP: case TFileFormatType::FORMAT_CSV_DEFLATE: - _cur_line_reader = new PlainTextLineReader( - _profile, - _cur_file_reader, _cur_decompressor, - size, _line_delimiter); + _cur_line_reader = new PlainTextLineReader(_profile, _cur_file_reader, _cur_decompressor, + size, _line_delimiter); break; default: { std::stringstream ss; @@ -286,8 +282,7 @@ void BrokerScanner::close() { } } -void BrokerScanner::split_line( - const Slice& line, std::vector* values) { +void BrokerScanner::split_line(const Slice& line, std::vector* values) { // line-begin char and line-end char are considered to be 'delimiter' const char* value = line.data; const char* ptr = line.data; @@ -300,9 +295,8 @@ void BrokerScanner::split_line( values->emplace_back(value, ptr - value); } -void BrokerScanner::fill_fix_length_string( - const Slice& value, MemPool* pool, - char** new_value_p, const int new_value_length) { +void BrokerScanner::fill_fix_length_string(const Slice& value, MemPool* pool, char** new_value_p, + const int new_value_length) { if (new_value_length != 0 && value.size < new_value_length) { *new_value_p = reinterpret_cast(pool->allocate(new_value_length)); @@ -318,18 +312,16 @@ void BrokerScanner::fill_fix_length_string( // .123 1.23 123. -1.23 // ATTN: The decimal point and (for negative numbers) the "-" sign are not counted. // like '.123', it will be regarded as '0.123', but it match decimal(3, 3) -bool BrokerScanner::check_decimal_input( - const Slice& slice, - int precision, int scale, - std::stringstream* error_msg) { +bool BrokerScanner::check_decimal_input(const Slice& slice, int precision, int scale, + std::stringstream* error_msg) { const char* value = slice.data; size_t value_length = slice.size; if (value_length > (precision + 2)) { (*error_msg) << "the length of decimal value is overflow. " - << "precision in schema: (" << precision << ", " << scale << "); " - << "value: [" << slice.to_string() << "]; " - << "str actual length: " << value_length << ";"; + << "precision in schema: (" << precision << ", " << scale << "); " + << "value: [" << slice.to_string() << "]; " + << "str actual length: " << value_length << ";"; return false; } @@ -357,7 +349,7 @@ bool BrokerScanner::check_decimal_input( int value_int_len = 0; int value_frac_len = 0; value_int_len = point_index - begin_index; - value_frac_len = end_index- point_index; + value_frac_len = end_index - point_index; if (point_index == -1) { // an int value: like 123 @@ -365,33 +357,27 @@ bool BrokerScanner::check_decimal_input( value_frac_len = 0; } else { value_int_len = point_index - begin_index; - value_frac_len = end_index- point_index; + value_frac_len = end_index - point_index; } if (value_int_len > (precision - scale)) { - (*error_msg) << "the int part length longer than schema precision [" - << precision << "]. " - << "value [" << slice.to_string() << "]. "; + (*error_msg) << "the int part length longer than schema precision [" << precision << "]. " + << "value [" << slice.to_string() << "]. "; return false; } else if (value_frac_len > scale) { - (*error_msg) << "the frac part length longer than schema scale [" - << scale << "]. " - << "value [" << slice.to_string() << "]. "; + (*error_msg) << "the frac part length longer than schema scale [" << scale << "]. " + << "value [" << slice.to_string() << "]. "; return false; } return true; } bool is_null(const Slice& slice) { - return slice.size == 2 && - slice.data[0] == '\\' && - slice.data[1] == 'N'; + return slice.size == 2 && slice.data[0] == '\\' && slice.data[1] == 'N'; } // Convert one row to this tuple -bool BrokerScanner::convert_one_row( - const Slice& line, - Tuple* tuple, MemPool* tuple_pool) { +bool BrokerScanner::convert_one_row(const Slice& line, Tuple* tuple, MemPool* tuple_pool) { if (!line_to_src_tuple(line)) { return false; } @@ -400,20 +386,16 @@ bool BrokerScanner::convert_one_row( // Convert one row to this tuple bool BrokerScanner::line_to_src_tuple(const Slice& line) { - if (!validate_utf8(line.data, line.size)) { std::stringstream error_msg; error_msg << "data is not encoded by UTF-8"; - _state->append_error_msg_to_file(std::string(line.data, line.size), - error_msg.str()); + _state->append_error_msg_to_file(std::string(line.data, line.size), error_msg.str()); _counter->num_rows_filtered++; return false; } std::vector values; - { - split_line(line, &values); - } + { split_line(line, &values); } // range of current file const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); @@ -421,19 +403,17 @@ bool BrokerScanner::line_to_src_tuple(const Slice& line) { if (values.size() + columns_from_path.size() < _src_slot_descs.size()) { std::stringstream error_msg; error_msg << "actual column number is less than schema column number. " - << "actual number: " << values.size() << " sep: " << _value_separator << ", " - << "schema number: " << _src_slot_descs.size() << "; "; - _state->append_error_msg_to_file(std::string(line.data, line.size), - error_msg.str()); + << "actual number: " << values.size() << " sep: " << _value_separator << ", " + << "schema number: " << _src_slot_descs.size() << "; "; + _state->append_error_msg_to_file(std::string(line.data, line.size), error_msg.str()); _counter->num_rows_filtered++; return false; } else if (values.size() + columns_from_path.size() > _src_slot_descs.size()) { std::stringstream error_msg; error_msg << "actual column number is more than schema column number. " - << "actual number: " << values.size() << " sep: " << _value_separator << ", " - << "schema number: " << _src_slot_descs.size() << "; "; - _state->append_error_msg_to_file(std::string(line.data, line.size), - error_msg.str()); + << "actual number: " << values.size() << " sep: " << _value_separator << ", " + << "schema number: " << _src_slot_descs.size() << "; "; + _state->append_error_msg_to_file(std::string(line.data, line.size), error_msg.str()); _counter->num_rows_filtered++; return false; } @@ -459,4 +439,4 @@ bool BrokerScanner::line_to_src_tuple(const Slice& line) { return true; } -} +} // namespace doris diff --git a/be/src/exec/broker_scanner.h b/be/src/exec/broker_scanner.h index 1eb11221a5d8be..0c02baf1f0e6b7 100644 --- a/be/src/exec/broker_scanner.h +++ b/be/src/exec/broker_scanner.h @@ -17,19 +17,19 @@ #pragma once -#include -#include -#include #include +#include #include +#include +#include -#include "exec/base_scanner.h" #include "common/status.h" +#include "exec/base_scanner.h" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" #include "runtime/mem_pool.h" -#include "util/slice.h" #include "util/runtime_profile.h" +#include "util/slice.h" namespace doris { @@ -52,13 +52,9 @@ class StreamLoadPipe; // Broker scanner convert the data read from broker to doris's tuple. class BrokerScanner : public BaseScanner { public: - BrokerScanner( - RuntimeState* state, - RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - ScannerCounter* counter); + BrokerScanner(RuntimeState* state, RuntimeProfile* profile, + const TBrokerScanRangeParams& params, const std::vector& ranges, + const std::vector& broker_addresses, ScannerCounter* counter); ~BrokerScanner(); // Open this scanner, will initialize information need to @@ -78,17 +74,13 @@ class BrokerScanner : public BaseScanner { Status open_next_reader(); // Split one text line to values - void split_line( - const Slice& line, std::vector* values); + void split_line(const Slice& line, std::vector* values); - void fill_fix_length_string( - const Slice& value, MemPool* pool, - char** new_value_p, int new_value_length); + void fill_fix_length_string(const Slice& value, MemPool* pool, char** new_value_p, + int new_value_length); - bool check_decimal_input( - const Slice& value, - int precision, int scale, - std::stringstream* error_msg); + bool check_decimal_input(const Slice& value, int precision, int scale, + std::stringstream* error_msg); // Convert one row to one tuple // 'ptr' and 'len' is csv text line @@ -99,7 +91,9 @@ class BrokerScanner : public BaseScanner { Status line_to_src_tuple(); bool line_to_src_tuple(const Slice& line); -private:; + +private: + ; const std::vector& _ranges; const std::vector& _broker_addresses; @@ -125,4 +119,4 @@ private:; std::shared_ptr _stream_load_pipe; }; -} +} // namespace doris diff --git a/be/src/exec/broker_writer.cpp b/be/src/exec/broker_writer.cpp index 4474cd06ae9206..40a711af22564a 100644 --- a/be/src/exec/broker_writer.cpp +++ b/be/src/exec/broker_writer.cpp @@ -29,20 +29,16 @@ namespace doris { -BrokerWriter::BrokerWriter( - ExecEnv* env, - const std::vector& broker_addresses, - const std::map& properties, - const std::string& path, - int64_t start_offset) : - _env(env), - _addresses(broker_addresses), - _properties(properties), - _path(path), - _cur_offset(start_offset), - _is_closed(false), - _addr_idx(0) { -} +BrokerWriter::BrokerWriter(ExecEnv* env, const std::vector& broker_addresses, + const std::map& properties, + const std::string& path, int64_t start_offset) + : _env(env), + _addresses(broker_addresses), + _properties(properties), + _path(path), + _cur_offset(start_offset), + _is_closed(false), + _addr_idx(0) {} BrokerWriter::~BrokerWriter() { close(); @@ -79,7 +75,7 @@ Status BrokerWriter::open() { request.__set_properties(_properties); VLOG_ROW << "debug: send broker open writer request: " - << apache::thrift::ThriftDebugString(request).c_str(); + << apache::thrift::ThriftDebugString(request).c_str(); TBrokerOpenWriterResponse response; try { @@ -87,8 +83,7 @@ Status BrokerWriter::open() { BrokerServiceConnection client(client_cache(_env), broker_addr, 10000, &status); if (!status.ok()) { LOG(WARNING) << "Create broker writer client failed. " - << "broker=" << broker_addr - << ", status=" << status.get_error_msg(); + << "broker=" << broker_addr << ", status=" << status.get_error_msg(); return status; } @@ -106,12 +101,12 @@ Status BrokerWriter::open() { } VLOG_ROW << "debug: send broker open writer response: " - << apache::thrift::ThriftDebugString(response).c_str(); + << apache::thrift::ThriftDebugString(response).c_str(); if (response.opStatus.statusCode != TBrokerOperationStatusCode::OK) { std::stringstream ss; ss << "Open broker writer failed, broker:" << broker_addr - << " failed:" << response.opStatus.message; + << " failed:" << response.opStatus.message; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -134,7 +129,7 @@ Status BrokerWriter::write(const uint8_t* buf, size_t buf_len, size_t* written_l request.__set_data(std::string(reinterpret_cast(buf), buf_len)); VLOG_ROW << "debug: send broker pwrite request: " - << apache::thrift::ThriftDebugString(request).c_str(); + << apache::thrift::ThriftDebugString(request).c_str(); TBrokerOperationStatus response; try { @@ -142,8 +137,7 @@ Status BrokerWriter::write(const uint8_t* buf, size_t buf_len, size_t* written_l BrokerServiceConnection client(client_cache(_env), broker_addr, 10000, &status); if (!status.ok()) { LOG(WARNING) << "Create broker write client failed. " - << "broker=" << broker_addr - << ", status=" << status.get_error_msg(); + << "broker=" << broker_addr << ", status=" << status.get_error_msg(); return status; } @@ -166,12 +160,11 @@ Status BrokerWriter::write(const uint8_t* buf, size_t buf_len, size_t* written_l } VLOG_ROW << "debug: send broker pwrite response: " - << apache::thrift::ThriftDebugString(response).c_str(); + << apache::thrift::ThriftDebugString(response).c_str(); if (response.statusCode != TBrokerOperationStatusCode::OK) { std::stringstream ss; - ss << "Fail to write to broker, broker:" << broker_addr - << " msg:" << response.message; + ss << "Fail to write to broker, broker:" << broker_addr << " msg:" << response.message; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -192,7 +185,7 @@ Status BrokerWriter::close() { request.__set_fd(_fd); VLOG_ROW << "debug: send broker close writer request: " - << apache::thrift::ThriftDebugString(request).c_str(); + << apache::thrift::ThriftDebugString(request).c_str(); const TNetworkAddress& broker_addr = _addresses[_addr_idx]; TBrokerOperationStatus response; @@ -203,7 +196,7 @@ Status BrokerWriter::close() { BrokerServiceConnection client(client_cache(_env), broker_addr, 20000, &status); if (!status.ok()) { LOG(WARNING) << "Create broker write client failed. broker=" << broker_addr - << ", status=" << status.get_error_msg(); + << ", status=" << status.get_error_msg(); return status; } @@ -211,30 +204,28 @@ Status BrokerWriter::close() { client->closeWriter(response, request); } catch (apache::thrift::transport::TTransportException& e) { LOG(WARNING) << "Close broker writer failed. broker=" << broker_addr - << ", status=" << status.get_error_msg(); + << ", status=" << status.get_error_msg(); status = client.reopen(); if (!status.ok()) { LOG(WARNING) << "Reopen broker writer failed. broker=" << broker_addr - << ", status=" << status.get_error_msg(); + << ", status=" << status.get_error_msg(); return status; } client->closeWriter(response, request); } } catch (apache::thrift::TException& e) { std::stringstream ss; - ss << "Close broker writer failed, broker:" << broker_addr - << " msg:" << e.what(); + ss << "Close broker writer failed, broker:" << broker_addr << " msg:" << e.what(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } VLOG_ROW << "debug: send broker close writer response: " - << apache::thrift::ThriftDebugString(response).c_str(); + << apache::thrift::ThriftDebugString(response).c_str(); if (response.statusCode != TBrokerOperationStatusCode::OK) { std::stringstream ss; - ss << "Close broker writer failed, broker:" << broker_addr - << " msg:" << response.message; + ss << "Close broker writer failed, broker:" << broker_addr << " msg:" << response.message; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } diff --git a/be/src/exec/broker_writer.h b/be/src/exec/broker_writer.h index 1bb5b757fa3b43..5858530387e87d 100644 --- a/be/src/exec/broker_writer.h +++ b/be/src/exec/broker_writer.h @@ -20,13 +20,13 @@ #include -#include #include +#include #include "common/status.h" #include "exec/file_writer.h" -#include "gen_cpp/Types_types.h" #include "gen_cpp/PaloBrokerService_types.h" +#include "gen_cpp/Types_types.h" namespace doris { @@ -37,11 +37,9 @@ class TNetworkAddress; // Reader of broker file class BrokerWriter : public FileWriter { public: - BrokerWriter(ExecEnv* env, - const std::vector& broker_addresses, - const std::map& properties, - const std::string& path, - int64_t start_offset); + BrokerWriter(ExecEnv* env, const std::vector& broker_addresses, + const std::map& properties, const std::string& path, + int64_t start_offset); virtual ~BrokerWriter(); virtual Status open() override; diff --git a/be/src/exec/buffered_reader.cpp b/be/src/exec/buffered_reader.cpp index 696067fb6db70c..96640b8a0703f3 100644 --- a/be/src/exec/buffered_reader.cpp +++ b/be/src/exec/buffered_reader.cpp @@ -17,8 +17,8 @@ #include "exec/buffered_reader.h" -#include #include +#include #include "common/logging.h" @@ -78,7 +78,8 @@ Status BufferedReader::readat(int64_t position, int64_t nbytes, int64_t* bytes_r } while (*bytes_read < nbytes) { int64_t len; - RETURN_IF_ERROR(_read_once(position + *bytes_read, nbytes - *bytes_read, &len, reinterpret_cast(out) + *bytes_read)); + RETURN_IF_ERROR(_read_once(position + *bytes_read, nbytes - *bytes_read, &len, + reinterpret_cast(out) + *bytes_read)); // EOF if (len <= 0) { break; @@ -88,7 +89,8 @@ Status BufferedReader::readat(int64_t position, int64_t nbytes, int64_t* bytes_r return Status::OK(); } -Status BufferedReader::_read_once(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) { +Status BufferedReader::_read_once(int64_t position, int64_t nbytes, int64_t* bytes_read, + void* out) { // requested bytes missed the local buffer if (position >= _buffer_limit || position < _buffer_offset) { // if requested length is larger than the capacity of buffer, do not @@ -102,7 +104,7 @@ Status BufferedReader::_read_once(int64_t position, int64_t nbytes, int64_t* byt *bytes_read = 0; return Status::OK(); } - } + } int64_t len = std::min(_buffer_limit - position, nbytes); int64_t off = position - _buffer_offset; memcpy(out, _buffer + off, len); @@ -149,4 +151,3 @@ bool BufferedReader::closed() { } } // namespace doris - diff --git a/be/src/exec/buffered_reader.h b/be/src/exec/buffered_reader.h index d7f2fbd7e675cb..c347ba4ce5a837 100644 --- a/be/src/exec/buffered_reader.h +++ b/be/src/exec/buffered_reader.h @@ -20,14 +20,14 @@ #include #include "common/status.h" -#include "olap/olap_define.h" #include "exec/file_reader.h" +#include "olap/olap_define.h" namespace doris { // Buffered Reader -// Add a cache layer between the caller and the file reader to reduce the -// times of calls to the read function to speed up. +// Add a cache layer between the caller and the file reader to reduce the +// times of calls to the read function to speed up. class BufferedReader : public FileReader { public: // If the reader need the file size, set it when construct FileReader. @@ -37,9 +37,10 @@ class BufferedReader : public FileReader { virtual Status open() override; - // Read + // Read virtual Status read(uint8_t* buf, size_t* buf_len, bool* eof) override; - virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) override; + virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, + void* out) override; virtual Status read_one_message(uint8_t** buf, size_t* length) override; virtual int64_t size() override; virtual Status seek(int64_t position) override; @@ -50,6 +51,7 @@ class BufferedReader : public FileReader { private: Status _fill(); Status _read_once(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out); + private: FileReader* _reader; char* _buffer; @@ -59,4 +61,4 @@ class BufferedReader : public FileReader { int64_t _cur_offset; }; -} +} // namespace doris diff --git a/be/src/exec/cross_join_node.cpp b/be/src/exec/cross_join_node.cpp index c85877df15438a..177df55196af16 100644 --- a/be/src/exec/cross_join_node.cpp +++ b/be/src/exec/cross_join_node.cpp @@ -28,10 +28,8 @@ namespace doris { -CrossJoinNode::CrossJoinNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : BlockingJoinNode("CrossJoinNode", TJoinOp::CROSS_JOIN, pool, tnode, descs) { -} +CrossJoinNode::CrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : BlockingJoinNode("CrossJoinNode", TJoinOp::CROSS_JOIN, pool, tnode, descs) {} Status CrossJoinNode::prepare(RuntimeState* state) { DCHECK(_join_op == TJoinOp::CROSS_JOIN); @@ -71,8 +69,7 @@ Status CrossJoinNode::construct_build_side(RuntimeState* state) { SCOPED_TIMER(_build_timer); _build_batches.add_row_batch(batch); VLOG_ROW << build_list_debug_string(); - COUNTER_SET(_build_row_counter, - static_cast(_build_batches.total_num_rows())); + COUNTER_SET(_build_row_counter, static_cast(_build_batches.total_num_rows())); if (eos) { break; @@ -111,7 +108,7 @@ Status CrossJoinNode::get_next(RuntimeState* state, RowBatch* output_batch, bool // Continue processing this row batch _num_rows_returned += - process_left_child_batch(output_batch, _left_batch.get(), max_added_rows); + process_left_child_batch(output_batch, _left_batch.get(), max_added_rows); COUNTER_SET(_rows_returned_counter, _num_rows_returned); if (reached_limit() || output_batch->is_full()) { @@ -153,7 +150,7 @@ std::string CrossJoinNode::build_list_debug_string() { // TODO: this can be replaced with a codegen'd function int CrossJoinNode::process_left_child_batch(RowBatch* output_batch, RowBatch* batch, - int max_added_rows) { + int max_added_rows) { int row_idx = output_batch->add_rows(max_added_rows); DCHECK(row_idx != RowBatch::INVALID_ROW_INDEX); uint8_t* output_row_mem = reinterpret_cast(output_batch->get_row(row_idx)); @@ -200,4 +197,4 @@ int CrossJoinNode::process_left_child_batch(RowBatch* output_batch, RowBatch* ba output_batch->commit_rows(rows_returned); return rows_returned; } -} +} // namespace doris diff --git a/be/src/exec/cross_join_node.h b/be/src/exec/cross_join_node.h index 797a86c63fd483..54cb4fb483f7fb 100644 --- a/be/src/exec/cross_join_node.h +++ b/be/src/exec/cross_join_node.h @@ -19,16 +19,16 @@ #define DORIS_BE_SRC_QUERY_EXEC_CROSS_JOIN_NODE_H #include -#include #include +#include #include -#include "exec/exec_node.h" #include "exec/blocking_join_node.h" +#include "exec/exec_node.h" #include "exec/row_batch_list.h" +#include "gen_cpp/PlanNodes_types.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" -#include "gen_cpp/PlanNodes_types.h" namespace doris { @@ -73,6 +73,6 @@ class CrossJoinNode : public BlockingJoinNode { std::string build_list_debug_string(); }; -} +} // namespace doris #endif diff --git a/be/src/exec/csv_scan_node.cpp b/be/src/exec/csv_scan_node.cpp index 58baecc43f1f15..7b073ed3fb6fa5 100644 --- a/be/src/exec/csv_scan_node.cpp +++ b/be/src/exec/csv_scan_node.cpp @@ -17,71 +17,57 @@ #include "csv_scan_node.h" +#include + #include #include -#include - #include "exec/text_converter.hpp" #include "exprs/hll_hash_function.h" #include "gen_cpp/PlanNodes_types.h" -#include "runtime/runtime_state.h" +#include "olap/olap_common.h" +#include "olap/utils.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" -#include "util/file_utils.h" -#include "util/runtime_profile.h" #include "util/debug_util.h" +#include "util/file_utils.h" #include "util/hash_util.hpp" -#include "olap/olap_common.h" -#include "olap/utils.h" +#include "util/runtime_profile.h" namespace doris { class StringRef { public: - StringRef(char const* const begin, int const size) : - _begin(begin), _size(size) { - } + StringRef(char const* const begin, int const size) : _begin(begin), _size(size) {} ~StringRef() { // No need to delete _begin, because it only record the index in a std::string. // The c-string will be released along with the std::string object. } - int size() const { - return _size; - } - int length() const { - return _size; - } + int size() const { return _size; } + int length() const { return _size; } - char const* c_str() const { - return _begin; - } - char const* begin() const { - return _begin; - } + char const* c_str() const { return _begin; } + char const* begin() const { return _begin; } + + char const* end() const { return _begin + _size; } - char const* end() const { - return _begin + _size; - } private: char const* _begin; int _size; }; void split_line(const std::string& str, char delimiter, std::vector& result) { - enum State { - IN_DELIM = 1, - IN_TOKEN = 0 - }; + enum State { IN_DELIM = 1, IN_TOKEN = 0 }; // line-begin char and line-end char are considered to be 'delimeter' State state = IN_DELIM; - char const* p_begin = str.c_str(); // Begin of either a token or a delimiter + char const* p_begin = str.c_str(); // Begin of either a token or a delimiter for (string::const_iterator it = str.begin(); it != str.end(); ++it) { - State const new_state = (*it == delimiter? IN_DELIM : IN_TOKEN); + State const new_state = (*it == delimiter ? IN_DELIM : IN_TOKEN); if (new_state != state) { if (new_state == IN_DELIM) { result.push_back(StringRef(p_begin, &*it - p_begin)); @@ -98,24 +84,21 @@ void split_line(const std::string& str, char delimiter, std::vector& result.push_back(StringRef(p_begin, (&*str.end() - p_begin) - state)); } -CsvScanNode::CsvScanNode( - ObjectPool* pool, - const TPlanNode& tnode, - const DescriptorTbl& descs) : - ScanNode(pool, tnode, descs), - _tuple_id(tnode.csv_scan_node.tuple_id), - _file_paths(tnode.csv_scan_node.file_paths), - _column_separator(tnode.csv_scan_node.column_separator), - _column_type_map(tnode.csv_scan_node.column_type_mapping), - _column_function_map(tnode.csv_scan_node.column_function_mapping), - _columns(tnode.csv_scan_node.columns), - _unspecified_columns(tnode.csv_scan_node.unspecified_columns), - _default_values(tnode.csv_scan_node.default_values), - _is_init(false), - _tuple_desc(nullptr), - _tuple_pool(nullptr), - _text_converter(nullptr), - _hll_column_num(0) { +CsvScanNode::CsvScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _tuple_id(tnode.csv_scan_node.tuple_id), + _file_paths(tnode.csv_scan_node.file_paths), + _column_separator(tnode.csv_scan_node.column_separator), + _column_type_map(tnode.csv_scan_node.column_type_mapping), + _column_function_map(tnode.csv_scan_node.column_function_mapping), + _columns(tnode.csv_scan_node.columns), + _unspecified_columns(tnode.csv_scan_node.unspecified_columns), + _default_values(tnode.csv_scan_node.default_values), + _is_init(false), + _tuple_desc(nullptr), + _tuple_pool(nullptr), + _text_converter(nullptr), + _hll_column_num(0) { // do nothing LOG(INFO) << "csv scan node: " << apache::thrift::ThriftDebugString(tnode).c_str(); } @@ -178,10 +161,8 @@ Status CsvScanNode::prepare(RuntimeState* state) { } // add 'unspecified_columns' which have default values - if (_unspecified_columns.end() != std::find( - _unspecified_columns.begin(), - _unspecified_columns.end(), - column_name)) { + if (_unspecified_columns.end() != + std::find(_unspecified_columns.begin(), _unspecified_columns.end(), column_name)) { _column_slot_map[column_name] = slot; } } @@ -204,17 +185,17 @@ Status CsvScanNode::prepare(RuntimeState* state) { } // new one scanner - _csv_scanner.reset(new(std::nothrow) CsvScanner(_file_paths)); + _csv_scanner.reset(new (std::nothrow) CsvScanner(_file_paths)); if (_csv_scanner.get() == nullptr) { return Status::InternalError("new a csv scanner failed."); } - _tuple_pool.reset(new(std::nothrow) MemPool(state->instance_mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool(state->instance_mem_tracker().get())); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); } - _text_converter.reset(new(std::nothrow) TextConverter('\\')); + _text_converter.reset(new (std::nothrow) TextConverter('\\')); if (_text_converter.get() == nullptr) { return Status::InternalError("new a text convertor failed."); } @@ -322,7 +303,7 @@ Status CsvScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos state->update_num_rows_load_total(_num_rows_load_total); state->update_num_rows_load_filtered(_num_rows_load_filtered); VLOG_ROW << "normal_row_number: " << state->num_rows_load_success() - << "; error_row_number: " << state->num_rows_load_filtered() << std::endl; + << "; error_row_number: " << state->num_rows_load_filtered() << std::endl; row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); @@ -354,7 +335,7 @@ Status CsvScanNode::close(RuntimeState* state) { // Summary normal line and error line number info std::stringstream summary_msg; summary_msg << "error line: " << _num_rows_load_filtered - << "; normal line: " << state->num_rows_load_success(); + << "; normal line: " << state->num_rows_load_success(); state->append_error_msg_to_file("", summary_msg.str(), true); } @@ -375,9 +356,8 @@ Status CsvScanNode::set_scan_ranges(const std::vector& scan_ra return Status::OK(); } -void CsvScanNode::fill_fix_length_string( - const char* value, const int value_length, MemPool* pool, - char** new_value_p, const int new_value_length) { +void CsvScanNode::fill_fix_length_string(const char* value, const int value_length, MemPool* pool, + char** new_value_p, const int new_value_length) { if (new_value_length != 0 && value_length < new_value_length) { DCHECK(pool != nullptr); *new_value_p = reinterpret_cast(pool->allocate(new_value_length)); @@ -388,10 +368,10 @@ void CsvScanNode::fill_fix_length_string( (*new_value_p)[i] = '\0'; } VLOG_ROW << "Fill fix length string. " - << "value: [" << std::string(value, value_length) << "]; " - << "value_length: " << value_length << "; " - << "*new_value_p: [" << *new_value_p << "]; " - << "new value length: " << new_value_length << std::endl; + << "value: [" << std::string(value, value_length) << "]; " + << "value_length: " << value_length << "; " + << "*new_value_p: [" << *new_value_p << "]; " + << "new value length: " << new_value_length << std::endl; } } @@ -399,15 +379,14 @@ void CsvScanNode::fill_fix_length_string( // .123 1.23 123. -1.23 // ATTN: The decimal point and (for negative numbers) the "-" sign are not counted. // like '.123', it will be regarded as '0.123', but it match decimal(3, 3) -bool CsvScanNode::check_decimal_input( - const char* value, const int value_length, - const int precision, const int scale, - std::stringstream* error_msg) { +bool CsvScanNode::check_decimal_input(const char* value, const int value_length, + const int precision, const int scale, + std::stringstream* error_msg) { if (value_length > (precision + 2)) { (*error_msg) << "the length of decimal value is overflow. " - << "precision in schema: (" << precision << ", " << scale << "); " - << "value: [" << std::string(value, value_length) << "]; " - << "str actual length: " << value_length << ";"; + << "precision in schema: (" << precision << ", " << scale << "); " + << "value: [" << std::string(value, value_length) << "]; " + << "str actual length: " << value_length << ";"; return false; } @@ -435,7 +414,7 @@ bool CsvScanNode::check_decimal_input( int value_int_len = 0; int value_frac_len = 0; value_int_len = point_index - begin_index; - value_frac_len = end_index- point_index; + value_frac_len = end_index - point_index; if (point_index == -1) { // an int value: like 123 @@ -443,18 +422,16 @@ bool CsvScanNode::check_decimal_input( value_frac_len = 0; } else { value_int_len = point_index - begin_index; - value_frac_len = end_index- point_index; + value_frac_len = end_index - point_index; } if (value_int_len > (precision - scale)) { - (*error_msg) << "the int part length longer than schema precision [" - << precision << "]. " - << "value [" << std::string(value, value_length) << "]. "; + (*error_msg) << "the int part length longer than schema precision [" << precision << "]. " + << "value [" << std::string(value, value_length) << "]. "; return false; } else if (value_frac_len > scale) { - (*error_msg) << "the frac part length longer than schema scale [" - << scale << "]. " - << "value [" << std::string(value, value_length) << "]. "; + (*error_msg) << "the frac part length longer than schema scale [" << scale << "]. " + << "value [" << std::string(value, value_length) << "]. "; return false; } return true; @@ -465,17 +442,15 @@ static bool is_null(const char* value, int value_length) { } // Writes a slot in _tuple from an value containing text data. -bool CsvScanNode::check_and_write_text_slot( - const std::string& column_name, const TColumnType& column_type, - const char* value, int value_length, - const SlotDescriptor* slot, RuntimeState* state, - std::stringstream* error_msg) { - +bool CsvScanNode::check_and_write_text_slot(const std::string& column_name, + const TColumnType& column_type, const char* value, + int value_length, const SlotDescriptor* slot, + RuntimeState* state, std::stringstream* error_msg) { if (value_length == 0 && !slot->type().is_string_type()) { (*error_msg) << "the length of input should not be 0. " - << "column_name: " << column_name << "; " - << "type: " << slot->type() << "; " - << "input_str: [" << std::string(value, value_length) << "]."; + << "column_name: " << column_name << "; " + << "type: " << slot->type() << "; " + << "input_str: [" << std::string(value, value_length) << "]."; return false; } @@ -485,16 +460,16 @@ bool CsvScanNode::check_and_write_text_slot( return true; } else { (*error_msg) << "value cannot be null. column name: " << column_name - << "; type: " << slot->type() << "; input_str: [" - << std::string(value, value_length) << "]."; + << "; type: " << slot->type() << "; input_str: [" + << std::string(value, value_length) << "]."; return false; } } if (!slot->is_nullable() && is_null(value, value_length)) { (*error_msg) << "value cannot be null. column name: " << column_name - << "; type: " << slot->type() << "; input_str: [" - << std::string(value, value_length) << "]."; + << "; type: " << slot->type() << "; input_str: [" + << std::string(value, value_length) << "]."; return false; } @@ -506,17 +481,16 @@ bool CsvScanNode::check_and_write_text_slot( int char_len = column_type.len; if (slot->type().type != TYPE_HLL && value_length > char_len) { (*error_msg) << "the length of input is too long than schema. " - << "column_name: " << column_name << "; " - << "input_str: [" << std::string(value, value_length) << "] " - << "type: " << slot->type() << "; " - << "schema length: " << char_len << "; " - << "actual length: " << value_length << "; "; + << "column_name: " << column_name << "; " + << "input_str: [" << std::string(value, value_length) << "] " + << "type: " << slot->type() << "; " + << "schema length: " << char_len << "; " + << "actual length: " << value_length << "; "; return false; } if (slot->type().type == TYPE_CHAR && value_length < char_len) { - fill_fix_length_string( - value, value_length, _tuple_pool.get(), - &value_to_convert, char_len); + fill_fix_length_string(value, value_length, _tuple_pool.get(), &value_to_convert, + char_len); value_to_convert_length = char_len; } } else if (slot->type().is_decimal_type()) { @@ -528,13 +502,11 @@ bool CsvScanNode::check_and_write_text_slot( } } - - if (!_text_converter->write_slot(slot, _tuple, value_to_convert, value_to_convert_length, - true, false, _tuple_pool.get())) { - (*error_msg) << "convert csv string to " - << slot->type() << " failed. " - << "column_name: " << column_name << "; " - << "input_str: [" << std::string(value, value_length) << "]; "; + if (!_text_converter->write_slot(slot, _tuple, value_to_convert, value_to_convert_length, true, + false, _tuple_pool.get())) { + (*error_msg) << "convert csv string to " << slot->type() << " failed. " + << "column_name: " << column_name << "; " + << "input_str: [" << std::string(value, value_length) << "]; "; return false; } @@ -554,14 +526,14 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) if (_hll_column_num == 0 && fields.size() < _columns.size()) { error_msg << "actual column number is less than schema column number. " - << "actual number: " << fields.size() << " ," - << "schema number: " << _columns.size() << "; "; + << "actual number: " << fields.size() << " ," + << "schema number: " << _columns.size() << "; "; _runtime_state->append_error_msg_to_file(line, error_msg.str()); return false; } else if (_hll_column_num == 0 && fields.size() > _columns.size()) { error_msg << "actual column number is more than schema column number. " - << "actual number: " << fields.size() << " ," - << "schema number: " << _columns.size() << "; "; + << "actual number: " << fields.size() << " ," + << "schema number: " << _columns.size() << "; "; _runtime_state->append_error_msg_to_file(line, error_msg.str()); return false; } @@ -583,11 +555,8 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) } const TColumnType& column_type = _column_type_vec[i]; - bool flag = check_and_write_text_slot( - column_name, column_type, - fields[i].c_str(), - fields[i].length(), - slot, state, &error_msg); + bool flag = check_and_write_text_slot(column_name, column_type, fields[i].c_str(), + fields[i].length(), slot, state, &error_msg); if (flag == false) { _runtime_state->append_error_msg_to_file(line, error_msg.str()); @@ -611,11 +580,8 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) } const TColumnType& column_type = _unspecified_colomn_type_vec[i]; - bool flag = check_and_write_text_slot( - column_name, column_type, - _default_values[i].c_str(), - _default_values[i].length(), - slot, state, &error_msg); + bool flag = check_and_write_text_slot(column_name, column_type, _default_values[i].c_str(), + _default_values[i].length(), slot, state, &error_msg); if (flag == false) { _runtime_state->append_error_msg_to_file(line, error_msg.str()); @@ -624,8 +590,7 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) } for (std::map::iterator iter = _column_function_map.begin(); - iter != _column_function_map.end(); - iter++) { + iter != _column_function_map.end(); iter++) { TMiniLoadEtlFunction& function = iter->second; const std::string& column_name = iter->first; const SlotDescriptor* slot = _column_slot_map[column_name]; @@ -634,11 +599,8 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) const char* src = fields[function.param_column_index].c_str(); int src_column_len = fields[function.param_column_index].length(); hll_hash(src, src_column_len, &column_string); - bool flag = check_and_write_text_slot( - column_name, column_type, - column_string.c_str(), - column_string.length(), - slot, state, &error_msg); + bool flag = check_and_write_text_slot(column_name, column_type, column_string.c_str(), + column_string.length(), slot, state, &error_msg); if (flag == false) { _runtime_state->append_error_msg_to_file(line, error_msg.str()); return false; @@ -649,9 +611,8 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) } bool CsvScanNode::check_hll_function(TMiniLoadEtlFunction& function) { - if (function.function_name.empty() - || function.function_name != "hll_hash" - || function.param_column_index < 0) { + if (function.function_name.empty() || function.function_name != "hll_hash" || + function.param_column_index < 0) { return false; } return true; @@ -676,4 +637,3 @@ void CsvScanNode::hll_hash(const char* src, int len, std::string* result) { } } // end namespace doris - diff --git a/be/src/exec/csv_scan_node.h b/be/src/exec/csv_scan_node.h index d320df6a0a7547..5f830e83023276 100644 --- a/be/src/exec/csv_scan_node.h +++ b/be/src/exec/csv_scan_node.h @@ -18,11 +18,10 @@ #ifndef DORIS_BE_SRC_QUERY_EXEC_CSV_SCAN_NODE_H #define DORIS_BE_SRC_QUERY_EXEC_CSV_SCAN_NODE_H +#include #include #include -#include - #include "common/config.h" #include "exec/csv_scanner.h" #include "exec/scan_node.h" @@ -64,25 +63,20 @@ class CsvScanNode : public ScanNode { virtual void debug_string(int indentation_level, std::stringstream* out) const; private: - bool check_and_write_text_slot( - const std::string& column_name, const TColumnType& column_type, - const char* value, int value_length, - const SlotDescriptor* slot, RuntimeState* state, - std::stringstream* error_msg); + bool check_and_write_text_slot(const std::string& column_name, const TColumnType& column_type, + const char* value, int value_length, const SlotDescriptor* slot, + RuntimeState* state, std::stringstream* error_msg); // split one line into fields, check every fields, fill every field into tuple bool split_check_fill(const std::string& line, RuntimeState* state); - void fill_fix_length_string( - const char* value, int value_length, MemPool* pool, - char** new_value, int new_value_length); - bool check_decimal_input( - const char* value, int value_length, - int precision, int scale, - std::stringstream* error_msg); - - void hll_hash(const char* src, int len, std::string* result); - + void fill_fix_length_string(const char* value, int value_length, MemPool* pool, + char** new_value, int new_value_length); + bool check_decimal_input(const char* value, int value_length, int precision, int scale, + std::stringstream* error_msg); + + void hll_hash(const char* src, int len, std::string* result); + bool check_hll_function(TMiniLoadEtlFunction& function); // Tuple id resolved in prepare() to set _tuple_desc; @@ -139,4 +133,3 @@ class CsvScanNode : public ScanNode { } // end namespace doris #endif // DORIS_BE_SRC_QUERY_EXEC_CSV_SCAN_NODE_H - diff --git a/be/src/exec/csv_scanner.cpp b/be/src/exec/csv_scanner.cpp index e21bef76bab5fd..5841ecb64b8af2 100644 --- a/be/src/exec/csv_scanner.cpp +++ b/be/src/exec/csv_scanner.cpp @@ -20,73 +20,72 @@ #include namespace doris { - CsvScanner::CsvScanner(const std::vector& csv_file_paths) : - _is_open(false), - _file_paths(csv_file_paths), - _current_file(nullptr), - _current_file_idx(0){ - // do nothing - } +CsvScanner::CsvScanner(const std::vector& csv_file_paths) + : _is_open(false), + _file_paths(csv_file_paths), + _current_file(nullptr), + _current_file_idx(0) { + // do nothing +} - CsvScanner::~CsvScanner() { - // close file - if (_current_file != nullptr) { - if (_current_file->is_open()) { - _current_file->close(); - } - delete _current_file; - _current_file = nullptr; +CsvScanner::~CsvScanner() { + // close file + if (_current_file != nullptr) { + if (_current_file->is_open()) { + _current_file->close(); } + delete _current_file; + _current_file = nullptr; } +} - Status CsvScanner::open() { - VLOG(1) << "CsvScanner::Connect"; +Status CsvScanner::open() { + VLOG(1) << "CsvScanner::Connect"; - if (_is_open) { - LOG(INFO) << "this scanner already opened"; - return Status::OK(); - } + if (_is_open) { + LOG(INFO) << "this scanner already opened"; + return Status::OK(); + } - if (_file_paths.empty()) { - return Status::InternalError("no file specified."); - } + if (_file_paths.empty()) { + return Status::InternalError("no file specified."); + } + + _is_open = true; + return Status::OK(); +} - _is_open = true; +// TODO(lingbin): read more than one line at a time to reduce IO comsumption +Status CsvScanner::get_next_row(std::string* line_str, bool* eos) { + if (_current_file == nullptr && _current_file_idx == _file_paths.size()) { + *eos = true; return Status::OK(); } - // TODO(lingbin): read more than one line at a time to reduce IO comsumption - Status CsvScanner::get_next_row(std::string* line_str, bool* eos) { - if (_current_file == nullptr && _current_file_idx == _file_paths.size()) { - *eos = true; - return Status::OK(); - } - - if (_current_file == nullptr && _current_file_idx < _file_paths.size()) { - std::string& file_path = _file_paths[_current_file_idx]; - LOG(INFO) << "open csv file: [" << _current_file_idx << "] " << file_path; + if (_current_file == nullptr && _current_file_idx < _file_paths.size()) { + std::string& file_path = _file_paths[_current_file_idx]; + LOG(INFO) << "open csv file: [" << _current_file_idx << "] " << file_path; - _current_file = new std::ifstream(file_path, std::ifstream::in); - if (!_current_file->is_open()) { - return Status::InternalError("Fail to read csv file: " + file_path); - } - ++_current_file_idx; + _current_file = new std::ifstream(file_path, std::ifstream::in); + if (!_current_file->is_open()) { + return Status::InternalError("Fail to read csv file: " + file_path); } + ++_current_file_idx; + } - getline(*_current_file, *line_str); - if (_current_file->eof()) { - _current_file->close(); - delete _current_file; - _current_file = nullptr; + getline(*_current_file, *line_str); + if (_current_file->eof()) { + _current_file->close(); + delete _current_file; + _current_file = nullptr; - if (_current_file_idx == _file_paths.size()) { - *eos = true; - return Status::OK(); - } + if (_current_file_idx == _file_paths.size()) { + *eos = true; + return Status::OK(); } - - *eos = false; - return Status::OK(); } -} // end namespace doris + *eos = false; + return Status::OK(); +} +} // end namespace doris diff --git a/be/src/exec/csv_scanner.h b/be/src/exec/csv_scanner.h index aaf5b4b4716de2..3de2fefed61aec 100644 --- a/be/src/exec/csv_scanner.h +++ b/be/src/exec/csv_scanner.h @@ -33,6 +33,7 @@ class CsvScanner { Status open(); Status get_next_row(std::string* line_str, bool* eos); + private: bool _is_open; std::vector _file_paths; @@ -43,4 +44,3 @@ class CsvScanner { } // end namespace doris #endif // DORIS_BE_SRC_QUERY_EXEC_CSV_SCANNER_H - diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index 50a27cbb9ef15a..47bea39bb23df6 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -17,33 +17,31 @@ #include "exec/data_sink.h" -#include #include #include +#include #include "common/logging.h" #include "exec/exec_node.h" #include "exec/tablet_sink.h" #include "exprs/expr.h" #include "gen_cpp/PaloInternalService_types.h" +#include "runtime/data_spliter.h" #include "runtime/data_stream_sender.h" -#include "runtime/result_sink.h" +#include "runtime/export_sink.h" #include "runtime/memory_scratch_sink.h" #include "runtime/mysql_table_sink.h" -#include "runtime/data_spliter.h" -#include "runtime/export_sink.h" +#include "runtime/result_sink.h" #include "runtime/runtime_state.h" #include "util/logging.h" namespace doris { -Status DataSink::create_data_sink( - ObjectPool* pool, - const TDataSink& thrift_sink, - const std::vector& output_exprs, - const TPlanFragmentExecParams& params, - const RowDescriptor& row_desc, - boost::scoped_ptr* sink) { +Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink, + const std::vector& output_exprs, + const TPlanFragmentExecParams& params, + const RowDescriptor& row_desc, + boost::scoped_ptr* sink) { DataSink* tmp_sink = NULL; switch (thrift_sink.type) { @@ -51,13 +49,14 @@ Status DataSink::create_data_sink( if (!thrift_sink.__isset.stream_sink) { return Status::InternalError("Missing data stream sink."); } - bool send_query_statistics_with_every_batch = params.__isset.send_query_statistics_with_every_batch ? - params.send_query_statistics_with_every_batch : false; + bool send_query_statistics_with_every_batch = + params.__isset.send_query_statistics_with_every_batch + ? params.send_query_statistics_with_every_batch + : false; // TODO: figure out good buffer size based on size of output row - tmp_sink = new DataStreamSender( - pool, params.sender_id, row_desc, - thrift_sink.stream_sink, params.destinations, 16 * 1024, - send_query_statistics_with_every_batch); + tmp_sink = new DataStreamSender(pool, params.sender_id, row_desc, thrift_sink.stream_sink, + params.destinations, 16 * 1024, + send_query_statistics_with_every_batch); // RETURN_IF_ERROR(sender->prepare(state->obj_pool(), thrift_sink.stream_sink)); sink->reset(tmp_sink); break; @@ -86,12 +85,12 @@ Status DataSink::create_data_sink( } // TODO: figure out good buffer size based on size of output row - MysqlTableSink* mysql_tbl_sink = new MysqlTableSink( - pool, row_desc, output_exprs); + MysqlTableSink* mysql_tbl_sink = new MysqlTableSink(pool, row_desc, output_exprs); sink->reset(mysql_tbl_sink); break; #else - return Status::InternalError("Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON"); + return Status::InternalError( + "Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON"); #endif } @@ -102,9 +101,7 @@ Status DataSink::create_data_sink( // TODO: figure out good buffer size based on size of output row std::unique_ptr data_spliter(new DataSpliter(row_desc)); - RETURN_IF_ERROR(DataSpliter::from_thrift(pool, - thrift_sink.split_sink, - data_spliter.get())); + RETURN_IF_ERROR(DataSpliter::from_thrift(pool, thrift_sink.split_sink, data_spliter.get())); sink->reset(data_spliter.release()); break; } @@ -129,7 +126,7 @@ Status DataSink::create_data_sink( default: std::stringstream error_msg; std::map::const_iterator i = - _TDataSinkType_VALUES_TO_NAMES.find(thrift_sink.type); + _TDataSinkType_VALUES_TO_NAMES.find(thrift_sink.type); const char* str = "Unknown data sink type "; if (i != _TDataSinkType_VALUES_TO_NAMES.end()) { @@ -152,9 +149,10 @@ Status DataSink::init(const TDataSink& thrift_sink) { } Status DataSink::prepare(RuntimeState* state) { - _expr_mem_tracker = MemTracker::CreateTracker(-1, std::string("DataSink:") + std::to_string(state->load_job_id()), - state->instance_mem_tracker()); + _expr_mem_tracker = MemTracker::CreateTracker( + -1, std::string("DataSink:") + std::to_string(state->load_job_id()), + state->instance_mem_tracker()); return Status::OK(); } -} // namespace doris +} // namespace doris diff --git a/be/src/exec/data_sink.h b/be/src/exec/data_sink.h index 1c26e2d4fa3470..daf06ed2d46ec8 100644 --- a/be/src/exec/data_sink.h +++ b/be/src/exec/data_sink.h @@ -64,17 +64,17 @@ class DataSink { // be ignored. virtual Status close(RuntimeState* state, Status exec_status) { _expr_mem_tracker.reset(); - _closed = true; + _closed = true; return Status::OK(); } // Creates a new data sink from thrift_sink. A pointer to the // new sink is written to *sink, and is owned by the caller. - static Status create_data_sink( - ObjectPool* pool, - const TDataSink& thrift_sink, const std::vector& output_exprs, - const TPlanFragmentExecParams& params, - const RowDescriptor& row_desc, boost::scoped_ptr* sink); + static Status create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink, + const std::vector& output_exprs, + const TPlanFragmentExecParams& params, + const RowDescriptor& row_desc, + boost::scoped_ptr* sink); // Returns the runtime profile for the sink. virtual RuntimeProfile* profile() = 0; @@ -82,6 +82,7 @@ class DataSink { virtual void set_query_statistics(std::shared_ptr statistics) { _query_statistics = statistics; } + protected: // Set to true after close() has been called. subclasses should check and set this in // close(). @@ -92,5 +93,5 @@ class DataSink { std::shared_ptr _query_statistics; }; -} // namespace doris +} // namespace doris #endif diff --git a/be/src/exec/decompressor.cpp b/be/src/exec/decompressor.cpp index c64a0174a7da3a..ea6cc0eca1a2c7 100644 --- a/be/src/exec/decompressor.cpp +++ b/be/src/exec/decompressor.cpp @@ -19,9 +19,8 @@ namespace doris { -Status Decompressor::create_decompressor(CompressType type, - Decompressor** decompressor) { - switch(type) { +Status Decompressor::create_decompressor(CompressType type, Decompressor** decompressor) { + switch (type) { case CompressType::UNCOMPRESSED: *decompressor = nullptr; break; @@ -52,25 +51,23 @@ Status Decompressor::create_decompressor(CompressType type, if (*decompressor != nullptr) { st = (*decompressor)->init(); } - + return st; } -Decompressor::~Decompressor() { -} +Decompressor::~Decompressor() {} std::string Decompressor::debug_info() { return "Decompressor"; } // Gzip -GzipDecompressor::GzipDecompressor(bool is_deflate): - Decompressor(_is_deflate ? CompressType::DEFLATE : CompressType::GZIP), - _is_deflate(is_deflate) { -} +GzipDecompressor::GzipDecompressor(bool is_deflate) + : Decompressor(_is_deflate ? CompressType::DEFLATE : CompressType::GZIP), + _is_deflate(is_deflate) {} GzipDecompressor::~GzipDecompressor() { - (void) inflateEnd(&_z_strm); + (void)inflateEnd(&_z_strm); } Status GzipDecompressor::init() { @@ -90,11 +87,10 @@ Status GzipDecompressor::init() { return Status::OK(); } -Status GzipDecompressor::decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) { +Status GzipDecompressor::decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, + size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) { // 1. set input and output _z_strm.next_in = input; _z_strm.avail_in = input_len; @@ -109,14 +105,13 @@ Status GzipDecompressor::decompress( // Provide more output starting at next_out and update next_out and avail_out // accordingly. // inflate() returns Z_OK if some progress has been made (more input processed - // or more output produced) + // or more output produced) int ret = inflate(&_z_strm, Z_NO_FLUSH); *input_bytes_read = input_len - _z_strm.avail_in; *decompressed_len = output_max_len - _z_strm.avail_out; - VLOG(10) << "gzip dec ret: " << ret - << " input_bytes_read: " << *input_bytes_read + VLOG(10) << "gzip dec ret: " << ret << " input_bytes_read: " << *input_bytes_read << " decompressed_len: " << *decompressed_len; if (ret == Z_BUF_ERROR) { @@ -150,7 +145,8 @@ Status GzipDecompressor::decompress( std::string GzipDecompressor::debug_info() { std::stringstream ss; - ss << "GzipDecompressor." << " is_deflate: " << _is_deflate; + ss << "GzipDecompressor." + << " is_deflate: " << _is_deflate; return ss.str(); } @@ -171,12 +167,10 @@ Status Bzip2Decompressor::init() { return Status::OK(); } -Status Bzip2Decompressor::decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) { - +Status Bzip2Decompressor::decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, + size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) { // 1. set input and output _bz_strm.next_in = const_cast(reinterpret_cast(input)); _bz_strm.avail_in = input_len; @@ -252,17 +246,15 @@ Status Lz4FrameDecompressor::init() { return Status::OK(); } -Status Lz4FrameDecompressor::decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) { - +Status Lz4FrameDecompressor::decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, + size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) { uint8_t* src = input; size_t src_size = input_len; size_t ret = 1; *input_bytes_read = 0; - + if (_expect_dec_buf_size == -1) { // init expected decompress buf size, and check if output_max_len is large enough // ATTN: _expect_dec_buf_size is uninit, which means this is the first time to call @@ -277,7 +269,7 @@ Status Lz4FrameDecompressor::decompress( } LZ4F_frameInfo_t info; - ret = LZ4F_getFrameInfo(_dctx, &info, (void*) src, &src_size); + ret = LZ4F_getFrameInfo(_dctx, &info, (void*)src, &src_size); if (LZ4F_isError(ret)) { std::stringstream ss; ss << "LZ4F_getFrameInfo error: " << std::string(LZ4F_getErrorName(ret)); @@ -298,11 +290,11 @@ Status Lz4FrameDecompressor::decompress( src_size = input_len - src_size; LOG(INFO) << "lz4 block size: " << _expect_dec_buf_size; - } - + } + // decompress size_t output_len = output_max_len; - ret = LZ4F_decompress(_dctx, (void*) output, &output_len, (void*) src, &src_size, + ret = LZ4F_decompress(_dctx, (void*)output, &output_len, (void*)src, &src_size, /* LZ4F_decompressOptions_t */ NULL); if (LZ4F_isError(ret)) { std::stringstream ss; @@ -332,15 +324,19 @@ std::string Lz4FrameDecompressor::debug_info() { size_t Lz4FrameDecompressor::get_block_size(const LZ4F_frameInfo_t* info) { switch (info->blockSizeID) { - case LZ4F_default: - case LZ4F_max64KB: return 1 << 16; - case LZ4F_max256KB: return 1 << 18; - case LZ4F_max1MB: return 1 << 20; - case LZ4F_max4MB: return 1 << 22; - default: - // error - return -1; + case LZ4F_default: + case LZ4F_max64KB: + return 1 << 16; + case LZ4F_max256KB: + return 1 << 18; + case LZ4F_max1MB: + return 1 << 20; + case LZ4F_max4MB: + return 1 << 22; + default: + // error + return -1; } } -} // namespace +} // namespace doris diff --git a/be/src/exec/decompressor.h b/be/src/exec/decompressor.h index 9f81c187fa792d..08228da3be24a1 100644 --- a/be/src/exec/decompressor.h +++ b/be/src/exec/decompressor.h @@ -17,27 +17,20 @@ #pragma once -#include #include #include +#include #ifdef DORIS_WITH_LZO -#include #include +#include #endif #include "common/status.h" namespace doris { -enum CompressType { - UNCOMPRESSED, - GZIP, - DEFLATE, - BZIP2, - LZ4FRAME, - LZOP -}; +enum CompressType { UNCOMPRESSED, GZIP, DEFLATE, BZIP2, LZ4FRAME, LZOP }; class Decompressor { public: @@ -55,16 +48,14 @@ class Decompressor { // more_input_bytes(out): decompressor need more bytes to consume // more_output_bytes(out): decompressor need more space to save decompressed data // - // input and output buf should be allocated and released outside - virtual Status decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) = 0; + // input and output buf should be allocated and released outside + virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, size_t* decompressed_len, + bool* stream_end, size_t* more_input_bytes, + size_t* more_output_bytes) = 0; public: - static Status create_decompressor(CompressType type, - Decompressor** decompressor); + static Status create_decompressor(CompressType type, Decompressor** decompressor); virtual std::string debug_info(); @@ -73,7 +64,7 @@ class Decompressor { protected: virtual Status init() = 0; - Decompressor(CompressType ctype):_ctype(ctype) {} + Decompressor(CompressType ctype) : _ctype(ctype) {} CompressType _ctype; }; @@ -82,11 +73,10 @@ class GzipDecompressor : public Decompressor { public: virtual ~GzipDecompressor(); - virtual Status decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) override; + virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, size_t* decompressed_len, + bool* stream_end, size_t* more_input_bytes, + size_t* more_output_bytes) override; virtual std::string debug_info() override; @@ -100,20 +90,19 @@ class GzipDecompressor : public Decompressor { z_stream _z_strm; - // These are magic numbers from zlib.h. Not clear why they are not defined there. - const static int WINDOW_BITS = 15; // Maximum window size - const static int DETECT_CODEC = 32; // Determine if this is libz or gzip from header. + // These are magic numbers from zlib.h. Not clear why they are not defined there. + const static int WINDOW_BITS = 15; // Maximum window size + const static int DETECT_CODEC = 32; // Determine if this is libz or gzip from header. }; class Bzip2Decompressor : public Decompressor { public: virtual ~Bzip2Decompressor(); - virtual Status decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) override; + virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, size_t* decompressed_len, + bool* stream_end, size_t* more_input_bytes, + size_t* more_output_bytes) override; virtual std::string debug_info() override; @@ -130,11 +119,10 @@ class Lz4FrameDecompressor : public Decompressor { public: virtual ~Lz4FrameDecompressor(); - virtual Status decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) override; + virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, size_t* decompressed_len, + bool* stream_end, size_t* more_input_bytes, + size_t* more_output_bytes) override; virtual std::string debug_info() override; @@ -156,28 +144,21 @@ class LzopDecompressor : public Decompressor { public: virtual ~LzopDecompressor(); - virtual Status decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) override; + virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, size_t* decompressed_len, + bool* stream_end, size_t* more_input_bytes, + size_t* more_output_bytes) override; virtual std::string debug_info() override; private: friend class Decompressor; - LzopDecompressor() : - Decompressor(CompressType::LZOP), - _header_info({0}), - _is_header_loaded(false) {} + LzopDecompressor() + : Decompressor(CompressType::LZOP), _header_info({0}), _is_header_loaded(false) {} virtual Status init() override; private: - enum LzoChecksum { - CHECK_NONE, - CHECK_CRC32, - CHECK_ADLER - }; + enum LzoChecksum { CHECK_NONE, CHECK_CRC32, CHECK_ADLER }; private: inline uint8_t* get_uint8(uint8_t* ptr, uint8_t* value) { @@ -200,22 +181,18 @@ class LzopDecompressor : public Decompressor { } inline LzoChecksum input_type(int flags) { - return (flags & F_CRC32_C) ? CHECK_CRC32 : - (flags & F_ADLER32_C) ? CHECK_ADLER : CHECK_NONE; + return (flags & F_CRC32_C) ? CHECK_CRC32 : (flags & F_ADLER32_C) ? CHECK_ADLER : CHECK_NONE; } inline LzoChecksum output_type(int flags) { - return (flags & F_CRC32_D) ? CHECK_CRC32 : - (flags & F_ADLER32_D) ? CHECK_ADLER : CHECK_NONE; + return (flags & F_CRC32_D) ? CHECK_CRC32 : (flags & F_ADLER32_D) ? CHECK_ADLER : CHECK_NONE; } - Status parse_header_info(uint8_t* input, size_t input_len, - size_t* input_bytes_read, + Status parse_header_info(uint8_t* input, size_t input_len, size_t* input_bytes_read, size_t* more_bytes_needed); - Status checksum(LzoChecksum type, const std::string& source, - uint32_t expected, - uint8_t* ptr, size_t len); + Status checksum(LzoChecksum type, const std::string& source, uint32_t expected, uint8_t* ptr, + size_t len); private: // lzop header info @@ -259,6 +236,6 @@ class LzopDecompressor : public Decompressor { const static uint64_t F_CRC32_D; const static uint64_t F_ADLER32_D; }; -#endif // DORIS_WITH_LZO +#endif // DORIS_WITH_LZO -} // namespace +} // namespace doris diff --git a/be/src/exec/empty_set_node.cpp b/be/src/exec/empty_set_node.cpp index 4284d63e32075e..b03ff33b8fa1dc 100644 --- a/be/src/exec/empty_set_node.cpp +++ b/be/src/exec/empty_set_node.cpp @@ -19,15 +19,12 @@ namespace doris { -EmptySetNode::EmptySetNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs) { -} +EmptySetNode::EmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs) {} Status EmptySetNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - *eos = true; - return Status::OK(); -} - + *eos = true; + return Status::OK(); } +} // namespace doris diff --git a/be/src/exec/empty_set_node.h b/be/src/exec/empty_set_node.h index 93e52433ab1703..d31dd2ee167021 100644 --- a/be/src/exec/empty_set_node.h +++ b/be/src/exec/empty_set_node.h @@ -29,5 +29,4 @@ class EmptySetNode : public ExecNode { virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; }; -} - +} // namespace doris diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 1070b45f9b6c13..b4558149f81a0a 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -17,30 +17,29 @@ #include "exec/es/es_predicate.h" +#include #include + +#include #include #include -#include -#include -#include "common/status.h" #include "common/logging.h" +#include "common/status.h" #include "exec/es/es_query_builder.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "exprs/in_predicate.h" - #include "gen_cpp/PlanNodes_types.h" #include "olap/olap_common.h" #include "olap/utils.h" #include "runtime/client_cache.h" -#include "runtime/runtime_state.h" -#include "runtime/row_batch.h" #include "runtime/datetime_value.h" #include "runtime/large_int_value.h" +#include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" - #include "service/backend_options.h" #include "util/debug_util.h" #include "util/runtime_profile.h" @@ -49,64 +48,63 @@ namespace doris { using namespace std; -#define RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(expr) \ - do { \ - const Expr* expr_without_cast = Expr::expr_without_cast(expr); \ - if (expr_without_cast->node_type() != TExprNodeType::SLOT_REF) { \ - return Status::InternalError("build disjuncts failed: child is not slot ref"); \ - } \ +#define RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(expr) \ + do { \ + const Expr* expr_without_cast = Expr::expr_without_cast(expr); \ + if (expr_without_cast->node_type() != TExprNodeType::SLOT_REF) { \ + return Status::InternalError("build disjuncts failed: child is not slot ref"); \ + } \ } while (false) std::string ExtLiteral::value_to_string() { std::stringstream ss; switch (_type) { - case TYPE_TINYINT: - ss << std::to_string(get_byte()); - break; - case TYPE_SMALLINT: - ss << std::to_string(get_short()); - break; - case TYPE_INT: - ss << std::to_string(get_int()); - break; - case TYPE_BIGINT: - ss << std::to_string(get_long()); - break; - case TYPE_FLOAT: - ss << std::to_string(get_float()); - break; - case TYPE_DOUBLE: - ss << std::to_string(get_double()); - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - ss << get_string(); - break; - case TYPE_DATE: - case TYPE_DATETIME: - ss << get_date_string(); - break; - case TYPE_BOOLEAN: - ss << std::to_string(get_bool()); - break; - case TYPE_DECIMAL: - ss << get_decimal_string(); - break; - case TYPE_DECIMALV2: - ss << get_decimalv2_string(); - break; - case TYPE_LARGEINT: - ss << get_largeint_string(); - break; - default: - DCHECK(false); - break; + case TYPE_TINYINT: + ss << std::to_string(get_byte()); + break; + case TYPE_SMALLINT: + ss << std::to_string(get_short()); + break; + case TYPE_INT: + ss << std::to_string(get_int()); + break; + case TYPE_BIGINT: + ss << std::to_string(get_long()); + break; + case TYPE_FLOAT: + ss << std::to_string(get_float()); + break; + case TYPE_DOUBLE: + ss << std::to_string(get_double()); + break; + case TYPE_CHAR: + case TYPE_VARCHAR: + ss << get_string(); + break; + case TYPE_DATE: + case TYPE_DATETIME: + ss << get_date_string(); + break; + case TYPE_BOOLEAN: + ss << std::to_string(get_bool()); + break; + case TYPE_DECIMAL: + ss << get_decimal_string(); + break; + case TYPE_DECIMALV2: + ss << get_decimalv2_string(); + break; + case TYPE_LARGEINT: + ss << get_largeint_string(); + break; + default: + DCHECK(false); + break; } return ss.str(); } -ExtLiteral::~ExtLiteral(){ -} +ExtLiteral::~ExtLiteral() {} int8_t ExtLiteral::get_byte() { DCHECK(_type == TYPE_TINYINT); @@ -152,7 +150,7 @@ std::string ExtLiteral::get_date_string() { char str[MAX_DTVALUE_STR_LEN]; date_value.to_string(str); - return std::string(str, strlen(str)); + return std::string(str, strlen(str)); } bool ExtLiteral::get_bool() { @@ -175,17 +173,15 @@ std::string ExtLiteral::get_largeint_string() { return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); } -EsPredicate::EsPredicate(ExprContext* context, - const TupleDescriptor* tuple_desc, ObjectPool* pool) : - _context(context), - _disjuncts_num(0), - _tuple_desc(tuple_desc), - _es_query_status(Status::OK()), - _pool(pool) { -} +EsPredicate::EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc, ObjectPool* pool) + : _context(context), + _disjuncts_num(0), + _tuple_desc(tuple_desc), + _es_query_status(Status::OK()), + _pool(pool) {} EsPredicate::~EsPredicate() { - for(int i=0; i < _disjuncts.size(); i++) { + for (int i = 0; i < _disjuncts.size(); i++) { delete _disjuncts[i]; } _disjuncts.clear(); @@ -196,7 +192,7 @@ Status EsPredicate::build_disjuncts_list() { } // make sure to build by build_disjuncts_list -const std::vector& EsPredicate::get_predicate_list(){ +const std::vector& EsPredicate::get_predicate_list() { return _disjuncts; } @@ -212,16 +208,16 @@ static bool ignore_cast(const SlotDescriptor* slot, const Expr* expr) { static bool is_literal_node(const Expr* expr) { switch (expr->node_type()) { - case TExprNodeType::BOOL_LITERAL: - case TExprNodeType::INT_LITERAL: - case TExprNodeType::LARGE_INT_LITERAL: - case TExprNodeType::FLOAT_LITERAL: - case TExprNodeType::DECIMAL_LITERAL: - case TExprNodeType::STRING_LITERAL: - case TExprNodeType::DATE_LITERAL: - return true; - default: - return false; + case TExprNodeType::BOOL_LITERAL: + case TExprNodeType::INT_LITERAL: + case TExprNodeType::LARGE_INT_LITERAL: + case TExprNodeType::FLOAT_LITERAL: + case TExprNodeType::DECIMAL_LITERAL: + case TExprNodeType::STRING_LITERAL: + case TExprNodeType::DATE_LITERAL: + return true; + default: + return false; } } @@ -240,8 +236,8 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { // conjunct->get_child(1)->node_type()return FLOAT_LITERAL // the left child is literal and right child is SlotRef maybe not happened, but here we just process // this situation regardless of the rewrite logic from the FE's Query Engine - if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type() - || TExprNodeType::CAST_EXPR == conjunct->get_child(0)->node_type()) { + if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type() || + TExprNodeType::CAST_EXPR == conjunct->get_child(0)->node_type()) { expr = conjunct->get_child(1); // process such as sub-query: select * from (select split_part(k, "_", 1) as new_field from table) t where t.new_field > 1; RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(conjunct->get_child(0)); @@ -249,8 +245,8 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { // k (float) > 2.0, k(int) > 3.2 slot_ref = (SlotRef*)Expr::expr_without_cast(conjunct->get_child(0)); op = conjunct->op(); - } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type() - || TExprNodeType::CAST_EXPR == conjunct->get_child(1)->node_type()) { + } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type() || + TExprNodeType::CAST_EXPR == conjunct->get_child(1)->node_type()) { expr = conjunct->get_child(0); RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(conjunct->get_child(1)); slot_ref = (SlotRef*)Expr::expr_without_cast(conjunct->get_child(1)); @@ -273,12 +269,8 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { if (_field_context.find(col) != _field_context.end()) { col = _field_context[col]; } - ExtPredicate* predicate = new ExtBinaryPredicate( - TExprNodeType::BINARY_PRED, - col, - slot_desc->type(), - op, - literal); + ExtPredicate* predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, col, + slot_desc->type(), op, literal); _disjuncts.push_back(predicate); return Status::OK(); @@ -295,13 +287,10 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { std::vector query_conditions; query_conditions.emplace_back(literal); std::vector cols; - ExtPredicate* predicate = new ExtFunction( - TExprNodeType::FUNCTION_CALL, - "esquery", - cols, - query_conditions); + ExtPredicate* predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, "esquery", cols, + query_conditions); if (_es_query_status.ok()) { - _es_query_status = BooleanQueryBuilder::check_es_query(*(ExtFunction *)predicate); + _es_query_status = BooleanQueryBuilder::check_es_query(*(ExtFunction*)predicate); if (!_es_query_status.ok()) { delete predicate; return _es_query_status; @@ -326,7 +315,8 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { col = _field_context[col]; } // use TExprNodeType::IS_NULL_PRED for BooleanQueryBuilder translate - ExtIsNullPredicate* predicate = new ExtIsNullPredicate(TExprNodeType::IS_NULL_PRED, col, slot_desc->type(), is_not_null); + ExtIsNullPredicate* predicate = new ExtIsNullPredicate(TExprNodeType::IS_NULL_PRED, col, + slot_desc->type(), is_not_null); _disjuncts.push_back(predicate); } else if (fname == "like") { if (conjunct->children().size() != 2) { @@ -357,30 +347,26 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { col = _field_context[col]; } ExtLiteral literal(type, _context->get_value(expr, NULL)); - ExtPredicate* predicate = new ExtLikePredicate( - TExprNodeType::LIKE_PRED, - col, - slot_desc->type(), - literal); + ExtPredicate* predicate = + new ExtLikePredicate(TExprNodeType::LIKE_PRED, col, slot_desc->type(), literal); _disjuncts.push_back(predicate); } else { std::stringstream ss; - ss << "can not process function predicate[ " - << fname - << " ]"; + ss << "can not process function predicate[ " << fname << " ]"; return Status::InternalError(ss.str()); } return Status::OK(); } - + if (TExprNodeType::IN_PRED == conjunct->node_type()) { // the op code maybe FILTER_NEW_IN, it means there is function in list // like col_a in (abs(1)) - if (TExprOpcode::FILTER_IN != conjunct->op() - && TExprOpcode::FILTER_NOT_IN != conjunct->op()) { - return Status::InternalError("build disjuncts failed: " - "opcode in IN_PRED is neither FILTER_IN nor FILTER_NOT_IN"); + if (TExprOpcode::FILTER_IN != conjunct->op() && + TExprOpcode::FILTER_NOT_IN != conjunct->op()) { + return Status::InternalError( + "build disjuncts failed: " + "opcode in IN_PRED is neither FILTER_IN nor FILTER_NOT_IN"); } std::vector in_pred_values; @@ -390,7 +376,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { return Status::InternalError("build disjuncts failed: node type is not slot ref"); } - const SlotDescriptor* slot_desc = get_slot_desc((const SlotRef *)expr); + const SlotDescriptor* slot_desc = get_slot_desc((const SlotRef*)expr); if (slot_desc == nullptr) { return Status::InternalError("build disjuncts failed: slot_desc is null"); } @@ -407,7 +393,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { return Status::InternalError("build disjuncts failed: hybrid set has a null value"); } - ExtLiteral literal(slot_desc->type().type, const_cast(iter->get_value())); + ExtLiteral literal(slot_desc->type().type, const_cast(iter->get_value())); in_pred_values.emplace_back(literal); iter->next(); } @@ -415,12 +401,8 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { if (_field_context.find(col) != _field_context.end()) { col = _field_context[col]; } - ExtPredicate* predicate = new ExtInPredicate( - TExprNodeType::IN_PRED, - pred->is_not_in(), - col, - slot_desc->type(), - in_pred_values); + ExtPredicate* predicate = new ExtInPredicate(TExprNodeType::IN_PRED, pred->is_not_in(), col, + slot_desc->type(), in_pred_values); _disjuncts.push_back(predicate); return Status::OK(); @@ -432,7 +414,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { if (conjunct->op() == TExprOpcode::COMPOUND_AND) { std::vector conjuncts; for (int i = 0; i < conjunct->get_num_children(); ++i) { - EsPredicate *predicate = _pool->add(new EsPredicate(_context, _tuple_desc, _pool)); + EsPredicate* predicate = _pool->add(new EsPredicate(_context, _tuple_desc, _pool)); predicate->set_field_context(_field_context); Status status = predicate->build_disjuncts_list(conjunct->children()[i]); if (status.ok()) { @@ -441,10 +423,11 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { return Status::InternalError("build COMPOUND_AND conjuncts failed"); } } - ExtCompPredicates *compound_predicate = new ExtCompPredicates(TExprOpcode::COMPOUND_AND, conjuncts); + ExtCompPredicates* compound_predicate = + new ExtCompPredicates(TExprOpcode::COMPOUND_AND, conjuncts); _disjuncts.push_back(compound_predicate); return Status::OK(); - } else if (conjunct->op() == TExprOpcode::COMPOUND_NOT){ + } else if (conjunct->op() == TExprOpcode::COMPOUND_NOT) { // reserved for processing COMPOUND_NOT return Status::InternalError("currently do not support COMPOUND_NOT push-down"); } @@ -477,4 +460,4 @@ const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { return slot_desc; } -} +} // namespace doris diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h index a3c6c4fbdf694c..0ffbe543889c5d 100644 --- a/be/src/exec/es/es_predicate.h +++ b/be/src/exec/es/es_predicate.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef BE_EXEC_ES_PREDICATE_H -#define BE_EXEC_ES_PREDICATE_H +#ifndef BE_EXEC_ES_PREDICATE_H +#define BE_EXEC_ES_PREDICATE_H #include #include @@ -26,8 +26,8 @@ #include "gen_cpp/Opcodes_types.h" #include "gen_cpp/PaloExternalDataSourceService_types.h" #include "runtime/descriptors.h" -#include "runtime/tuple.h" #include "runtime/primitive_type.h" +#include "runtime/tuple.h" namespace doris { @@ -38,15 +38,11 @@ class EsPredicate; class ExtLiteral { public: - ExtLiteral(PrimitiveType type, void *value) : - _type(type), - _value(value) { - _str = value_to_string(); + ExtLiteral(PrimitiveType type, void* value) : _type(type), _value(value) { + _str = value_to_string(); } ~ExtLiteral(); - const std::string& to_string() const { - return _str; - } + const std::string& to_string() const { return _str; } private: int8_t get_byte(); @@ -70,18 +66,14 @@ class ExtLiteral { }; struct ExtColumnDesc { - ExtColumnDesc(const std::string& name, const TypeDescriptor& type) : - name(name), - type(type) { - } + ExtColumnDesc(const std::string& name, const TypeDescriptor& type) : name(name), type(type) {} std::string name; TypeDescriptor type; }; struct ExtPredicate { - ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { - } + ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) {} virtual ~ExtPredicate() {} TExprNodeType::type node_type; @@ -90,30 +82,17 @@ struct ExtPredicate { // this used for placeholder for compound_predicate // reserved for compound_not struct ExtCompPredicates : public ExtPredicate { - ExtCompPredicates( - TExprOpcode::type expr_op, - const std::vector& es_predicates) : - ExtPredicate(TExprNodeType::COMPOUND_PRED), - op(expr_op), - conjuncts(es_predicates) { - } + ExtCompPredicates(TExprOpcode::type expr_op, const std::vector& es_predicates) + : ExtPredicate(TExprNodeType::COMPOUND_PRED), op(expr_op), conjuncts(es_predicates) {} TExprOpcode::type op; std::vector conjuncts; }; struct ExtBinaryPredicate : public ExtPredicate { - ExtBinaryPredicate( - TExprNodeType::type node_type, - const std::string& name, - const TypeDescriptor& type, - TExprOpcode::type op, - const ExtLiteral& value) : - ExtPredicate(node_type), - col(name, type), - op(op), - value(value) { - } + ExtBinaryPredicate(TExprNodeType::type node_type, const std::string& name, + const TypeDescriptor& type, TExprOpcode::type op, const ExtLiteral& value) + : ExtPredicate(node_type), col(name, type), op(op), value(value) {} ExtColumnDesc col; TExprOpcode::type op; @@ -121,17 +100,9 @@ struct ExtBinaryPredicate : public ExtPredicate { }; struct ExtInPredicate : public ExtPredicate { - ExtInPredicate( - TExprNodeType::type node_type, - bool is_not_in, - const std::string& name, - const TypeDescriptor& type, - const std::vector& values) : - ExtPredicate(node_type), - is_not_in(is_not_in), - col(name, type), - values(values) { - } + ExtInPredicate(TExprNodeType::type node_type, bool is_not_in, const std::string& name, + const TypeDescriptor& type, const std::vector& values) + : ExtPredicate(node_type), is_not_in(is_not_in), col(name, type), values(values) {} bool is_not_in; ExtColumnDesc col; @@ -139,45 +110,27 @@ struct ExtInPredicate : public ExtPredicate { }; struct ExtLikePredicate : public ExtPredicate { - ExtLikePredicate( - TExprNodeType::type node_type, - const std::string& name, - const TypeDescriptor& type, - ExtLiteral value) : - ExtPredicate(node_type), - col(name, type), - value(value) { - } + ExtLikePredicate(TExprNodeType::type node_type, const std::string& name, + const TypeDescriptor& type, ExtLiteral value) + : ExtPredicate(node_type), col(name, type), value(value) {} ExtColumnDesc col; ExtLiteral value; }; struct ExtIsNullPredicate : public ExtPredicate { - ExtIsNullPredicate( - TExprNodeType::type node_type, - const std::string& name, - const TypeDescriptor& type, - bool is_not_null) : - ExtPredicate(node_type), - col(name, type), - is_not_null(is_not_null) { - } + ExtIsNullPredicate(TExprNodeType::type node_type, const std::string& name, + const TypeDescriptor& type, bool is_not_null) + : ExtPredicate(node_type), col(name, type), is_not_null(is_not_null) {} ExtColumnDesc col; bool is_not_null; }; struct ExtFunction : public ExtPredicate { - ExtFunction(TExprNodeType::type node_type, - const std::string& func_name, - std::vector cols, - std::vector values) : - ExtPredicate(node_type), - func_name(func_name), - cols(cols), - values(values) { - } + ExtFunction(TExprNodeType::type node_type, const std::string& func_name, + std::vector cols, std::vector values) + : ExtPredicate(node_type), func_name(func_name), cols(cols), values(values) {} const std::string func_name; std::vector cols; @@ -191,13 +144,9 @@ class EsPredicate { const std::vector& get_predicate_list(); Status build_disjuncts_list(); // public for tests - EsPredicate(const std::vector& all_predicates) { - _disjuncts = all_predicates; - }; + EsPredicate(const std::vector& all_predicates) { _disjuncts = all_predicates; }; - Status get_es_query_status() { - return _es_query_status; - } + Status get_es_query_status() { return _es_query_status; } void set_field_context(const std::map& field_context) { _field_context = field_context; @@ -207,15 +156,15 @@ class EsPredicate { Status build_disjuncts_list(const Expr* conjunct); const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); - ExprContext* _context; + ExprContext* _context; int _disjuncts_num; const TupleDescriptor* _tuple_desc; std::vector _disjuncts; Status _es_query_status; - ObjectPool *_pool; + ObjectPool* _pool; std::map _field_context; }; -} +} // namespace doris #endif diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp index 441aa551566ddf..a75ed0eb0e7576 100644 --- a/be/src/exec/es/es_query_builder.cpp +++ b/be/src/exec/es/es_query_builder.cpp @@ -18,16 +18,15 @@ #include "exec/es/es_query_builder.h" #include + +#include "common/logging.h" #include "rapidjson/rapidjson.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" -#include "common/logging.h" namespace doris { -ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str(es_query_str) { - -} +ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str(es_query_str) {} ESQueryBuilder::ESQueryBuilder(const ExtFunction& es_query) { auto first = es_query.values.front(); _es_query_str = first.to_string(); @@ -46,17 +45,15 @@ void ESQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* qu query_key.CopyFrom(first->name, allocator); // if we found one key, then end loop as QueryDSL only support one `query` root query_value.CopyFrom(first->value, allocator); - // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics + // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics query->AddMember(query_key, query_value, allocator); } -TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& term) : _field(field), _term(term) { +TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& term) + : _field(field), _term(term) {} -} - -TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) : _field(binary_predicate.col.name), _term(binary_predicate.value.to_string()) { - -} +TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) + : _field(binary_predicate.col.name), _term(binary_predicate.value.to_string()) {} void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); @@ -68,8 +65,10 @@ void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query->AddMember("term", term_node, allocator); } -RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) : _field(range_predicate.col.name), _value(range_predicate.value.to_string()), _op(range_predicate.op) { -} +RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) + : _field(range_predicate.col.name), + _value(range_predicate.value.to_string()), + _op(range_predicate.op) {} void RangeQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); @@ -78,20 +77,20 @@ void RangeQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* rapidjson::Value op_node(rapidjson::kObjectType); op_node.SetObject(); switch (_op) { - case TExprOpcode::LT: - op_node.AddMember("lt", value, allocator); - break; - case TExprOpcode::LE: - op_node.AddMember("lte", value, allocator); - break; - case TExprOpcode::GT: - op_node.AddMember("gt", value, allocator); - break; - case TExprOpcode::GE: - op_node.AddMember("gte", value, allocator); - break; - default: - break; + case TExprOpcode::LT: + op_node.AddMember("lt", value, allocator); + break; + case TExprOpcode::LE: + op_node.AddMember("lte", value, allocator); + break; + case TExprOpcode::GT: + op_node.AddMember("gt", value, allocator); + break; + case TExprOpcode::GE: + op_node.AddMember("gte", value, allocator); + break; + default: + break; } rapidjson::Value field_node(rapidjson::kObjectType); field_node.SetObject(); @@ -108,7 +107,8 @@ void WildCardQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Val term_node.AddMember(field_value, term_value, allocator); query->AddMember("wildcard", term_node, allocator); } -WildCardQueryBuilder::WildCardQueryBuilder(const ExtLikePredicate& like_predicate) : _field(like_predicate.col.name) { +WildCardQueryBuilder::WildCardQueryBuilder(const ExtLikePredicate& like_predicate) + : _field(like_predicate.col.name) { _like_value = like_predicate.value.to_string(); // example of translation : // abc_123 ===> abc?123 @@ -119,13 +119,13 @@ WildCardQueryBuilder::WildCardQueryBuilder(const ExtLikePredicate& like_predicat // abc\\_123 ===> abc\\_123 // abc\\%123 ===> abc\\%123 // NOTE. user must input sql like 'abc\\_123' or 'abc\\%ykz' - for (int i = 0; i< _like_value.size(); i++) { + for (int i = 0; i < _like_value.size(); i++) { if (_like_value[i] == '_' || _like_value[i] == '%') { - if (i == 0) { - _like_value[i] = (_like_value[i] == '_') ? '?' : '*'; - } else if (_like_value[i - 1] != '\\' ) { - _like_value[i] = (_like_value[i] == '_') ? '?' : '*'; - } + if (i == 0) { + _like_value[i] = (_like_value[i] == '_') ? '?' : '*'; + } else if (_like_value[i - 1] != '\\') { + _like_value[i] = (_like_value[i] == '_') ? '?' : '*'; + } } } } @@ -143,7 +143,8 @@ void TermsInSetQueryBuilder::to_json(rapidjson::Document* document, rapidjson::V query->AddMember("terms", terms_node, allocator); } -TermsInSetQueryBuilder::TermsInSetQueryBuilder(const ExtInPredicate& in_predicate) : _field(in_predicate.col.name) { +TermsInSetQueryBuilder::TermsInSetQueryBuilder(const ExtInPredicate& in_predicate) + : _field(in_predicate.col.name) { for (auto& value : in_predicate.values) { _values.push_back(value.to_string()); } @@ -156,9 +157,8 @@ void MatchAllQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Val query->AddMember("match_all", match_all_node, allocator); } -ExistsQueryBuilder::ExistsQueryBuilder(const ExtIsNullPredicate& is_null_predicate) : _field(is_null_predicate.col.name) { - -} +ExistsQueryBuilder::ExistsQueryBuilder(const ExtIsNullPredicate& is_null_predicate) + : _field(is_null_predicate.col.name) {} void ExistsQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); @@ -169,9 +169,7 @@ void ExistsQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value query->AddMember("exists", term_node, allocator); } -BooleanQueryBuilder::BooleanQueryBuilder() { - -} +BooleanQueryBuilder::BooleanQueryBuilder() {} BooleanQueryBuilder::~BooleanQueryBuilder() { for (auto clause : _must_clauses) { delete clause; @@ -194,96 +192,96 @@ BooleanQueryBuilder::~BooleanQueryBuilder() { BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predicates) { for (auto predicate : predicates) { switch (predicate->node_type) { - case TExprNodeType::BINARY_PRED: { - ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - switch (binary_predicate->op) { - case TExprOpcode::EQ: { - TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); - _should_clauses.push_back(term_query); - break; - } - case TExprOpcode::NE:{ // process NE - TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - bool_query->must_not(term_query); - _should_clauses.push_back(bool_query); - break; - } - case TExprOpcode::LT: - case TExprOpcode::LE: - case TExprOpcode::GT: - case TExprOpcode::GE: { - RangeQueryBuilder* range_query = new RangeQueryBuilder(*binary_predicate); - _should_clauses.push_back(range_query); - break; - } - default: - break; - } + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + switch (binary_predicate->op) { + case TExprOpcode::EQ: { + TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); + _should_clauses.push_back(term_query); break; } - case TExprNodeType::IN_PRED: { - ExtInPredicate* in_predicate = (ExtInPredicate *)predicate; - bool is_not_in = in_predicate->is_not_in; - if (is_not_in) { // process not in predicate - TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(*in_predicate); - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - bool_query->must_not(terms_predicate); - _should_clauses.push_back(bool_query); - } else { // process in predicate - TermsInSetQueryBuilder* terms_query= new TermsInSetQueryBuilder(*in_predicate); - _should_clauses.push_back(terms_query); - } - break; - } - case TExprNodeType::LIKE_PRED: { - ExtLikePredicate* like_predicate = (ExtLikePredicate *)predicate; - WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(*like_predicate); - _should_clauses.push_back(wild_card_query); + case TExprOpcode::NE: { // process NE + TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(term_query); + _should_clauses.push_back(bool_query); break; } - case TExprNodeType::IS_NULL_PRED: { - ExtIsNullPredicate* is_null_predicate = (ExtIsNullPredicate *)predicate; - ExistsQueryBuilder* exists_query = new ExistsQueryBuilder(*is_null_predicate); - if (is_null_predicate->is_not_null) { - _should_clauses.push_back(exists_query); - } else { - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - bool_query->must_not(exists_query); - _should_clauses.push_back(bool_query); - } + case TExprOpcode::LT: + case TExprOpcode::LE: + case TExprOpcode::GT: + case TExprOpcode::GE: { + RangeQueryBuilder* range_query = new RangeQueryBuilder(*binary_predicate); + _should_clauses.push_back(range_query); break; } - case TExprNodeType::FUNCTION_CALL: { - ExtFunction* function_predicate = (ExtFunction *)predicate; - if ("esquery" == function_predicate->func_name ) { - ESQueryBuilder* es_query = new ESQueryBuilder(*function_predicate); - _should_clauses.push_back(es_query); - }; + default: break; } - case TExprNodeType::COMPOUND_PRED: { - ExtCompPredicates* compound_predicates = (ExtCompPredicates *)predicate; - // reserved for compound_not - if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - for (auto es_predicate : compound_predicates->conjuncts) { - std::vector or_predicates = es_predicate->get_predicate_list(); - BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); - bool_query->must(inner_bool_query); - } - _should_clauses.push_back(bool_query); + break; + } + case TExprNodeType::IN_PRED: { + ExtInPredicate* in_predicate = (ExtInPredicate*)predicate; + bool is_not_in = in_predicate->is_not_in; + if (is_not_in) { // process not in predicate + TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(*in_predicate); + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(terms_predicate); + _should_clauses.push_back(bool_query); + } else { // process in predicate + TermsInSetQueryBuilder* terms_query = new TermsInSetQueryBuilder(*in_predicate); + _should_clauses.push_back(terms_query); + } + break; + } + case TExprNodeType::LIKE_PRED: { + ExtLikePredicate* like_predicate = (ExtLikePredicate*)predicate; + WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(*like_predicate); + _should_clauses.push_back(wild_card_query); + break; + } + case TExprNodeType::IS_NULL_PRED: { + ExtIsNullPredicate* is_null_predicate = (ExtIsNullPredicate*)predicate; + ExistsQueryBuilder* exists_query = new ExistsQueryBuilder(*is_null_predicate); + if (is_null_predicate->is_not_null) { + _should_clauses.push_back(exists_query); + } else { + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(exists_query); + _should_clauses.push_back(bool_query); + } + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction*)predicate; + if ("esquery" == function_predicate->func_name) { + ESQueryBuilder* es_query = new ESQueryBuilder(*function_predicate); + _should_clauses.push_back(es_query); + }; + break; + } + case TExprNodeType::COMPOUND_PRED: { + ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; + // reserved for compound_not + if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + for (auto es_predicate : compound_predicates->conjuncts) { + std::vector or_predicates = es_predicate->get_predicate_list(); + BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); + bool_query->must(inner_bool_query); } - break; + _should_clauses.push_back(bool_query); } - default: - break; + break; + } + default: + break; } } } void BooleanQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType &allocator = document->GetAllocator(); + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); rapidjson::Value root_node_object(rapidjson::kObjectType); if (_filter_clauses.size() > 0) { rapidjson::Value filter_node(rapidjson::kArrayType); @@ -361,61 +359,61 @@ Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { return Status::OK(); } -void BooleanQueryBuilder::validate(const std::vector& espredicates, std::vector* result) { +void BooleanQueryBuilder::validate(const std::vector& espredicates, + std::vector* result) { int conjunct_size = espredicates.size(); result->reserve(conjunct_size); for (auto espredicate : espredicates) { bool flag = true; for (auto predicate : espredicate->get_predicate_list()) { switch (predicate->node_type) { - case TExprNodeType::BINARY_PRED: { - ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - TExprOpcode::type op = binary_predicate->op; - if (op != TExprOpcode::EQ && op != TExprOpcode::NE - && op != TExprOpcode::LT && op != TExprOpcode::LE - && op != TExprOpcode::GT && op != TExprOpcode::GE) { - flag = false; - } - break; - } - case TExprNodeType::COMPOUND_PRED: { - ExtCompPredicates* compound_predicates = (ExtCompPredicates *)predicate; - if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { - std::vector list; - validate(compound_predicates->conjuncts, &list); - for(int i = list.size() - 1; i >= 0; i--) { - if(!list[i]) { - flag = false; - break; - } - } - } else { - // reserved for compound_not - flag = false; - } - break; - } - case TExprNodeType::LIKE_PRED: - case TExprNodeType::IS_NULL_PRED: - case TExprNodeType::IN_PRED: { - break; + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + TExprOpcode::type op = binary_predicate->op; + if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != TExprOpcode::LT && + op != TExprOpcode::LE && op != TExprOpcode::GT && op != TExprOpcode::GE) { + flag = false; } - case TExprNodeType::FUNCTION_CALL: { - ExtFunction* function_predicate = (ExtFunction *)predicate; - if ("esquery" == function_predicate->func_name ) { - Status st = check_es_query(*function_predicate); - if (!st.ok()) { + break; + } + case TExprNodeType::COMPOUND_PRED: { + ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; + if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { + std::vector list; + validate(compound_predicates->conjuncts, &list); + for (int i = list.size() - 1; i >= 0; i--) { + if (!list[i]) { flag = false; + break; } - } else { - flag = false; - } - break; + } + } else { + // reserved for compound_not + flag = false; } - default: { + break; + } + case TExprNodeType::LIKE_PRED: + case TExprNodeType::IS_NULL_PRED: + case TExprNodeType::IN_PRED: { + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction*)predicate; + if ("esquery" == function_predicate->func_name) { + Status st = check_es_query(*function_predicate); + if (!st.ok()) { + flag = false; + } + } else { flag = false; - break; } + break; + } + default: { + flag = false; + break; + } } if (!flag) { break; @@ -425,7 +423,8 @@ void BooleanQueryBuilder::validate(const std::vector& espredicates } } -void BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query) { +void BooleanQueryBuilder::to_query(const std::vector& predicates, + rapidjson::Document* root, rapidjson::Value* query) { if (predicates.size() == 0) { MatchAllQueryBuilder match_all_query; match_all_query.to_json(root, query); @@ -438,5 +437,6 @@ void BooleanQueryBuilder::to_query(const std::vector& predicates, BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); bool_query.must(inner_bool_query); } - bool_query.to_json(root, query);} + bool_query.to_json(root, query); } +} // namespace doris diff --git a/be/src/exec/es/es_query_builder.h b/be/src/exec/es/es_query_builder.h index 52222efbab9e20..6fb66bf7884a09 100644 --- a/be/src/exec/es/es_query_builder.h +++ b/be/src/exec/es/es_query_builder.h @@ -17,12 +17,12 @@ #pragma once -#include -#include +#include +#include -#include "rapidjson/document.h" -#include "exec/es/es_predicate.h" #include "common/status.h" +#include "exec/es/es_predicate.h" +#include "rapidjson/document.h" namespace doris { @@ -38,13 +38,13 @@ class ESQueryBuilder : public QueryBuilder { ESQueryBuilder(const std::string& es_query_str); ESQueryBuilder(const ExtFunction& es_query); void to_json(rapidjson::Document* document, rapidjson::Value* query) override; + private: std::string _es_query_str; }; -// process field = value +// process field = value class TermQueryBuilder : public QueryBuilder { - public: TermQueryBuilder(const std::string& field, const std::string& term); TermQueryBuilder(const ExtBinaryPredicate& binary_predicate); @@ -57,10 +57,10 @@ class TermQueryBuilder : public QueryBuilder { // process range predicate field >= value or field < value etc. class RangeQueryBuilder : public QueryBuilder { - public: RangeQueryBuilder(const ExtBinaryPredicate& range_predicate); void to_json(rapidjson::Document* document, rapidjson::Value* query) override; + private: std::string _field; std::string _value; @@ -69,10 +69,10 @@ class RangeQueryBuilder : public QueryBuilder { // process in predicate : field in [value1, value2] class TermsInSetQueryBuilder : public QueryBuilder { - public: TermsInSetQueryBuilder(const ExtInPredicate& in_predicate); void to_json(rapidjson::Document* document, rapidjson::Value* query) override; + private: std::string _field; std::vector _values; @@ -80,7 +80,6 @@ class TermsInSetQueryBuilder : public QueryBuilder { // process like predicate : field like "a%b%c_" class WildCardQueryBuilder : public QueryBuilder { - public: WildCardQueryBuilder(const ExtLikePredicate& like_predicate); void to_json(rapidjson::Document* document, rapidjson::Value* query) override; @@ -92,15 +91,12 @@ class WildCardQueryBuilder : public QueryBuilder { // no predicates: all document match class MatchAllQueryBuilder : public QueryBuilder { - public: void to_json(rapidjson::Document* document, rapidjson::Value* query) override; }; - // process like predicate : k1 is null or k1 is not null" class ExistsQueryBuilder : public QueryBuilder { - public: ExistsQueryBuilder(const ExtIsNullPredicate& like_predicate); void to_json(rapidjson::Document* document, rapidjson::Value* query) override; @@ -111,13 +107,13 @@ class ExistsQueryBuilder : public QueryBuilder { // process bool compound query, and play the role of a bridge for transferring predicates to es native query class BooleanQueryBuilder : public QueryBuilder { - public: BooleanQueryBuilder(const std::vector& predicates); BooleanQueryBuilder(); virtual ~BooleanQueryBuilder(); // class method for transfer predicate to es query value, invoker should enclose this value with `query` - static void to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query); + static void to_query(const std::vector& predicates, rapidjson::Document* root, + rapidjson::Value* query); // validate esquery syntax static Status check_es_query(const ExtFunction& extFunction); // decide which predicate can process @@ -137,4 +133,4 @@ class BooleanQueryBuilder : public QueryBuilder { std::vector _should_clauses; }; -} +} // namespace doris diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp index 81cbc060e5f577..fe4dbedd1c3b84 100644 --- a/be/src/exec/es/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -18,8 +18,8 @@ #include "exec/es/es_scan_reader.h" #include -#include #include +#include #include "common/config.h" #include "common/logging.h" @@ -29,26 +29,29 @@ namespace doris { // hits.hits._id used for obtain ES document `_id` -const std::string SOURCE_SCROLL_SEARCH_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,hits.hits._id"; +const std::string SOURCE_SCROLL_SEARCH_FILTER_PATH = + "filter_path=_scroll_id,hits.hits._source,hits.total,hits.hits._id"; // hits.hits._score used for processing field not exists in one batch -const std::string DOCVALUE_SCROLL_SEARCH_FILTER_PATH = "filter_path=_scroll_id,hits.total,hits.hits._score,hits.hits.fields"; +const std::string DOCVALUE_SCROLL_SEARCH_FILTER_PATH = + "filter_path=_scroll_id,hits.total,hits.hits._score,hits.hits.fields"; const std::string REQUEST_SCROLL_PATH = "_scroll"; const std::string REQUEST_PREFERENCE_PREFIX = "&preference=_shards:"; const std::string REQUEST_SEARCH_SCROLL_PATH = "/_search/scroll"; const std::string REQUEST_SEPARATOR = "/"; -ESScanReader::ESScanReader(const std::string& target, const std::map& props, bool doc_value_mode) : - _scroll_keep_alive(config::es_scroll_keepalive), - _http_timeout_ms(config::es_http_timeout_ms), - _doc_value_mode(doc_value_mode) { +ESScanReader::ESScanReader(const std::string& target, + const std::map& props, bool doc_value_mode) + : _scroll_keep_alive(config::es_scroll_keepalive), + _http_timeout_ms(config::es_http_timeout_ms), + _doc_value_mode(doc_value_mode) { _target = target; _index = props.at(KEY_INDEX); _type = props.at(KEY_TYPE); if (props.find(KEY_USER_NAME) != props.end()) { _user_name = props.at(KEY_USER_NAME); } - if (props.find(KEY_PASS_WORD) != props.end()){ + if (props.find(KEY_PASS_WORD) != props.end()) { _passwd = props.at(KEY_PASS_WORD); } if (props.find(KEY_SHARD) != props.end()) { @@ -60,35 +63,34 @@ ESScanReader::ESScanReader(const std::string& target, const std::map& scr _network_client.set_content_type("application/json"); _network_client.set_timeout_ms(_http_timeout_ms); RETURN_IF_ERROR(_network_client.execute_post_request( - ESScrollQueryBuilder::build_next_scroll_body(_scroll_id, _scroll_keep_alive), &response)); + ESScrollQueryBuilder::build_next_scroll_body(_scroll_id, _scroll_keep_alive), + &response)); long status = _network_client.get_http_status(); if (status == 404) { - LOG(WARNING) << "request scroll search failure 404[" + LOG(WARNING) << "request scroll search failure 404[" << ", response: " << (response.empty() ? "empty response" : response); return Status::InternalError("No search context found for " + _scroll_id); } if (status != 200) { - LOG(WARNING) << "request scroll search failure[" + LOG(WARNING) << "request scroll search failure[" << "http status" << status << ", response: " << (response.empty() ? "empty response" : response); - return Status::InternalError("request scroll search failure: " + (response.empty() ? "empty response" : response)); + return Status::InternalError("request scroll search failure: " + + (response.empty() ? "empty response" : response)); } } scroll_parser.reset(new ScrollParser(_doc_value_mode)); VLOG(1) << "get_next request ES, returned response: " << response; Status status = scroll_parser->parse(response, _exactly_once); - if (!status.ok()){ + if (!status.ok()) { _eos = true; LOG(WARNING) << status.get_error_msg(); return status; @@ -185,11 +189,12 @@ Status ESScanReader::close() { _network_client.set_content_type("application/json"); _network_client.set_timeout_ms(5 * 1000); std::string response; - RETURN_IF_ERROR(_network_client.execute_delete_request(ESScrollQueryBuilder::build_clear_scroll_body(_scroll_id), &response)); + RETURN_IF_ERROR(_network_client.execute_delete_request( + ESScrollQueryBuilder::build_clear_scroll_body(_scroll_id), &response)); if (_network_client.get_http_status() == 200) { return Status::OK(); } else { return Status::InternalError("es_scan_reader delete scroll context failure"); } } -} +} // namespace doris diff --git a/be/src/exec/es/es_scan_reader.h b/be/src/exec/es/es_scan_reader.h index c428770e02a699..6a1f9d4a41066c 100644 --- a/be/src/exec/es/es_scan_reader.h +++ b/be/src/exec/es/es_scan_reader.h @@ -29,7 +29,6 @@ namespace doris { class Status; class ESScanReader { - public: static constexpr const char* KEY_USER_NAME = "user"; static constexpr const char* KEY_PASS_WORD = "password"; @@ -41,16 +40,17 @@ class ESScanReader { static constexpr const char* KEY_BATCH_SIZE = "batch_size"; static constexpr const char* KEY_TERMINATE_AFTER = "limit"; static constexpr const char* KEY_DOC_VALUES_MODE = "doc_values_mode"; - ESScanReader(const std::string& target, const std::map& props, bool doc_value_mode); + ESScanReader(const std::string& target, const std::map& props, + bool doc_value_mode); ~ESScanReader(); // launch the first scroll request, this method will cache the first scroll response, and return the this cached response when invoke get_next Status open(); // invoke get_next to get next batch documents from elasticsearch - Status get_next(bool *eos, std::unique_ptr& parser); + Status get_next(bool* eos, std::unique_ptr& parser); // clear scroll context from elasticsearch Status close(); - + private: std::string _target; std::string _user_name; @@ -73,14 +73,14 @@ class ESScanReader { std::string _init_scroll_url; // The result from the above request includes a _scroll_id, which should be passed to the scroll API in order to retrieve the next batch of results // _next_scroll_url for the subsequent scroll request, like /_search/scroll - // POST /_search/scroll + // POST /_search/scroll // { - // "scroll" : "1m", - // "scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAD4WYm9laVYtZndUQlNsdDcwakFMNjU1QQ==" + // "scroll" : "1m", + // "scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAD4WYm9laVYtZndUQlNsdDcwakFMNjU1QQ==" // } // Each call to the scroll API returns the next batch of results until there are no more results left to return std::string _next_scroll_url; - + // _search_url used to execute just only one search request to Elasticsearch // _search_url would go into effect when `limit` specified: // select * from es_table limit 10 -> /es_table/doc/_search?terminate_after=10 @@ -95,8 +95,7 @@ class ESScanReader { int _http_timeout_ms; bool _exactly_once; - + bool _doc_value_mode; }; -} - +} // namespace doris diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 4bf44dc5e53091..0a4c5c7ac8dc0c 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -17,22 +17,21 @@ #include "exec/es/es_scroll_parser.h" -#include #include + +#include #include #include "common/logging.h" #include "common/status.h" -#include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" -#include "rapidjson/rapidjson.h" #include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" +#include "runtime/mem_pool.h" +#include "runtime/mem_tracker.h" #include "util/string_parser.hpp" - - namespace doris { static const char* FIELD_SCROLL_ID = "_scroll_id"; @@ -41,25 +40,24 @@ static const char* FIELD_INNER_HITS = "hits"; static const char* FIELD_SOURCE = "_source"; static const char* FIELD_ID = "_id"; - // get the original json data type std::string json_type_to_string(rapidjson::Type type) { switch (type) { - case rapidjson::kNumberType: - return "Number"; - case rapidjson::kStringType: - return "Varchar/Char"; - case rapidjson::kArrayType: - return "Array"; - case rapidjson::kObjectType: - return "Object"; - case rapidjson::kNullType: - return "Null Type"; - case rapidjson::kFalseType: - case rapidjson::kTrueType: - return "True/False"; - default: - return "Unknown Type"; + case rapidjson::kNumberType: + return "Number"; + case rapidjson::kStringType: + return "Varchar/Char"; + case rapidjson::kArrayType: + return "Array"; + case rapidjson::kObjectType: + return "Object"; + case rapidjson::kNullType: + return "Null Type"; + case rapidjson::kFalseType: + case rapidjson::kTrueType: + return "True/False"; + default: + return "Unknown Type"; } } @@ -71,75 +69,73 @@ std::string json_value_to_string(const rapidjson::Value& value) { return scratch_buffer.GetString(); } -static const std::string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " - "Expected value of type $0 based on column metadata. This likely indicates a " - "problem with the data source library."; -static const std::string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " - "$1 bytes for $2."; -static const std::string ERROR_COL_DATA_IS_ARRAY = "Data source returned an array for the type $0" - "based on column metadata."; - -#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \ - do { \ - if (col.IsArray()) { \ - std::stringstream ss; \ - ss << "Expected value of type: " \ - << type_to_string(type) \ +static const std::string ERROR_INVALID_COL_DATA = + "Data source returned inconsistent column data. " + "Expected value of type $0 based on column metadata. This likely indicates a " + "problem with the data source library."; +static const std::string ERROR_MEM_LIMIT_EXCEEDED = + "DataSourceScanNode::$0() failed to allocate " + "$1 bytes for $2."; +static const std::string ERROR_COL_DATA_IS_ARRAY = + "Data source returned an array for the type $0" + "based on column metadata."; + +#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \ + do { \ + if (col.IsArray()) { \ + std::stringstream ss; \ + ss << "Expected value of type: " << type_to_string(type) \ << "; but found type: " << json_type_to_string(col.GetType()) \ - << "; Document slice is : " << json_value_to_string(col); \ - return Status::RuntimeError(ss.str()); \ - } \ + << "; Document slice is : " << json_value_to_string(col); \ + return Status::RuntimeError(ss.str()); \ + } \ } while (false) - -#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type) \ - do { \ - if (!col.IsString()) { \ - std::stringstream ss; \ - ss << "Expected value of type: " \ - << type_to_string(type) \ - << "; but found type: " << json_type_to_string(col.GetType()) \ +#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type) \ + do { \ + if (!col.IsString()) { \ + std::stringstream ss; \ + ss << "Expected value of type: " << type_to_string(type) \ + << "; but found type: " << json_type_to_string(col.GetType()) \ << "; Document source slice is : " << json_value_to_string(col); \ - return Status::RuntimeError(ss.str()); \ - } \ + return Status::RuntimeError(ss.str()); \ + } \ } while (false) -#define RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col, type) \ - do { \ - if (!col.IsNumber()) { \ - std::stringstream ss; \ - ss << "Expected value of type: " \ - << type_to_string(type) \ - << "; but found type: " < -static Status get_int_value(const rapidjson::Value &col, PrimitiveType type, void* slot, bool pure_doc_value) { +static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot, + bool pure_doc_value) { if (col.IsNumber()) { *reinterpret_cast(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); return Status::OK(); @@ -151,11 +147,9 @@ static Status get_int_value(const rapidjson::Value &col, PrimitiveType type, voi return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - StringParser::ParseResult result; const std::string& val = col.GetString(); size_t len = col.GetStringLength(); @@ -173,7 +167,8 @@ static Status get_int_value(const rapidjson::Value &col, PrimitiveType type, voi } template -static Status get_float_value(const rapidjson::Value &col, PrimitiveType type, void* slot, bool pure_doc_value) { +static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot, + bool pure_doc_value) { DCHECK(sizeof(T) == 4 || sizeof(T) == 8); if (col.IsNumber()) { *reinterpret_cast(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); @@ -198,15 +193,10 @@ static Status get_float_value(const rapidjson::Value &col, PrimitiveType type, v return Status::OK(); } -ScrollParser::ScrollParser(bool doc_value_mode) : - _scroll_id(""), - _size(0), - _line_index(0), - _doc_value_mode(doc_value_mode) { -} +ScrollParser::ScrollParser(bool doc_value_mode) + : _scroll_id(""), _size(0), _line_index(0), _doc_value_mode(doc_value_mode) {} -ScrollParser::~ScrollParser() { -} +ScrollParser::~ScrollParser() {} Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) { // rely on `_size !=0 ` to determine whether scroll ends @@ -224,16 +214,16 @@ Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) } if (!exactly_once) { - const rapidjson::Value &scroll_node = _document_node[FIELD_SCROLL_ID]; + const rapidjson::Value& scroll_node = _document_node[FIELD_SCROLL_ID]; _scroll_id = scroll_node.GetString(); } // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} - const rapidjson::Value &outer_hits_node = _document_node[FIELD_HITS]; + const rapidjson::Value& outer_hits_node = _document_node[FIELD_HITS]; // if has no inner hits, there has no data in this index if (!outer_hits_node.HasMember(FIELD_INNER_HITS)) { return Status::OK(); } - const rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; + const rapidjson::Value& inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; // this happened just the end of scrolling if (!inner_hits_node.IsArray()) { return Status::OK(); @@ -252,8 +242,9 @@ const std::string& ScrollParser::get_scroll_id() { return _scroll_id; } -Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, - Tuple* tuple, MemPool* tuple_pool, bool* line_eof, const std::map& docvalue_context) { +Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, + MemPool* tuple_pool, bool* line_eof, + const std::map& docvalue_context) { *line_eof = true; if (_size <= 0 || _line_index >= _size) { @@ -301,8 +292,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, size_t len = _id.length(); char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(len)); if (UNLIKELY(buffer == NULL)) { - std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", - len, "string slot"); + std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, + "MaterializeNextRow", len, "string slot"); return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, len); } memcpy(buffer, _id.data(), len); @@ -314,7 +305,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, // if pure_doc_value enabled, docvalue_context must contains the key // todo: need move all `pure_docvalue` for every tuple outside fill_tuple // should check pure_docvalue for one table scan not every tuple - const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str() : slot_desc->col_name().c_str(); + const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str() + : slot_desc->col_name().c_str(); rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); if (itr == line.MemberEnd()) { @@ -323,160 +315,159 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, } tuple->set_not_null(slot_desc->null_indicator_offset()); - const rapidjson::Value &col = line[col_name]; + const rapidjson::Value& col = line[col_name]; void* slot = tuple->get_slot(slot_desc->tuple_offset()); PrimitiveType type = slot_desc->type().type; switch (type) { - case TYPE_CHAR: - case TYPE_VARCHAR: { - // sometimes elasticsearch user post some not-string value to Elasticsearch Index. - // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation - // this may be a tricky, but we can workaround this issue - std::string val; - if (pure_doc_value) { - if (!col[0].IsString()) { - val = json_value_to_string(col[0]); - } else { - val = col[0].GetString(); - } + case TYPE_CHAR: + case TYPE_VARCHAR: { + // sometimes elasticsearch user post some not-string value to Elasticsearch Index. + // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation + // this may be a tricky, but we can workaround this issue + std::string val; + if (pure_doc_value) { + if (!col[0].IsString()) { + val = json_value_to_string(col[0]); } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - if (!col.IsString()) { - val = json_value_to_string(col); - } else { - val = col.GetString(); - } + val = col[0].GetString(); } - size_t val_size = val.length(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); - if (UNLIKELY(buffer == NULL)) { - std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", - val_size, "string slot"); - return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); + } else { + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + if (!col.IsString()) { + val = json_value_to_string(col); + } else { + val = col.GetString(); } - memcpy(buffer, val.data(), val_size); - reinterpret_cast(slot)->ptr = buffer; - reinterpret_cast(slot)->len = val_size; - break; } + size_t val_size = val.length(); + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); + if (UNLIKELY(buffer == NULL)) { + std::string details = strings::Substitute( + ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); + return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); + } + memcpy(buffer, val.data(), val_size); + reinterpret_cast(slot)->ptr = buffer; + reinterpret_cast(slot)->len = val_size; + break; + } - case TYPE_TINYINT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; + case TYPE_TINYINT: { + Status status = get_int_value(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; } + break; + } - case TYPE_SMALLINT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; + case TYPE_SMALLINT: { + Status status = get_int_value(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; } + break; + } - case TYPE_INT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; + case TYPE_INT: { + Status status = get_int_value(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; } + break; + } - case TYPE_BIGINT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; + case TYPE_BIGINT: { + Status status = get_int_value(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; } + break; + } - case TYPE_LARGEINT: { - Status status = get_int_value<__int128>(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; + case TYPE_LARGEINT: { + Status status = get_int_value<__int128>(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; } + break; + } - case TYPE_DOUBLE: { - Status status = get_float_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } + case TYPE_DOUBLE: { + Status status = get_float_value(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_FLOAT: { + Status status = get_float_value(col, type, slot, pure_doc_value); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_BOOLEAN: { + if (col.IsBool()) { + *reinterpret_cast(slot) = col.GetBool(); break; } - case TYPE_FLOAT: { - Status status = get_float_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetInt(); + break; + } + if (pure_doc_value && col.IsArray()) { + *reinterpret_cast(slot) = col[0].GetBool(); break; } - case TYPE_BOOLEAN: { - if (col.IsBool()) { - *reinterpret_cast(slot) = col.GetBool(); - break; - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - if (col.IsNumber()) { - *reinterpret_cast(slot) = col.GetInt(); - break; - } - if (pure_doc_value && col.IsArray()) { - *reinterpret_cast(slot) = col[0].GetBool(); + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + StringParser::ParseResult result; + bool b = StringParser::string_to_bool(val.c_str(), val_size, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, col, type); + *reinterpret_cast(slot) = b; + break; + } + + case TYPE_DATE: + case TYPE_DATETIME: { + // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source + if (col.IsNumber()) { + // ES process date/datetime field would use millisecond timestamp for index or docvalue + // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms + // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds + RETURN_IF_ERROR(fill_date_slot_with_timestamp(slot, col, type)); + } else if (col.IsArray() && pure_doc_value) { + // this would happened just only when `enable_docvalue_scan = true` + // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose + // a standard date-format for date field as `2020-06-16T00:00:00.000Z` + // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for + // date field's docvalue + if (col[0].IsString()) { + RETURN_IF_ERROR(fill_date_slot_with_strval(slot, col[0], type)); break; } - + // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds + RETURN_IF_ERROR(fill_date_slot_with_timestamp(slot, col[0], type)); + } else { + // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source RETURN_ERROR_IF_COL_IS_ARRAY(col, type); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t val_size = col.GetStringLength(); - StringParser::ParseResult result; - bool b = - StringParser::string_to_bool(val.c_str(), val_size, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, col, type); - *reinterpret_cast(slot) = b; - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: { - // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source - if (col.IsNumber()) { - // ES process date/datetime field would use millisecond timestamp for index or docvalue - // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms - // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_slot_with_timestamp(slot, col, type)); - } else if (col.IsArray() && pure_doc_value) { - // this would happened just only when `enable_docvalue_scan = true` - // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose - // a standard date-format for date field as `2020-06-16T00:00:00.000Z` - // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for - // date field's docvalue - if (col[0].IsString()) { - RETURN_IF_ERROR(fill_date_slot_with_strval(slot, col[0], type)); - break; - } - // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_slot_with_timestamp(slot, col[0], type)); - } else { - // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - RETURN_IF_ERROR(fill_date_slot_with_strval(slot, col, type)); - } - break; - } - default: { - DCHECK(false); - break; + RETURN_IF_ERROR(fill_date_slot_with_strval(slot, col, type)); } + break; + } + default: { + DCHECK(false); + break; + } } } @@ -484,7 +475,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, return Status::OK(); } -Status ScrollParser::fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, PrimitiveType type) { +Status ScrollParser::fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, + PrimitiveType type) { DateTimeValue* ts_slot = reinterpret_cast(slot); const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); @@ -499,7 +491,8 @@ Status ScrollParser::fill_date_slot_with_strval(void* slot, const rapidjson::Val return Status::OK(); } -Status ScrollParser::fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col, PrimitiveType type) { +Status ScrollParser::fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col, + PrimitiveType type) { if (!reinterpret_cast(slot)->from_unixtime(col.GetInt64() / 1000, "+08:00")) { RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); } @@ -511,4 +504,4 @@ Status ScrollParser::fill_date_slot_with_timestamp(void* slot, const rapidjson:: return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index b5804900572995..f2ff855a7cbda0 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -28,14 +28,13 @@ namespace doris { class Status; class ScrollParser { - public: ScrollParser(bool doc_value_mode); ~ScrollParser(); Status parse(const std::string& scroll_result, bool exactly_once = false); - Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, - MemPool* mem_pool, bool* line_eof, const std::map& docvalue_context); + Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, MemPool* mem_pool, + bool* line_eof, const std::map& docvalue_context); const std::string& get_scroll_id(); int get_size(); @@ -46,21 +45,21 @@ class ScrollParser { // fill date slot with string format date Status fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, PrimitiveType type); // fill date slot with timestamp - Status fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col, PrimitiveType type); + Status fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col, + PrimitiveType type); private: - std::string _scroll_id; int _size; rapidjson::SizeType _line_index; rapidjson::Document _document_node; rapidjson::Value _inner_hits_node; - - // todo(milimin): ScrollParser should be divided into two classes: SourceParser and DocValueParser, - // including remove some variables in the current implementation, e.g. pure_doc_value. + + // todo(milimin): ScrollParser should be divided into two classes: SourceParser and DocValueParser, + // including remove some variables in the current implementation, e.g. pure_doc_value. // All above will be done in the DOE refactoring projects. // Current bug fixes minimize the scope of changes to avoid introducing other new bugs. bool _doc_value_mode; }; -} +} // namespace doris diff --git a/be/src/exec/es/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp index 0f42bf2c328dd1..dd86d39fbd2d7c 100644 --- a/be/src/exec/es/es_scroll_query.cpp +++ b/be/src/exec/es/es_scroll_query.cpp @@ -28,17 +28,14 @@ namespace doris { -ESScrollQueryBuilder::ESScrollQueryBuilder() { +ESScrollQueryBuilder::ESScrollQueryBuilder() {} -} - -ESScrollQueryBuilder::~ESScrollQueryBuilder() { - -} +ESScrollQueryBuilder::~ESScrollQueryBuilder() {} -std::string ESScrollQueryBuilder::build_next_scroll_body(const std::string& scroll_id, const std::string& scroll) { +std::string ESScrollQueryBuilder::build_next_scroll_body(const std::string& scroll_id, + const std::string& scroll) { rapidjson::Document scroll_dsl; - rapidjson::Document::AllocatorType &allocator = scroll_dsl.GetAllocator(); + rapidjson::Document::AllocatorType& allocator = scroll_dsl.GetAllocator(); scroll_dsl.SetObject(); rapidjson::Value scroll_id_value(scroll_id.c_str(), allocator); scroll_dsl.AddMember("scroll_id", scroll_id_value, allocator); @@ -51,22 +48,23 @@ std::string ESScrollQueryBuilder::build_next_scroll_body(const std::string& scro } std::string ESScrollQueryBuilder::build_clear_scroll_body(const std::string& scroll_id) { rapidjson::Document delete_scroll_dsl; - rapidjson::Document::AllocatorType &allocator = delete_scroll_dsl.GetAllocator(); + rapidjson::Document::AllocatorType& allocator = delete_scroll_dsl.GetAllocator(); delete_scroll_dsl.SetObject(); rapidjson::Value scroll_id_value(scroll_id.c_str(), allocator); delete_scroll_dsl.AddMember("scroll_id", scroll_id_value, allocator); - rapidjson::StringBuffer buffer; + rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); delete_scroll_dsl.Accept(writer); return buffer.GetString(); } std::string ESScrollQueryBuilder::build(const std::map& properties, - const std::vector& fields, - std::vector& predicates, const std::map& docvalue_context, - bool* doc_value_mode) { + const std::vector& fields, + std::vector& predicates, + const std::map& docvalue_context, + bool* doc_value_mode) { rapidjson::Document es_query_dsl; - rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); + rapidjson::Document::AllocatorType& allocator = es_query_dsl.GetAllocator(); es_query_dsl.SetObject(); // generate the filter clause rapidjson::Document scratch_document; @@ -135,7 +133,6 @@ std::string ESScrollQueryBuilder::build(const std::map es_query_dsl.Accept(writer); std::string es_query_dsl_json = buffer.GetString(); LOG(INFO) << "Generated ES queryDSL [ " << es_query_dsl_json << " ]"; - return es_query_dsl_json; - -} + return es_query_dsl_json; } +} // namespace doris diff --git a/be/src/exec/es/es_scroll_query.h b/be/src/exec/es/es_scroll_query.h index f57df642ae824e..c9df1d960c7465 100644 --- a/be/src/exec/es/es_scroll_query.h +++ b/be/src/exec/es/es_scroll_query.h @@ -17,25 +17,27 @@ #pragma once -#include -#include +#include +#include #include "exec/es/es_predicate.h" namespace doris { class ESScrollQueryBuilder { - public: ESScrollQueryBuilder(); ~ESScrollQueryBuilder(); // build the query DSL for elasticsearch - static std::string build_next_scroll_body(const std::string& scroll_id, const std::string& scroll); + static std::string build_next_scroll_body(const std::string& scroll_id, + const std::string& scroll); static std::string build_clear_scroll_body(const std::string& scroll_id); - // @note: predicates should processed before pass it to this method, + // @note: predicates should processed before pass it to this method, // tie breaker for predicate whether can push down es can reference the push-down filters static std::string build(const std::map& properties, - const std::vector& fields, std::vector& predicates, const std::map& docvalue_context, - bool* doc_value_mode); + const std::vector& fields, + std::vector& predicates, + const std::map& docvalue_context, + bool* doc_value_mode); }; -} +} // namespace doris diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 190b3c4f61494e..d00a4aabb4f61b 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -26,29 +26,26 @@ #include "exec/es/es_scan_reader.h" #include "exec/es/es_scroll_query.h" #include "exprs/expr.h" -#include "runtime/runtime_state.h" -#include "runtime/row_batch.h" #include "runtime/dpp_sink_internal.h" +#include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "service/backend_options.h" #include "util/runtime_profile.h" namespace doris { -EsHttpScanNode::EsHttpScanNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ScanNode(pool, tnode, descs), - _tuple_id(tnode.es_scan_node.tuple_id), - _runtime_state(nullptr), - _tuple_desc(nullptr), - _num_running_scanners(0), - _scan_finished(false), - _eos(false), - _max_buffered_batches(1024), - _wait_scanner_timer(nullptr) { -} +EsHttpScanNode::EsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _tuple_id(tnode.es_scan_node.tuple_id), + _runtime_state(nullptr), + _tuple_desc(nullptr), + _num_running_scanners(0), + _scan_finished(false), + _eos(false), + _max_buffered_batches(1024), + _wait_scanner_timer(nullptr) {} -EsHttpScanNode::~EsHttpScanNode() { -} +EsHttpScanNode::~EsHttpScanNode() {} Status EsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ScanNode::init(tnode)); @@ -91,12 +88,11 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { return Status::OK(); } -// build predicate +// build predicate Status EsHttpScanNode::build_conjuncts_list() { Status status = Status::OK(); for (int i = 0; i < _conjunct_ctxs.size(); ++i) { - EsPredicate* predicate = _pool->add( - new EsPredicate(_conjunct_ctxs[i], _tuple_desc, _pool)); + EsPredicate* predicate = _pool->add(new EsPredicate(_conjunct_ctxs[i], _tuple_desc, _pool)); predicate->set_field_context(_fields_context); status = predicate->build_disjuncts_list(); if (status.ok()) { @@ -137,8 +133,8 @@ Status EsHttpScanNode::open(RuntimeState* state) { std::vector list; BooleanQueryBuilder::validate(_predicates, &list); DCHECK(list.size() == _predicate_to_conjunct.size()); - for(int i = list.size() - 1; i >= 0; i--) { - if(!list[i]) { + for (int i = list.size() - 1; i >= 0; i--) { + if (!list[i]) { _predicate_to_conjunct.erase(_predicate_to_conjunct.begin() + i); _predicates.erase(_predicates.begin() + i); } @@ -164,15 +160,15 @@ Status EsHttpScanNode::start_scanners() { _scanners_status.resize(_scan_ranges.size()); for (int i = 0; i < _scan_ranges.size(); i++) { - _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, i, - _scan_ranges.size(), std::ref(_scanners_status[i])); + _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, i, _scan_ranges.size(), + std::ref(_scanners_status[i])); } return Status::OK(); } Status EsHttpScanNode::collect_scanners_status() { // NOTE. if open() was called, but set_range() was NOT called for some reason. - // then close() was called. + // then close() was called. // there would cause a core because _scanners_status's iterator was in [0, _scan_ranges) other than [0, _scanners_status) // it is said that the fragment-call-frame is calling scan-node in this way.... // in my options, it's better fixed in fragment-call-frame. e.g. call close() according the return value of open() @@ -183,8 +179,7 @@ Status EsHttpScanNode::collect_scanners_status() { return Status::OK(); } -Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, - bool* eos) { +Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); if (state->is_cancelled()) { std::unique_lock l(_batch_queue_lock); @@ -206,10 +201,8 @@ Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, std::shared_ptr scanner_batch; { std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && - !_runtime_state->is_cancelled() && - _num_running_scanners > 0 && - _batch_queue.empty()) { + while (_process_status.ok() && !_runtime_state->is_cancelled() && + _num_running_scanners > 0 && _batch_queue.empty()) { SCOPED_TIMER(_wait_scanner_timer); _queue_reader_cond.wait_for(l, std::chrono::seconds(1)); } @@ -263,7 +256,7 @@ Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, for (int i = 0; i < row_batch->num_rows(); ++i) { TupleRow* row = row_batch->get_row(i); VLOG_ROW << "EsHttpScanNode output row: " - << Tuple::to_string(row->get_tuple(0), *_tuple_desc); + << Tuple::to_string(row->get_tuple(0), *_tuple_desc); } } @@ -271,8 +264,6 @@ Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, } Status EsHttpScanNode::close(RuntimeState* state) { - - if (is_closed()) { return Status::OK(); } @@ -307,17 +298,16 @@ void EsHttpScanNode::debug_string(int ident_level, std::stringstream* out) const (*out) << "EsHttpScanNode"; } -Status EsHttpScanNode::scanner_scan( - std::unique_ptr scanner, - const std::vector& conjunct_ctxs, - EsScanCounter* counter) { +Status EsHttpScanNode::scanner_scan(std::unique_ptr scanner, + const std::vector& conjunct_ctxs, + EsScanCounter* counter) { RETURN_IF_ERROR(scanner->open()); bool scanner_eof = false; - + while (!scanner_eof) { // Fill one row batch std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); + new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); @@ -367,8 +357,7 @@ Status EsHttpScanNode::scanner_scan( // Row batch has been filled, push this to the queue if (row_batch->num_rows() > 0) { std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && - !_scan_finished.load() && + while (_process_status.ok() && !_scan_finished.load() && !_runtime_state->is_cancelled() && _batch_queue.size() >= _max_buffered_batches) { _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); @@ -388,7 +377,7 @@ Status EsHttpScanNode::scanner_scan( // Queue size Must be smaller than _max_buffered_batches _batch_queue.push_back(row_batch); - // Notify reader to + // Notify reader to _queue_reader_cond.notify_one(); } } @@ -398,19 +387,18 @@ Status EsHttpScanNode::scanner_scan( // Prefer to the local host static std::string get_host_port(const std::vector& es_hosts) { - std::string host_port; std::string localhost = BackendOptions::get_localhost(); TNetworkAddress host = es_hosts[0]; for (auto& es_host : es_hosts) { if (es_host.hostname == localhost) { - host = es_host; - break; + host = es_host; + break; } } - host_port = host.hostname; + host_port = host.hostname; host_port += ":"; host_port += std::to_string(host.port); return host_port; @@ -420,15 +408,13 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise scanner_expr_ctxs; DCHECK(start_idx < length); - auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, - &scanner_expr_ctxs); + auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, &scanner_expr_ctxs); if (!status.ok()) { LOG(WARNING) << "Clone conjuncts failed."; } EsScanCounter counter; - const TEsScanRange& es_scan_range = - _scan_ranges[start_idx].scan_range.es_scan_range; + const TEsScanRange& es_scan_range = _scan_ranges[start_idx].scan_range.es_scan_range; // Collect the information from scan range to properties std::map properties(_properties); @@ -446,22 +432,20 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise scanner(new EsHttpScanner( - _runtime_state, runtime_profile(), _tuple_id, - properties, scanner_expr_ctxs, &counter, doc_value_mode)); + std::unique_ptr scanner( + new EsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties, + scanner_expr_ctxs, &counter, doc_value_mode)); status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); if (!status.ok()) { - LOG(WARNING) << "Scanner[" << start_idx << "] process failed. status=" - << status.get_error_msg(); + LOG(WARNING) << "Scanner[" << start_idx + << "] process failed. status=" << status.get_error_msg(); } - - // scanner is going to finish + // scanner is going to finish { std::lock_guard l(_batch_queue_lock); if (!status.ok()) { @@ -478,4 +462,4 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise #include +#include #include -#include -#include #include +#include #include -#include +#include #include "common/status.h" -#include "exec/scan_node.h" #include "exec/es_http_scanner.h" +#include "exec/scan_node.h" #include "gen_cpp/PaloInternalService_types.h" namespace doris { @@ -78,8 +78,7 @@ class EsHttpScanNode : public ScanNode { // Scan one range Status scanner_scan(std::unique_ptr scanner, - const std::vector& conjunct_ctxs, - EsScanCounter* counter); + const std::vector& conjunct_ctxs, EsScanCounter* counter); Status build_conjuncts_list(); @@ -113,6 +112,6 @@ class EsHttpScanNode : public ScanNode { std::vector _predicate_to_conjunct; }; -} +} // namespace doris #endif diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 6890010ef8bda1..c5205c51871572 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -17,16 +17,16 @@ #include "exec/es_http_scanner.h" -#include #include +#include +#include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" -#include "exprs/expr.h" namespace doris { @@ -86,7 +86,8 @@ Status EsHttpScanner::open() { return Status::OK(); } -Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, const std::map& docvalue_context) { +Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, + const std::map& docvalue_context) { SCOPED_TIMER(_read_timer); if (_line_eof && _batch_eof) { *eof = true; @@ -104,8 +105,8 @@ Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, con COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(_es_scroll_parser->fill_tuple( - _tuple_desc, tuple, tuple_pool, &_line_eof, docvalue_context)); + RETURN_IF_ERROR(_es_scroll_parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof, + docvalue_context)); if (!_line_eof) { break; } @@ -122,4 +123,4 @@ void EsHttpScanner::close() { Expr::close(_conjunct_ctxs, _state); } -} +} // namespace doris diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index 3ffa1eae14a3c3..2bade5ae3f640b 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -16,17 +16,17 @@ // specific language governing permissions and limitations // under the License. -#ifndef BE_EXEC_ES_HTTP_SCANNER_H -#define BE_EXEC_ES_HTTP_SCANNER_H +#ifndef BE_EXEC_ES_HTTP_SCANNER_H +#define BE_EXEC_ES_HTTP_SCANNER_H -#include -#include -#include #include +#include #include +#include +#include -#include "common/status.h" #include "common/global_types.h" +#include "common/status.h" #include "exec/es/es_scan_reader.h" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" @@ -47,38 +47,33 @@ class MemTracker; class RuntimeProfile; struct EsScanCounter { - EsScanCounter() : num_rows_returned(0), num_rows_filtered(0) { - } - + EsScanCounter() : num_rows_returned(0), num_rows_filtered(0) {} + int64_t num_rows_returned; int64_t num_rows_filtered; }; class EsHttpScanner { public: - EsHttpScanner( - RuntimeState* state, - RuntimeProfile* profile, - TupleId tuple_id, - const std::map& properties, - const std::vector& conjunct_ctxs, - EsScanCounter* counter, - bool doc_value_mode); + EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, + const std::map& properties, + const std::vector& conjunct_ctxs, EsScanCounter* counter, + bool doc_value_mode); ~EsHttpScanner(); Status open(); - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, const std::map& docvalue_context); + Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, + const std::map& docvalue_context); void close(); private: - RuntimeState* _state; RuntimeProfile* _profile; TupleId _tuple_id; const std::map& _properties; - const std::vector& _conjunct_ctxs; + const std::vector& _conjunct_ctxs; int _next_range; bool _line_eof; @@ -103,6 +98,6 @@ class EsHttpScanner { RuntimeProfile::Counter* _materialize_timer; }; -} +} // namespace doris #endif diff --git a/be/src/exec/es_scan_node.cpp b/be/src/exec/es_scan_node.cpp index abde62493118ff..097879cdd95742 100644 --- a/be/src/exec/es_scan_node.cpp +++ b/be/src/exec/es_scan_node.cpp @@ -17,50 +17,47 @@ #include "es_scan_node.h" -#include -#include #include -#include "gen_cpp/PlanNodes_types.h" +#include +#include + +#include "exprs/expr.h" +#include "exprs/expr_context.h" +#include "exprs/in_predicate.h" +#include "exprs/slot_ref.h" #include "gen_cpp/Exprs_types.h" -#include "runtime/runtime_state.h" +#include "gen_cpp/PlanNodes_types.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "runtime/client_cache.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" -#include "runtime/client_cache.h" -#include "util/runtime_profile.h" -#include "util/debug_util.h" #include "service/backend_options.h" -#include "olap/olap_common.h" -#include "olap/utils.h" -#include "exprs/expr_context.h" -#include "exprs/expr.h" -#include "exprs/in_predicate.h" -#include "exprs/slot_ref.h" +#include "util/debug_util.h" +#include "util/runtime_profile.h" namespace doris { // $0 = column type (e.g. INT) -const std::string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " - "Expected value of type $0 based on column metadata. This likely indicates a " - "problem with the data source library."; -const std::string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " - "$1 bytes for $2."; - -EsScanNode::EsScanNode( - ObjectPool* pool, - const TPlanNode& tnode, - const DescriptorTbl& descs) : - ScanNode(pool, tnode, descs), - _tuple_id(tnode.es_scan_node.tuple_id), - _scan_range_idx(0) { +const std::string ERROR_INVALID_COL_DATA = + "Data source returned inconsistent column data. " + "Expected value of type $0 based on column metadata. This likely indicates a " + "problem with the data source library."; +const std::string ERROR_MEM_LIMIT_EXCEEDED = + "DataSourceScanNode::$0() failed to allocate " + "$1 bytes for $2."; + +EsScanNode::EsScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), _tuple_id(tnode.es_scan_node.tuple_id), _scan_range_idx(0) { if (tnode.es_scan_node.__isset.properties) { _properties = tnode.es_scan_node.properties; } } -EsScanNode::~EsScanNode() { -} +EsScanNode::~EsScanNode() {} Status EsScanNode::prepare(RuntimeState* state) { VLOG(1) << "EsScanNode::Prepare"; @@ -122,7 +119,6 @@ Status EsScanNode::open(RuntimeState* state) { return Status::InternalError(ss.str()); } - // TExtOpenParams TExtOpenParams params; params.__set_query_id(state->query_id()); @@ -142,32 +138,31 @@ Status EsScanNode::open(RuntimeState* state) { bool is_success = false; for (int j = 0; j < 2; ++j) { for (auto& es_host : es_scan_range.es_hosts) { - if ((j == 0 && es_host.hostname != localhost) - || (j == 1 && es_host.hostname == localhost)) { + if ((j == 0 && es_host.hostname != localhost) || + (j == 1 && es_host.hostname == localhost)) { continue; } Status status = open_es(es_host, result, params); if (status.ok()) { - is_success = true; - _addresses.push_back(es_host); - _scan_handles.push_back(result.scan_handle); - if (result.__isset.accepted_conjuncts) { - for (int index : result.accepted_conjuncts) { - conjunct_accepted_times[predicate_to_conjunct[index]]++; - } - } - break; + is_success = true; + _addresses.push_back(es_host); + _scan_handles.push_back(result.scan_handle); + if (result.__isset.accepted_conjuncts) { + for (int index : result.accepted_conjuncts) { + conjunct_accepted_times[predicate_to_conjunct[index]]++; + } + } + break; } else if (status.code() == TStatusCode::ES_SHARD_NOT_FOUND) { // if shard not found, try other nodes LOG(WARNING) << "shard not found on es node: " - << ", address=" << es_host - << ", scan_range_idx=" << i << ", try other nodes"; + << ", address=" << es_host << ", scan_range_idx=" << i + << ", try other nodes"; } else { - LOG(WARNING) << "es open error: scan_range_idx=" << i - << ", address=" << es_host + LOG(WARNING) << "es open error: scan_range_idx=" << i << ", address=" << es_host << ", msg=" << status.get_error_msg(); return status; - } + } } if (is_success) { break; @@ -176,8 +171,7 @@ Status EsScanNode::open(RuntimeState* state) { if (!is_success) { std::stringstream ss; - ss << "es open error: scan_range_idx=" << i - << ", can't find shard on any node"; + ss << "es open error: scan_range_idx=" << i << ", can't find shard on any node"; return Status::InternalError(ss.str()); } } @@ -193,7 +187,8 @@ Status EsScanNode::open(RuntimeState* state) { for (int i = 0; i < _conjunct_ctxs.size(); ++i) { if (!check_left_conjuncts(_conjunct_ctxs[i]->root())) { - return Status::InternalError("esquery could only be executed on es, but could not push down to es"); + return Status::InternalError( + "esquery could only be executed on es, but could not push down to es"); } } @@ -211,9 +206,10 @@ Status EsScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) MemPool* tuple_pool = row_batch->tuple_data_pool(); int64_t tuple_buffer_size; uint8_t* tuple_buffer = nullptr; - RETURN_IF_ERROR(row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buffer_size, &tuple_buffer)); + RETURN_IF_ERROR( + row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buffer_size, &tuple_buffer)); Tuple* tuple = reinterpret_cast(tuple_buffer); - + // get batch TExtGetNextResult result; RETURN_IF_ERROR(get_next_from_es(result)); @@ -223,7 +219,7 @@ Status EsScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) VLOG(1) << "begin to convert: scan_range_idx=" << _scan_range_idx << ", num_rows=" << result.rows.num_rows; std::vector& cols = result.rows.cols; - // indexes of the next non-null value in the row batch, per column. + // indexes of the next non-null value in the row batch, per column. std::vector cols_next_val_idx(_tuple_desc->slots().size(), 0); for (int row_idx = 0; row_idx < result.rows.num_rows; row_idx++) { if (reached_limit()) { @@ -235,8 +231,8 @@ Status EsScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) tuple_row->set_tuple(0, tuple); if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), tuple_row)) { row_batch->commit_last_row(); - tuple = reinterpret_cast( - reinterpret_cast(tuple) + _tuple_desc->byte_size()); + tuple = reinterpret_cast(reinterpret_cast(tuple) + + _tuple_desc->byte_size()); ++_num_rows_returned; } } @@ -274,8 +270,7 @@ Status EsScanNode::close(RuntimeState* state) { ExtDataSourceServiceConnection client(client_cache, address, 10000, &status); if (!status.ok()) { LOG(WARNING) << "es create client error: scan_range_idx=" << i - << ", address=" << address - << ", msg=" << status.get_error_msg(); + << ", address=" << address << ", msg=" << status.get_error_msg(); return status; } @@ -287,10 +282,9 @@ Status EsScanNode::close(RuntimeState* state) { RETURN_IF_ERROR(client.reopen()); client->close(result, params); } - } catch (apache::thrift::TException &e) { + } catch (apache::thrift::TException& e) { std::stringstream ss; - ss << "es close error: scan_range_idx=" << i - << ", msg=" << e.what(); + ss << "es close error: scan_range_idx=" << i << ", msg=" << e.what(); LOG(WARNING) << ss.str(); return Status::ThriftRpcError(ss.str()); } @@ -333,8 +327,8 @@ Status EsScanNode::set_scan_ranges(const std::vector& scan_ran return Status::OK(); } -Status EsScanNode::open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params) { - +Status EsScanNode::open_es(TNetworkAddress& address, TExtOpenResult& result, + TExtOpenParams& params) { VLOG(1) << "es open param=" << apache::thrift::ThriftDebugString(params); #ifndef BE_TEST try { @@ -357,7 +351,7 @@ Status EsScanNode::open_es(TNetworkAddress& address, TExtOpenResult& result, TEx } VLOG(1) << "es open result=" << apache::thrift::ThriftDebugString(result); return Status(result.status); - } catch (apache::thrift::TException &e) { + } catch (apache::thrift::TException& e) { std::stringstream ss; ss << "es open error: address=" << address << ", msg=" << e.what(); return Status::InternalError(ss.str()); @@ -426,8 +420,7 @@ bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct, TExtLiteral literal; if (!to_ext_literal(context, expr, &literal)) { - VLOG(1) << "get disjuncts fail: can't get literal, node_type=" - << expr->node_type(); + VLOG(1) << "get disjuncts fail: can't get literal, node_type=" << expr->node_type(); return false; } @@ -444,13 +437,12 @@ bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct, disjuncts.push_back(std::move(predicate)); return true; } else if (is_match_func(conjunct)) { - // if this is a function call expr and function name is match, then push + // if this is a function call expr and function name is match, then push // down it to es TExtFunction match_function; match_function.__set_func_name(conjunct->fn().name.function_name); std::vector query_conditions; - TExtLiteral literal; if (!to_ext_literal(context, conjunct->get_child(1), &literal)) { VLOG(1) << "get disjuncts fail: can't get literal, node_type=" @@ -468,8 +460,8 @@ bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct, } else if (TExprNodeType::IN_PRED == conjunct->node_type()) { // the op code maybe FILTER_NEW_IN, it means there is function in list // like col_a in (abs(1)) - if (TExprOpcode::FILTER_IN != conjunct->op() - && TExprOpcode::FILTER_NOT_IN != conjunct->op()) { + if (TExprOpcode::FILTER_IN != conjunct->op() && + TExprOpcode::FILTER_NOT_IN != conjunct->op()) { return false; } TExtInPredicate ext_in_predicate; @@ -502,7 +494,8 @@ bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct, return false; } TExtLiteral literal; - if (!to_ext_literal(slot_desc->type().type, const_cast(iter->get_value()), &literal)) { + if (!to_ext_literal(slot_desc->type().type, const_cast(iter->get_value()), + &literal)) { VLOG(1) << "get disjuncts fail: can't get literal, node_type=" << slot_desc->type().type; return false; @@ -536,9 +529,9 @@ bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct, } bool EsScanNode::is_match_func(Expr* conjunct) { - if (TExprNodeType::FUNCTION_CALL == conjunct->node_type() - && conjunct->fn().name.function_name == "esquery") { - return true; + if (TExprNodeType::FUNCTION_CALL == conjunct->node_type() && + conjunct->fn().name.function_name == "esquery") { + return true; } return false; } @@ -681,11 +674,11 @@ Status EsScanNode::get_next_from_es(TExtGetNextResult& result) { params.__set_offset(_offsets[_scan_range_idx]); // getNext - const TNetworkAddress &address = _addresses[_scan_range_idx]; + const TNetworkAddress& address = _addresses[_scan_range_idx]; #ifndef BE_TEST try { Status create_client_status; - ExtDataSourceServiceClientCache *client_cache = _env->extdatasource_client_cache(); + ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache(); ExtDataSourceServiceConnection client(client_cache, address, 10000, &create_client_status); if (!create_client_status.ok()) { LOG(WARNING) << "es create client error: scan_range_idx=" << _scan_range_idx @@ -699,16 +692,14 @@ Status EsScanNode::get_next_from_es(TExtGetNextResult& result) { client->getNext(result, params); } catch (apache::thrift::transport::TTransportException& e) { std::stringstream ss; - ss << "es get_next error: scan_range_idx=" << _scan_range_idx - << ", msg=" << e.what(); + ss << "es get_next error: scan_range_idx=" << _scan_range_idx << ", msg=" << e.what(); LOG(WARNING) << ss.str(); RETURN_IF_ERROR(client.reopen()); return Status::ThriftRpcError(ss.str()); } - } catch (apache::thrift::TException &e) { + } catch (apache::thrift::TException& e) { std::stringstream ss; - ss << "es get_next error: scan_range_idx=" << _scan_range_idx - << ", msg=" << e.what(); + ss << "es get_next error: scan_range_idx=" << _scan_range_idx << ", msg=" << e.what(); LOG(WARNING) << ss.str(); return Status::ThriftRpcError(ss.str()); } @@ -738,8 +729,7 @@ Status EsScanNode::get_next_from_es(TExtGetNextResult& result) { Status get_next_status(result.status); if (!get_next_status.ok()) { LOG(WARNING) << "es get_next error: scan_range_idx=" << _scan_range_idx - << ", address=" << address - << ", msg=" << get_next_status.get_error_msg(); + << ", address=" << address << ", msg=" << get_next_status.get_error_msg(); return get_next_status; } if (!result.__isset.rows || !result.rows.__isset.num_rows) { @@ -756,121 +746,130 @@ Status EsScanNode::get_next_from_es(TExtGetNextResult& result) { Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple, const std::vector& cols, int row_idx, std::vector& cols_next_val_idx) { - tuple->init(_tuple_desc->byte_size()); + tuple->init(_tuple_desc->byte_size()); - for (int i = 0; i < _tuple_desc->slots().size(); ++i) { - const SlotDescriptor* slot_desc = _tuple_desc->slots()[i]; + for (int i = 0; i < _tuple_desc->slots().size(); ++i) { + const SlotDescriptor* slot_desc = _tuple_desc->slots()[i]; - if (!slot_desc->is_materialized()) { - continue; - } + if (!slot_desc->is_materialized()) { + continue; + } - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - const TExtColumnData& col = cols[i]; + void* slot = tuple->get_slot(slot_desc->tuple_offset()); + const TExtColumnData& col = cols[i]; - if (col.is_null[row_idx]) { - tuple->set_null(slot_desc->null_indicator_offset()); - continue; - } else { - tuple->set_not_null(slot_desc->null_indicator_offset()); - } - - int val_idx = cols_next_val_idx[i]++; - switch (slot_desc->type().type) { - case TYPE_CHAR: - case TYPE_VARCHAR: { - if (val_idx >= col.string_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); - } - const string& val = col.string_vals[val_idx]; - size_t val_size = val.size(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); - if (UNLIKELY(buffer == NULL)) { - std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", - val_size, "string slot"); - return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); - } - memcpy(buffer, val.data(), val_size); - reinterpret_cast(slot)->ptr = buffer; - reinterpret_cast(slot)->len = val_size; - break; - } - case TYPE_TINYINT: - if (val_idx >= col.byte_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); - } - *reinterpret_cast(slot) = col.byte_vals[val_idx]; - break; - case TYPE_SMALLINT: - if (val_idx >= col.short_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); - } - *reinterpret_cast(slot) = col.short_vals[val_idx]; - break; - case TYPE_INT: - if (val_idx >= col.int_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); - } - *reinterpret_cast(slot) = col.int_vals[val_idx]; - break; - case TYPE_BIGINT: - if (val_idx >= col.long_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); - } - *reinterpret_cast(slot) = col.long_vals[val_idx]; - break; - case TYPE_LARGEINT: - if (val_idx >= col.long_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); - } - *reinterpret_cast(slot) = col.long_vals[val_idx]; - break; - case TYPE_DOUBLE: - if (val_idx >= col.double_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); - } - *reinterpret_cast(slot) = col.double_vals[val_idx]; - break; - case TYPE_FLOAT: - if (val_idx >= col.double_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); + if (col.is_null[row_idx]) { + tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } else { + tuple->set_not_null(slot_desc->null_indicator_offset()); } - *reinterpret_cast(slot) = col.double_vals[val_idx]; - break; - case TYPE_BOOLEAN: - if (val_idx >= col.bool_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); + + int val_idx = cols_next_val_idx[i]++; + switch (slot_desc->type().type) { + case TYPE_CHAR: + case TYPE_VARCHAR: { + if (val_idx >= col.string_vals.size()) { + return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); + } + const string& val = col.string_vals[val_idx]; + size_t val_size = val.size(); + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); + if (UNLIKELY(buffer == NULL)) { + std::string details = strings::Substitute( + ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); + return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); + } + memcpy(buffer, val.data(), val_size); + reinterpret_cast(slot)->ptr = buffer; + reinterpret_cast(slot)->len = val_size; + break; } - *reinterpret_cast(slot) = col.bool_vals[val_idx]; - break; - case TYPE_DATE: - if (val_idx >= col.long_vals.size() || - !reinterpret_cast(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + case TYPE_TINYINT: + if (val_idx >= col.byte_vals.size()) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); + } + *reinterpret_cast(slot) = col.byte_vals[val_idx]; + break; + case TYPE_SMALLINT: + if (val_idx >= col.short_vals.size()) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); + } + *reinterpret_cast(slot) = col.short_vals[val_idx]; + break; + case TYPE_INT: + if (val_idx >= col.int_vals.size()) { + return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); + } + *reinterpret_cast(slot) = col.int_vals[val_idx]; + break; + case TYPE_BIGINT: + if (val_idx >= col.long_vals.size()) { + return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); + } + *reinterpret_cast(slot) = col.long_vals[val_idx]; + break; + case TYPE_LARGEINT: + if (val_idx >= col.long_vals.size()) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); + } + *reinterpret_cast(slot) = col.long_vals[val_idx]; + break; + case TYPE_DOUBLE: + if (val_idx >= col.double_vals.size()) { + return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); + } + *reinterpret_cast(slot) = col.double_vals[val_idx]; + break; + case TYPE_FLOAT: + if (val_idx >= col.double_vals.size()) { + return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); + } + *reinterpret_cast(slot) = col.double_vals[val_idx]; + break; + case TYPE_BOOLEAN: + if (val_idx >= col.bool_vals.size()) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); + } + *reinterpret_cast(slot) = col.bool_vals[val_idx]; + break; + case TYPE_DATE: + if (val_idx >= col.long_vals.size() || + !reinterpret_cast(slot)->from_unixtime(col.long_vals[val_idx], + "+08:00")) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + } + reinterpret_cast(slot)->cast_to_date(); + break; + case TYPE_DATETIME: { + if (val_idx >= col.long_vals.size() || + !reinterpret_cast(slot)->from_unixtime(col.long_vals[val_idx], + "+08:00")) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + } + reinterpret_cast(slot)->set_type(TIME_DATETIME); + break; } - reinterpret_cast(slot)->cast_to_date(); - break; - case TYPE_DATETIME: { - if (val_idx >= col.long_vals.size() || - !reinterpret_cast(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + case TYPE_DECIMAL: { + if (val_idx >= col.binary_vals.size()) { + return Status::InternalError( + strings::Substitute(ERROR_INVALID_COL_DATA, "DECIMAL")); + } + const string& val = col.binary_vals[val_idx]; + *reinterpret_cast(slot) = *reinterpret_cast(&val); + break; } - reinterpret_cast(slot)->set_type(TIME_DATETIME); - break; - } - case TYPE_DECIMAL: { - if (val_idx >= col.binary_vals.size()) { - return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DECIMAL")); + default: + DCHECK(false); } - const string& val = col.binary_vals[val_idx]; - *reinterpret_cast(slot) = *reinterpret_cast(&val); - break; - } - default: - DCHECK(false); } - } - return Status::OK(); + return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/es_scan_node.h b/be/src/exec/es_scan_node.h index a4563b59f32bb7..518d18e0611936 100644 --- a/be/src/exec/es_scan_node.h +++ b/be/src/exec/es_scan_node.h @@ -20,13 +20,13 @@ #include #include -#include "runtime/descriptors.h" -#include "runtime/tuple.h" #include "exec/scan_node.h" #include "exprs/slot_ref.h" -#include "runtime/exec_env.h" -#include "gen_cpp/TExtDataSourceService.h" #include "gen_cpp/PaloExternalDataSourceService_types.h" +#include "gen_cpp/TExtDataSourceService.h" +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "runtime/tuple.h" namespace doris { @@ -51,9 +51,8 @@ class EsScanNode : public ScanNode { private: Status open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params); - Status materialize_row(MemPool* tuple_pool, Tuple* tuple, - const vector& cols, int next_row_idx, - vector& cols_next_val_idx); + Status materialize_row(MemPool* tuple_pool, Tuple* tuple, const vector& cols, + int next_row_idx, vector& cols_next_val_idx); Status get_next_from_es(TExtGetNextResult& result); bool get_disjuncts(ExprContext* context, Expr* conjunct, vector& disjuncts); @@ -86,5 +85,4 @@ class EsScanNode : public ScanNode { std::vector _pushdown_conjunct_ctxs; }; -} - +} // namespace doris diff --git a/be/src/exec/except_node.cpp b/be/src/exec/except_node.cpp index 18db629881a045..7ac5cd4b2b77da 100644 --- a/be/src/exec/except_node.cpp +++ b/be/src/exec/except_node.cpp @@ -69,7 +69,8 @@ Status ExceptNode::open(RuntimeState* state) { temp_tbl->close(); } // probe - _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); + _probe_batch.reset( + new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; @@ -84,7 +85,8 @@ Status ExceptNode::open(RuntimeState* state) { if (_hash_tbl_iterator != _hash_tbl->end()) { _hash_tbl_iterator.set_matched(); VLOG_ROW << "probe matched: " - << get_row_output_string(_hash_tbl_iterator.get_row(), child(0)->row_desc()); + << get_row_output_string(_hash_tbl_iterator.get_row(), + child(0)->row_desc()); } } _probe_batch->reset(); diff --git a/be/src/exec/exchange_node.cpp b/be/src/exec/exchange_node.cpp index 39a688ab808b01..94fe0a0bfa8a8b 100644 --- a/be/src/exec/exchange_node.cpp +++ b/be/src/exec/exchange_node.cpp @@ -19,31 +19,28 @@ #include +#include "gen_cpp/PlanNodes_types.h" #include "runtime/data_stream_mgr.h" #include "runtime/data_stream_recvr.h" #include "runtime/exec_env.h" -#include "runtime/runtime_state.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "util/runtime_profile.h" -#include "gen_cpp/PlanNodes_types.h" namespace doris { -ExchangeNode::ExchangeNode( - ObjectPool* pool, - const TPlanNode& tnode, - const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _num_senders(0), - _stream_recvr(NULL), - _input_row_desc(descs, tnode.exchange_node.input_row_tuples, - std::vector( - tnode.nullable_tuples.begin(), - tnode.nullable_tuples.begin() + tnode.exchange_node.input_row_tuples.size())), - _next_row_idx(0), - _is_merging(tnode.exchange_node.__isset.sort_info), - _offset(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0), - _num_rows_skipped(0) { +ExchangeNode::ExchangeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _num_senders(0), + _stream_recvr(NULL), + _input_row_desc(descs, tnode.exchange_node.input_row_tuples, + std::vector(tnode.nullable_tuples.begin(), + tnode.nullable_tuples.begin() + + tnode.exchange_node.input_row_tuples.size())), + _next_row_idx(0), + _is_merging(tnode.exchange_node.__isset.sort_info), + _offset(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0), + _num_rows_skipped(0) { DCHECK_GE(_offset, 0); DCHECK(_is_merging || (_offset == 0)); } @@ -67,13 +64,12 @@ Status ExchangeNode::prepare(RuntimeState* state) { DCHECK_GT(_num_senders, 0); _sub_plan_query_statistics_recvr.reset(new QueryStatisticsRecvr()); _stream_recvr = state->exec_env()->stream_mgr()->create_recvr( - state, _input_row_desc, - state->fragment_instance_id(), _id, - _num_senders, config::exchg_node_buffer_size_bytes, - _runtime_profile.get(), _is_merging, _sub_plan_query_statistics_recvr); + state, _input_row_desc, state->fragment_instance_id(), _id, _num_senders, + config::exchg_node_buffer_size_bytes, _runtime_profile.get(), _is_merging, + _sub_plan_query_statistics_recvr); if (_is_merging) { - RETURN_IF_ERROR(_sort_exec_exprs.prepare( - state, _row_descriptor, _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor, + expr_mem_tracker())); // AddExprCtxsToFree(_sort_exec_exprs); } return Status::OK(); @@ -122,9 +118,9 @@ Status ExchangeNode::fill_input_row_batch(RuntimeState* state) { ret_status = _stream_recvr->get_batch(&_input_batch); } VLOG_FILE << "exch: has batch=" << (_input_batch == NULL ? "false" : "true") - << " #rows=" << (_input_batch != NULL ? _input_batch->num_rows() : 0) - << " is_cancelled=" << (ret_status.is_cancelled() ? "true" : "false") - << " instance_id=" << state->fragment_instance_id(); + << " #rows=" << (_input_batch != NULL ? _input_batch->num_rows() : 0) + << " is_cancelled=" << (ret_status.is_cancelled() ? "true" : "false") + << " instance_id=" << state->fragment_instance_id(); return ret_status; } @@ -152,8 +148,8 @@ Status ExchangeNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* SCOPED_TIMER(_convert_row_batch_timer); RETURN_IF_CANCELLED(state); // copy rows until we hit the limit/capacity or until we exhaust _input_batch - while (!reached_limit() && !output_batch->at_capacity() - && _input_batch != NULL && _next_row_idx < _input_batch->capacity()) { + while (!reached_limit() && !output_batch->at_capacity() && _input_batch != NULL && + _next_row_idx < _input_batch->capacity()) { TupleRow* src = _input_batch->get_row(_next_row_idx); if (ExecNode::eval_conjuncts(ctxs, num_ctxs, src)) { @@ -251,4 +247,4 @@ void ExchangeNode::debug_string(int indentation_level, std::stringstream* out) c *out << ")"; } -} +} // namespace doris diff --git a/be/src/exec/exchange_node.h b/be/src/exec/exchange_node.h index 18df78283562b2..7551d88699806f 100644 --- a/be/src/exec/exchange_node.h +++ b/be/src/exec/exchange_node.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXEC_EXCHANGE_NODE_H #include + #include "exec/exec_node.h" #include "exec/sort_exec_exprs.h" #include "runtime/data_stream_recvr.h" @@ -54,15 +55,12 @@ class ExchangeNode : public ExecNode { // the number of senders needs to be set after the c'tor, because it's not // recorded in TPlanNode, and before calling prepare() - void set_num_senders(int num_senders) { - _num_senders = num_senders; - } + void set_num_senders(int num_senders) { _num_senders = num_senders; } protected: virtual void debug_string(int indentation_level, std::stringstream* out) const; private: - // Implements GetNext() for the case where _is_merging is true. Delegates the GetNext() // call to the underlying DataStreamRecvr. Status get_next_merging(RuntimeState* state, RowBatch* output_batch, bool* eos); @@ -71,7 +69,7 @@ class ExchangeNode : public ExecNode { // Only used when _is_merging is false. Status fill_input_row_batch(RuntimeState* state); - int _num_senders; // needed for _stream_recvr construction + int _num_senders; // needed for _stream_recvr construction // created in prepare() and owned by the RuntimeState boost::shared_ptr _stream_recvr; @@ -110,12 +108,12 @@ class ExchangeNode : public ExecNode { // Number of rows skipped so far. int64_t _num_rows_skipped; - // Sub plan query statistics receiver. It is shared with DataStreamRecvr and will be + // Sub plan query statistics receiver. It is shared with DataStreamRecvr and will be // called in two different threads. When ExchangeNode is destructed, this may be accessed // by recvr thread in DataStreamMgr's transmit_data. std::shared_ptr _sub_plan_query_statistics_recvr; }; -}; +}; // namespace doris #endif diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 1f9531343a8942..81c55649b72178 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -17,10 +17,11 @@ #include "exec/exec_node.h" -#include #include #include +#include + #include "common/object_pool.h" #include "common/status.h" #include "exec/aggregation_node.h" @@ -39,9 +40,9 @@ #include "exec/merge_join_node.h" #include "exec/merge_node.h" #include "exec/mysql_scan_node.h" +#include "exec/odbc_scan_node.h" #include "exec/olap_rewrite_node.h" #include "exec/olap_scan_node.h" -#include "exec/odbc_scan_node.h" #include "exec/partitioned_aggregation_node.h" #include "exec/repeat_node.h" #include "exec/schema_scan_node.h" @@ -50,8 +51,9 @@ #include "exec/topn_node.h" #include "exec/union_node.h" #include "exprs/expr_context.h" -#include "runtime/exec_env.h" +#include "odbc_scan_node.h" #include "runtime/descriptors.h" +#include "runtime/exec_env.h" #include "runtime/initial_reservations.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" @@ -59,7 +61,6 @@ #include "runtime/runtime_state.h" #include "util/debug_util.h" #include "util/runtime_profile.h" -#include "odbc_scan_node.h" namespace doris { @@ -69,74 +70,70 @@ int ExecNode::get_node_id_from_profile(RuntimeProfile* p) { return p->metadata(); } -ExecNode::RowBatchQueue::RowBatchQueue(int max_batches) : - BlockingQueue(max_batches) { -} +ExecNode::RowBatchQueue::RowBatchQueue(int max_batches) : BlockingQueue(max_batches) {} ExecNode::RowBatchQueue::~RowBatchQueue() { DCHECK(cleanup_queue_.empty()); } void ExecNode::RowBatchQueue::AddBatch(RowBatch* batch) { - if (!blocking_put(batch)) { - std::lock_guard lock(lock_); - cleanup_queue_.push_back(batch); - } + if (!blocking_put(batch)) { + std::lock_guard lock(lock_); + cleanup_queue_.push_back(batch); + } } -bool ExecNode::RowBatchQueue::AddBatchWithTimeout(RowBatch* batch, - int64_t timeout_micros) { +bool ExecNode::RowBatchQueue::AddBatchWithTimeout(RowBatch* batch, int64_t timeout_micros) { // return blocking_put_with_timeout(batch, timeout_micros); return blocking_put(batch); } RowBatch* ExecNode::RowBatchQueue::GetBatch() { - RowBatch* result = NULL; - if (blocking_get(&result)) return result; - return NULL; + RowBatch* result = NULL; + if (blocking_get(&result)) return result; + return NULL; } int ExecNode::RowBatchQueue::Cleanup() { - int num_io_buffers = 0; - - // RowBatch* batch = NULL; - // while ((batch = GetBatch()) != NULL) { - // num_io_buffers += batch->num_io_buffers(); - // delete batch; - // } - - lock_guard l(lock_); - for (std::list::iterator it = cleanup_queue_.begin(); - it != cleanup_queue_.end(); ++it) { - // num_io_buffers += (*it)->num_io_buffers(); - delete *it; - } - cleanup_queue_.clear(); - return num_io_buffers; -} - -ExecNode::ExecNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - _id(tnode.node_id), - _type(tnode.node_type), - _pool(pool), - _tuple_ids(tnode.row_tuples), - _row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples), - _resource_profile(tnode.resource_profile), - _debug_phase(TExecNodePhase::INVALID), - _debug_action(TDebugAction::WAIT), - _limit(tnode.limit), - _num_rows_returned(0), - _rows_returned_counter(NULL), - _rows_returned_rate(NULL), - _memory_used_counter(NULL), - _is_closed(false){ + int num_io_buffers = 0; + + // RowBatch* batch = NULL; + // while ((batch = GetBatch()) != NULL) { + // num_io_buffers += batch->num_io_buffers(); + // delete batch; + // } + + lock_guard l(lock_); + for (std::list::iterator it = cleanup_queue_.begin(); it != cleanup_queue_.end(); + ++it) { + // num_io_buffers += (*it)->num_io_buffers(); + delete *it; + } + cleanup_queue_.clear(); + return num_io_buffers; +} + +ExecNode::ExecNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : _id(tnode.node_id), + _type(tnode.node_type), + _pool(pool), + _tuple_ids(tnode.row_tuples), + _row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples), + _resource_profile(tnode.resource_profile), + _debug_phase(TExecNodePhase::INVALID), + _debug_action(TDebugAction::WAIT), + _limit(tnode.limit), + _num_rows_returned(0), + _rows_returned_counter(NULL), + _rows_returned_rate(NULL), + _memory_used_counter(NULL), + _is_closed(false) { init_runtime_profile(print_plan_node_type(tnode.node_type)); } ExecNode::~ExecNode() {} -void ExecNode::push_down_predicate( - RuntimeState* state, std::list* expr_ctxs) { +void ExecNode::push_down_predicate(RuntimeState* state, std::list* expr_ctxs) { if (_type != TPlanNodeType::AGGREGATION_NODE) { for (int i = 0; i < _children.size(); ++i) { _children[i]->push_down_predicate(state, expr_ctxs); @@ -162,23 +159,22 @@ void ExecNode::push_down_predicate( } Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR( - Expr::create_expr_trees(_pool, tnode.conjuncts, &_conjunct_ctxs)); + RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.conjuncts, &_conjunct_ctxs)); return Status::OK(); } Status ExecNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::PREPARE)); DCHECK(_runtime_profile.get() != NULL); - _rows_returned_counter = - ADD_COUNTER(_runtime_profile, "RowsReturned", TUnit::UNIT); + _rows_returned_counter = ADD_COUNTER(_runtime_profile, "RowsReturned", TUnit::UNIT); _rows_returned_rate = runtime_profile()->add_derived_counter( - ROW_THROUGHPUT_COUNTER, TUnit::UNIT_PER_SECOND, - boost::bind(&RuntimeProfile::units_per_second, - _rows_returned_counter, - runtime_profile()->total_time_counter()), - ""); - _mem_tracker = MemTracker::CreateTracker(_runtime_profile.get(), -1, "ExecNode "+ _runtime_profile->name(), state->instance_mem_tracker()); + ROW_THROUGHPUT_COUNTER, TUnit::UNIT_PER_SECOND, + boost::bind(&RuntimeProfile::units_per_second, _rows_returned_counter, + runtime_profile()->total_time_counter()), + ""); + _mem_tracker = MemTracker::CreateTracker(_runtime_profile.get(), -1, + "ExecNode " + _runtime_profile->name(), + state->instance_mem_tracker()); _expr_mem_tracker = MemTracker::CreateTracker(-1, "ExecNode Exprs", _mem_tracker); _expr_mem_pool.reset(new MemPool(_expr_mem_tracker.get())); // TODO chenhao @@ -198,12 +194,11 @@ Status ExecNode::open(RuntimeState* state) { return Expr::open(_conjunct_ctxs, state); } - Status ExecNode::reset(RuntimeState* state) { _num_rows_returned = 0; for (int i = 0; i < _children.size(); ++i) { RETURN_IF_ERROR(_children[i]->reset(state)); - } + } return Status::OK(); } @@ -211,7 +206,7 @@ Status ExecNode::collect_query_statistics(QueryStatistics* statistics) { DCHECK(statistics != nullptr); for (auto child_node : _children) { child_node->collect_query_statistics(statistics); - } + } return Status::OK(); } @@ -242,8 +237,8 @@ Status ExecNode::close(RuntimeState* state) { if (_buffer_pool_client.is_registered()) { VLOG_FILE << _id << " returning reservation " << _resource_profile.min_reservation; - state->initial_reservations()->Return( - &_buffer_pool_client, _resource_profile.min_reservation); + state->initial_reservations()->Return(&_buffer_pool_client, + _resource_profile.min_reservation); state->exec_env()->buffer_pool()->DeregisterClient(&_buffer_pool_client); } @@ -264,7 +259,7 @@ void ExecNode::add_runtime_exec_option(const std::string& str) { } Status ExecNode::create_tree(RuntimeState* state, ObjectPool* pool, const TPlan& plan, - const DescriptorTbl& descs, ExecNode** root) { + const DescriptorTbl& descs, ExecNode** root) { if (plan.nodes.size() == 0) { *root = NULL; return Status::OK(); @@ -276,20 +271,16 @@ Status ExecNode::create_tree(RuntimeState* state, ObjectPool* pool, const TPlan& if (node_idx + 1 != plan.nodes.size()) { // TODO: print thrift msg for diagnostic purposes. return Status::InternalError( - "Plan tree only partially reconstructed. Not all thrift nodes were used."); + "Plan tree only partially reconstructed. Not all thrift nodes were used."); } return Status::OK(); } -Status ExecNode::create_tree_helper( - RuntimeState* state, - ObjectPool* pool, - const std::vector& tnodes, - const DescriptorTbl& descs, - ExecNode* parent, - int* node_idx, - ExecNode** root) { +Status ExecNode::create_tree_helper(RuntimeState* state, ObjectPool* pool, + const std::vector& tnodes, + const DescriptorTbl& descs, ExecNode* parent, int* node_idx, + ExecNode** root) { // propagate error case if (*node_idx >= tnodes.size()) { // TODO: print thrift msg @@ -336,7 +327,7 @@ Status ExecNode::create_tree_helper( } Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs, ExecNode** node) { + const DescriptorTbl& descs, ExecNode** node) { std::stringstream error_msg; VLOG(2) << "tnode:\n" << apache::thrift::ThriftDebugString(tnode); @@ -350,7 +341,8 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN *node = pool->add(new MysqlScanNode(pool, tnode, descs)); return Status::OK(); #else - return Status::InternalError("Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON"); + return Status::InternalError( + "Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON"); #endif case TPlanNodeType::ODBC_SCAN_NODE: *node = pool->add(new OdbcScanNode(pool, tnode, descs)); @@ -449,7 +441,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN default: map::const_iterator i = - _TPlanNodeType_VALUES_TO_NAMES.find(tnode.node_type); + _TPlanNodeType_VALUES_TO_NAMES.find(tnode.node_type); const char* str = "unknown node type"; if (i != _TPlanNodeType_VALUES_TO_NAMES.end()) { @@ -463,9 +455,8 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN return Status::OK(); } -void ExecNode::set_debug_options( - int node_id, TExecNodePhase::type phase, TDebugAction::type action, - ExecNode* root) { +void ExecNode::set_debug_options(int node_id, TExecNodePhase::type phase, TDebugAction::type action, + ExecNode* root) { if (root->_id == node_id) { root->_debug_phase = phase; root->_debug_action = action; @@ -577,22 +568,23 @@ Status ExecNode::claim_buffer_reservation(RuntimeState* state) { BufferPool* buffer_pool = ExecEnv::GetInstance()->buffer_pool(); // Check the minimum buffer size in case the minimum buffer size used by the planner // doesn't match this backend's. - std::stringstream ss; + std::stringstream ss; if (_resource_profile.__isset.spillable_buffer_size && _resource_profile.spillable_buffer_size < buffer_pool->min_buffer_len()) { - ss << "Spillable buffer size for node " << _id << " of " << _resource_profile.spillable_buffer_size + ss << "Spillable buffer size for node " << _id << " of " + << _resource_profile.spillable_buffer_size << "bytes is less than the minimum buffer pool buffer size of " - << buffer_pool->min_buffer_len() << "bytes"; + << buffer_pool->min_buffer_len() << "bytes"; return Status::InternalError(ss.str()); - } - + } + ss << print_plan_node_type(_type) << " id=" << _id << " ptr=" << this; RETURN_IF_ERROR(buffer_pool->RegisterClient(ss.str(), state->instance_buffer_reservation(), mem_tracker(), buffer_pool->GetSystemBytesLimit(), runtime_profile(), &_buffer_pool_client)); state->initial_reservations()->Claim(&_buffer_pool_client, _resource_profile.min_reservation); -/* + /* if (debug_action_ == TDebugAction::SET_DENY_RESERVATION_PROBABILITY && (debug_phase_ == TExecNodePhase::PREPARE || debug_phase_ == TExecNodePhase::OPEN)) { // We may not have been able to enable the debug action at the start of Prepare() or @@ -600,12 +592,12 @@ Status ExecNode::claim_buffer_reservation(RuntimeState* state) { // effective. RETURN_IF_ERROR(EnableDenyReservationDebugAction()); } -*/ +*/ return Status::OK(); } Status ExecNode::release_unused_reservation() { - return _buffer_pool_client.DecreaseReservationTo(_resource_profile.min_reservation); + return _buffer_pool_client.DecreaseReservationTo(_resource_profile.min_reservation); } /* Status ExecNode::enable_deny_reservation_debug_action() { @@ -626,9 +618,9 @@ Status ExecNode::enable_deny_reservation_debug_action() { */ Status ExecNode::QueryMaintenance(RuntimeState* state, const std::string& msg) { - // TODO chenhao , when introduce latest AnalyticEvalNode open it - // ScalarExprEvaluator::FreeLocalAllocations(evals_to_free_); - return state->check_query_state(msg); + // TODO chenhao , when introduce latest AnalyticEvalNode open it + // ScalarExprEvaluator::FreeLocalAllocations(evals_to_free_); + return state->check_query_state(msg); } -} +} // namespace doris diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index 03c3eca4d75ee7..f0decce8f37dec 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -18,19 +18,19 @@ #ifndef DORIS_BE_SRC_QUERY_EXEC_EXEC_NODE_H #define DORIS_BE_SRC_QUERY_EXEC_EXEC_NODE_H +#include #include #include -#include #include "common/status.h" #include "gen_cpp/PlanNodes_types.h" +#include "runtime/bufferpool/buffer_pool.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" -#include "util/runtime_profile.h" -#include "util/blocking_queue.hpp" -#include "runtime/bufferpool/buffer_pool.h" #include "runtime/query_statistics.h" #include "service/backend_options.h" +#include "util/blocking_queue.hpp" +#include "util/runtime_profile.h" #include "util/uid_util.h" // for print_id namespace doris { @@ -134,11 +134,11 @@ class ExecNode { // traversal. All nodes are placed in pool. // Returns error if 'plan' is corrupted, otherwise success. static Status create_tree(RuntimeState* state, ObjectPool* pool, const TPlan& plan, - const DescriptorTbl& descs, ExecNode** root); + const DescriptorTbl& descs, ExecNode** root); // Set debug action for node with given id in 'tree' static void set_debug_options(int node_id, TExecNodePhase::type phase, - TDebugAction::type action, ExecNode* tree); + TDebugAction::type action, ExecNode* tree); // Collect all nodes of given 'node_type' that are part of this subtree, and return in // 'nodes'. @@ -172,50 +172,24 @@ class ExecNode { // out: Stream to accumulate debug string. virtual void debug_string(int indentation_level, std::stringstream* out) const; - const std::vector& conjunct_ctxs() const { - return _conjunct_ctxs; - } - - int id() const { - return _id; - } - TPlanNodeType::type type() const { - return _type; - } - const RowDescriptor& row_desc() const { - return _row_descriptor; - } - int64_t rows_returned() const { - return _num_rows_returned; - } - int64_t limit() const { - return _limit; - } - bool reached_limit() { - return _limit != -1 && _num_rows_returned >= _limit; - } - const std::vector& get_tuple_ids() const { - return _tuple_ids; - } - - RuntimeProfile* runtime_profile() { - return _runtime_profile.get(); - } - RuntimeProfile::Counter* memory_used_counter() const { - return _memory_used_counter; - } - - std::shared_ptr mem_tracker() const { - return _mem_tracker; - } - - std::shared_ptr expr_mem_tracker() const { - return _expr_mem_tracker; - } - - MemPool* expr_mem_pool() { - return _expr_mem_pool.get(); - } + const std::vector& conjunct_ctxs() const { return _conjunct_ctxs; } + + int id() const { return _id; } + TPlanNodeType::type type() const { return _type; } + const RowDescriptor& row_desc() const { return _row_descriptor; } + int64_t rows_returned() const { return _num_rows_returned; } + int64_t limit() const { return _limit; } + bool reached_limit() { return _limit != -1 && _num_rows_returned >= _limit; } + const std::vector& get_tuple_ids() const { return _tuple_ids; } + + RuntimeProfile* runtime_profile() { return _runtime_profile.get(); } + RuntimeProfile::Counter* memory_used_counter() const { return _memory_used_counter; } + + std::shared_ptr mem_tracker() const { return _mem_tracker; } + + std::shared_ptr expr_mem_tracker() const { return _expr_mem_tracker; } + + MemPool* expr_mem_pool() { return _expr_mem_pool.get(); } // Extract node id from p->name(). static int get_node_id_from_profile(RuntimeProfile* p); @@ -253,7 +227,7 @@ class ExecNode { /// be cleaned up during Close(). /// All functions are thread safe. class RowBatchQueue : public BlockingQueue { - public: + public: /// max_batches is the maximum number of row batches that can be queued. /// When the queue is full, producers will block. RowBatchQueue(int max_batches); @@ -279,7 +253,7 @@ class ExecNode { /// Returns the number of io buffers that were released (for debug tracking) int Cleanup(); - private: + private: /// Lock protecting cleanup_queue_ // SpinLock lock_; // TODO(dhc): need to modify spinlock @@ -289,7 +263,7 @@ class ExecNode { std::list cleanup_queue_; }; - int _id; // unique w/in single plan tree + int _id; // unique w/in single plan tree TPlanNodeType::type _type; ObjectPool* _pool; std::vector _conjuncts; @@ -307,14 +281,14 @@ class ExecNode { TExecNodePhase::type _debug_phase; TDebugAction::type _debug_action; - int64_t _limit; // -1: no limit + int64_t _limit; // -1: no limit int64_t _num_rows_returned; boost::scoped_ptr _runtime_profile; - + /// Account for peak memory used by this node std::shared_ptr _mem_tracker; - + /// MemTracker used by 'expr_mem_pool_'. std::shared_ptr _expr_mem_tracker; @@ -332,20 +306,16 @@ class ExecNode { // "Codegen Enabled" boost::mutex _exec_options_lock; std::string _runtime_exec_options; - + /// Buffer pool client for this node. Initialized with the node's minimum reservation /// in ClaimBufferReservation(). After initialization, the client must hold onto at /// least the minimum reservation so that it can be returned to the initial /// reservations pool in Close(). BufferPool::ClientHandle _buffer_pool_client; - ExecNode* child(int i) { - return _children[i]; - } + ExecNode* child(int i) { return _children[i]; } - bool is_closed() const { - return _is_closed; - } + bool is_closed() const { return _is_closed; } // TODO(zc) /// Pointer to the containing SubplanNode or NULL if not inside a subplan. @@ -358,14 +328,14 @@ class ExecNode { // Create a single exec node derived from thrift node; place exec node in 'pool'. static Status create_node(RuntimeState* state, ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs, ExecNode** node); + const DescriptorTbl& descs, ExecNode** node); - static Status create_tree_helper(RuntimeState* state, ObjectPool* pool, const std::vector& tnodes, - const DescriptorTbl& descs, ExecNode* parent, int* node_idx, ExecNode** root); + static Status create_tree_helper(RuntimeState* state, ObjectPool* pool, + const std::vector& tnodes, + const DescriptorTbl& descs, ExecNode* parent, int* node_idx, + ExecNode** root); - virtual bool is_scan_node() const { - return false; - } + virtual bool is_scan_node() const { return false; } void init_runtime_profile(const std::string& name); @@ -389,26 +359,25 @@ class ExecNode { bool _is_closed; }; -#define LIMIT_EXCEEDED(tracker, state, msg) \ - do { \ - stringstream str; \ - str << "Memory exceed limit. " << msg << " "; \ - str << "Backend: " << BackendOptions::get_localhost() << ", "; \ - str << "fragment: " << print_id(state->fragment_instance_id()) << " "; \ +#define LIMIT_EXCEEDED(tracker, state, msg) \ + do { \ + stringstream str; \ + str << "Memory exceed limit. " << msg << " "; \ + str << "Backend: " << BackendOptions::get_localhost() << ", "; \ + str << "fragment: " << print_id(state->fragment_instance_id()) << " "; \ str << "Used: " << tracker->consumption() << ", Limit: " << tracker->limit() << ". "; \ - str << "You can change the limit by session variable exec_mem_limit."; \ - return Status::MemoryLimitExceeded(str.str()); \ + str << "You can change the limit by session variable exec_mem_limit."; \ + return Status::MemoryLimitExceeded(str.str()); \ } while (false) -#define RETURN_IF_LIMIT_EXCEEDED(state, msg) \ - do { \ - /* if (UNLIKELY(MemTracker::limit_exceeded(*(state)->mem_trackers()))) { */ \ +#define RETURN_IF_LIMIT_EXCEEDED(state, msg) \ + do { \ + /* if (UNLIKELY(MemTracker::limit_exceeded(*(state)->mem_trackers()))) { */ \ MemTracker* tracker = state->instance_mem_tracker()->find_limit_exceeded_tracker(); \ - if (tracker != nullptr) { \ - LIMIT_EXCEEDED(tracker, state, msg); \ - } \ + if (tracker != nullptr) { \ + LIMIT_EXCEEDED(tracker, state, msg); \ + } \ } while (false) -} +} // namespace doris #endif - diff --git a/be/src/exec/file_reader.h b/be/src/exec/file_reader.h index a883b51cf103f3..19b4660a58b815 100644 --- a/be/src/exec/file_reader.h +++ b/be/src/exec/file_reader.h @@ -25,8 +25,7 @@ namespace doris { class FileReader { public: - virtual ~FileReader() { - } + virtual ~FileReader() {} virtual Status open() = 0; // Read content to 'buf', 'buf_len' is the max size of this buffer. // Return ok when read success, and 'buf_len' is set to size of read content @@ -46,11 +45,11 @@ class FileReader { * !! Important !! */ virtual Status read_one_message(uint8_t** buf, size_t* length) = 0; - virtual int64_t size () = 0; + virtual int64_t size() = 0; virtual Status seek(int64_t position) = 0; virtual Status tell(int64_t* position) = 0; virtual void close() = 0; virtual bool closed() = 0; }; -} +} // namespace doris diff --git a/be/src/exec/file_writer.h b/be/src/exec/file_writer.h index 9c0ebf4488be18..a77c788cc7af0b 100644 --- a/be/src/exec/file_writer.h +++ b/be/src/exec/file_writer.h @@ -26,8 +26,7 @@ namespace doris { class FileWriter { public: - virtual ~FileWriter() { - } + virtual ~FileWriter() {} virtual Status open() = 0; diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 4b55462caad811..7c735f05c04c17 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -23,26 +23,25 @@ #include "exprs/expr.h" #include "exprs/in_predicate.h" #include "exprs/slot_ref.h" +#include "gen_cpp/PlanNodes_types.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "util/runtime_profile.h" -#include "gen_cpp/PlanNodes_types.h" namespace doris { -HashJoinNode::HashJoinNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _join_op(tnode.hash_join_node.join_op), - _probe_eos(false), - _process_build_batch_fn(NULL), - _process_probe_batch_fn(NULL), - _anti_join_last_pos(NULL) { +HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _join_op(tnode.hash_join_node.join_op), + _probe_eos(false), + _process_build_batch_fn(NULL), + _process_probe_batch_fn(NULL), + _anti_join_last_pos(NULL) { _match_all_probe = - (_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN); + (_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN); _match_one_build = (_join_op == TJoinOp::LEFT_SEMI_JOIN); _match_all_build = - (_join_op == TJoinOp::RIGHT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN); + (_join_op == TJoinOp::RIGHT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN); _is_push_down = tnode.hash_join_node.is_push_down; _build_unique = _join_op == TJoinOp::LEFT_ANTI_JOIN || _join_op == TJoinOp::LEFT_SEMI_JOIN; } @@ -63,17 +62,16 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { _probe_expr_ctxs.push_back(ctx); RETURN_IF_ERROR(Expr::create_expr_tree(_pool, eq_join_conjuncts[i].right, &ctx)); _build_expr_ctxs.push_back(ctx); - if (eq_join_conjuncts[i].__isset.opcode - && eq_join_conjuncts[i].opcode == TExprOpcode::EQ_FOR_NULL) { + if (eq_join_conjuncts[i].__isset.opcode && + eq_join_conjuncts[i].opcode == TExprOpcode::EQ_FOR_NULL) { _is_null_safe_eq_join.push_back(true); } else { _is_null_safe_eq_join.push_back(false); } } - RETURN_IF_ERROR( - Expr::create_expr_trees(_pool, tnode.hash_join_node.other_join_conjuncts, - &_other_join_conjunct_ctxs)); + RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.hash_join_node.other_join_conjuncts, + &_other_join_conjunct_ctxs)); if (!_other_join_conjunct_ctxs.empty()) { // If LEFT SEMI JOIN/LEFT ANTI JOIN with not equal predicate, @@ -88,33 +86,26 @@ Status HashJoinNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _build_pool.reset(new MemPool(mem_tracker().get())); - _build_timer = - ADD_TIMER(runtime_profile(), "BuildTime"); - _push_down_timer = - ADD_TIMER(runtime_profile(), "PushDownTime"); - _push_compute_timer = - ADD_TIMER(runtime_profile(), "PushDownComputeTime"); - _probe_timer = - ADD_TIMER(runtime_profile(), "ProbeTime"); - _build_rows_counter = - ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); - _build_buckets_counter = - ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT); - _probe_rows_counter = - ADD_COUNTER(runtime_profile(), "ProbeRows", TUnit::UNIT); + _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); + _push_down_timer = ADD_TIMER(runtime_profile(), "PushDownTime"); + _push_compute_timer = ADD_TIMER(runtime_profile(), "PushDownComputeTime"); + _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime"); + _build_rows_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); + _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT); + _probe_rows_counter = ADD_COUNTER(runtime_profile(), "ProbeRows", TUnit::UNIT); _hash_tbl_load_factor_counter = - ADD_COUNTER(runtime_profile(), "LoadFactor", TUnit::DOUBLE_VALUE); + ADD_COUNTER(runtime_profile(), "LoadFactor", TUnit::DOUBLE_VALUE); // build and probe exprs are evaluated in the context of the rows produced by our // right and left children, respectively - RETURN_IF_ERROR(Expr::prepare( - _build_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker())); - RETURN_IF_ERROR(Expr::prepare( - _probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_build_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); // _other_join_conjuncts are evaluated in the context of the rows produced by this node - RETURN_IF_ERROR(Expr::prepare( - _other_join_conjunct_ctxs, state, _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_other_join_conjunct_ctxs, state, _row_descriptor, expr_mem_tracker())); _result_tuple_row_size = _row_descriptor.tuple_descriptors().size() * sizeof(Tuple*); @@ -135,17 +126,16 @@ Status HashJoinNode::prepare(RuntimeState* state) { _build_tuple_row_size = num_build_tuples * sizeof(Tuple*); // TODO: default buckets - const bool stores_nulls = _join_op == TJoinOp::RIGHT_OUTER_JOIN - || _join_op == TJoinOp::FULL_OUTER_JOIN - || _join_op == TJoinOp::RIGHT_ANTI_JOIN - || _join_op == TJoinOp::RIGHT_SEMI_JOIN - || (std::find(_is_null_safe_eq_join.begin(), _is_null_safe_eq_join.end(), - true) != _is_null_safe_eq_join.end()); - _hash_tbl.reset(new HashTable( - _build_expr_ctxs, _probe_expr_ctxs, _build_tuple_size, - stores_nulls, _is_null_safe_eq_join, id(), mem_tracker(), 1024)); - - _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + const bool stores_nulls = + _join_op == TJoinOp::RIGHT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN || + _join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN || + (std::find(_is_null_safe_eq_join.begin(), _is_null_safe_eq_join.end(), true) != + _is_null_safe_eq_join.end()); + _hash_tbl.reset(new HashTable(_build_expr_ctxs, _probe_expr_ctxs, _build_tuple_size, + stores_nulls, _is_null_safe_eq_join, id(), mem_tracker(), 1024)); + + _probe_batch.reset( + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); return Status::OK(); } @@ -250,8 +240,8 @@ Status HashJoinNode::open(RuntimeState* state) { thread_status.set_value(construct_hash_table(state)); } - if (_children[0]->type() == TPlanNodeType::EXCHANGE_NODE - && _children[1]->type() == TPlanNodeType::EXCHANGE_NODE) { + if (_children[0]->type() == TPlanNodeType::EXCHANGE_NODE && + _children[1]->type() == TPlanNodeType::EXCHANGE_NODE) { _is_push_down = false; } @@ -259,8 +249,8 @@ Status HashJoinNode::open(RuntimeState* state) { // The in predicate will filter the null value in child[0] while it is needed in the Null-safe equal join. // For example: select * from a join b where a.id<=>b.id // the null value in table a should be return by scan node instead of filtering it by In-predicate. - if (std::find(_is_null_safe_eq_join.begin(), _is_null_safe_eq_join.end(), - true) != _is_null_safe_eq_join.end()) { + if (std::find(_is_null_safe_eq_join.begin(), _is_null_safe_eq_join.end(), true) != + _is_null_safe_eq_join.end()) { _is_push_down = false; } @@ -314,7 +304,7 @@ Status HashJoinNode::open(RuntimeState* state) { HashTable::Iterator iter = _hash_tbl->begin(); while (iter.has_next()) { - TupleRow* row = iter.get_row(); + TupleRow* row = iter.get_row(); std::list::iterator ctx_iter = _push_down_expr_ctxs.begin(); for (int i = 0; i < _build_expr_ctxs.size(); ++i, ++ctx_iter) { @@ -338,7 +328,6 @@ Status HashJoinNode::open(RuntimeState* state) { Status open_status = child(0)->open(state); RETURN_IF_ERROR(open_status); } else { - // Open the probe-side child so that it may perform any initialisation in parallel. // Don't exit even if we see an error, we still need to wait for the build thread // to finish. @@ -393,8 +382,8 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo } // These cases are simpler and use a more efficient processing loop - if (!(_match_all_build || _join_op == TJoinOp::RIGHT_SEMI_JOIN - || _join_op == TJoinOp::RIGHT_ANTI_JOIN)) { + if (!(_match_all_build || _join_op == TJoinOp::RIGHT_SEMI_JOIN || + _join_op == TJoinOp::RIGHT_ANTI_JOIN)) { if (_eos) { *eos = true; return Status::OK(); @@ -423,8 +412,8 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo TupleRow* matched_build_row = _hash_tbl_iterator.get_row(); VLOG_ROW << "matched_build_row: " << matched_build_row->to_string(child(1)->row_desc()); - if ((_join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN) - && _hash_tbl_iterator.matched()) { + if ((_join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN) && + _hash_tbl_iterator.matched()) { // We have already matched this build row, continue to next match. // _hash_tbl_iterator.next(); _hash_tbl_iterator.next(); @@ -439,10 +428,10 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo // 2. judge if set matched with other join predicates // 3. scans hash table to choose row which is't set matched and meets conjuncts if (_join_op == TJoinOp::RIGHT_ANTI_JOIN) { - create_output_row(out_row, _current_probe_row, matched_build_row); + create_output_row(out_row, _current_probe_row, matched_build_row); if (eval_conjuncts(other_conjunct_ctxs, num_other_conjunct_ctxs, out_row)) { _hash_tbl_iterator.set_matched(); - } + } _hash_tbl_iterator.next(); continue; } else { @@ -451,13 +440,13 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo // 2. check if the row meets other join predicates // 3. check if the row meets conjuncts // right join and full join - // 1. find pos in hash table which meets equi-join + // 1. find pos in hash table which meets equi-join // 2. check if the row meets other join predicates // 3. check if the row meets conjuncts // 4. output left and right meeting other predicates and conjuncts - // 5. if full join, output left meeting and right no meeting other + // 5. if full join, output left meeting and right no meeting other // join predicates and conjuncts - // 6. output left no meeting and right meeting other join predicate + // 6. output left no meeting and right meeting other join predicate // and conjuncts create_output_row(out_row, _current_probe_row, matched_build_row); } @@ -467,7 +456,6 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo continue; } - if (_join_op == TJoinOp::RIGHT_SEMI_JOIN) { _hash_tbl_iterator.set_matched(); } @@ -626,8 +614,7 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo return Status::OK(); } -Status HashJoinNode::left_join_get_next(RuntimeState* state, - RowBatch* out_batch, bool* eos) { +Status HashJoinNode::left_join_get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { *eos = _eos; ScopedTimer probe_timer(_probe_timer); @@ -643,12 +630,12 @@ Status HashJoinNode::left_join_get_next(RuntimeState* state, // Continue processing this row batch if (_process_probe_batch_fn == NULL) { _num_rows_returned += - process_probe_batch(out_batch, _probe_batch.get(), max_added_rows); + process_probe_batch(out_batch, _probe_batch.get(), max_added_rows); COUNTER_SET(_rows_returned_counter, _num_rows_returned); } else { // Use codegen'd function _num_rows_returned += - _process_probe_batch_fn(this, out_batch, _probe_batch.get(), max_added_rows); + _process_probe_batch_fn(this, out_batch, _probe_batch.get(), max_added_rows); COUNTER_SET(_rows_returned_counter, _num_rows_returned); } @@ -692,7 +679,7 @@ std::string HashJoinNode::get_probe_row_output_string(TupleRow* probe_row) { } int* is_build_tuple = - std::find(_build_tuple_idx_ptr, _build_tuple_idx_ptr + _build_tuple_size, i); + std::find(_build_tuple_idx_ptr, _build_tuple_idx_ptr + _build_tuple_size, i); if (is_build_tuple != _build_tuple_idx_ptr + _build_tuple_size) { out << Tuple::to_string(NULL, *row_desc().tuple_descriptors()[i]); @@ -707,13 +694,12 @@ std::string HashJoinNode::get_probe_row_output_string(TupleRow* probe_row) { void HashJoinNode::debug_string(int indentation_level, std::stringstream* out) const { *out << string(indentation_level * 2, ' '); - *out << "_hashJoin(eos=" << (_eos ? "true" : "false") - << " probe_batch_pos=" << _probe_batch_pos + *out << "_hashJoin(eos=" << (_eos ? "true" : "false") << " probe_batch_pos=" << _probe_batch_pos << " hash_tbl="; *out << string(indentation_level * 2, ' '); *out << "HashTbl("; - // << " build_exprs=" << Expr::debug_string(_build_expr_ctxs) - // << " probe_exprs=" << Expr::debug_string(_probe_expr_ctxs); + // << " build_exprs=" << Expr::debug_string(_build_expr_ctxs) + // << " probe_exprs=" << Expr::debug_string(_probe_expr_ctxs); *out << ")"; ExecNode::debug_string(indentation_level, out); *out << ")"; @@ -735,4 +721,4 @@ void HashJoinNode::create_output_row(TupleRow* out, TupleRow* probe, TupleRow* b } } -} +} // namespace doris diff --git a/be/src/exec/hash_join_node.h b/be/src/exec/hash_join_node.h index eaa04879ccfe32..b8645ceec51f6c 100644 --- a/be/src/exec/hash_join_node.h +++ b/be/src/exec/hash_join_node.h @@ -19,8 +19,8 @@ #define DORIS_BE_SRC_QUERY_EXEC_HASH_JOIN_NODE_H #include -#include #include +#include #include #include "exec/exec_node.h" @@ -85,14 +85,14 @@ class HashJoinNode : public ExecNode { std::vector _other_join_conjunct_ctxs; // derived from _join_op - bool _match_all_probe; // output all rows coming from the probe input - bool _match_one_build; // match at most one build row to each probe row - bool _match_all_build; // output all rows coming from the build input - bool _build_unique; // build a hash table without duplicated rows + bool _match_all_probe; // output all rows coming from the probe input + bool _match_one_build; // match at most one build row to each probe row + bool _match_all_build; // output all rows coming from the build input + bool _build_unique; // build a hash table without duplicated rows - bool _matched_probe; // if true, we have matched the current probe row - bool _eos; // if true, nothing left to return in get_next() - boost::scoped_ptr _build_pool; // holds everything referenced in _hash_tbl + bool _matched_probe; // if true, we have matched the current probe row + bool _eos; // if true, nothing left to return in get_next() + boost::scoped_ptr _build_pool; // holds everything referenced in _hash_tbl // Size of the TupleRow (just the Tuple ptrs) from the build (right) and probe (left) // sides. Set to zero if the build/probe tuples are not returned, e.g., for semi joins. @@ -104,8 +104,8 @@ class HashJoinNode : public ExecNode { // does not initialize all tuple ptrs in the row, only the ones that it // is responsible for. boost::scoped_ptr _probe_batch; - int _probe_batch_pos; // current scan pos in _probe_batch - bool _probe_eos; // if true, probe child has no more rows to process + int _probe_batch_pos; // current scan pos in _probe_batch + bool _probe_eos; // if true, probe child has no more rows to process TupleRow* _current_probe_row; // _build_tuple_idx[i] is the tuple index of child(1)'s tuple[i] in the output row @@ -129,13 +129,13 @@ class HashJoinNode : public ExecNode { // record anti join pos in get_next() HashTable::Iterator* _anti_join_last_pos; - RuntimeProfile::Counter* _build_timer; // time to build hash table - RuntimeProfile::Counter* _push_down_timer; // time to build hash table + RuntimeProfile::Counter* _build_timer; // time to build hash table + RuntimeProfile::Counter* _push_down_timer; // time to build hash table RuntimeProfile::Counter* _push_compute_timer; - RuntimeProfile::Counter* _probe_timer; // time to probe - RuntimeProfile::Counter* _build_rows_counter; // num build rows - RuntimeProfile::Counter* _probe_rows_counter; // num probe rows - RuntimeProfile::Counter* _build_buckets_counter; // num buckets in hash table + RuntimeProfile::Counter* _probe_timer; // time to probe + RuntimeProfile::Counter* _build_rows_counter; // num build rows + RuntimeProfile::Counter* _probe_rows_counter; // num probe rows + RuntimeProfile::Counter* _build_buckets_counter; // num buckets in hash table RuntimeProfile::Counter* _hash_tbl_load_factor_counter; // Supervises ConstructHashTable in a separate thread, and @@ -176,6 +176,6 @@ class HashJoinNode : public ExecNode { std::string get_probe_row_output_string(TupleRow* probe_row); }; -} +} // namespace doris #endif diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 471663dc0ab311..d3f145a02a5327 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -18,10 +18,10 @@ #include "exec/hash_table.hpp" #include "exprs/expr.h" -#include "runtime/raw_value.h" -#include "runtime/string_value.hpp" #include "runtime/mem_tracker.h" +#include "runtime/raw_value.h" #include "runtime/runtime_state.h" +#include "runtime/string_value.hpp" #include "util/doris_metrics.h" namespace doris { @@ -29,24 +29,22 @@ namespace doris { const float HashTable::MAX_BUCKET_OCCUPANCY_FRACTION = 0.75f; HashTable::HashTable(const std::vector& build_expr_ctxs, - const std::vector& probe_expr_ctxs, - int num_build_tuples, bool stores_nulls, - const std::vector& finds_nulls, - int32_t initial_seed, - const std::shared_ptr& mem_tracker, int64_t num_buckets) : - _build_expr_ctxs(build_expr_ctxs), - _probe_expr_ctxs(probe_expr_ctxs), - _num_build_tuples(num_build_tuples), - _stores_nulls(stores_nulls), - _finds_nulls(finds_nulls), - _initial_seed(initial_seed), - _node_byte_size(sizeof(Node) + sizeof(Tuple*) * _num_build_tuples), - _num_filled_buckets(0), - _nodes(NULL), - _num_nodes(0), - _exceeded_limit(false), - _mem_tracker(mem_tracker), - _mem_limit_exceeded(false) { + const std::vector& probe_expr_ctxs, int num_build_tuples, + bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, + const std::shared_ptr& mem_tracker, int64_t num_buckets) + : _build_expr_ctxs(build_expr_ctxs), + _probe_expr_ctxs(probe_expr_ctxs), + _num_build_tuples(num_build_tuples), + _stores_nulls(stores_nulls), + _finds_nulls(finds_nulls), + _initial_seed(initial_seed), + _node_byte_size(sizeof(Node) + sizeof(Tuple*) * _num_build_tuples), + _num_filled_buckets(0), + _nodes(NULL), + _num_nodes(0), + _exceeded_limit(false), + _mem_tracker(mem_tracker), + _mem_limit_exceeded(false) { DCHECK(_mem_tracker); DCHECK_EQ(_build_expr_ctxs.size(), _probe_expr_ctxs.size()); @@ -57,8 +55,8 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, _mem_tracker->Consume(_buckets.capacity() * sizeof(Bucket)); // Compute the layout and buffer size to store the evaluated expr results - _results_buffer_size = Expr::compute_results_layout(_build_expr_ctxs, - &_expr_values_buffer_offsets, &_var_result_begin); + _results_buffer_size = Expr::compute_results_layout( + _build_expr_ctxs, &_expr_values_buffer_offsets, &_var_result_begin); _expr_values_buffer = new uint8_t[_results_buffer_size]; memset(_expr_values_buffer, 0, sizeof(uint8_t) * _results_buffer_size); _expr_value_null_bits = new uint8_t[_build_expr_ctxs.size()]; @@ -73,8 +71,7 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, } } -HashTable::~HashTable() { -} +HashTable::~HashTable() {} void HashTable::close() { // TODO: use tr1::array? @@ -149,7 +146,6 @@ uint32_t HashTable::hash_variable_len_row() { hash = decimal->hash(hash); } } - } return hash; @@ -175,7 +171,6 @@ bool HashTable::equals(TupleRow* build_row) { if (!RawValue::eq(loc, val, _build_expr_ctxs[i]->root()->type())) { return false; - } } @@ -299,4 +294,4 @@ std::string HashTable::debug_string(bool skip_empty, const RowDescriptor* desc) return ss.str(); } -} +} // namespace doris diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h index ddf7f36bed7f58..511c61e817b281 100644 --- a/be/src/exec/hash_table.h +++ b/be/src/exec/hash_table.h @@ -18,8 +18,8 @@ #ifndef DORIS_BE_SRC_QUERY_EXEC_HASH_TABLE_H #define DORIS_BE_SRC_QUERY_EXEC_HASH_TABLE_H -#include #include +#include #include "codegen/doris_ir.h" #include "common/logging.h" @@ -76,6 +76,7 @@ using std::vector; class HashTable { private: struct Node; + public: class Iterator; @@ -88,14 +89,10 @@ class HashTable { // - mem_limits: if non-empty, all memory allocation for nodes and for buckets is // tracked against those limits; the limits must be valid until the d'tor is called // - initial_seed: Initial seed value to use when computing hashes for rows - HashTable( - const std::vector& build_exprs, - const std::vector& probe_exprs, - int num_build_tuples, bool stores_nulls, - const std::vector& finds_nulls, - int32_t initial_seed, - const std::shared_ptr& mem_tracker, - int64_t num_buckets); + HashTable(const std::vector& build_exprs, + const std::vector& probe_exprs, int num_build_tuples, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, + const std::shared_ptr& mem_tracker, int64_t num_buckets); ~HashTable(); @@ -120,7 +117,6 @@ class HashTable { } } - // Returns the start iterator for all rows that match 'probe_row'. 'probe_row' is // evaluated with _probe_expr_ctxs. The iterator can be iterated until HashTable::end() // to find all the matching rows. @@ -133,24 +129,16 @@ class HashTable { Iterator IR_ALWAYS_INLINE find(TupleRow* probe_row, bool probe = true); // Returns number of elements in the hash table - int64_t size() { - return _num_nodes; - } + int64_t size() { return _num_nodes; } // Returns the number of buckets - int64_t num_buckets() { - return _buckets.size(); - } + int64_t num_buckets() { return _buckets.size(); } // true if any of the MemTrackers was exceeded - bool exceeded_limit() const { - return _exceeded_limit; - } + bool exceeded_limit() const { return _exceeded_limit; } // Returns the load factor (the number of non-empty buckets) - float load_factor() { - return _num_filled_buckets / static_cast(_buckets.size()); - } + float load_factor() { return _num_filled_buckets / static_cast(_buckets.size()); } // Returns the number of bytes allocated to the hash table int64_t byte_size() const { @@ -167,18 +155,14 @@ class HashTable { } // Returns if the expr at 'expr_idx' evaluated to NULL for the last row. - bool last_expr_value_null(int expr_idx) const { - return _expr_value_null_bits[expr_idx]; - } + bool last_expr_value_null(int expr_idx) const { return _expr_value_null_bits[expr_idx]; } // Return beginning of hash table. Advancing this iterator will traverse all // elements. Iterator begin(); // Returns end marker - Iterator end() { - return Iterator(); - } + Iterator end() { return Iterator(); } // Dump out the entire hash table to string. If skip_empty, empty buckets are // skipped. If build_desc is non-null, the build rows will be output. Otherwise @@ -188,13 +172,12 @@ class HashTable { // stl-like iterator interface. class Iterator { public: - Iterator() : _table(NULL), _bucket_idx(-1), _node_idx(-1) { - } + Iterator() : _table(NULL), _bucket_idx(-1), _node_idx(-1) {} // Iterates to the next element. In the case where the iterator was // from a Find, this will lazily evaluate that bucket, only returning // TupleRows that match the current scan row. - template + template void IR_ALWAYS_INLINE next(); // Returns the current row or NULL if at end. @@ -206,32 +189,26 @@ class HashTable { } // Returns Hash - uint32_t get_hash() { - return _table->get_node(_node_idx)->_hash; - } + uint32_t get_hash() { return _table->get_node(_node_idx)->_hash; } // Returns if the iterator is at the end - bool has_next() { - return _node_idx != -1; - } + bool has_next() { return _node_idx != -1; } // Returns true if this iterator is at the end, i.e. get_row() cannot be called. - bool at_end() { - return _node_idx == -1; - } + bool at_end() { return _node_idx == -1; } // Sets as matched the node currently pointed by the iterator. The iterator // cannot be AtEnd(). void set_matched() { DCHECK(!at_end()); - Node *node = _table->get_node(_node_idx); + Node* node = _table->get_node(_node_idx); node->matched = true; } bool matched() { - DCHECK(!at_end()); - Node *node = _table->get_node(_node_idx); - return node->matched; + DCHECK(!at_end()); + Node* node = _table->get_node(_node_idx); + return node->matched; } bool operator==(const Iterator& rhs) { @@ -245,12 +222,8 @@ class HashTable { private: friend class HashTable; - Iterator(HashTable* table, int bucket_idx, int64_t node, uint32_t hash) : - _table(table), - _bucket_idx(bucket_idx), - _node_idx(node), - _scan_hash(hash) { - } + Iterator(HashTable* table, int bucket_idx, int64_t node, uint32_t hash) + : _table(table), _bucket_idx(bucket_idx), _node_idx(node), _scan_hash(hash) {} HashTable* _table; // Current bucket idx @@ -268,14 +241,11 @@ class HashTable { // Header portion of a Node. The node data (TupleRow) is right after the // node memory to maximize cache hits. struct Node { - int64_t _next_idx; // chain to next node for collisions - uint32_t _hash; // Cache of the hash for _data + int64_t _next_idx; // chain to next node for collisions + uint32_t _hash; // Cache of the hash for _data bool matched; - Node():_next_idx(-1), - _hash(-1), - matched(false) { - } + Node() : _next_idx(-1), _hash(-1), matched(false) {} TupleRow* data() { uint8_t* mem = reinterpret_cast(this); @@ -287,9 +257,7 @@ class HashTable { struct Bucket { int64_t _node_idx; - Bucket() { - _node_idx = -1; - } + Bucket() { _node_idx = -1; } }; // Returns the next non-empty bucket and updates idx to be the index of that bucket. @@ -316,7 +284,7 @@ class HashTable { // Moves a node from one bucket to another. 'previous_node' refers to the // node (if any) that's chained before this node in from_bucket's linked list. void move_node(Bucket* from_bucket, Bucket* to_bucket, int64_t node_idx, Node* node, - Node* previous_node); + Node* previous_node); // Evaluate the exprs over row and cache the results in '_expr_values_buffer'. // Returns whether any expr evaluated to NULL @@ -328,15 +296,11 @@ class HashTable { // cross compiled because we need to be able to differentiate between EvalBuildRow // and EvalProbeRow by name and the _build_expr_ctxs/_probe_expr_ctxs are baked into // the codegen'd function. - bool IR_NO_INLINE eval_build_row(TupleRow* row) { - return eval_row(row, _build_expr_ctxs); - } + bool IR_NO_INLINE eval_build_row(TupleRow* row) { return eval_row(row, _build_expr_ctxs); } // Evaluate 'row' over _probe_expr_ctxs caching the results in '_expr_values_buffer' // This will be replaced by codegen. - bool IR_NO_INLINE eval_probe_row(TupleRow* row) { - return eval_row(row, _probe_expr_ctxs); - } + bool IR_NO_INLINE eval_probe_row(TupleRow* row) { return eval_row(row, _probe_expr_ctxs); } // Compute the hash of the values in _expr_values_buffer. // This will be replaced by codegen. We don't want this inlined for replacing @@ -399,7 +363,7 @@ class HashTable { // max number of nodes that can be stored in '_nodes' before realloc int64_t _nodes_capacity; - bool _exceeded_limit; // true if any of _mem_trackers[].limit_exceeded() + bool _exceeded_limit; // true if any of _mem_trackers[].limit_exceeded() std::shared_ptr _mem_tracker; // Set to true if the hash table exceeds the memory limit. If this is set, @@ -433,6 +397,6 @@ class HashTable { uint8_t* _expr_value_null_bits; }; -} +} // namespace doris #endif diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp old mode 100755 new mode 100644 index 59f6c2b178669b..df78798036dcd6 --- a/be/src/exec/intersect_node.cpp +++ b/be/src/exec/intersect_node.cpp @@ -77,7 +77,8 @@ Status IntersectNode::open(RuntimeState* state) { } } // probe - _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); + _probe_batch.reset( + new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; @@ -92,7 +93,8 @@ Status IntersectNode::open(RuntimeState* state) { if (_hash_tbl_iterator != _hash_tbl->end()) { _hash_tbl_iterator.set_matched(); VLOG_ROW << "probe matched: " - << get_row_output_string(_hash_tbl_iterator.get_row(), child(0)->row_desc()); + << get_row_output_string(_hash_tbl_iterator.get_row(), + child(0)->row_desc()); } } _probe_batch->reset(); diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp index aa0633b0337e10..3191bf16c847e5 100644 --- a/be/src/exec/json_scanner.cpp +++ b/be/src/exec/json_scanner.cpp @@ -15,36 +15,35 @@ // specific language governing permissions and limitations // under the License. - #include "exec/json_scanner.h" + #include + +#include "env/env.h" +#include "exec/broker_reader.h" +#include "exec/local_file_reader.h" +#include "exprs/expr.h" +#include "exprs/json_functions.h" #include "gutil/strings/split.h" #include "runtime/exec_env.h" #include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" -#include "exprs/expr.h" -#include "env/env.h" -#include "exec/local_file_reader.h" -#include "exec/broker_reader.h" -#include "exprs/json_functions.h" namespace doris { -JsonScanner::JsonScanner(RuntimeState* state, - RuntimeProfile* profile, +JsonScanner::JsonScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, - ScannerCounter* counter) : BaseScanner(state, profile, params, counter), - _ranges(ranges), - _broker_addresses(broker_addresses), - _cur_file_reader(nullptr), - _next_range(0), - _cur_file_eof(false), - _scanner_eof(false) { - -} + ScannerCounter* counter) + : BaseScanner(state, profile, params, counter), + _ranges(ranges), + _broker_addresses(broker_addresses), + _cur_file_reader(nullptr), + _next_range(0), + _cur_file_eof(false), + _scanner_eof(false) {} JsonScanner::~JsonScanner() { close(); @@ -66,7 +65,8 @@ Status JsonScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { } _cur_file_eof = false; } - RETURN_IF_ERROR(_cur_file_reader->read(_src_tuple, _src_slot_descs, tuple_pool, &_cur_file_eof)); + RETURN_IF_ERROR( + _cur_file_reader->read(_src_tuple, _src_slot_descs, tuple_pool, &_cur_file_eof)); if (_cur_file_eof) { continue; // read next file @@ -102,7 +102,7 @@ Status JsonScanner::open_next_reader() { if (start_offset != 0) { start_offset -= 1; } - FileReader *file = nullptr; + FileReader* file = nullptr; switch (range.file_type) { case TFileType::FILE_LOCAL: { LocalFileReader* file_reader = new LocalFileReader(range.path, start_offset); @@ -111,8 +111,9 @@ Status JsonScanner::open_next_reader() { break; } case TFileType::FILE_BROKER: { - BrokerReader* broker_reader = new BrokerReader( - _state->exec_env(), _broker_addresses, _params.properties, range.path, start_offset); + BrokerReader* broker_reader = + new BrokerReader(_state->exec_env(), _broker_addresses, _params.properties, + range.path, start_offset); RETURN_IF_ERROR(broker_reader->open()); file = broker_reader; break; @@ -163,11 +164,10 @@ void JsonScanner::close() { } ////// class JsonDataInternal -JsonDataInternal::JsonDataInternal(rapidjson::Value* v) : - _json_values(v) { - if (v != nullptr) { - _iterator = v->Begin(); - } +JsonDataInternal::JsonDataInternal(rapidjson::Value* v) : _json_values(v) { + if (v != nullptr) { + _iterator = v->Begin(); + } } rapidjson::Value::ConstValueIterator JsonDataInternal::get_next() { @@ -178,21 +178,18 @@ rapidjson::Value::ConstValueIterator JsonDataInternal::get_next() { } ////// class JsonReader -JsonReader::JsonReader( - RuntimeState* state, ScannerCounter* counter, - RuntimeProfile* profile, - FileReader* file_reader, - bool strip_outer_array) : - _handle_json_callback(nullptr), - _next_line(0), - _total_lines(0), - _state(state), - _counter(counter), - _profile(profile), - _file_reader(file_reader), - _closed(false), - _strip_outer_array(strip_outer_array), - _json_doc(nullptr) { +JsonReader::JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, + FileReader* file_reader, bool strip_outer_array) + : _handle_json_callback(nullptr), + _next_line(0), + _total_lines(0), + _state(state), + _counter(counter), + _profile(profile), + _file_reader(file_reader), + _closed(false), + _strip_outer_array(strip_outer_array), + _json_doc(nullptr) { _bytes_read_counter = ADD_COUNTER(_profile, "BytesRead", TUnit::BYTES); _read_timer = ADD_TIMER(_profile, "FileReadTime"); } @@ -224,7 +221,8 @@ Status JsonReader::init(const std::string& jsonpath, const std::string& json_roo return Status::OK(); } -Status JsonReader::_generate_json_paths(const std::string& jsonpath, std::vector>* vect) { +Status JsonReader::_generate_json_paths(const std::string& jsonpath, + std::vector>* vect) { rapidjson::Document jsonpaths_doc; if (!jsonpaths_doc.Parse(jsonpath.c_str()).HasParseError()) { if (!jsonpaths_doc.IsArray()) { @@ -251,7 +249,8 @@ void JsonReader::_close() { if (_closed) { return; } - if (typeid(*_file_reader) == typeid(doris::BrokerReader) || typeid(*_file_reader) == typeid(doris::LocalFileReader)) { + if (typeid(*_file_reader) == typeid(doris::BrokerReader) || + typeid(*_file_reader) == typeid(doris::LocalFileReader)) { _file_reader->close(); delete _file_reader; } @@ -274,9 +273,10 @@ Status JsonReader::_parse_json_doc(bool* eof) { // parse jsondata to JsonDoc if (_origin_json_doc.Parse((char*)json_str, length).HasParseError()) { std::stringstream str_error; - str_error << "Parse json data for JsonDoc failed. code = " << _origin_json_doc.GetParseError() - << ", error-info:" << rapidjson::GetParseError_En(_origin_json_doc.GetParseError()); - _state->append_error_msg_to_file(std::string((char*) json_str, length), str_error.str()); + str_error << "Parse json data for JsonDoc failed. code = " + << _origin_json_doc.GetParseError() << ", error-info:" + << rapidjson::GetParseError_En(_origin_json_doc.GetParseError()); + _state->append_error_msg_to_file(std::string((char*)json_str, length), str_error.str()); _counter->num_rows_filtered++; delete[] json_str; return Status::DataQualityError(str_error.str()); @@ -285,7 +285,8 @@ Status JsonReader::_parse_json_doc(bool* eof) { // set json root if (_parsed_json_root.size() != 0) { - _json_doc = JsonFunctions::get_json_object_from_parsed_json(_parsed_json_root, &_origin_json_doc, _origin_json_doc.GetAllocator()); + _json_doc = JsonFunctions::get_json_object_from_parsed_json( + _parsed_json_root, &_origin_json_doc, _origin_json_doc.GetAllocator()); if (_json_doc == nullptr) { std::stringstream str_error; str_error << "JSON Root not found."; @@ -332,7 +333,8 @@ std::string JsonReader::_print_jsonpath(const std::vector& path) { return ss.str(); } -void JsonReader::_fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len) { +void JsonReader::_fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, + const uint8_t* value, int32_t len) { tuple->set_not_null(slot_desc->null_indicator_offset()); void* slot = tuple->get_slot(slot_desc->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); @@ -342,67 +344,73 @@ void JsonReader::_fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* me return; } -void JsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator value, SlotDescriptor* desc, Tuple* tuple, MemPool* tuple_pool, bool* valid) { +void JsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator value, + SlotDescriptor* desc, Tuple* tuple, MemPool* tuple_pool, + bool* valid) { const char* str_value = nullptr; uint8_t tmp_buf[128] = {0}; int32_t wbytes = 0; switch (value->GetType()) { - case rapidjson::Type::kStringType: - str_value = value->GetString(); - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)str_value, strlen(str_value)); - break; - case rapidjson::Type::kNumberType: - if (value->IsUint()) { - wbytes = sprintf((char*)tmp_buf, "%u", value->GetUint()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else if (value->IsInt()) { - wbytes = sprintf((char*)tmp_buf, "%d", value->GetInt()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else if (value->IsUint64()) { - wbytes = sprintf((char*)tmp_buf, "%lu", value->GetUint64()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else if (value->IsInt64()) { - wbytes = sprintf((char*)tmp_buf, "%ld", value->GetInt64()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else { - wbytes = sprintf((char*)tmp_buf, "%f", value->GetDouble()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } - break; - case rapidjson::Type::kFalseType: - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"0", 1); - break; - case rapidjson::Type::kTrueType: - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"1", 1); - break; - case rapidjson::Type::kNullType: - if (desc->is_nullable()) { - tuple->set_null(desc->null_indicator_offset()); - } else { - std::stringstream str_error; - str_error << "Json value is null, but the column `" << desc->col_name() << "` is not nullable."; - _state->append_error_msg_to_file(_print_json_value(*value), str_error.str()); - _counter->num_rows_filtered++; - *valid = false; - return; - } - break; - default: - // for other type like array or object. we convert it to string to save - std::string json_str = _print_json_value(*value); - _fill_slot(tuple, desc, tuple_pool, (uint8_t*) json_str.c_str(), json_str.length()); - break; + case rapidjson::Type::kStringType: + str_value = value->GetString(); + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)str_value, strlen(str_value)); + break; + case rapidjson::Type::kNumberType: + if (value->IsUint()) { + wbytes = sprintf((char*)tmp_buf, "%u", value->GetUint()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else if (value->IsInt()) { + wbytes = sprintf((char*)tmp_buf, "%d", value->GetInt()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else if (value->IsUint64()) { + wbytes = sprintf((char*)tmp_buf, "%lu", value->GetUint64()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else if (value->IsInt64()) { + wbytes = sprintf((char*)tmp_buf, "%ld", value->GetInt64()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else { + wbytes = sprintf((char*)tmp_buf, "%f", value->GetDouble()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } + break; + case rapidjson::Type::kFalseType: + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"0", 1); + break; + case rapidjson::Type::kTrueType: + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"1", 1); + break; + case rapidjson::Type::kNullType: + if (desc->is_nullable()) { + tuple->set_null(desc->null_indicator_offset()); + } else { + std::stringstream str_error; + str_error << "Json value is null, but the column `" << desc->col_name() + << "` is not nullable."; + _state->append_error_msg_to_file(_print_json_value(*value), str_error.str()); + _counter->num_rows_filtered++; + *valid = false; + return; + } + break; + default: + // for other type like array or object. we convert it to string to save + std::string json_str = _print_json_value(*value); + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)json_str.c_str(), json_str.length()); + break; } *valid = true; return; } // for simple format json -void JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool *valid) { +void JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, + const std::vector& slot_descs, + MemPool* tuple_pool, bool* valid) { if (!objectValue.IsObject()) { // Here we expect the incoming `objectValue` to be a Json Object, such as {"key" : "value"}, // not other type of Json format. - _state->append_error_msg_to_file(_print_json_value(objectValue), "Expect json object value"); + _state->append_error_msg_to_file(_print_json_value(objectValue), + "Expect json object value"); _counter->num_rows_filtered++; *valid = false; // current row is invalid return; @@ -420,9 +428,10 @@ void JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, c if (v->is_nullable()) { tuple->set_null(v->null_indicator_offset()); nullcount++; - } else { + } else { std::stringstream str_error; - str_error << "The column `" << v->col_name() << "` is not nullable, but it's not found in jsondata."; + str_error << "The column `" << v->col_name() + << "` is not nullable, but it's not found in jsondata."; _state->append_error_msg_to_file(_print_json_value(objectValue), str_error.str()); _counter->num_rows_filtered++; *valid = false; // current row is invalid @@ -432,7 +441,8 @@ void JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, c } if (nullcount == slot_descs.size()) { - _state->append_error_msg_to_file(_print_json_value(objectValue), "All fields is null, this is a invalid row."); + _state->append_error_msg_to_file(_print_json_value(objectValue), + "All fields is null, this is a invalid row."); _counter->num_rows_filtered++; *valid = false; return; @@ -449,7 +459,8 @@ void JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, c ", "column2":30}] * case 2. {"column1":"value1", "column2":10} */ -Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof) { +Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof) { do { bool valid = false; if (_next_line >= _total_lines) { // parse json and generic document @@ -458,7 +469,7 @@ Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vectorIsArray()) { @@ -467,7 +478,8 @@ Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vectorappend_error_msg_to_file(_print_json_value(*_json_doc), str_error.str()); + _state->append_error_msg_to_file(_print_json_value(*_json_doc), + str_error.str()); _counter->num_rows_filtered++; continue; } @@ -478,8 +490,8 @@ Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vectorIsArray()) { // handle case 1 - rapidjson::Value& objectValue = (*_json_doc)[_next_line];// json object + if (_json_doc->IsArray()) { // handle case 1 + rapidjson::Value& objectValue = (*_json_doc)[_next_line]; // json object _set_tuple_value(objectValue, tuple, slot_descs, tuple_pool, &valid); } else { // handle case 2 _set_tuple_value(*_json_doc, tuple, slot_descs, tuple_pool, &valid); @@ -493,15 +505,18 @@ Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vector& slot_descs) { +bool JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, + Tuple* tuple, + const std::vector& slot_descs) { int nullcount = 0; bool valid = true; size_t column_num = slot_descs.size(); for (size_t i = 0; i < column_num; i++) { rapidjson::Value* json_values = nullptr; - if (LIKELY( i < _parsed_jsonpaths.size())) { - json_values = JsonFunctions::get_json_array_from_parsed_json(_parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator()); + if (LIKELY(i < _parsed_jsonpaths.size())) { + json_values = JsonFunctions::get_json_array_from_parsed_json( + _parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator()); } if (json_values == nullptr) { @@ -509,9 +524,10 @@ bool JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPoo if (slot_descs[i]->is_nullable()) { tuple->set_null(slot_descs[i]->null_indicator_offset()); nullcount++; - } else { + } else { std::stringstream str_error; - str_error << "The column `" << slot_descs[i]->col_name() << "` is not nullable, but it's not found in jsondata."; + str_error << "The column `" << slot_descs[i]->col_name() + << "` is not nullable, but it's not found in jsondata."; _state->append_error_msg_to_file(_print_json_value(objectValue), str_error.str()); _counter->num_rows_filtered++; valid = false; // current row is invalid @@ -533,7 +549,9 @@ bool JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPoo } } if (nullcount == column_num) { - _state->append_error_msg_to_file(_print_json_value(objectValue), "All fields is null or not matched, this is a invalid row."); + _state->append_error_msg_to_file( + _print_json_value(objectValue), + "All fields is null or not matched, this is a invalid row."); _counter->num_rows_filtered++; valid = false; } @@ -547,8 +565,10 @@ bool JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPoo * } * In this scene, generate only one row */ -Status JsonReader::_handle_nested_complex_json(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof) { - while(true) { +Status JsonReader::_handle_nested_complex_json(Tuple* tuple, + const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof) { + while (true) { Status st = _parse_json_doc(eof); if (st.is_data_quality_error()) { continue; // continue to read next @@ -577,7 +597,9 @@ Status JsonReader::_handle_nested_complex_json(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof) { +Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, + const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof) { do { if (_next_line >= _total_lines) { Status st = _parse_json_doc(eof); @@ -585,7 +607,7 @@ Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, const std::vect continue; // continue to read next } RETURN_IF_ERROR(st); // terminate if encounter other errors - if (*eof) { // read all data, then return + if (*eof) { // read all data, then return return Status::OK(); } _total_lines = _json_doc->Size(); @@ -600,9 +622,9 @@ Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, const std::vect return Status::OK(); } -Status JsonReader::read(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof) { - return (this->*_handle_json_callback)(tuple, slot_descs, tuple_pool, eof); +Status JsonReader::read(Tuple* tuple, const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof) { + return (this->*_handle_json_callback)(tuple, slot_descs, tuple_pool, eof); } - -} // end of namespace +} // namespace doris diff --git a/be/src/exec/json_scanner.h b/be/src/exec/json_scanner.h index fe2894469d0b9a..16c274b0733e36 100644 --- a/be/src/exec/json_scanner.h +++ b/be/src/exec/json_scanner.h @@ -18,27 +18,28 @@ #ifndef BE_SRC_JSON_SCANNER_H_ #define BE_SRC_JSON_SCANNER_H_ -#include -#include -#include -#include -#include #include #include #include #include -#include "exec/base_scanner.h" +#include +#include +#include +#include +#include + #include "common/status.h" +#include "exec/base_scanner.h" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" -#include "util/slice.h" -#include "util/runtime_profile.h" -#include "runtime/mem_pool.h" -#include "runtime/tuple.h" #include "runtime/descriptors.h" -#include "runtime/stream_load/load_stream_mgr.h" +#include "runtime/mem_pool.h" #include "runtime/small_file_mgr.h" +#include "runtime/stream_load/load_stream_mgr.h" +#include "runtime/tuple.h" +#include "util/runtime_profile.h" +#include "util/slice.h" namespace doris { class Tuple; @@ -50,13 +51,9 @@ class JsonReader; class JsonScanner : public BaseScanner { public: - JsonScanner( - RuntimeState* state, - RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - ScannerCounter* counter); + JsonScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, + const std::vector& ranges, + const std::vector& broker_addresses, ScannerCounter* counter); ~JsonScanner(); // Open this scanner, will initialize information needed @@ -67,8 +64,10 @@ class JsonScanner : public BaseScanner { // Close this scanner void close() override; + private: Status open_next_reader(); + private: const std::vector& _ranges; const std::vector& _broker_addresses; @@ -104,30 +103,43 @@ struct JsonPath; // return other error Status if encounter other errors. class JsonReader { public: - JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, FileReader* file_reader, - bool strip_outer_array); + JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, + FileReader* file_reader, bool strip_outer_array); ~JsonReader(); Status init(const std::string& jsonpath, const std::string& json_root); // must call before use - Status read(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof); + Status read(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, + bool* eof); private: - Status (JsonReader::*_handle_json_callback)(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof); - Status _handle_simple_json(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof); - Status _handle_flat_array_complex_json(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof); - Status _handle_nested_complex_json(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool* eof); - - void _fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len); + Status (JsonReader::*_handle_json_callback)(Tuple* tuple, + const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof); + Status _handle_simple_json(Tuple* tuple, const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof); + Status _handle_flat_array_complex_json(Tuple* tuple, + const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof); + Status _handle_nested_complex_json(Tuple* tuple, const std::vector& slot_descs, + MemPool* tuple_pool, bool* eof); + + void _fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, + const uint8_t* value, int32_t len); Status _parse_json_doc(bool* eof); - void _set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, bool *valid); - void _write_data_to_tuple(rapidjson::Value::ConstValueIterator value, SlotDescriptor* desc, Tuple* tuple, MemPool* tuple_pool, bool* valid); - bool _write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, Tuple* tuple, const std::vector& slot_descs); + void _set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, + const std::vector& slot_descs, MemPool* tuple_pool, + bool* valid); + void _write_data_to_tuple(rapidjson::Value::ConstValueIterator value, SlotDescriptor* desc, + Tuple* tuple, MemPool* tuple_pool, bool* valid); + bool _write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, Tuple* tuple, + const std::vector& slot_descs); std::string _print_json_value(const rapidjson::Value& value); std::string _print_jsonpath(const std::vector& path); void _close(); - Status _generate_json_paths(const std::string& jsonpath, std::vector>* vect); + Status _generate_json_paths(const std::string& jsonpath, + std::vector>* vect); private: int _next_line; @@ -135,7 +147,7 @@ class JsonReader { RuntimeState* _state; ScannerCounter* _counter; RuntimeProfile* _profile; - FileReader*_file_reader; + FileReader* _file_reader; bool _closed; bool _strip_outer_array; RuntimeProfile::Counter* _bytes_read_counter; @@ -145,9 +157,8 @@ class JsonReader { std::vector _parsed_json_root; rapidjson::Document _origin_json_doc; // origin json document object from parsed json string - rapidjson::Value *_json_doc; // _json_doc equals _final_json_doc iff not set `json_root` + rapidjson::Value* _json_doc; // _json_doc equals _final_json_doc iff not set `json_root` }; - -} // end namespace +} // namespace doris #endif diff --git a/be/src/exec/line_reader.h b/be/src/exec/line_reader.h index d2fe9c1e8b269d..06450ed2efa0bc 100644 --- a/be/src/exec/line_reader.h +++ b/be/src/exec/line_reader.h @@ -24,11 +24,10 @@ namespace doris { // This class is used for CSV scanner, to read content line by line class LineReader { public: - virtual ~LineReader() { - } + virtual ~LineReader() {} virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof) = 0; virtual void close() = 0; }; -} +} // namespace doris diff --git a/be/src/exec/local_file_reader.cpp b/be/src/exec/local_file_reader.cpp index 3cbf66fdafbdb8..70245573777bf2 100644 --- a/be/src/exec/local_file_reader.cpp +++ b/be/src/exec/local_file_reader.cpp @@ -15,15 +15,16 @@ // specific language governing permissions and limitations // under the License. #include "exec/local_file_reader.h" -#include + #include +#include + #include "common/logging.h" namespace doris { -LocalFileReader::LocalFileReader(const std::string& path, int64_t start_offset) - : _path(path), _current_offset(start_offset), _file_size(-1), _fp(nullptr) { -} +LocalFileReader::LocalFileReader(const std::string& path, int64_t start_offset) + : _path(path), _current_offset(start_offset), _file_size(-1), _fp(nullptr) {} LocalFileReader::~LocalFileReader() { close(); @@ -34,8 +35,7 @@ Status LocalFileReader::open() { if (_fp == nullptr) { char err_buf[64]; std::stringstream ss; - ss << "Open file failed. path=" << _path - << ", error=" << strerror_r(errno, err_buf, 64); + ss << "Open file failed. path=" << _path << ", error=" << strerror_r(errno, err_buf, 64); return Status::InternalError(ss.str()); } return seek(_current_offset); @@ -84,31 +84,30 @@ Status LocalFileReader::read(uint8_t* buf, size_t* buf_len, bool* eof) { Status LocalFileReader::readat(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) { if (position != _current_offset) { int ret = fseek(_fp, position, SEEK_SET); - if (ret != 0) {// check fseek return value + if (ret != 0) { // check fseek return value return Status::InternalError(strerror(errno)); } } - *bytes_read = fread(out, 1, nbytes, _fp); + *bytes_read = fread(out, 1, nbytes, _fp); if (*bytes_read == 0 && ferror(_fp)) { char err_buf[64]; std::stringstream ss; - ss << "Read file failed. path=" << _path - << ", error=" << strerror_r(errno, err_buf, 64); + ss << "Read file failed. path=" << _path << ", error=" << strerror_r(errno, err_buf, 64); return Status::InternalError(ss.str()); } - _current_offset = ftell(_fp);// save offset with file + _current_offset = ftell(_fp); // save offset with file return Status::OK(); } -int64_t LocalFileReader::size () { +int64_t LocalFileReader::size() { if (_file_size == -1) { int ret; struct stat buf; ret = fstat(fileno(_fp), &buf); if (ret) { - LOG(WARNING) << "Get file size is error, errno: " << errno - << ", msg " << strerror(errno); + LOG(WARNING) << "Get file size is error, errno: " << errno << ", msg " + << strerror(errno); return -1; } _file_size = buf.st_size; @@ -122,7 +121,7 @@ Status LocalFileReader::seek(int64_t position) { char err_buf[64]; std::stringstream ss; ss << "Seek to start_offset failed. offset=" << position - << ", error=" << strerror_r(errno, err_buf, 64); + << ", error=" << strerror_r(errno, err_buf, 64); return Status::InternalError(ss.str()); } return Status::OK(); @@ -133,4 +132,4 @@ Status LocalFileReader::tell(int64_t* position) { return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/local_file_reader.h b/be/src/exec/local_file_reader.h index 11bcbcc0350861..07c1d191d20bbb 100644 --- a/be/src/exec/local_file_reader.h +++ b/be/src/exec/local_file_reader.h @@ -36,13 +36,15 @@ class LocalFileReader : public FileReader { // If reach to end of file, the eof is set to true. meanwhile 'buf_len' // is set to zero. virtual Status read(uint8_t* buf, size_t* buf_len, bool* eof) override; - virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) override; + virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, + void* out) override; virtual Status read_one_message(uint8_t** buf, size_t* length) override; - virtual int64_t size () override; + virtual int64_t size() override; virtual Status seek(int64_t position) override; virtual Status tell(int64_t* position) override; virtual void close() override; virtual bool closed() override; + private: std::string _path; int64_t _current_offset; @@ -50,4 +52,4 @@ class LocalFileReader : public FileReader { FILE* _fp; }; -} +} // namespace doris diff --git a/be/src/exec/local_file_writer.cpp b/be/src/exec/local_file_writer.cpp index d140e6b97c4b03..056d4b0cea5369 100644 --- a/be/src/exec/local_file_writer.cpp +++ b/be/src/exec/local_file_writer.cpp @@ -16,13 +16,13 @@ // under the License. #include "exec/local_file_writer.h" + #include "util/error_util.h" namespace doris { LocalFileWriter::LocalFileWriter(const std::string& path, int64_t start_offset) - : _path(path), _start_offset(start_offset), _fp(nullptr) { -} + : _path(path), _start_offset(start_offset), _fp(nullptr) {} LocalFileWriter::~LocalFileWriter() { close(); @@ -32,9 +32,8 @@ Status LocalFileWriter::open() { _fp = fopen(_path.c_str(), "w+"); if (_fp == nullptr) { std::stringstream ss; - ss << "Open file failed. path=" << _path - << ", errno= " << errno - << ", description=" << get_str_err_msg(); + ss << "Open file failed. path=" << _path << ", errno= " << errno + << ", description=" << get_str_err_msg(); return Status::InternalError(ss.str()); } @@ -42,9 +41,8 @@ Status LocalFileWriter::open() { int success = fseek(_fp, _start_offset, SEEK_SET); if (success != 0) { std::stringstream ss; - ss << "Seek to start_offset failed. offset=" << _start_offset - << ", errno= " << errno - << ", description=" << get_str_err_msg(); + ss << "Seek to start_offset failed. offset=" << _start_offset << ", errno= " << errno + << ", description=" << get_str_err_msg(); return Status::InternalError(ss.str()); } } @@ -57,10 +55,8 @@ Status LocalFileWriter::write(const uint8_t* buf, size_t buf_len, size_t* writte if (bytes_written < buf_len) { std::stringstream error_msg; error_msg << "fail to write to file. " - << " len=" << buf_len - << ", path=" << _path - << ", failed with errno=" << errno - << ", description=" << get_str_err_msg(); + << " len=" << buf_len << ", path=" << _path << ", failed with errno=" << errno + << ", description=" << get_str_err_msg(); return Status::InternalError(error_msg.str()); } diff --git a/be/src/exec/lzo_decompressor.cpp b/be/src/exec/lzo_decompressor.cpp index c017c87711bc56..47042996b2cd4f 100644 --- a/be/src/exec/lzo_decompressor.cpp +++ b/be/src/exec/lzo_decompressor.cpp @@ -21,8 +21,8 @@ namespace doris { #ifdef DORIS_WITH_LZO // Lzop -const uint8_t LzopDecompressor::LZOP_MAGIC[9] = - { 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a }; +const uint8_t LzopDecompressor::LZOP_MAGIC[9] = {0x89, 0x4c, 0x5a, 0x4f, 0x00, + 0x0d, 0x0a, 0x1a, 0x0a}; const uint64_t LzopDecompressor::LZOP_VERSION = 0x1030; const uint64_t LzopDecompressor::MIN_LZO_VERSION = 0x0100; @@ -30,37 +30,34 @@ const uint64_t LzopDecompressor::MIN_LZO_VERSION = 0x0100; // + lvl(1) + flags(4) + mode/mtime(12) + filename_len(1) // without the real file name, extra field and checksum const uint32_t LzopDecompressor::MIN_HEADER_SIZE = 34; -const uint32_t LzopDecompressor::LZO_MAX_BLOCK_SIZE = (64*1024l*1024l); +const uint32_t LzopDecompressor::LZO_MAX_BLOCK_SIZE = (64 * 1024l * 1024l); const uint32_t LzopDecompressor::CRC32_INIT_VALUE = 0; const uint32_t LzopDecompressor::ADLER32_INIT_VALUE = 1; -const uint64_t LzopDecompressor::F_H_CRC32 = 0x00001000L; -const uint64_t LzopDecompressor::F_MASK = 0x00003FFFL; -const uint64_t LzopDecompressor::F_OS_MASK = 0xff000000L; -const uint64_t LzopDecompressor::F_CS_MASK = 0x00f00000L; -const uint64_t LzopDecompressor::F_RESERVED = ((F_MASK | F_OS_MASK | F_CS_MASK) ^ 0xffffffffL); -const uint64_t LzopDecompressor::F_MULTIPART = 0x00000400L; -const uint64_t LzopDecompressor::F_H_FILTER = 0x00000800L; -const uint64_t LzopDecompressor::F_H_EXTRA_FIELD = 0x00000040L; -const uint64_t LzopDecompressor::F_CRC32_C = 0x00000200L; -const uint64_t LzopDecompressor::F_ADLER32_C = 0x00000002L; -const uint64_t LzopDecompressor::F_CRC32_D = 0x00000100L; -const uint64_t LzopDecompressor::F_ADLER32_D = 0x00000001L; - -LzopDecompressor::~LzopDecompressor() { -} +const uint64_t LzopDecompressor::F_H_CRC32 = 0x00001000L; +const uint64_t LzopDecompressor::F_MASK = 0x00003FFFL; +const uint64_t LzopDecompressor::F_OS_MASK = 0xff000000L; +const uint64_t LzopDecompressor::F_CS_MASK = 0x00f00000L; +const uint64_t LzopDecompressor::F_RESERVED = ((F_MASK | F_OS_MASK | F_CS_MASK) ^ 0xffffffffL); +const uint64_t LzopDecompressor::F_MULTIPART = 0x00000400L; +const uint64_t LzopDecompressor::F_H_FILTER = 0x00000800L; +const uint64_t LzopDecompressor::F_H_EXTRA_FIELD = 0x00000040L; +const uint64_t LzopDecompressor::F_CRC32_C = 0x00000200L; +const uint64_t LzopDecompressor::F_ADLER32_C = 0x00000002L; +const uint64_t LzopDecompressor::F_CRC32_D = 0x00000100L; +const uint64_t LzopDecompressor::F_ADLER32_D = 0x00000001L; + +LzopDecompressor::~LzopDecompressor() {} Status LzopDecompressor::init() { return Status::OK(); } -Status LzopDecompressor::decompress( - uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, - size_t* decompressed_len, bool* stream_end, - size_t* more_input_bytes, size_t* more_output_bytes) { - +Status LzopDecompressor::decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, + uint8_t* output, size_t output_max_len, + size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) { if (!_is_header_loaded) { // this is the first time to call lzo decompress, parse the header info first RETURN_IF_ERROR(parse_header_info(input, input_len, input_bytes_read, more_input_bytes)); @@ -107,8 +104,8 @@ Status LzopDecompressor::decompress( left_input_len -= sizeof(uint32_t); if (compressed_size > LZO_MAX_BLOCK_SIZE) { std::stringstream ss; - ss << "lzo block size: " << compressed_size << " is greater than LZO_MAX_BLOCK_SIZE: " - << LZO_MAX_BLOCK_SIZE; + ss << "lzo block size: " << compressed_size + << " is greater than LZO_MAX_BLOCK_SIZE: " << LZO_MAX_BLOCK_SIZE; return Status::InternalError(ss.str()); } @@ -142,11 +139,11 @@ Status LzopDecompressor::decompress( // 5. checksum compressed data if (left_input_len < compressed_size) { - *more_input_bytes = compressed_size - left_input_len; + *more_input_bytes = compressed_size - left_input_len; return Status::OK(); } - RETURN_IF_ERROR(checksum(_header_info.input_checksum_type, - "compressed", in_checksum, ptr, compressed_size)); + RETURN_IF_ERROR(checksum(_header_info.input_checksum_type, "compressed", in_checksum, ptr, + compressed_size)); // 6. decompress if (output_max_len < uncompressed_size) { @@ -160,18 +157,17 @@ Status LzopDecompressor::decompress( } else { // decompress *decompressed_len = uncompressed_size; - int ret = lzo1x_decompress_safe(ptr, compressed_size, - output, reinterpret_cast(&uncompressed_size), nullptr); + int ret = lzo1x_decompress_safe(ptr, compressed_size, output, + reinterpret_cast(&uncompressed_size), nullptr); if (ret != LZO_E_OK || uncompressed_size != *decompressed_len) { std::stringstream ss; ss << "Lzo decompression failed with ret: " << ret - << " decompressed len: " << uncompressed_size - << " expected: " << *decompressed_len; + << " decompressed len: " << uncompressed_size << " expected: " << *decompressed_len; return Status::InternalError(ss.str()); } - RETURN_IF_ERROR(checksum(_header_info.output_checksum_type, "decompressed", - out_checksum, output, uncompressed_size)); + RETURN_IF_ERROR(checksum(_header_info.output_checksum_type, "decompressed", out_checksum, + output, uncompressed_size)); ptr += compressed_size; } @@ -210,10 +206,9 @@ Status LzopDecompressor::decompress( // // // -// -- presence indicated in flags, not currently used. +// -- presence indicated in flags, not currently used. Status LzopDecompressor::parse_header_info(uint8_t* input, size_t input_len, - size_t* input_bytes_read, - size_t* more_input_bytes) { + size_t* input_bytes_read, size_t* more_input_bytes) { if (input_len < MIN_HEADER_SIZE) { LOG(INFO) << "highly recommanded that Lzo header size is larger than " << MIN_HEADER_SIZE << ", or parsing header info may failed." @@ -288,7 +283,7 @@ Status LzopDecompressor::parse_header_info(uint8_t* input, size_t input_len, // 9. filename uint8_t filename_len; ptr = get_uint8(ptr, &filename_len); - + // here we already consume (MIN_HEADER_SIZE) // from now we have to check left input is enough for each step size_t left = input_len - (ptr - input); @@ -297,7 +292,7 @@ Status LzopDecompressor::parse_header_info(uint8_t* input, size_t input_len, return Status::OK(); } - _header_info.filename = std::string((char*) ptr, (size_t) filename_len); + _header_info.filename = std::string((char*)ptr, (size_t)filename_len); ptr += filename_len; left -= filename_len; @@ -338,7 +333,7 @@ Status LzopDecompressor::parse_header_info(uint8_t* input, size_t input_len, // add the checksum and the len to the total ptr size. if (left < sizeof(int32_t) + extra_len) { - *more_input_bytes = sizeof(int32_t) + extra_len - left; + *more_input_bytes = sizeof(int32_t) + extra_len - left; return Status::OK(); } left -= sizeof(int32_t) + extra_len; @@ -354,8 +349,7 @@ Status LzopDecompressor::parse_header_info(uint8_t* input, size_t input_len, return Status::OK(); } -Status LzopDecompressor::checksum(LzoChecksum type, const std::string& source, - uint32_t expected, +Status LzopDecompressor::checksum(LzoChecksum type, const std::string& source, uint32_t expected, uint8_t* ptr, size_t len) { uint32_t computed_checksum; switch (type) { @@ -376,8 +370,7 @@ Status LzopDecompressor::checksum(LzoChecksum type, const std::string& source, if (computed_checksum != expected) { std::stringstream ss; ss << "checksum of " << source << " block failed." - << " computed checksum: " << computed_checksum - << " expected: " << expected; + << " computed checksum: " << computed_checksum << " expected: " << expected; return Status::InternalError(ss.str()); } @@ -387,11 +380,9 @@ Status LzopDecompressor::checksum(LzoChecksum type, const std::string& source, std::string LzopDecompressor::debug_info() { std::stringstream ss; ss << "LzopDecompressor." - << " version: " << _header_info.version - << " lib version: " << _header_info.lib_version + << " version: " << _header_info.version << " lib version: " << _header_info.lib_version << " version needed: " << _header_info.version_needed - << " method: " << (uint16_t) _header_info.method - << " filename: " << _header_info.filename + << " method: " << (uint16_t)_header_info.method << " filename: " << _header_info.filename << " header size: " << _header_info.header_size << " header checksum type: " << _header_info.header_checksum_type << " input checksum type: " << _header_info.input_checksum_type @@ -400,4 +391,4 @@ std::string LzopDecompressor::debug_info() { } #endif // DORIS_WITH_LZO -} // namespace +} // namespace doris diff --git a/be/src/exec/merge_join_node.cpp b/be/src/exec/merge_join_node.cpp index 12a257199bf67b..dcd5984788061e 100644 --- a/be/src/exec/merge_join_node.cpp +++ b/be/src/exec/merge_join_node.cpp @@ -29,9 +29,9 @@ namespace doris { -template -int compare_value(const void* left_value, const void* right_value) { - if (*(T*)left_value < * (T*)right_value) { +template +int compare_value(const void* left_value, const void* right_value) { + if (*(T*)left_value < *(T*)right_value) { return -1; } else if (*(T*)left_value == *(T*)right_value) { return 0; @@ -40,25 +40,20 @@ int compare_value(const void* left_value, const void* right_value) { } } -template -int compare_value(const StringValue* left_value, const StringValue* right_value) { +template +int compare_value(const StringValue* left_value, const StringValue* right_value) { return left_value->compare(*right_value); } -MergeJoinNode::MergeJoinNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _out_batch(NULL) { -} +MergeJoinNode::MergeJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), _out_batch(NULL) {} -MergeJoinNode::~MergeJoinNode() { -} +MergeJoinNode::~MergeJoinNode() {} Status MergeJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { DCHECK(tnode.__isset.merge_join_node); RETURN_IF_ERROR(ExecNode::init(tnode, state)); - const std::vector& cmp_conjuncts = - tnode.merge_join_node.cmp_conjuncts; + const std::vector& cmp_conjuncts = tnode.merge_join_node.cmp_conjuncts; for (int i = 0; i < cmp_conjuncts.size(); ++i) { ExprContext* ctx = NULL; @@ -68,9 +63,8 @@ Status MergeJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { _right_expr_ctxs.push_back(ctx); } - RETURN_IF_ERROR(Expr::create_expr_trees( - _pool, tnode.merge_join_node.other_join_conjuncts, - &_other_join_conjunct_ctxs)); + RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.merge_join_node.other_join_conjuncts, + &_other_join_conjunct_ctxs)); return Status::OK(); } @@ -79,10 +73,10 @@ Status MergeJoinNode::prepare(RuntimeState* state) { // build and probe exprs are evaluated in the context of the rows produced by our // right and left children, respectively - RETURN_IF_ERROR(Expr::prepare( - _left_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); - RETURN_IF_ERROR(Expr::prepare( - _right_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_left_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_right_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker())); for (int i = 0; i < _left_expr_ctxs.size(); ++i) { switch (_left_expr_ctxs[i]->root()->type().type) { @@ -118,8 +112,8 @@ Status MergeJoinNode::prepare(RuntimeState* state) { } // _other_join_conjuncts are evaluated in the context of the rows produced by this node - RETURN_IF_ERROR(Expr::prepare( - _other_join_conjunct_ctxs, state, _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR( + Expr::prepare(_other_join_conjunct_ctxs, state, _row_descriptor, expr_mem_tracker())); _result_tuple_row_size = _row_descriptor.tuple_descriptors().size() * sizeof(Tuple*); // pre-compute the tuple index of build tuples in the output row @@ -299,18 +293,14 @@ Status MergeJoinNode::get_input_row(RuntimeState* state, int child_idx) { } if (child_idx == 0) { - _left_child_ctx.reset( - new ChildReaderContext( - child(child_idx)->row_desc(), - state->batch_size(), - state->instance_mem_tracker())); + _left_child_ctx.reset(new ChildReaderContext(child(child_idx)->row_desc(), + state->batch_size(), + state->instance_mem_tracker())); ctx = _left_child_ctx.get(); } else { - _right_child_ctx.reset( - new ChildReaderContext( - child(child_idx)->row_desc(), - state->batch_size(), - state->instance_mem_tracker())); + _right_child_ctx.reset(new ChildReaderContext(child(child_idx)->row_desc(), + state->batch_size(), + state->instance_mem_tracker())); ctx = _right_child_ctx.get(); } @@ -333,12 +323,11 @@ void MergeJoinNode::debug_string(int indentation_level, std::stringstream* out) << " _right_child_pos=" << (_right_child_ctx.get() ? _right_child_ctx->row_idx : -1) << " join_conjuncts="; *out << "Conjunct("; - // << " left_exprs=" << Expr::debug_string(_left_exprs) - // << " right_exprs=" << Expr::debug_string(_right_exprs); + // << " left_exprs=" << Expr::debug_string(_left_exprs) + // << " right_exprs=" << Expr::debug_string(_right_exprs); *out << ")"; ExecNode::debug_string(indentation_level, out); *out << ")"; } -} - +} // namespace doris diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h index e1e647989e86dd..3574520b10275b 100644 --- a/be/src/exec/merge_join_node.h +++ b/be/src/exec/merge_join_node.h @@ -19,13 +19,13 @@ #define DORIS_BE_SRC_QUERY_EXEC_MERGE_JOIN_NODE_H #include -#include #include +#include #include #include "exec/exec_node.h" +#include "gen_cpp/PlanNodes_types.h" // for TJoinOp #include "runtime/row_batch.h" -#include "gen_cpp/PlanNodes_types.h" // for TJoinOp namespace doris { @@ -59,7 +59,7 @@ class MergeJoinNode : public ExecNode { // non-equi-join conjuncts from the JOIN clause std::vector _other_join_conjunct_ctxs; - bool _eos; // if true, nothing left to return in get_next() + bool _eos; // if true, nothing left to return in get_next() struct ChildReaderContext { RowBatch batch; @@ -98,6 +98,6 @@ class MergeJoinNode : public ExecNode { Status get_input_row(RuntimeState* state, int child_idx); }; -} +} // namespace doris #endif diff --git a/be/src/exec/merge_node.cpp b/be/src/exec/merge_node.cpp index 92152ecb72a892..c7c058e01cd04f 100644 --- a/be/src/exec/merge_node.cpp +++ b/be/src/exec/merge_node.cpp @@ -19,24 +19,22 @@ #include "exprs/expr.h" #include "gen_cpp/PlanNodes_types.h" +#include "runtime/raw_value.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" -#include "runtime/raw_value.h" using std::vector; namespace doris { -MergeNode::MergeNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _tuple_id(tnode.merge_node.tuple_id), - _const_result_expr_idx(0), - _child_idx(INVALID_CHILD_IDX), - _child_row_batch(NULL), - _child_eos(false), - _child_row_idx(0) { -} +MergeNode::MergeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _tuple_id(tnode.merge_node.tuple_id), + _const_result_expr_idx(0), + _child_idx(INVALID_CHILD_IDX), + _child_row_batch(NULL), + _child_eos(false), + _child_row_idx(0) {} Status MergeNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); @@ -66,8 +64,8 @@ Status MergeNode::prepare(RuntimeState* state) { // Prepare const expr lists. for (int i = 0; i < _const_result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare( - _const_result_expr_ctx_lists[i], state, row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_const_result_expr_ctx_lists[i], state, row_desc(), + expr_mem_tracker())); DCHECK_EQ(_const_result_expr_ctx_lists[i].size(), _tuple_desc->slots().size()); } @@ -81,8 +79,8 @@ Status MergeNode::prepare(RuntimeState* state) { // Prepare result expr lists. for (int i = 0; i < _result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare( - _result_expr_ctx_lists[i], state, child(i)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_result_expr_ctx_lists[i], state, child(i)->row_desc(), + expr_mem_tracker())); // DCHECK_EQ(_result_expr_ctx_lists[i].size(), _tuple_desc->slots().size()); DCHECK_EQ(_result_expr_ctx_lists[i].size(), _materialized_slots.size()); } @@ -118,8 +116,8 @@ Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) // Evaluate and materialize the const expr lists exactly once. while (_const_result_expr_idx < _const_result_expr_ctx_lists.size()) { // Materialize expr results into row_batch. - eval_and_materialize_exprs( - _const_result_expr_ctx_lists[_const_result_expr_idx], true, &tuple, row_batch); + eval_and_materialize_exprs(_const_result_expr_ctx_lists[_const_result_expr_idx], true, + &tuple, row_batch); ++_const_result_expr_idx; *eos = reached_limit(); @@ -137,12 +135,12 @@ Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) // Row batch was either never set or we're moving on to a different child. if (_child_row_batch.get() == NULL) { RETURN_IF_CANCELLED(state); - _child_row_batch.reset( - new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), mem_tracker().get())); + _child_row_batch.reset(new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), + mem_tracker().get())); // Open child and fetch the first row batch. RETURN_IF_ERROR(child(_child_idx)->open(state)); - RETURN_IF_ERROR(child(_child_idx)->get_next(state, _child_row_batch.get(), - &_child_eos)); + RETURN_IF_ERROR( + child(_child_idx)->get_next(state, _child_row_batch.get(), &_child_eos)); _child_row_idx = 0; } @@ -150,7 +148,7 @@ Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) while (true) { // Continue materializing exprs on _child_row_batch into row batch. if (eval_and_materialize_exprs(_result_expr_ctx_lists[_child_idx], false, &tuple, - row_batch)) { + row_batch)) { *eos = reached_limit(); if (*eos) { @@ -167,8 +165,8 @@ Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) RETURN_IF_CANCELLED(state); _child_row_batch->reset(); - RETURN_IF_ERROR(child(_child_idx)->get_next(state, _child_row_batch.get(), - &_child_eos)); + RETURN_IF_ERROR( + child(_child_idx)->get_next(state, _child_row_batch.get(), &_child_eos)); _child_row_idx = 0; } @@ -198,11 +196,8 @@ Status MergeNode::close(RuntimeState* state) { return ExecNode::close(state); } -bool MergeNode::eval_and_materialize_exprs( - const std::vector& ctxs, - bool const_exprs, - Tuple** tuple, - RowBatch* row_batch) { +bool MergeNode::eval_and_materialize_exprs(const std::vector& ctxs, bool const_exprs, + Tuple** tuple, RowBatch* row_batch) { // Make sure there are rows left in the batch. if (!const_exprs && _child_row_idx >= _child_row_batch->num_rows()) { return false; @@ -258,4 +253,4 @@ bool MergeNode::eval_and_materialize_exprs( return false; } -} +} // namespace doris diff --git a/be/src/exec/merge_node.h b/be/src/exec/merge_node.h index 3d5214c4203be7..7344dabdc77892 100644 --- a/be/src/exec/merge_node.h +++ b/be/src/exec/merge_node.h @@ -22,7 +22,6 @@ #include "exec/exec_node.h" #include "runtime/mem_pool.h" -#include namespace doris { @@ -36,7 +35,7 @@ class TupleRow; class MergeNode : public ExecNode { public: MergeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~MergeNode() { } + virtual ~MergeNode() {} // Create const exprs, child exprs and conjuncts from corresponding thrift exprs. virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); @@ -88,9 +87,9 @@ class MergeNode : public ExecNode { // Returns true if row_batch should be returned to caller or limit has been // reached, false otherwise. bool eval_and_materialize_exprs(const std::vector& exprs, bool const_exprs, - Tuple** tuple, RowBatch* row_batch); + Tuple** tuple, RowBatch* row_batch); }; -} +} // namespace doris #endif diff --git a/be/src/exec/mysql_scan_node.cpp b/be/src/exec/mysql_scan_node.cpp index 7f087f3971e916..79775edb98a15a 100644 --- a/be/src/exec/mysql_scan_node.cpp +++ b/be/src/exec/mysql_scan_node.cpp @@ -21,27 +21,24 @@ #include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" -#include "runtime/runtime_state.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" namespace doris { -MysqlScanNode::MysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _is_init(false), - _table_name(tnode.mysql_scan_node.table_name), - _tuple_id(tnode.mysql_scan_node.tuple_id), - _columns(tnode.mysql_scan_node.columns), - _filters(tnode.mysql_scan_node.filters), - _tuple_desc(nullptr) { -} +MysqlScanNode::MysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _is_init(false), + _table_name(tnode.mysql_scan_node.table_name), + _tuple_id(tnode.mysql_scan_node.tuple_id), + _columns(tnode.mysql_scan_node.columns), + _filters(tnode.mysql_scan_node.filters), + _tuple_desc(nullptr) {} -MysqlScanNode::~MysqlScanNode() { -} +MysqlScanNode::~MysqlScanNode() {} Status MysqlScanNode::prepare(RuntimeState* state) { VLOG(1) << "MysqlScanNode::Prepare"; @@ -65,7 +62,7 @@ Status MysqlScanNode::prepare(RuntimeState* state) { _slot_num = _tuple_desc->slots().size(); // get mysql info const MySQLTableDescriptor* mysql_table = - static_cast(_tuple_desc->table_desc()); + static_cast(_tuple_desc->table_desc()); if (NULL == mysql_table) { return Status::InternalError("mysql table pointer is NULL."); @@ -77,19 +74,19 @@ Status MysqlScanNode::prepare(RuntimeState* state) { _my_param.passwd = mysql_table->passwd(); _my_param.db = mysql_table->mysql_db(); // new one scanner - _mysql_scanner.reset(new(std::nothrow) MysqlScanner(_my_param)); + _mysql_scanner.reset(new (std::nothrow) MysqlScanner(_my_param)); if (_mysql_scanner.get() == NULL) { return Status::InternalError("new a mysql scanner failed."); } - _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool(mem_tracker().get())); if (_tuple_pool.get() == NULL) { return Status::InternalError("new a mem pool failed."); } - _text_converter.reset(new(std::nothrow) TextConverter('\\')); + _text_converter.reset(new (std::nothrow) TextConverter('\\')); if (_text_converter.get() == NULL) { return Status::InternalError("new a text convertor failed."); @@ -134,10 +131,10 @@ Status MysqlScanNode::open(RuntimeState* state) { return Status::OK(); } -Status MysqlScanNode::write_text_slot(char* value, int value_length, - SlotDescriptor* slot, RuntimeState* state) { - if (!_text_converter->write_slot(slot, _tuple, value, value_length, - true, false, _tuple_pool.get())) { +Status MysqlScanNode::write_text_slot(char* value, int value_length, SlotDescriptor* slot, + RuntimeState* state) { + if (!_text_converter->write_slot(slot, _tuple, value, value_length, true, false, + _tuple_pool.get())) { std::stringstream ss; ss << "fail to convert mysql value '" << value << "' TO " << slot->type(); return Status::InternalError(ss.str()); @@ -214,7 +211,7 @@ Status MysqlScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* e } else { std::stringstream ss; ss << "nonnull column contains NULL. table=" << _table_name - << ", column=" << slot_desc->col_name(); + << ", column=" << slot_desc->col_name(); return Status::InternalError(ss.str()); } } else { @@ -264,4 +261,4 @@ Status MysqlScanNode::set_scan_ranges(const std::vector& scan_ return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/mysql_scan_node.h b/be/src/exec/mysql_scan_node.h index 381a008c5df8de..6d9d602d6b2dd2 100644 --- a/be/src/exec/mysql_scan_node.h +++ b/be/src/exec/mysql_scan_node.h @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_MYSQL_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_MYSQL_SCAN_NODE_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_MYSQL_SCAN_NODE_H +#define DORIS_BE_SRC_QUERY_EXEC_MYSQL_SCAN_NODE_H #include -#include "runtime/descriptors.h" #include "exec/mysql_scanner.h" #include "exec/scan_node.h" +#include "runtime/descriptors.h" namespace doris { @@ -62,7 +62,7 @@ class MysqlScanNode : public ScanNode { // Writes a slot in _tuple from an MySQL value containing text data. // The Mysql value is converted into the appropriate target type. Status write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state); + RuntimeState* state); bool _is_init; MysqlScannerParam _my_param; @@ -91,6 +91,6 @@ class MysqlScanNode : public ScanNode { Tuple* _tuple = nullptr; }; -} +} // namespace doris #endif diff --git a/be/src/exec/mysql_scanner.cpp b/be/src/exec/mysql_scanner.cpp index 7cf5ba9c024b42..0b82c4fa9d0b42 100644 --- a/be/src/exec/mysql_scanner.cpp +++ b/be/src/exec/mysql_scanner.cpp @@ -19,20 +19,13 @@ #define __DorisMysql MYSQL #define __DorisMysqlRes MYSQL_RES -#include "mysql_scanner.h" - - #include "common/logging.h" +#include "mysql_scanner.h" namespace doris { MysqlScanner::MysqlScanner(const MysqlScannerParam& param) - : _my_param(param), - _my_conn(NULL), - _my_result(NULL), - _is_open(false), - _field_num(0) { -} + : _my_param(param), _my_conn(NULL), _my_result(NULL), _is_open(false), _field_num(0) {} MysqlScanner::~MysqlScanner() { if (_my_result) { @@ -63,9 +56,10 @@ Status MysqlScanner::open() { if (NULL == mysql_real_connect(_my_conn, _my_param.host.c_str(), _my_param.user.c_str(), _my_param.passwd.c_str(), _my_param.db.c_str(), atoi(_my_param.port.c_str()), NULL, _my_param.client_flag)) { - LOG(WARNING) << "connect Mysql: " << "Host: " << _my_param.host - << " user: " << _my_param.user << " passwd: " << _my_param.passwd - << " db: " << _my_param.db << " port: " << _my_param.port; + LOG(WARNING) << "connect Mysql: " + << "Host: " << _my_param.host << " user: " << _my_param.user + << " passwd: " << _my_param.passwd << " db: " << _my_param.db + << " port: " << _my_param.port; return _error_status("mysql real connect failed."); } @@ -147,7 +141,7 @@ Status MysqlScanner::query(const std::string& table, const std::vector + #include #include #include "common/status.h" - #ifndef __DorisMysql #define __DorisMysql void #endif @@ -42,7 +42,7 @@ struct MysqlScannerParam { std::string passwd; std::string db; unsigned long client_flag; - MysqlScannerParam(): client_flag(0) { } + MysqlScannerParam() : client_flag(0) {} }; // Mysql Scanner for scan data from mysql @@ -57,11 +57,10 @@ class MysqlScanner { // query for DORIS Status query(const std::string& table, const std::vector& fields, const std::vector& filters, const uint64_t limit); - Status get_next_row(char** *buf, unsigned long** lengths, bool* eos); + Status get_next_row(char*** buf, unsigned long** lengths, bool* eos); + + int field_num() const { return _field_num; } - int field_num() const { - return _field_num; - } private: Status _error_status(const std::string& prefix); @@ -73,6 +72,6 @@ class MysqlScanner { int _field_num; }; -} +} // namespace doris #endif diff --git a/be/src/exec/odbc_scan_node.cpp b/be/src/exec/odbc_scan_node.cpp index 91deca28547c3c..ce4d71bc7fc7ca 100644 --- a/be/src/exec/odbc_scan_node.cpp +++ b/be/src/exec/odbc_scan_node.cpp @@ -21,27 +21,24 @@ #include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" -#include "runtime/runtime_state.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" namespace doris { -OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) +OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ScanNode(pool, tnode, descs), _is_init(false), _table_name(tnode.odbc_scan_node.table_name), _connect_string(std::move(tnode.odbc_scan_node.connect_string)), _query_string(std::move(tnode.odbc_scan_node.query_string)), _tuple_id(tnode.odbc_scan_node.tuple_id), - _tuple_desc(nullptr) { -} + _tuple_desc(nullptr) {} -OdbcScanNode::~OdbcScanNode() { -} +OdbcScanNode::~OdbcScanNode() {} Status OdbcScanNode::prepare(RuntimeState* state) { VLOG(1) << "OdbcScanNode::Prepare"; @@ -68,19 +65,19 @@ Status OdbcScanNode::prepare(RuntimeState* state) { _odbc_param.query_string = std::move(_query_string); _odbc_param.tuple_desc = _tuple_desc; - _odbc_scanner.reset(new (std::nothrow)ODBCScanner(_odbc_param)); + _odbc_scanner.reset(new (std::nothrow) ODBCScanner(_odbc_param)); if (_odbc_scanner.get() == nullptr) { return Status::InternalError("new a odbc scanner failed."); } - _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool(mem_tracker().get())); if (_tuple_pool.get() == NULL) { return Status::InternalError("new a mem pool failed."); } - _text_converter.reset(new(std::nothrow) TextConverter('\\')); + _text_converter.reset(new (std::nothrow) TextConverter('\\')); if (_text_converter.get() == NULL) { return Status::InternalError("new a text convertor failed."); @@ -113,10 +110,10 @@ Status OdbcScanNode::open(RuntimeState* state) { return Status::OK(); } -Status OdbcScanNode::write_text_slot(char* value, int value_length, - SlotDescriptor* slot, RuntimeState* state) { - if (!_text_converter->write_slot(slot, _tuple, value, value_length, - true, false, _tuple_pool.get())) { +Status OdbcScanNode::write_text_slot(char* value, int value_length, SlotDescriptor* slot, + RuntimeState* state) { + if (!_text_converter->write_slot(slot, _tuple, value, value_length, true, false, + _tuple_pool.get())) { std::stringstream ss; ss << "fail to convert odbc value '" << value << "' TO " << slot->type(); return Status::InternalError(ss.str()); @@ -206,8 +203,8 @@ Status OdbcScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eo << ", column=" << slot_desc->col_name(); return Status::InternalError(ss.str()); } else { - RETURN_IF_ERROR( - write_text_slot(static_cast(column_data.target_value_ptr), column_data.strlen_or_ind, slot_desc, state)); + RETURN_IF_ERROR(write_text_slot(static_cast(column_data.target_value_ptr), + column_data.strlen_or_ind, slot_desc, state)); } j++; } @@ -253,4 +250,4 @@ Status OdbcScanNode::set_scan_ranges(const std::vector& scan_r return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/odbc_scan_node.h b/be/src/exec/odbc_scan_node.h index 37b9bece8aa17f..5763b44155c1fe 100644 --- a/be/src/exec/odbc_scan_node.h +++ b/be/src/exec/odbc_scan_node.h @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_ODBC_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_ODBC_SCAN_NODE_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_ODBC_SCAN_NODE_H +#define DORIS_BE_SRC_QUERY_EXEC_ODBC_SCAN_NODE_H #include -#include "runtime/descriptors.h" -#include "exec/scan_node.h" #include "exec/odbc_scanner.h" +#include "exec/scan_node.h" +#include "runtime/descriptors.h" namespace doris { @@ -74,7 +74,6 @@ class OdbcScanNode : public ScanNode { // Tuple id resolved in prepare() to set _tuple_desc; TupleId _tuple_id; - // Descriptor of tuples read from ODBC table. const TupleDescriptor* _tuple_desc; // Tuple index in tuple row. @@ -90,6 +89,6 @@ class OdbcScanNode : public ScanNode { // Current tuple. Tuple* _tuple = nullptr; }; -} +} // namespace doris #endif diff --git a/be/src/exec/odbc_scanner.cpp b/be/src/exec/odbc_scanner.cpp index a83b8cc14cdf98..1df47ad5b38a03 100644 --- a/be/src/exec/odbc_scanner.cpp +++ b/be/src/exec/odbc_scanner.cpp @@ -15,25 +15,27 @@ // specific language governing permissions and limitations // under the License. +#include "exec/odbc_scanner.h" + +#include + #include #include -#include -#include "exec/odbc_scanner.h" #include "common/logging.h" #include "runtime/primitive_type.h" -#define ODBC_DISPOSE(h, ht, x, op) { auto rc = x;\ - if (rc != SQL_SUCCESS && rc != SQL_SUCCESS_WITH_INFO) \ - { \ - return error_status(op, handle_diagnostic_record(h, ht, rc)); \ - } \ - if (rc == SQL_ERROR) \ - { \ - auto err_msg = std::string("Error in") + std::string(op); \ - return Status::InternalError(err_msg.c_str()); \ - } \ - } \ +#define ODBC_DISPOSE(h, ht, x, op) \ + { \ + auto rc = x; \ + if (rc != SQL_SUCCESS && rc != SQL_SUCCESS_WITH_INFO) { \ + return error_status(op, handle_diagnostic_record(h, ht, rc)); \ + } \ + if (rc == SQL_ERROR) { \ + auto err_msg = std::string("Error in") + std::string(op); \ + return Status::InternalError(err_msg.c_str()); \ + } \ + } static constexpr uint32_t SMALL_COLUMN_SIZE_BUFFER = 100; // Now we only treat HLL, CHAR, VARCHAR as big column @@ -55,8 +57,7 @@ ODBCScanner::ODBCScanner(const ODBCScannerParam& param) _row_count(0), _env(nullptr), _dbc(nullptr), - _stmt(nullptr) { -} + _stmt(nullptr) {} ODBCScanner::~ODBCScanner() { if (_stmt != nullptr) { @@ -84,12 +85,16 @@ Status ODBCScanner::open() { return Status::InternalError("alloc env failed"); } // We want ODBC 3 support - ODBC_DISPOSE(_env, SQL_HANDLE_ENV, SQLSetEnvAttr(_env, SQL_ATTR_ODBC_VERSION, (void *) SQL_OV_ODBC3, 0), "set env attr"); + ODBC_DISPOSE(_env, SQL_HANDLE_ENV, + SQLSetEnvAttr(_env, SQL_ATTR_ODBC_VERSION, (void*)SQL_OV_ODBC3, 0), + "set env attr"); // Allocate a connection handle ODBC_DISPOSE(_env, SQL_HANDLE_ENV, SQLAllocHandle(SQL_HANDLE_DBC, _env, &_dbc), "alloc dbc"); // Connect to the Database - ODBC_DISPOSE(_dbc, SQL_HANDLE_DBC, SQLDriverConnect(_dbc, NULL, (SQLCHAR*)_connect_string.c_str(), SQL_NTS, - NULL, 0, NULL, SQL_DRIVER_COMPLETE_REQUIRED), "driver connect"); + ODBC_DISPOSE(_dbc, SQL_HANDLE_DBC, + SQLDriverConnect(_dbc, NULL, (SQLCHAR*)_connect_string.c_str(), SQL_NTS, NULL, 0, + NULL, SQL_DRIVER_COMPLETE_REQUIRED), + "driver connect"); LOG(INFO) << "connect success:" << _connect_string.substr(0, _connect_string.find("Pwd=")); @@ -99,20 +104,22 @@ Status ODBCScanner::open() { Status ODBCScanner::query() { if (!_is_open) { - return Status::InternalError( "Query before open."); + return Status::InternalError("Query before open."); } // Allocate a statement handle - ODBC_DISPOSE(_dbc, SQL_HANDLE_DBC, SQLAllocHandle(SQL_HANDLE_STMT, _dbc, &_stmt), "alloc statement"); + ODBC_DISPOSE(_dbc, SQL_HANDLE_DBC, SQLAllocHandle(SQL_HANDLE_STMT, _dbc, &_stmt), + "alloc statement"); // Translate utf8 string to utf16 to use unicode code auto wquery = utf8_to_wstring(_sql_str); - ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLExecDirectW(_stmt, (SQLWCHAR*)(wquery.c_str()), SQL_NTS), "exec direct"); + ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, + SQLExecDirectW(_stmt, (SQLWCHAR*)(wquery.c_str()), SQL_NTS), "exec direct"); // How many columns are there */ ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLNumResultCols(_stmt, &_field_num), "count num column"); - LOG(INFO) << "execute success:" << _sql_str << " column count:" << _field_num; + LOG(INFO) << "execute success:" << _sql_str << " column count:" << _field_num; // check materialize num equal _field_num int materialize_num = 0; @@ -126,20 +133,24 @@ Status ODBCScanner::query() { } // allocate memory for the binding - for (int i = 0 ; i < _field_num ; i++ ) { + for (int i = 0; i < _field_num; i++) { DataBinding* column_data = new DataBinding; column_data->target_type = SQL_C_CHAR; auto type = _tuple_desc->slots()[i]->type().type; - column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR) ? BIG_COLUMN_SIZE_BUFFER : - SMALL_COLUMN_SIZE_BUFFER; + column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR) + ? BIG_COLUMN_SIZE_BUFFER + : SMALL_COLUMN_SIZE_BUFFER; column_data->target_value_ptr = malloc(sizeof(char) * column_data->buffer_length); _columns_data.push_back(column_data); } // setup the binding - for (int i = 0 ; i < _field_num ; i++ ) { - ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLBindCol(_stmt, (SQLUSMALLINT)i + 1, _columns_data[i].target_type, - _columns_data[i].target_value_ptr, _columns_data[i].buffer_length, &(_columns_data[i].strlen_or_ind)), "bind col"); + for (int i = 0; i < _field_num; i++) { + ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, + SQLBindCol(_stmt, (SQLUSMALLINT)i + 1, _columns_data[i].target_type, + _columns_data[i].target_value_ptr, _columns_data[i].buffer_length, + &(_columns_data[i].strlen_or_ind)), + "bind col"); } return Status::OK(); @@ -174,13 +185,12 @@ Status ODBCScanner::error_status(const std::string& prefix, const std::string& e // hHandle ODBC handle // hType Type of handle (HANDLE_STMT, HANDLE_ENV, HANDLE_DBC) // RetCode Return code of failing command -std::string ODBCScanner::handle_diagnostic_record(SQLHANDLE hHandle, - SQLSMALLINT hType, - RETCODE RetCode) { +std::string ODBCScanner::handle_diagnostic_record(SQLHANDLE hHandle, SQLSMALLINT hType, + RETCODE RetCode) { SQLSMALLINT rec = 0; - SQLINTEGER error; - CHAR message[1000]; - CHAR state[SQL_SQLSTATE_SIZE+1]; + SQLINTEGER error; + CHAR message[1000]; + CHAR state[SQL_SQLSTATE_SIZE + 1]; if (RetCode == SQL_INVALID_HANDLE) { return "Invalid handle!"; @@ -188,16 +198,12 @@ std::string ODBCScanner::handle_diagnostic_record(SQLHANDLE hHandle, std::string diagnostic_msg; - while (SQLGetDiagRec(hType, - hHandle, - ++rec, - (SQLCHAR *)(state), - &error, - reinterpret_cast(message), + while (SQLGetDiagRec(hType, hHandle, ++rec, (SQLCHAR*)(state), &error, + reinterpret_cast(message), (SQLSMALLINT)(sizeof(message) / sizeof(WCHAR)), - (SQLSMALLINT *)NULL) == SQL_SUCCESS) { + (SQLSMALLINT*)NULL) == SQL_SUCCESS) { // Hide data truncated.. - if (wcsncmp(reinterpret_cast(state), L"01004", 5)) { + if (wcsncmp(reinterpret_cast(state), L"01004", 5)) { boost::format msg_string("%s %s (%d)"); msg_string % state % message % error; diagnostic_msg += msg_string.str(); @@ -207,4 +213,4 @@ std::string ODBCScanner::handle_diagnostic_record(SQLHANDLE hHandle, return diagnostic_msg; } -} +} // namespace doris diff --git a/be/src/exec/odbc_scanner.h b/be/src/exec/odbc_scanner.h index 14c84a9df0b6d8..e384bcae4424b2 100644 --- a/be/src/exec/odbc_scanner.h +++ b/be/src/exec/odbc_scanner.h @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_ODBC_SCANNER_H -#define DORIS_BE_SRC_QUERY_EXEC_ODBC_SCANNER_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_ODBC_SCANNER_H +#define DORIS_BE_SRC_QUERY_EXEC_ODBC_SCANNER_H + +#include #include #include #include -#include #include #include @@ -41,16 +42,14 @@ struct ODBCScannerParam { // Because the DataBinding have the mem alloc, so // this class should not be copyable struct DataBinding : public boost::noncopyable { - SQLSMALLINT target_type; - SQLINTEGER buffer_length; - SQLLEN strlen_or_ind; - SQLPOINTER target_value_ptr; + SQLSMALLINT target_type; + SQLINTEGER buffer_length; + SQLLEN strlen_or_ind; + SQLPOINTER target_value_ptr; - DataBinding() = default; + DataBinding() = default; - ~DataBinding() { - free(target_value_ptr); - } + ~DataBinding() { free(target_value_ptr); } }; // ODBC Scanner for scan data from ODBC @@ -66,16 +65,13 @@ class ODBCScanner { Status get_next_row(bool* eos); - const DataBinding& get_column_data(int i) const { - return _columns_data.at(i); - } + const DataBinding& get_column_data(int i) const { return _columns_data.at(i); } private: static Status error_status(const std::string& prefix, const std::string& error_msg); - static std::string handle_diagnostic_record (SQLHANDLE hHandle, - SQLSMALLINT hType, - RETCODE RetCode); + static std::string handle_diagnostic_record(SQLHANDLE hHandle, SQLSMALLINT hType, + RETCODE RetCode); std::string _connect_string; std::string _sql_str; @@ -92,6 +88,6 @@ class ODBCScanner { boost::ptr_vector _columns_data; }; -} +} // namespace doris #endif \ No newline at end of file diff --git a/be/src/exec/olap_common.cpp b/be/src/exec/olap_common.cpp index 9c31b3b88d7c10..6d51fb60714ea0 100644 --- a/be/src/exec/olap_common.cpp +++ b/be/src/exec/olap_common.cpp @@ -19,44 +19,43 @@ #include #include +#include #include #include #include -#include #include "exec/olap_utils.h" namespace doris { -template<> +template <> std::string cast_to_string(__int128 value) { std::stringstream ss; ss << value; return ss.str(); } -template<> +template <> void ColumnValueRange::convert_to_fixed_value() { return; } -template<> +template <> void ColumnValueRange::convert_to_fixed_value() { return; } -template<> +template <> void ColumnValueRange::convert_to_fixed_value() { return; } -template<> +template <> void ColumnValueRange<__int128>::convert_to_fixed_value() { return; } -Status OlapScanKeys::get_key_range( - std::vector>* key_range) { +Status OlapScanKeys::get_key_range(std::vector>* key_range) { key_range->clear(); for (int i = 0; i < _begin_scan_keys.size(); ++i) { @@ -71,6 +70,6 @@ Status OlapScanKeys::get_key_range( return Status::OK(); } -} // namespace doris +} // namespace doris /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 9d4c7cf7e3508c..b4cf31ee0b29ba 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -15,29 +15,29 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_COMMON_H -#define DORIS_BE_SRC_QUERY_EXEC_OLAP_COMMON_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_COMMON_H +#define DORIS_BE_SRC_QUERY_EXEC_OLAP_COMMON_H + +#include -#include #include +#include #include -#include #include -#include +#include #include "common/logging.h" #include "exec/olap_utils.h" #include "exec/scan_node.h" #include "gen_cpp/PlanNodes_types.h" +#include "olap/tuple.h" +#include "runtime/datetime_value.h" #include "runtime/descriptors.h" #include "runtime/string_value.hpp" -#include "runtime/datetime_value.h" - -#include "olap/tuple.h" namespace doris { -template +template std::string cast_to_string(T value) { return boost::lexical_cast(value); } @@ -45,7 +45,7 @@ std::string cast_to_string(T value) { /** * @brief Column's value range **/ -template +template class ColumnValueRange { public: typedef typename std::set::iterator iterator_type; @@ -79,43 +79,25 @@ class ColumnValueRange { _high_value = _type_min; } - const std::set& get_fixed_value_set() const { - return _fixed_values; - } + const std::set& get_fixed_value_set() const { return _fixed_values; } - T get_range_max_value() const { - return _high_value; - } + T get_range_max_value() const { return _high_value; } - T get_range_min_value() const { - return _low_value; - } + T get_range_min_value() const { return _low_value; } - bool is_low_value_mininum() const { - return _low_value == _type_min; - } + bool is_low_value_mininum() const { return _low_value == _type_min; } - bool is_high_value_maximum() const { - return _high_value == _type_max; - } + bool is_high_value_maximum() const { return _high_value == _type_max; } - bool is_begin_include() const { - return _low_op == FILTER_LARGER_OR_EQUAL; - } + bool is_begin_include() const { return _low_op == FILTER_LARGER_OR_EQUAL; } - bool is_end_include() const { - return _high_op == FILTER_LESS_OR_EQUAL; - } + bool is_end_include() const { return _high_op == FILTER_LESS_OR_EQUAL; } - PrimitiveType type() const { - return _column_type; - } + PrimitiveType type() const { return _column_type; } - size_t get_fixed_value_size() const { - return _fixed_values.size(); - } + size_t get_fixed_value_size() const { return _fixed_values.size(); } - void to_olap_filter(std::list &filters) { + void to_olap_filter(std::list& filters) { if (is_fixed_value_range()) { TCondition condition; condition.__set_column_name(_column_name); @@ -160,37 +142,36 @@ class ColumnValueRange { _low_op = FILTER_LARGER_OR_EQUAL; _high_op = FILTER_LESS_OR_EQUAL; } + protected: bool is_in_range(const T& value); private: std::string _column_name; - PrimitiveType _column_type; // Column type (eg: TINYINT,SMALLINT,INT,BIGINT) - T _type_min; // Column type's min value - T _type_max; // Column type's max value - T _low_value; // Column's low value, closed interval at left - T _high_value; // Column's high value, open interval at right + PrimitiveType _column_type; // Column type (eg: TINYINT,SMALLINT,INT,BIGINT) + T _type_min; // Column type's min value + T _type_max; // Column type's max value + T _low_value; // Column's low value, closed interval at left + T _high_value; // Column's high value, open interval at right SQLFilterOp _low_op; SQLFilterOp _high_op; - std::set _fixed_values; // Column's fixed int value + std::set _fixed_values; // Column's fixed int value }; class OlapScanKeys { public: - OlapScanKeys() : - _has_range_value(false), - _begin_include(true), - _end_include(true), - _is_convertible(true) {} + OlapScanKeys() + : _has_range_value(false), + _begin_include(true), + _end_include(true), + _is_convertible(true) {} - template + template Status extend_scan_key(ColumnValueRange& range, int32_t max_scan_key_num); Status get_key_range(std::vector>* key_range); - bool has_range_value() { - return _has_range_value; - } + bool has_range_value() { return _has_range_value; } void clear() { _has_range_value = false; @@ -204,10 +185,8 @@ class OlapScanKeys { ss << "ScanKeys:"; for (int i = 0; i < _begin_scan_keys.size(); ++i) { - ss << "ScanKey=" << (_begin_include ? "[" : "(") - << _begin_scan_keys[i] << " : " - << _end_scan_keys[i] - << (_end_include ? "]" : ")"); + ss << "ScanKey=" << (_begin_include ? "[" : "(") << _begin_scan_keys[i] << " : " + << _end_scan_keys[i] << (_end_include ? "]" : ")"); } return ss.str(); } @@ -217,25 +196,15 @@ class OlapScanKeys { return _begin_scan_keys.size(); } - void set_begin_include(bool begin_include) { - _begin_include = begin_include; - } + void set_begin_include(bool begin_include) { _begin_include = begin_include; } - bool begin_include() const { - return _begin_include; - } + bool begin_include() const { return _begin_include; } - void set_end_include(bool end_include) { - _end_include = end_include; - } + void set_end_include(bool end_include) { _end_include = end_include; } - bool end_include() const { - return _end_include; - } + bool end_include() const { return _end_include; } - void set_is_convertible(bool is_convertible) { - _is_convertible = is_convertible; - } + void set_is_convertible(bool is_convertible) { _is_convertible = is_convertible; } private: std::vector _begin_scan_keys; @@ -246,35 +215,28 @@ class OlapScanKeys { bool _is_convertible; }; -typedef boost::variant < - ColumnValueRange, - ColumnValueRange, - ColumnValueRange, - ColumnValueRange, - ColumnValueRange<__int128>, - ColumnValueRange, - ColumnValueRange, - ColumnValueRange, - ColumnValueRange, - ColumnValueRange> ColumnValueRangeType; - -template -ColumnValueRange::ColumnValueRange() : _column_type(INVALID_TYPE) { -} +typedef boost::variant, ColumnValueRange, + ColumnValueRange, ColumnValueRange, + ColumnValueRange<__int128>, ColumnValueRange, + ColumnValueRange, ColumnValueRange, + ColumnValueRange, ColumnValueRange> + ColumnValueRangeType; -template -ColumnValueRange::ColumnValueRange(std::string col_name, PrimitiveType type, T min, T max) - : _column_name(col_name), - _column_type(type), - _type_min(min), - _type_max(max), - _low_value(min), - _high_value(max), - _low_op(FILTER_LARGER_OR_EQUAL), - _high_op(FILTER_LESS_OR_EQUAL) { -} +template +ColumnValueRange::ColumnValueRange() : _column_type(INVALID_TYPE) {} -template +template +ColumnValueRange::ColumnValueRange(std::string col_name, PrimitiveType type, T min, T max) + : _column_name(col_name), + _column_type(type), + _type_min(min), + _type_max(max), + _low_value(min), + _high_value(max), + _low_op(FILTER_LARGER_OR_EQUAL), + _high_op(FILTER_LESS_OR_EQUAL) {} + +template Status ColumnValueRange::add_fixed_value(T value) { if (INVALID_TYPE == _column_type) { return Status::InternalError("AddFixedValue failed, Invalid type"); @@ -284,12 +246,12 @@ Status ColumnValueRange::add_fixed_value(T value) { return Status::OK(); } -template +template bool ColumnValueRange::is_fixed_value_range() const { return _fixed_values.size() != 0; } -template +template bool ColumnValueRange::is_empty_value_range() const { if (INVALID_TYPE == _column_type) { return true; @@ -306,7 +268,7 @@ bool ColumnValueRange::is_empty_value_range() const { } } -template +template bool ColumnValueRange::is_fixed_value_convertible() const { if (is_fixed_value_range()) { return false; @@ -319,21 +281,20 @@ bool ColumnValueRange::is_fixed_value_convertible() const { return true; } -template +template bool ColumnValueRange::is_range_value_convertible() const { if (!is_fixed_value_range()) { return false; } - if (TYPE_NULL == _column_type - || TYPE_BOOLEAN == _column_type) { + if (TYPE_NULL == _column_type || TYPE_BOOLEAN == _column_type) { return false; } return true; } -template +template size_t ColumnValueRange::get_convertible_fixed_value_size() const { if (!is_fixed_value_convertible()) { return 0; @@ -342,19 +303,19 @@ size_t ColumnValueRange::get_convertible_fixed_value_size() const { return _high_value - _low_value; } -template<> +template <> void ColumnValueRange::convert_to_fixed_value(); -template<> +template <> void ColumnValueRange::convert_to_fixed_value(); -template<> +template <> void ColumnValueRange::convert_to_fixed_value(); -template<> +template <> void ColumnValueRange<__int128>::convert_to_fixed_value(); -template +template void ColumnValueRange::convert_to_fixed_value() { if (!is_fixed_value_convertible()) { return; @@ -375,7 +336,7 @@ void ColumnValueRange::convert_to_fixed_value() { } } -template +template void ColumnValueRange::convert_to_range_value() { if (!is_range_value_convertible()) { return; @@ -390,15 +351,14 @@ void ColumnValueRange::convert_to_range_value() { } } -template +template Status ColumnValueRange::add_range(SQLFilterOp op, T value) { if (INVALID_TYPE == _column_type) { return Status::InternalError("AddRange failed, Invalid type"); } if (is_fixed_value_range()) { - std::pair bound_pair - = _fixed_values.equal_range(value); + std::pair bound_pair = _fixed_values.equal_range(value); switch (op) { case FILTER_LARGER: { @@ -479,9 +439,8 @@ Status ColumnValueRange::add_range(SQLFilterOp op, T value) { } } - if (FILTER_LARGER_OR_EQUAL == _low_op && - FILTER_LESS_OR_EQUAL == _high_op && - _high_value == _low_value) { + if (FILTER_LARGER_OR_EQUAL == _low_op && FILTER_LESS_OR_EQUAL == _high_op && + _high_value == _low_value) { add_fixed_value(_high_value); _high_value = _type_min; _low_value = _type_max; @@ -491,7 +450,7 @@ Status ColumnValueRange::add_range(SQLFilterOp op, T value) { return Status::OK(); } -template +template bool ColumnValueRange::is_in_range(const T& value) { switch (_high_op) { case FILTER_LESS: { @@ -536,7 +495,7 @@ bool ColumnValueRange::is_in_range(const T& value) { return false; } -template +template bool ColumnValueRange::has_intersection(ColumnValueRange& range) { // 1. return false if column type not match if (_column_type != range._column_type) { @@ -551,12 +510,9 @@ bool ColumnValueRange::has_intersection(ColumnValueRange& range) { // 3.1 return false if two int fixedRange has no intersection if (is_fixed_value_range() && range.is_fixed_value_range()) { std::set result_values; - set_intersection( - _fixed_values.begin(), - _fixed_values.end(), - range._fixed_values.begin(), - range._fixed_values.end(), - std::inserter(result_values, result_values.begin())); + set_intersection(_fixed_values.begin(), _fixed_values.end(), range._fixed_values.begin(), + range._fixed_values.end(), + std::inserter(result_values, result_values.begin())); if (result_values.size() != 0) { return true; @@ -589,19 +545,16 @@ bool ColumnValueRange::has_intersection(ColumnValueRange& range) { return false; } else { - if (_low_value > range._high_value - || range._low_value > _high_value) { + if (_low_value > range._high_value || range._low_value > _high_value) { return false; } else if (_low_value == range._high_value) { - if (FILTER_LARGER_OR_EQUAL == _low_op && - FILTER_LESS_OR_EQUAL == range._high_op) { + if (FILTER_LARGER_OR_EQUAL == _low_op && FILTER_LESS_OR_EQUAL == range._high_op) { return true; } else { return false; } } else if (range._low_value == _high_value) { - if (FILTER_LARGER_OR_EQUAL == range._low_op && - FILTER_LESS_OR_EQUAL == _high_op) { + if (FILTER_LARGER_OR_EQUAL == range._low_op && FILTER_LESS_OR_EQUAL == _high_op) { return true; } else { return false; @@ -612,7 +565,7 @@ bool ColumnValueRange::has_intersection(ColumnValueRange& range) { } } -template +template Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, int32_t max_scan_key_num) { using namespace std; typedef typename set::const_iterator const_iterator_type; @@ -633,7 +586,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, int32_t max_sca //for this case, we need to add null value to fixed values bool has_converted = false; - auto scan_keys_size = _begin_scan_keys.empty() ? 1 : _begin_scan_keys.size(); + auto scan_keys_size = _begin_scan_keys.empty() ? 1 : _begin_scan_keys.size(); if (range.is_fixed_value_range()) { if (range.get_fixed_value_size() * scan_keys_size > max_scan_key_num) { if (range.is_range_value_convertible()) { @@ -668,10 +621,10 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, int32_t max_sca } if (has_converted) { - _begin_scan_keys.emplace_back(); - _begin_scan_keys.back().add_null(); - _end_scan_keys.emplace_back(); - _end_scan_keys.back().add_null(); + _begin_scan_keys.emplace_back(); + _begin_scan_keys.back().add_null(); + _end_scan_keys.emplace_back(); + _end_scan_keys.back().add_null(); } } // 3.1.2 produces the Cartesian product of ScanKey and fixed_value else { @@ -715,22 +668,18 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, int32_t max_sca if (_begin_scan_keys.empty()) { _begin_scan_keys.emplace_back(); - _begin_scan_keys.back().add_value( - cast_to_string(range.get_range_min_value()), - range.is_low_value_mininum()); + _begin_scan_keys.back().add_value(cast_to_string(range.get_range_min_value()), + range.is_low_value_mininum()); _end_scan_keys.emplace_back(); - _end_scan_keys.back().add_value( - cast_to_string(range.get_range_max_value())); + _end_scan_keys.back().add_value(cast_to_string(range.get_range_max_value())); } else { for (int i = 0; i < _begin_scan_keys.size(); ++i) { - _begin_scan_keys[i].add_value( - cast_to_string(range.get_range_min_value()), - range.is_low_value_mininum()); + _begin_scan_keys[i].add_value(cast_to_string(range.get_range_min_value()), + range.is_low_value_mininum()); } for (int i = 0; i < _end_scan_keys.size(); ++i) { - _end_scan_keys[i].add_value( - cast_to_string(range.get_range_max_value())); + _end_scan_keys[i].add_value(cast_to_string(range.get_range_max_value())); } } @@ -741,7 +690,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, int32_t max_sca return Status::OK(); } -} // namespace doris +} // namespace doris #endif diff --git a/be/src/exec/olap_rewrite_node.cpp b/be/src/exec/olap_rewrite_node.cpp index d592c5fe2af554..b191e31f58adc1 100644 --- a/be/src/exec/olap_rewrite_node.cpp +++ b/be/src/exec/olap_rewrite_node.cpp @@ -21,28 +21,25 @@ #include "exprs/expr.h" #include "runtime/descriptors.h" -#include "runtime/runtime_state.h" -#include "runtime/row_batch.h" #include "runtime/raw_value.h" +#include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/tuple.h" namespace doris { -OlapRewriteNode::OlapRewriteNode(ObjectPool* pool, - const TPlanNode& tnode, - const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _child_row_batch(nullptr), - _child_row_idx(0), - _child_eos(false) { -} +OlapRewriteNode::OlapRewriteNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _child_row_batch(nullptr), + _child_row_idx(0), + _child_eos(false) {} Status OlapRewriteNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); DCHECK(tnode.__isset.olap_rewrite_node); // create columns - RETURN_IF_ERROR(Expr::create_expr_trees( - _pool, tnode.olap_rewrite_node.columns, &_columns)); + RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.olap_rewrite_node.columns, &_columns)); _column_types = tnode.olap_rewrite_node.column_types; _output_tuple_id = tnode.olap_rewrite_node.output_tuple_id; return Status::OK(); @@ -50,22 +47,20 @@ Status OlapRewriteNode::init(const TPlanNode& tnode, RuntimeState* state) { Status OlapRewriteNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - RETURN_IF_ERROR(Expr::prepare( - _columns, state, child(0)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_columns, state, child(0)->row_desc(), expr_mem_tracker())); _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); // _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); - _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), state->fragment_mem_tracker().get())); + _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), + state->fragment_mem_tracker().get())); _max_decimal_val.resize(_column_types.size()); _max_decimalv2_val.resize(_column_types.size()); for (int i = 0; i < _column_types.size(); ++i) { if (_column_types[i].type == TPrimitiveType::DECIMAL) { - _max_decimal_val[i].to_max_decimal( - _column_types[i].precision, _column_types[i].scale); + _max_decimal_val[i].to_max_decimal(_column_types[i].precision, _column_types[i].scale); } else if (_column_types[i].type == TPrimitiveType::DECIMALV2) { - _max_decimalv2_val[i].to_max_decimal( - _column_types[i].precision, _column_types[i].scale); + _max_decimalv2_val[i].to_max_decimal(_column_types[i].precision, + _column_types[i].scale); } } return Status::OK(); @@ -101,8 +96,8 @@ Status OlapRewriteNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* } if (copy_rows(state, row_batch)) { - *eos = reached_limit() - || (_child_row_idx == _child_row_batch->num_rows() && _child_eos); + *eos = reached_limit() || + (_child_row_idx == _child_row_batch->num_rows() && _child_eos); return Status::OK(); } @@ -116,8 +111,7 @@ Status OlapRewriteNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* return Status::OK(); } -bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, - MemPool* pool, +bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* pool, std::stringstream* ss) { memset(tuple, 0, _output_tuple_desc->num_null_bytes()); // check if valid @@ -148,10 +142,10 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, StringValue* str_val = (StringValue*)src_value; if (str_val->len > column_type.len) { (*ss) << "the length of input is too long than schema. " - << "column_name: " << slot_desc->col_name() << "; " - << "input_str: [" << std::string(str_val->ptr, str_val->len) << "] " - << "schema length: " << column_type.len << "; " - << "actual length: " << str_val->len << "; "; + << "column_name: " << slot_desc->col_name() << "; " + << "input_str: [" << std::string(str_val->ptr, str_val->len) << "] " + << "schema length: " << column_type.len << "; " + << "actual length: " << str_val->len << "; "; return false; } StringValue* dst_val = (StringValue*)tuple->get_slot(slot_desc->tuple_offset()); @@ -194,8 +188,8 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, return false; } } else { - *reinterpret_cast(dst_val) = - *reinterpret_cast(dec_val); + *reinterpret_cast(dst_val) = + *reinterpret_cast(dec_val); } if (*dst_val > _max_decimalv2_val[i]) { dst_val->to_max_decimal(column_type.precision, column_type.scale); @@ -230,7 +224,7 @@ bool OlapRewriteNode::copy_rows(RuntimeState* state, RowBatch* output_batch) { TupleRow* src_row = _child_row_batch->get_row(_child_row_idx); std::stringstream ss; - if (copy_one_row(src_row, tuple, pool, &ss)) { + if (copy_one_row(src_row, tuple, pool, &ss)) { TupleRow* dst_row = output_batch->get_row(dst_row_idx); dst_row->set_tuple(0, tuple); tuple = nullptr; @@ -269,4 +263,4 @@ Status OlapRewriteNode::close(RuntimeState* state) { Expr::close(_columns, state); return ExecNode::close(state); } -} +} // namespace doris diff --git a/be/src/exec/olap_rewrite_node.h b/be/src/exec/olap_rewrite_node.h index d6b2681bcfa14e..0b35ab295dcaee 100644 --- a/be/src/exec/olap_rewrite_node.h +++ b/be/src/exec/olap_rewrite_node.h @@ -34,7 +34,7 @@ class OlapRewriteNode : public ExecNode { OlapRewriteNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual ~OlapRewriteNode() { } + virtual ~OlapRewriteNode() {} virtual Status prepare(RuntimeState* state); virtual Status open(RuntimeState* state); virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); @@ -66,6 +66,6 @@ class OlapRewriteNode : public ExecNode { std::vector _max_decimalv2_val; }; -} +} // namespace doris #endif diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 4305fc53de930e..03a9dd0bc24a90 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -19,53 +19,51 @@ #include #include -#include +#include #include -#include +#include #include +#include +#include "agent/cgroups_mgr.h" #include "common/logging.h" -#include "exprs/expr.h" +#include "common/resource_tls.h" #include "exprs/binary_predicate.h" +#include "exprs/expr.h" #include "exprs/in_predicate.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/exec_env.h" -#include "runtime/runtime_state.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" -#include "util/runtime_profile.h" #include "util/debug_util.h" #include "util/priority_thread_pool.hpp" -#include "agent/cgroups_mgr.h" -#include "common/resource_tls.h" -#include +#include "util/runtime_profile.h" namespace doris { #define DS_SUCCESS(x) ((x) >= 0) -OlapScanNode::OlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs): - ScanNode(pool, tnode, descs), - _tuple_id(tnode.olap_scan_node.tuple_id), - _olap_scan_node(tnode.olap_scan_node), - _tuple_desc(NULL), - _tuple_idx(0), - _eos(false), - _scanner_pool(new ObjectPool()), - _max_materialized_row_batches(config::doris_scanner_queue_size), - _start(false), - _scanner_done(false), - _transfer_done(false), - _status(Status::OK()), - _resource_info(nullptr), - _buffered_bytes(0), - _running_thread(0), - _eval_conjuncts_fn(nullptr) { -} - -OlapScanNode::~OlapScanNode() { -} +OlapScanNode::OlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _tuple_id(tnode.olap_scan_node.tuple_id), + _olap_scan_node(tnode.olap_scan_node), + _tuple_desc(NULL), + _tuple_idx(0), + _eos(false), + _scanner_pool(new ObjectPool()), + _max_materialized_row_batches(config::doris_scanner_queue_size), + _start(false), + _scanner_done(false), + _transfer_done(false), + _status(Status::OK()), + _resource_info(nullptr), + _buffered_bytes(0), + _running_thread(0), + _eval_conjuncts_fn(nullptr) {} + +OlapScanNode::~OlapScanNode() {} Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); @@ -87,7 +85,6 @@ Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) { return Status::OK(); } - void OlapScanNode::init_scan_profile() { _scanner_profile.reset(new RuntimeProfile("OlapScanner")); runtime_profile()->add_child(_scanner_profile.get(), true, NULL); @@ -101,7 +98,8 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _reader_init_timer = ADD_TIMER(_scanner_profile, "ReaderInitTime"); _read_compressed_counter = ADD_COUNTER(_segment_profile, "CompressedBytesRead", TUnit::BYTES); - _read_uncompressed_counter = ADD_COUNTER(_segment_profile, "UncompressedBytesRead", TUnit::BYTES); + _read_uncompressed_counter = + ADD_COUNTER(_segment_profile, "UncompressedBytesRead", TUnit::BYTES); _block_load_timer = ADD_TIMER(_segment_profile, "BlockLoadTime"); _block_load_counter = ADD_COUNTER(_segment_profile, "BlocksLoad", TUnit::UNIT); _block_fetch_timer = ADD_TIMER(_scanner_profile, "BlockFetchTime"); @@ -116,8 +114,10 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _stats_filtered_counter = ADD_COUNTER(_segment_profile, "RowsStatsFiltered", TUnit::UNIT); _bf_filtered_counter = ADD_COUNTER(_segment_profile, "RowsBloomFilterFiltered", TUnit::UNIT); _del_filtered_counter = ADD_COUNTER(_scanner_profile, "RowsDelFiltered", TUnit::UNIT); - _conditions_filtered_counter = ADD_COUNTER(_segment_profile, "RowsConditionsFiltered", TUnit::UNIT); - _key_range_filtered_counter = ADD_COUNTER(_segment_profile, "RowsKeyRangeFiltered", TUnit::UNIT); + _conditions_filtered_counter = + ADD_COUNTER(_segment_profile, "RowsConditionsFiltered", TUnit::UNIT); + _key_range_filtered_counter = + ADD_COUNTER(_segment_profile, "RowsKeyRangeFiltered", TUnit::UNIT); _io_timer = ADD_TIMER(_segment_profile, "IOTimer"); _decompressor_timer = ADD_TIMER(_segment_profile, "DecompressorTimer"); @@ -128,7 +128,8 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _total_pages_num_counter = ADD_COUNTER(_segment_profile, "TotalPagesNum", TUnit::UNIT); _cached_pages_num_counter = ADD_COUNTER(_segment_profile, "CachedPagesNum", TUnit::UNIT); - _bitmap_index_filter_counter = ADD_COUNTER(_segment_profile, "RowsBitmapIndexFiltered", TUnit::UNIT); + _bitmap_index_filter_counter = + ADD_COUNTER(_segment_profile, "RowsBitmapIndexFiltered", TUnit::UNIT); _bitmap_index_filter_timer = ADD_TIMER(_segment_profile, "BitmapIndexFilterTimer"); _num_scanners = ADD_COUNTER(_runtime_profile, "NumScanners", TUnit::UNIT); @@ -142,10 +143,9 @@ Status OlapScanNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ScanNode::prepare(state)); // create scanner profile // create timer - _tablet_counter = - ADD_COUNTER(runtime_profile(), "TabletCount ", TUnit::UNIT); + _tablet_counter = ADD_COUNTER(runtime_profile(), "TabletCount ", TUnit::UNIT); _rows_pushed_cond_filtered_counter = - ADD_COUNTER(_scanner_profile, "RowsPushedCondFiltered", TUnit::UNIT); + ADD_COUNTER(_scanner_profile, "RowsPushedCondFiltered", TUnit::UNIT); _init_counter(state); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_tuple_desc == NULL) { @@ -176,7 +176,7 @@ Status OlapScanNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(ExecNode::open(state)); - + for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { // if conjunct is constant, compute direct and set eos = true @@ -278,7 +278,7 @@ Status OlapScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eo for (int i = 0; i < row_batch->num_rows(); ++i) { TupleRow* row = row_batch->get_row(i); VLOG_ROW << "OlapScanNode output row: " - << Tuple::to_string(row->get_tuple(0), *_tuple_desc); + << Tuple::to_string(row->get_tuple(0), *_tuple_desc); } } __sync_fetch_and_sub(&_buffered_bytes, @@ -397,37 +397,33 @@ Status OlapScanNode::normalize_conjuncts() { // TYPE_TINYINT use int32_t to present // because it's easy to convert to string for build Olap fetch Query case TYPE_TINYINT: { - ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - std::numeric_limits::min(), - std::numeric_limits::max()); + ColumnValueRange range( + slots[slot_idx]->col_name(), slots[slot_idx]->type().type, + std::numeric_limits::min(), std::numeric_limits::max()); normalize_predicate(range, slots[slot_idx]); break; } case TYPE_SMALLINT: { - ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - std::numeric_limits::min(), - std::numeric_limits::max()); + ColumnValueRange range( + slots[slot_idx]->col_name(), slots[slot_idx]->type().type, + std::numeric_limits::min(), std::numeric_limits::max()); normalize_predicate(range, slots[slot_idx]); break; } case TYPE_INT: { - ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - std::numeric_limits::min(), - std::numeric_limits::max()); + ColumnValueRange range( + slots[slot_idx]->col_name(), slots[slot_idx]->type().type, + std::numeric_limits::min(), std::numeric_limits::max()); normalize_predicate(range, slots[slot_idx]); break; } case TYPE_BIGINT: { - ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - std::numeric_limits::min(), - std::numeric_limits::max()); + ColumnValueRange range( + slots[slot_idx]->col_name(), slots[slot_idx]->type().type, + std::numeric_limits::min(), std::numeric_limits::max()); normalize_predicate(range, slots[slot_idx]); break; } @@ -436,22 +432,19 @@ Status OlapScanNode::normalize_conjuncts() { __int128 min = MIN_INT128; __int128 max = MAX_INT128; ColumnValueRange<__int128> range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - min, - max); + slots[slot_idx]->type().type, min, max); normalize_predicate(range, slots[slot_idx]); break; } case TYPE_CHAR: - case TYPE_VARCHAR: + case TYPE_VARCHAR: case TYPE_HLL: { static char min_char = 0x00; static char max_char = 0xff; - ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - StringValue(&min_char, 0), - StringValue(&max_char, 1)); + ColumnValueRange range( + slots[slot_idx]->col_name(), slots[slot_idx]->type().type, + StringValue(&min_char, 0), StringValue(&max_char, 1)); normalize_predicate(range, slots[slot_idx]); break; } @@ -461,8 +454,7 @@ Status OlapScanNode::normalize_conjuncts() { DateTimeValue max_value = DateTimeValue::datetime_max_value(); DateTimeValue min_value = DateTimeValue::datetime_min_value(); ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - min_value, + slots[slot_idx]->type().type, min_value, max_value); normalize_predicate(range, slots[slot_idx]); break; @@ -472,9 +464,7 @@ Status OlapScanNode::normalize_conjuncts() { DecimalValue min = DecimalValue::get_min_decimal(); DecimalValue max = DecimalValue::get_max_decimal(); ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - min, - max); + slots[slot_idx]->type().type, min, max); normalize_predicate(range, slots[slot_idx]); break; } @@ -483,25 +473,20 @@ Status OlapScanNode::normalize_conjuncts() { DecimalV2Value min = DecimalV2Value::get_min_decimal(); DecimalV2Value max = DecimalV2Value::get_max_decimal(); ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - min, - max); + slots[slot_idx]->type().type, min, max); normalize_predicate(range, slots[slot_idx]); break; } case TYPE_BOOLEAN: { - ColumnValueRange range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().type, - false, - true); + ColumnValueRange range(slots[slot_idx]->col_name(), slots[slot_idx]->type().type, + false, true); normalize_predicate(range, slots[slot_idx]); break; } default: { - VLOG(2) << "Unsupported Normalize Slot [ColName=" - << slots[slot_idx]->col_name() << "]"; + VLOG(2) << "Unsupported Normalize Slot [ColName=" << slots[slot_idx]->col_name() << "]"; break; } } @@ -524,7 +509,7 @@ Status OlapScanNode::build_olap_filters() { } for (const auto& filter : new_filters) { - _olap_filter.push_back(filter); + _olap_filter.push_back(filter); } } @@ -539,7 +524,8 @@ Status OlapScanNode::build_scan_key() { // 1. construct scan key except last olap engine short key _scan_keys.set_is_convertible(limit() == -1); - for (int column_index = 0; column_index < column_names.size() && !_scan_keys.has_range_value(); ++column_index) { + for (int column_index = 0; column_index < column_names.size() && !_scan_keys.has_range_value(); + ++column_index) { auto column_range_iter = _column_value_ranges.find(column_names[column_index]); if (_column_value_ranges.end() == column_range_iter) { break; @@ -554,23 +540,20 @@ Status OlapScanNode::build_scan_key() { return Status::OK(); } -static Status get_hints( - const TPaloScanRange& scan_range, - int block_row_count, - bool is_begin_include, - bool is_end_include, - const std::vector>& scan_key_range, - std::vector>* sub_scan_range, - RuntimeProfile* profile) { +static Status get_hints(const TPaloScanRange& scan_range, int block_row_count, + bool is_begin_include, bool is_end_include, + const std::vector>& scan_key_range, + std::vector>* sub_scan_range, + RuntimeProfile* profile) { auto tablet_id = scan_range.tablet_id; int32_t schema_hash = strtoul(scan_range.schema_hash.c_str(), NULL, 10); std::string err; TabletSharedPtr table = StorageEngine::instance()->tablet_manager()->get_tablet( - tablet_id, schema_hash, true, &err); + tablet_id, schema_hash, true, &err); if (table == nullptr) { std::stringstream ss; - ss << "failed to get tablet: " << tablet_id << " with schema hash: " - << schema_hash << ", reason: " << err; + ss << "failed to get tablet: " << tablet_id << " with schema hash: " << schema_hash + << ", reason: " << err; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -579,16 +562,15 @@ static Status get_hints( std::vector> ranges; bool have_valid_range = false; for (auto& key_range : scan_key_range) { - if (key_range->begin_scan_range.size() == 1 - && key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) { + if (key_range->begin_scan_range.size() == 1 && + key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) { continue; } SCOPED_TIMER(show_hints_timer); - + OLAPStatus res = OLAP_SUCCESS; std::vector range; - res = table->split_range(key_range->begin_scan_range, - key_range->end_scan_range, + res = table->split_range(key_range->begin_scan_range, key_range->end_scan_range, block_row_count, &range); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to show hints by split range. [res=%d]", res); @@ -635,7 +617,6 @@ static Status get_hints( return Status::OK(); } - Status OlapScanNode::start_scan_thread(RuntimeState* state) { if (_scan_ranges.empty()) { _transfer_done = true; @@ -664,14 +645,9 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) { std::vector>* ranges = &cond_ranges; std::vector> split_ranges; if (need_split) { - auto st = get_hints( - *scan_range, - config::doris_scan_range_row_count, - _scan_keys.begin_include(), - _scan_keys.end_include(), - cond_ranges, - &split_ranges, - _runtime_profile.get()); + auto st = get_hints(*scan_range, config::doris_scan_range_row_count, + _scan_keys.begin_include(), _scan_keys.end_include(), cond_ranges, + &split_ranges, _runtime_profile.get()); if (st.ok()) { ranges = &split_ranges; } @@ -683,20 +659,19 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) { std::vector scanner_ranges; scanner_ranges.push_back((*ranges)[i].get()); ++i; - for (int j = 1; - i < num_ranges && - j < ranges_per_scanner && - (*ranges)[i]->end_include == (*ranges)[i - 1]->end_include; + for (int j = 1; i < num_ranges && j < ranges_per_scanner && + (*ranges)[i]->end_include == (*ranges)[i - 1]->end_include; ++j, ++i) { scanner_ranges.push_back((*ranges)[i].get()); } - OlapScanner* scanner = new OlapScanner( - state, this, _olap_scan_node.is_preaggregation, _need_agg_finalize, *scan_range, scanner_ranges); + OlapScanner* scanner = new OlapScanner(state, this, _olap_scan_node.is_preaggregation, + _need_agg_finalize, *scan_range, scanner_ranges); // add scanner to pool before doing prepare. // so that scanner can be automatically deconstructed if prepare failed. _scanner_pool->add(scanner); - RETURN_IF_ERROR(scanner->prepare(*scan_range, scanner_ranges, _olap_filter, _is_null_vector)); - + RETURN_IF_ERROR( + scanner->prepare(*scan_range, scanner_ranges, _olap_filter, _is_null_vector)); + _olap_scanners.push_back(scanner); disk_set.insert(scanner->scan_disk()); } @@ -710,14 +685,12 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) { _progress = ProgressUpdater(ss.str(), _olap_scanners.size(), 1); _progress.set_logging_level(1); - _transfer_thread.add_thread( - new boost::thread( - &OlapScanNode::transfer_thread, this, state)); + _transfer_thread.add_thread(new boost::thread(&OlapScanNode::transfer_thread, this, state)); return Status::OK(); } -template +template Status OlapScanNode::normalize_predicate(ColumnValueRange& range, SlotDescriptor* slot) { // 1. Normalize InPredicate, add to ColumnValueRange RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range)); @@ -745,8 +718,9 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) { // It will only handle the InPredicate and eq BinaryPredicate in _conjunct_ctxs. // It will try to push down conditions of that column as much as possible, // But if the number of conditions exceeds the limit, none of conditions will be pushed down. -template -Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnValueRange* range) { +template +Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, + ColumnValueRange* range) { bool meet_eq_binary = false; for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' @@ -780,8 +754,7 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV } } - VLOG(1) << slot->col_name() << " fixed_values add num: " - << pred->hybrid_set()->size(); + VLOG(1) << slot->col_name() << " fixed_values add num: " << pred->hybrid_set()->size(); // if there are too many elements in InPredicate, exceed the limit, // we will not push any condition of this column to storage engine. @@ -790,8 +763,8 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV // ATTN: This is just an experience value. You may need to try // different thresholds to improve performance. if (pred->hybrid_set()->size() > _max_pushdown_conditions_per_column) { - VLOG(3) << "Predicate value num " << pred->hybrid_set()->size() - << " exceed limit " << _max_pushdown_conditions_per_column; + VLOG(3) << "Predicate value num " << pred->hybrid_set()->size() << " exceed limit " + << _max_pushdown_conditions_per_column; continue; } @@ -813,7 +786,7 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV } case TYPE_DATE: { DateTimeValue date_value = - *reinterpret_cast(iter->get_value()); + *reinterpret_cast(iter->get_value()); date_value.cast_to_date(); range->add_fixed_value(*reinterpret_cast(&date_value)); break; @@ -828,7 +801,8 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV case TYPE_INT: case TYPE_BIGINT: case TYPE_DATETIME: { - range->add_fixed_value(*reinterpret_cast(const_cast(iter->get_value()))); + range->add_fixed_value( + *reinterpret_cast(const_cast(iter->get_value()))); break; } case TYPE_BOOLEAN: { @@ -842,19 +816,18 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV } iter->next(); } - + } // end of handle in predicate // 2. Normalize eq conjuncts like 'where col = value' - if (TExprNodeType::BINARY_PRED == _conjunct_ctxs[conj_idx]->root()->node_type() - && FILTER_IN == to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) { - + if (TExprNodeType::BINARY_PRED == _conjunct_ctxs[conj_idx]->root()->node_type() && + FILTER_IN == to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) { Expr* pred = _conjunct_ctxs[conj_idx]->root(); DCHECK(pred->get_num_children() == 2); for (int child_idx = 0; child_idx < 2; ++child_idx) { - if (Expr::type_without_cast(pred->get_child(child_idx)) - != TExprNodeType::SLOT_REF) { + if (Expr::type_without_cast(pred->get_child(child_idx)) != + TExprNodeType::SLOT_REF) { continue; } @@ -893,46 +866,46 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV // because for AND compound predicates, it can overwrite previous conditions range->clear(); switch (slot->type().type) { - case TYPE_TINYINT: { - int32_t v = *reinterpret_cast(value); - range->add_fixed_value(*reinterpret_cast(&v)); - break; - } - case TYPE_DATE: { - DateTimeValue date_value = - *reinterpret_cast(value); - date_value.cast_to_date(); - range->add_fixed_value(*reinterpret_cast(&date_value)); - break; - } - case TYPE_DECIMAL: - case TYPE_DECIMALV2: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_DATETIME: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_LARGEINT: { - range->add_fixed_value(*reinterpret_cast(value)); - break; - } - case TYPE_BOOLEAN: { - bool v = *reinterpret_cast(value); - range->add_fixed_value(*reinterpret_cast(&v)); - break; - } - default: { - LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" - << expr->type() << "]"; - return Status::InternalError("Normalize filter fail, Unsupported Primitive type"); - } + case TYPE_TINYINT: { + int32_t v = *reinterpret_cast(value); + range->add_fixed_value(*reinterpret_cast(&v)); + break; + } + case TYPE_DATE: { + DateTimeValue date_value = *reinterpret_cast(value); + date_value.cast_to_date(); + range->add_fixed_value(*reinterpret_cast(&date_value)); + break; + } + case TYPE_DECIMAL: + case TYPE_DECIMALV2: + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_HLL: + case TYPE_DATETIME: + case TYPE_SMALLINT: + case TYPE_INT: + case TYPE_BIGINT: + case TYPE_LARGEINT: { + range->add_fixed_value(*reinterpret_cast(value)); + break; + } + case TYPE_BOOLEAN: { + bool v = *reinterpret_cast(value); + range->add_fixed_value(*reinterpret_cast(&v)); + break; + } + default: { + LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" + << expr->type() << "]"; + return Status::InternalError( + "Normalize filter fail, Unsupported Primitive type"); + } } meet_eq_binary = true; } // end for each binary predicate child - } // end of handling eq binary predicate + } // end of handling eq binary predicate if (range->get_fixed_value_size() > 0) { // this columns already meet some eq predicates(IN or Binary), @@ -953,8 +926,7 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV // So the strategy is to use the BinaryPredicate as much as possible. break; } - } - + } } if (range->get_fixed_value_size() > _max_pushdown_conditions_per_column) { @@ -964,7 +936,8 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnV return Status::OK(); } -void OlapScanNode::construct_is_null_pred_in_where_pred(Expr* expr, SlotDescriptor* slot, const std::string& is_null_str) { +void OlapScanNode::construct_is_null_pred_in_where_pred(Expr* expr, SlotDescriptor* slot, + const std::string& is_null_str) { if (expr->node_type() != TExprNodeType::SLOT_REF) { return; } @@ -985,18 +958,19 @@ void OlapScanNode::construct_is_null_pred_in_where_pred(Expr* expr, SlotDescript return; } -template -Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, ColumnValueRange* range) { +template +Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, + ColumnValueRange* range) { for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - Expr *root_expr = _conjunct_ctxs[conj_idx]->root(); - if (TExprNodeType::BINARY_PRED != root_expr->node_type() - || FILTER_IN == to_olap_filter_type(root_expr->op(), false) - || FILTER_NOT_IN == to_olap_filter_type(root_expr->op(), false)) { + Expr* root_expr = _conjunct_ctxs[conj_idx]->root(); + if (TExprNodeType::BINARY_PRED != root_expr->node_type() || + FILTER_IN == to_olap_filter_type(root_expr->op(), false) || + FILTER_NOT_IN == to_olap_filter_type(root_expr->op(), false)) { if (TExprNodeType::FUNCTION_CALL == root_expr->node_type()) { std::string is_null_str; if (root_expr->is_null_scalar_function(is_null_str)) { - construct_is_null_pred_in_where_pred(root_expr->get_child(0), - slot, is_null_str); + construct_is_null_pred_in_where_pred(root_expr->get_child(0), slot, + is_null_str); } } continue; @@ -1039,7 +1013,7 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, Colu case TYPE_TINYINT: { int32_t v = *reinterpret_cast(value); range->add_range(to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast(&v)); + *reinterpret_cast(&v)); break; } @@ -1061,7 +1035,7 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, Colu case TYPE_BIGINT: case TYPE_LARGEINT: { range->add_range(to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast(value)); + *reinterpret_cast(value)); break; } case TYPE_BOOLEAN: { @@ -1074,12 +1048,13 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, Colu default: { LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" << expr->type() << "]"; - return Status::InternalError("Normalize filter fail, Unsupported Primitive type"); + return Status::InternalError( + "Normalize filter fail, Unsupported Primitive type"); } } - VLOG(1) << slot->col_name() << " op: " - << static_cast(to_olap_filter_type(pred->op(), child_idx)) + VLOG(1) << slot->col_name() + << " op: " << static_cast(to_olap_filter_type(pred->op(), child_idx)) << " value: " << *reinterpret_cast(value); } } @@ -1189,15 +1164,13 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { // scanner_row_num = 16k // 16k * 10 * 12 * 8 = 15M(>2s) --> nice=10 // 16k * 20 * 22 * 8 = 55M(>6s) --> nice=0 - while (_nice > 0 - && _total_assign_num > (22 - _nice) * (20 - _nice) * 6) { + while (_nice > 0 && _total_assign_num > (22 - _nice) * (20 - _nice) * 6) { --_nice; } // 2 wait when all scanner are running & no result in queue - while (UNLIKELY(_running_thread == assigned_thread_num - && _scan_row_batches.empty() - && !_scanner_done)) { + while (UNLIKELY(_running_thread == assigned_thread_num && _scan_row_batches.empty() && + !_scanner_done)) { _scan_batch_added_cv.wait(l); } @@ -1268,8 +1241,8 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) { LOG(INFO) << "Scan thread cancelled, cause query done, maybe reach limit."; break; } - RowBatch *row_batch = new RowBatch( - this->row_desc(), state->batch_size(), _runtime_state->fragment_mem_tracker().get()); + RowBatch* row_batch = new RowBatch(this->row_desc(), state->batch_size(), + _runtime_state->fragment_mem_tracker().get()); row_batch->set_scanner_id(scanner->id()); status = scanner->get_batch(_runtime_state, row_batch, &eos); if (!status.ok()) { @@ -1341,9 +1314,8 @@ Status OlapScanNode::add_one_batch(RowBatchInterface* row_batch) { { std::unique_lock l(_row_batches_lock); - while (UNLIKELY(_materialized_row_batches.size() - >= _max_materialized_row_batches - && !_transfer_done)) { + while (UNLIKELY(_materialized_row_batches.size() >= _max_materialized_row_batches && + !_transfer_done)) { _row_batch_consumed_cv.wait(l); } @@ -1355,9 +1327,6 @@ Status OlapScanNode::add_one_batch(RowBatchInterface* row_batch) { return Status::OK(); } -void OlapScanNode::debug_string( - int /* indentation_level */, - std::stringstream* /* out */) const { -} +void OlapScanNode::debug_string(int /* indentation_level */, std::stringstream* /* out */) const {} } // namespace doris diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 59289351883d7f..811787cdae79ce 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H +#define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H -#include #include +#include #include #include @@ -55,9 +55,8 @@ class OlapScanNode : public ScanNode { Status collect_query_statistics(QueryStatistics* statistics) override; virtual Status close(RuntimeState* state); virtual Status set_scan_ranges(const std::vector& scan_ranges); - inline void set_no_agg_finalize() { - _need_agg_finalize = false; - } + inline void set_no_agg_finalize() { _need_agg_finalize = false; } + protected: typedef struct { Tuple* tuple; @@ -65,7 +64,7 @@ class OlapScanNode : public ScanNode { } HeapType; class IsFixedValueRangeVisitor : public boost::static_visitor { public: - template + template bool operator()(T& v) const { return v.is_fixed_value_range(); } @@ -73,7 +72,7 @@ class OlapScanNode : public ScanNode { class GetFixedValueSizeVisitor : public boost::static_visitor { public: - template + template size_t operator()(T& v) const { return v.get_fixed_value_size(); } @@ -82,12 +81,12 @@ class OlapScanNode : public ScanNode { class ExtendScanKeyVisitor : public boost::static_visitor { public: ExtendScanKeyVisitor(OlapScanKeys& scan_keys, int32_t max_scan_key_num) - : _scan_keys(scan_keys), - _max_scan_key_num(max_scan_key_num) { } - template + : _scan_keys(scan_keys), _max_scan_key_num(max_scan_key_num) {} + template Status operator()(T& v) { return _scan_keys.extend_scan_key(v, _max_scan_key_num); } + private: OlapScanKeys& _scan_keys; int32_t _max_scan_key_num; @@ -97,7 +96,7 @@ class OlapScanNode : public ScanNode { class ToOlapFilterVisitor : public boost::static_visitor { public: - template + template void operator()(T& v, P& v2) const { v.to_olap_filter(v2); } @@ -112,6 +111,7 @@ class OlapScanNode : public ScanNode { bool operator()(const HeapType& lhs, const HeapType& rhs) const { return (*_compute_fn)(lhs.tuple->get_slot(_offset), rhs.tuple->get_slot(_offset)); } + private: CompareLargeFunc _compute_fn; int _offset; @@ -139,13 +139,13 @@ class OlapScanNode : public ScanNode { Status build_scan_key(); Status start_scan_thread(RuntimeState* state); - template + template Status normalize_predicate(ColumnValueRange& range, SlotDescriptor* slot); - template + template Status normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnValueRange* range); - template + template Status normalize_noneq_binary_predicate(SlotDescriptor* slot, ColumnValueRange* range); void transfer_thread(RuntimeState* state); @@ -158,11 +158,12 @@ class OlapScanNode : public ScanNode { private: void _init_counter(RuntimeState* state); - // OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and SegmentIterator + // OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and SegmentIterator // according to the calling relationship void init_scan_profile(); - void construct_is_null_pred_in_where_pred(Expr* expr, SlotDescriptor* slot, const std::string& is_null_str); + void construct_is_null_pred_in_where_pred(Expr* expr, SlotDescriptor* slot, + const std::string& is_null_str); friend class OlapScanner; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index bbc4b343818594..4cb7ca07415206 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -15,42 +15,39 @@ // specific language governing permissions and limitations // under the License. +#include "olap_scanner.h" + #include #include #include "gen_cpp/PaloInternalService_types.h" -#include "olap_scanner.h" +#include "olap/field.h" #include "olap_scan_node.h" #include "olap_utils.h" -#include "olap/field.h" -#include "service/backend_options.h" #include "runtime/descriptors.h" -#include "runtime/runtime_state.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" +#include "runtime/runtime_state.h" +#include "service/backend_options.h" +#include "util/doris_metrics.h" #include "util/mem_util.hpp" #include "util/network_util.h" -#include "util/doris_metrics.h" namespace doris { -OlapScanner::OlapScanner( - RuntimeState* runtime_state, - OlapScanNode* parent, - bool aggregation, - bool need_agg_finalize, - const TPaloScanRange& scan_range, - const std::vector& key_ranges) - : _runtime_state(runtime_state), - _parent(parent), - _tuple_desc(parent->_tuple_desc), - _profile(parent->runtime_profile()), - _string_slots(parent->_string_slots), - _is_open(false), - _aggregation(aggregation), - _need_agg_finalize(need_agg_finalize), - _tuple_idx(parent->_tuple_idx), - _direct_conjunct_size(parent->_direct_conjunct_size) { +OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, + bool need_agg_finalize, const TPaloScanRange& scan_range, + const std::vector& key_ranges) + : _runtime_state(runtime_state), + _parent(parent), + _tuple_desc(parent->_tuple_desc), + _profile(parent->runtime_profile()), + _string_slots(parent->_string_slots), + _is_open(false), + _aggregation(aggregation), + _need_agg_finalize(need_agg_finalize), + _tuple_idx(parent->_tuple_idx), + _direct_conjunct_size(parent->_direct_conjunct_size) { _reader.reset(new Reader()); DCHECK(_reader.get() != NULL); @@ -58,26 +55,24 @@ OlapScanner::OlapScanner( _rows_pushed_cond_filtered_counter = parent->_rows_pushed_cond_filtered_counter; } -OlapScanner::~OlapScanner() { -} +OlapScanner::~OlapScanner() {} -Status OlapScanner::prepare( - const TPaloScanRange& scan_range, const std::vector& key_ranges, - const std::vector& filters, const std::vector& is_nulls) { +Status OlapScanner::prepare(const TPaloScanRange& scan_range, + const std::vector& key_ranges, + const std::vector& filters, + const std::vector& is_nulls) { // Get olap table TTabletId tablet_id = scan_range.tablet_id; - SchemaHash schema_hash = - strtoul(scan_range.schema_hash.c_str(), nullptr, 10); - _version = - strtoul(scan_range.version.c_str(), nullptr, 10); + SchemaHash schema_hash = strtoul(scan_range.schema_hash.c_str(), nullptr, 10); + _version = strtoul(scan_range.version.c_str(), nullptr, 10); { std::string err; - _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash, true, &err); + _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash, + true, &err); if (_tablet.get() == nullptr) { std::stringstream ss; ss << "failed to get tablet. tablet_id=" << tablet_id - << ", with schema_hash=" << schema_hash - << ", reason=" << err; + << ", with schema_hash=" << schema_hash << ", reason=" << err; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -95,12 +90,14 @@ Status OlapScanner::prepare( // to prevent this case: when there are lots of olap scanners to run for example 10000 // the rowsets maybe compacted when the last olap scanner starts Version rd_version(0, _version); - OLAPStatus acquire_reader_st = _tablet->capture_rs_readers(rd_version, &_params.rs_readers); + OLAPStatus acquire_reader_st = + _tablet->capture_rs_readers(rd_version, &_params.rs_readers); if (acquire_reader_st != OLAP_SUCCESS) { LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; std::stringstream ss; ss << "failed to initialize storage reader. tablet=" << _tablet->full_name() - << ", res=" << acquire_reader_st << ", backend=" << BackendOptions::get_localhost(); + << ", res=" << acquire_reader_st + << ", backend=" << BackendOptions::get_localhost(); return Status::InternalError(ss.str().c_str()); } } @@ -133,10 +130,9 @@ Status OlapScanner::open() { } // it will be called under tablet read lock because capture rs readers need -Status OlapScanner::_init_params( - const std::vector& key_ranges, - const std::vector& filters, - const std::vector& is_nulls) { +Status OlapScanner::_init_params(const std::vector& key_ranges, + const std::vector& filters, + const std::vector& is_nulls) { RETURN_IF_ERROR(_init_return_columns()); _params.tablet = _tablet; @@ -154,7 +150,7 @@ Status OlapScanner::_init_params( // Range for (auto key_range : key_ranges) { if (key_range->begin_scan_range.size() == 1 && - key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) { + key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) { continue; } @@ -212,7 +208,7 @@ Status OlapScanner::_init_return_columns() { int32_t index = _tablet->field_index(slot->col_name()); if (index < 0) { std::stringstream ss; - ss << "field name is invalid. field=" << slot->col_name(); + ss << "field name is invalid. field=" << slot->col_name(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -223,7 +219,8 @@ Status OlapScanner::_init_return_columns() { if (_tablet->tablet_schema().has_sequence_col()) { bool has_replace_col = false; for (auto col : _return_columns) { - if (_tablet->tablet_schema().column(col).aggregation() == FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE) { + if (_tablet->tablet_schema().column(col).aggregation() == + FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE) { has_replace_col = true; break; } @@ -239,13 +236,12 @@ Status OlapScanner::_init_return_columns() { return Status::OK(); } -Status OlapScanner::get_batch( - RuntimeState* state, RowBatch* batch, bool* eof) { +Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { // 2. Allocate Row's Tuple buf - uint8_t *tuple_buf = batch->tuple_data_pool()->allocate( - state->batch_size() * _tuple_desc->byte_size()); + uint8_t* tuple_buf = + batch->tuple_data_pool()->allocate(state->batch_size() * _tuple_desc->byte_size()); bzero(tuple_buf, state->batch_size() * _tuple_desc->byte_size()); - Tuple *tuple = reinterpret_cast(tuple_buf); + Tuple* tuple = reinterpret_cast(tuple_buf); auto tracker = MemTracker::CreateTracker(state->fragment_mem_tracker()->limit(), "OlapScanner"); std::unique_ptr mem_pool(new MemPool(tracker.get())); @@ -260,12 +256,13 @@ Status OlapScanner::get_batch( break; } // Read one row from reader - auto res = _reader->next_row_with_aggregation(&_read_row_cursor, mem_pool.get(), batch->agg_object_pool(), eof); + auto res = _reader->next_row_with_aggregation(&_read_row_cursor, mem_pool.get(), + batch->agg_object_pool(), eof); if (res != OLAP_SUCCESS) { std::stringstream ss; ss << "Internal Error: read storage fail. res=" << res - << ", tablet=" << _tablet->full_name() - << ", backend=" << BackendOptions::get_localhost(); + << ", tablet=" << _tablet->full_name() + << ", backend=" << BackendOptions::get_localhost(); return Status::InternalError(ss.str()); } // If we reach end of this scanner, break @@ -307,9 +304,9 @@ Status OlapScanner::get_batch( // 3.5.2 Using pushdown conjuncts to filter data if (_use_pushdown_conjuncts) { - if (!ExecNode::eval_conjuncts( - &_conjunct_ctxs[_direct_conjunct_size], - _conjunct_ctxs.size() - _direct_conjunct_size, row)) { + if (!ExecNode::eval_conjuncts(&_conjunct_ctxs[_direct_conjunct_size], + _conjunct_ctxs.size() - _direct_conjunct_size, + row)) { // check pushdown conjuncts fail then clear tuple for reuse // make sure to reset null indicators since we're overwriting // the tuple assembled for the previous row @@ -347,14 +344,16 @@ Status OlapScanner::get_batch( if (_use_pushdown_conjuncts) { // check this rate after if (_num_rows_read > 32768) { - int32_t pushdown_return_rate - = _num_rows_read * 100 / (_num_rows_read + _num_rows_pushed_cond_filtered); - if (pushdown_return_rate > config::doris_max_pushdown_conjuncts_return_rate) { + int32_t pushdown_return_rate = + _num_rows_read * 100 / + (_num_rows_read + _num_rows_pushed_cond_filtered); + if (pushdown_return_rate > + config::doris_max_pushdown_conjuncts_return_rate) { _use_pushdown_conjuncts = false; VLOG(2) << "Stop Using PushDown Conjuncts. " - << "PushDownReturnRate: " << pushdown_return_rate << "%" - << " MaxPushDownReturnRate: " - << config::doris_max_pushdown_conjuncts_return_rate << "%"; + << "PushDownReturnRate: " << pushdown_return_rate << "%" + << " MaxPushDownReturnRate: " + << config::doris_max_pushdown_conjuncts_return_rate << "%"; } } } @@ -383,7 +382,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { switch (slot_desc->type().type) { case TYPE_CHAR: { Slice* slice = reinterpret_cast(ptr); - StringValue *slot = tuple->get_string_slot(slot_desc->tuple_offset()); + StringValue* slot = tuple->get_string_slot(slot_desc->tuple_offset()); slot->ptr = slice->data; slot->len = strnlen(slot->ptr, slice->size); break; @@ -392,13 +391,13 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { case TYPE_OBJECT: case TYPE_HLL: { Slice* slice = reinterpret_cast(ptr); - StringValue *slot = tuple->get_string_slot(slot_desc->tuple_offset()); + StringValue* slot = tuple->get_string_slot(slot_desc->tuple_offset()); slot->ptr = slice->data; slot->len = slice->size; break; } case TYPE_DECIMAL: { - DecimalValue *slot = tuple->get_decimal_slot(slot_desc->tuple_offset()); + DecimalValue* slot = tuple->get_decimal_slot(slot_desc->tuple_offset()); // TODO(lingbin): should remove this assign, use set member function int64_t int_value = *(int64_t*)(ptr); @@ -407,7 +406,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { break; } case TYPE_DECIMALV2: { - DecimalV2Value *slot = tuple->get_decimalv2_slot(slot_desc->tuple_offset()); + DecimalV2Value* slot = tuple->get_decimalv2_slot(slot_desc->tuple_offset()); int64_t int_value = *(int64_t*)(ptr); int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t)); @@ -417,7 +416,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { break; } case TYPE_DATETIME: { - DateTimeValue *slot = tuple->get_datetime_slot(slot_desc->tuple_offset()); + DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset()); uint64_t value = *reinterpret_cast(ptr); if (!slot->from_olap_datetime(value)) { tuple->set_null(slot_desc->null_indicator_offset()); @@ -425,7 +424,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { break; } case TYPE_DATE: { - DateTimeValue *slot = tuple->get_datetime_slot(slot_desc->tuple_offset()); + DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset()); uint64_t value = 0; value = *(unsigned char*)(ptr + 2); value <<= 8; @@ -438,7 +437,7 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { break; } default: { - void *slot = tuple->get_slot(slot_desc->tuple_offset()); + void* slot = tuple->get_slot(slot_desc->tuple_offset()); memory_copy(slot, ptr, len); break; } @@ -476,8 +475,9 @@ void OlapScanner::update_counter() { COUNTER_UPDATE(_parent->_stats_filtered_counter, _reader->stats().rows_stats_filtered); COUNTER_UPDATE(_parent->_bf_filtered_counter, _reader->stats().rows_bf_filtered); COUNTER_UPDATE(_parent->_del_filtered_counter, _reader->stats().rows_del_filtered); - COUNTER_UPDATE(_parent->_conditions_filtered_counter, _reader->stats().rows_conditions_filtered); - + COUNTER_UPDATE(_parent->_conditions_filtered_counter, + _reader->stats().rows_conditions_filtered); + COUNTER_UPDATE(_parent->_key_range_filtered_counter, _reader->stats().rows_key_range_filtered); COUNTER_UPDATE(_parent->_index_load_timer, _reader->stats().index_load_ns); @@ -485,7 +485,8 @@ void OlapScanner::update_counter() { COUNTER_UPDATE(_parent->_total_pages_num_counter, _reader->stats().total_pages_num); COUNTER_UPDATE(_parent->_cached_pages_num_counter, _reader->stats().cached_pages_num); - COUNTER_UPDATE(_parent->_bitmap_index_filter_counter, _reader->stats().rows_bitmap_index_filtered); + COUNTER_UPDATE(_parent->_bitmap_index_filter_counter, + _reader->stats().rows_bitmap_index_filtered); COUNTER_UPDATE(_parent->_bitmap_index_filter_timer, _reader->stats().bitmap_index_filter_timer); COUNTER_UPDATE(_parent->_block_seek_counter, _reader->stats().block_seek_num); diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index b6ca6d84bb90b1..7f5e432c6c915a 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -19,26 +19,25 @@ #define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCANNER_H #include -#include -#include #include +#include #include +#include #include "common/status.h" -#include "exec/olap_common.h" #include "exec/exec_node.h" +#include "exec/olap_common.h" #include "exprs/expr.h" #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/tuple.h" -#include "runtime/vectorized_row_batch.h" - #include "olap/delete_handler.h" -#include "olap/rowset/column_data.h" #include "olap/olap_cond.h" -#include "olap/storage_engine.h" #include "olap/reader.h" +#include "olap/rowset/column_data.h" +#include "olap/storage_engine.h" +#include "runtime/descriptors.h" +#include "runtime/tuple.h" +#include "runtime/vectorized_row_batch.h" namespace doris { @@ -49,21 +48,14 @@ class Field; class OlapScanner { public: - OlapScanner( - RuntimeState* runtime_state, - OlapScanNode* parent, - bool aggregation, - bool need_agg_finalize, - const TPaloScanRange& scan_range, - const std::vector& key_ranges); + OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, + bool need_agg_finalize, const TPaloScanRange& scan_range, + const std::vector& key_ranges); ~OlapScanner(); - Status prepare( - const TPaloScanRange& scan_range, - const std::vector& key_ranges, - const std::vector& filters, - const std::vector& is_nulls); + Status prepare(const TPaloScanRange& scan_range, const std::vector& key_ranges, + const std::vector& filters, const std::vector& is_nulls); Status open(); @@ -71,13 +63,9 @@ class OlapScanner { Status close(RuntimeState* state); - RuntimeState* runtime_state() { - return _runtime_state; - } + RuntimeState* runtime_state() { return _runtime_state; } - std::vector* conjunct_ctxs() { - return &_conjunct_ctxs; - } + std::vector* conjunct_ctxs() { return &_conjunct_ctxs; } int id() const { return _id; } void set_id(int id) { _id = id; } @@ -88,14 +76,12 @@ class OlapScanner { void update_counter(); - const std::string& scan_disk() const { - return _tablet->data_dir()->path(); - } + const std::string& scan_disk() const { return _tablet->data_dir()->path(); } + private: - Status _init_params( - const std::vector& key_ranges, - const std::vector& filters, - const std::vector& is_nulls); + Status _init_params(const std::vector& key_ranges, + const std::vector& filters, + const std::vector& is_nulls); Status _init_return_columns(); void _convert_row_to_tuple(Tuple* tuple); @@ -104,7 +90,7 @@ class OlapScanner { RuntimeState* _runtime_state; OlapScanNode* _parent; - const TupleDescriptor* _tuple_desc; /**< tuple descriptor */ + const TupleDescriptor* _tuple_desc; /**< tuple descriptor */ RuntimeProfile* _profile; const std::vector& _string_slots; diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h index 8a73c5b26b3f24..f702df9de210e9 100644 --- a/be/src/exec/olap_utils.h +++ b/be/src/exec/olap_utils.h @@ -15,22 +15,22 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_UTILS_H -#define DORIS_BE_SRC_QUERY_EXEC_OLAP_UTILS_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_UTILS_H +#define DORIS_BE_SRC_QUERY_EXEC_OLAP_UTILS_H #include #include "common/logging.h" #include "gen_cpp/Opcodes_types.h" -#include "runtime/primitive_type.h" -#include "runtime/datetime_value.h" #include "olap/tuple.h" +#include "runtime/datetime_value.h" +#include "runtime/primitive_type.h" namespace doris { typedef bool (*CompareLargeFunc)(const void*, const void*); -template +template inline bool compare_large(const void* lhs, const void* rhs) { return *reinterpret_cast(lhs) > *reinterpret_cast(rhs); } @@ -89,13 +89,12 @@ typedef struct OlapScanRange { begin_scan_range.add_value(NEGATIVE_INFINITY); end_scan_range.add_value(POSITIVE_INFINITY); } - OlapScanRange( - bool begin, - bool end, - std::vector& begin_range, - std::vector& end_range) - : begin_include(begin), end_include(end), - begin_scan_range(begin_range), end_scan_range(end_range) { } + OlapScanRange(bool begin, bool end, std::vector& begin_range, + std::vector& end_range) + : begin_include(begin), + end_include(end), + begin_scan_range(begin_range), + end_scan_range(end_range) {} bool begin_include; bool end_include; @@ -103,33 +102,23 @@ typedef struct OlapScanRange { OlapTuple end_scan_range; } OlapScanRange; -static char encoding_table[] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', - 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/' -}; +static char encoding_table[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; static int mod_table[] = {0, 2, 1}; static const char base64_pad = '='; -inline size_t base64_encode( - const char* data, - size_t length, - char* encoded_data) { - - size_t output_length = (size_t)(4.0 * ceil((double) length / 3.0)); +inline size_t base64_encode(const char* data, size_t length, char* encoded_data) { + size_t output_length = (size_t)(4.0 * ceil((double)length / 3.0)); if (encoded_data == NULL) { return 0; } for (uint32_t i = 0, j = 0; i < length;) { - uint32_t octet_a = i < length ? (unsigned char)data[i++] : 0; uint32_t octet_b = i < length ? (unsigned char)data[i++] : 0; uint32_t octet_c = i < length ? (unsigned char)data[i++] : 0; @@ -202,8 +191,8 @@ inline SQLFilterOp to_olap_filter_type(TExprOpcode::type type, bool opposite) { switch (type) { case TExprOpcode::LT: case TExprOpcode::LE: - // NOTE: Datetime may be truncated to a date column, so we convert LT to LE - // for example: '2010-01-01 00:00:01' will be truncate to '2010-01-01' + // NOTE: Datetime may be truncated to a date column, so we convert LT to LE + // for example: '2010-01-01 00:00:01' will be truncate to '2010-01-01' return opposite ? FILTER_LARGER_OR_EQUAL : FILTER_LESS_OR_EQUAL; case TExprOpcode::GT: @@ -229,4 +218,3 @@ inline SQLFilterOp to_olap_filter_type(TExprOpcode::type type, bool opposite) { } // namespace doris #endif - diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 30954526b5dbd6..bdd7eb4bc60c18 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -32,21 +32,21 @@ // https://github.com/apache/orc/blob/84353fbfc447b06e0924024a8e03c1aaebd3e7a5/c%2B%2B/src/Timezone.hh#L104-L109 namespace orc { -class TimezoneError: public std::runtime_error { +class TimezoneError : public std::runtime_error { public: TimezoneError(const std::string& what); TimezoneError(const TimezoneError&); virtual ~TimezoneError() noexcept; }; -} +} // namespace orc namespace doris { class ORCFileStream : public orc::InputStream { public: - ORCFileStream(FileReader* file, std::string filename) : _file(file), _filename(std::move(filename)) { - } + ORCFileStream(FileReader* file, std::string filename) + : _file(file), _filename(std::move(filename)) {} ~ORCFileStream() override { if (_file != nullptr) { @@ -59,17 +59,13 @@ class ORCFileStream : public orc::InputStream { /** * Get the total length of the file in bytes. */ - uint64_t getLength() const override { - return _file->size(); - } + uint64_t getLength() const override { return _file->size(); } /** * Get the natural size for reads. * @return the number of bytes that should be read at once */ - uint64_t getNaturalReadSize() const override { - return 128 * 1024; - } + uint64_t getNaturalReadSize() const override { return 128 * 1024; } /** * Read length bytes from the file starting at offset into @@ -93,44 +89,43 @@ class ORCFileStream : public orc::InputStream { if (reads == 0) { break; } - bytes_read += reads;// total read bytes + bytes_read += reads; // total read bytes offset += reads; - buf = (char*) buf + reads; + buf = (char*)buf + reads; } if (length != bytes_read) { - throw orc::ParseError("Short read of " + _filename - + ". expected :" + std::to_string(length) + ", actual : " + std::to_string(bytes_read)); + throw orc::ParseError("Short read of " + _filename + + ". expected :" + std::to_string(length) + + ", actual : " + std::to_string(bytes_read)); } } /** * Get the name of the stream for error messages. */ - const std::string& getName() const override { - return _filename; - } + const std::string& getName() const override { return _filename; } private: FileReader* _file; std::string _filename; }; -ORCScanner::ORCScanner(RuntimeState* state, - RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - ScannerCounter* counter) : BaseScanner(state, profile, params, counter), - _ranges(ranges), - _broker_addresses(broker_addresses), - // _splittable(params.splittable), - _next_range(0), - _cur_file_eof(true), - _scanner_eof(false), - _total_groups(0), - _current_group(0), - _rows_of_group(0), - _current_line_of_group(0) {} +ORCScanner::ORCScanner(RuntimeState* state, RuntimeProfile* profile, + const TBrokerScanRangeParams& params, + const std::vector& ranges, + const std::vector& broker_addresses, + ScannerCounter* counter) + : BaseScanner(state, profile, params, counter), + _ranges(ranges), + _broker_addresses(broker_addresses), + // _splittable(params.splittable), + _next_range(0), + _cur_file_eof(true), + _scanner_eof(false), + _total_groups(0), + _current_group(0), + _rows_of_group(0), + _current_line_of_group(0) {} ORCScanner::~ORCScanner() { close(); @@ -141,8 +136,9 @@ Status ORCScanner::open() { if (!_ranges.empty()) { std::list include_cols; TBrokerRangeDesc range = _ranges[0]; - _num_of_columns_from_file = - range.__isset.num_of_columns_from_file ? range.num_of_columns_from_file : _src_slot_descs.size(); + _num_of_columns_from_file = range.__isset.num_of_columns_from_file + ? range.num_of_columns_from_file + : _src_slot_descs.size(); for (int i = 0; i < _num_of_columns_from_file; i++) { auto slot_desc = _src_slot_descs.at(i); include_cols.push_back(slot_desc->col_name()); @@ -180,15 +176,17 @@ Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { ++_current_group; } - const std::vector &batch_vec = ((orc::StructVectorBatch*) _batch.get())->fields; + const std::vector& batch_vec = + ((orc::StructVectorBatch*)_batch.get())->fields; for (int column_ipos = 0; column_ipos < _num_of_columns_from_file; ++column_ipos) { auto slot_desc = _src_slot_descs[column_ipos]; - orc::ColumnVectorBatch *cvb = batch_vec[_position_in_orc_original[column_ipos]]; + orc::ColumnVectorBatch* cvb = batch_vec[_position_in_orc_original[column_ipos]]; if (cvb->hasNulls && !cvb->notNull[_current_line_of_group]) { if (!slot_desc->is_nullable()) { std::stringstream str_error; - str_error << "The field name(" << slot_desc->col_name() << ") is not nullable "; + str_error << "The field name(" << slot_desc->col_name() + << ") is not nullable "; LOG(WARNING) << str_error.str(); return Status::InternalError(str_error.str()); } @@ -202,132 +200,144 @@ Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); - switch (_row_reader->getSelectedType().getSubtype(_position_in_orc_original[column_ipos])->getKind()) { - case orc::BOOLEAN: { - int64_t value = ((orc::LongVectorBatch*) cvb)->data[_current_line_of_group]; - if (value == 0) { - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(5)); - memcpy(str_slot->ptr, "false", 5); - str_slot->len = 5; - } else { - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(4)); - memcpy(str_slot->ptr, "true", 4); - str_slot->len = 4; - } - break; - } - case orc::BYTE: - case orc::INT: - case orc::SHORT: - case orc::LONG: { - int64_t value = ((orc::LongVectorBatch*) cvb)->data[_current_line_of_group]; - wbytes = sprintf((char*) tmp_buf, "%ld", value); - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; + switch (_row_reader->getSelectedType() + .getSubtype(_position_in_orc_original[column_ipos]) + ->getKind()) { + case orc::BOOLEAN: { + int64_t value = ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group]; + if (value == 0) { + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(5)); + memcpy(str_slot->ptr, "false", 5); + str_slot->len = 5; + } else { + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(4)); + memcpy(str_slot->ptr, "true", 4); + str_slot->len = 4; } - case orc::FLOAT: - case orc::DOUBLE: { - double value = ((orc::DoubleVectorBatch*) cvb)->data[_current_line_of_group]; - wbytes = sprintf((char*) tmp_buf, "%f", value); - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; - } - case orc::BINARY: - case orc::CHAR: - case orc::VARCHAR: - case orc::STRING: { - char *value = ((orc::StringVectorBatch*) cvb)->data[_current_line_of_group]; - wbytes = ((orc::StringVectorBatch*) cvb)->length[_current_line_of_group]; - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, value, wbytes); - str_slot->len = wbytes; - break; + break; + } + case orc::BYTE: + case orc::INT: + case orc::SHORT: + case orc::LONG: { + int64_t value = ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group]; + wbytes = sprintf((char*)tmp_buf, "%ld", value); + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); + memcpy(str_slot->ptr, tmp_buf, wbytes); + str_slot->len = wbytes; + break; + } + case orc::FLOAT: + case orc::DOUBLE: { + double value = ((orc::DoubleVectorBatch*)cvb)->data[_current_line_of_group]; + wbytes = sprintf((char*)tmp_buf, "%f", value); + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); + memcpy(str_slot->ptr, tmp_buf, wbytes); + str_slot->len = wbytes; + break; + } + case orc::BINARY: + case orc::CHAR: + case orc::VARCHAR: + case orc::STRING: { + char* value = ((orc::StringVectorBatch*)cvb)->data[_current_line_of_group]; + wbytes = ((orc::StringVectorBatch*)cvb)->length[_current_line_of_group]; + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); + memcpy(str_slot->ptr, value, wbytes); + str_slot->len = wbytes; + break; + } + case orc::DECIMAL: { + int precision = ((orc::Decimal64VectorBatch*)cvb)->precision; + int scale = ((orc::Decimal64VectorBatch*)cvb)->scale; + + //Decimal64VectorBatch handles decimal columns with precision no greater than 18. + //Decimal128VectorBatch handles the others. + std::string decimal_str; + if (precision <= 18) { + decimal_str = std::to_string(((orc::Decimal64VectorBatch*)cvb) + ->values[_current_line_of_group]); + } else { + decimal_str = ((orc::Decimal128VectorBatch*)cvb) + ->values[_current_line_of_group] + .toString(); } - case orc::DECIMAL: { - int precision = ((orc::Decimal64VectorBatch*) cvb)->precision; - int scale = ((orc::Decimal64VectorBatch*) cvb)->scale; - - //Decimal64VectorBatch handles decimal columns with precision no greater than 18. - //Decimal128VectorBatch handles the others. - std::string decimal_str; - if (precision <= 18) { - decimal_str = std::to_string(((orc::Decimal64VectorBatch*) cvb)->values[_current_line_of_group]); - } else { - decimal_str = ((orc::Decimal128VectorBatch*) cvb)->values[_current_line_of_group].toString(); + + int negative = decimal_str[0] == '-' ? 1 : 0; + int decimal_scale_length = decimal_str.size() - negative; + + std::string v; + if (decimal_scale_length <= scale) { + // decimal(5,2) : the integer of 0.01 is 1, so we should fill 0 befor integer + v = std::string(negative ? "-0." : "0."); + int fill_zero = scale - decimal_scale_length; + while (fill_zero--) { + v += "0"; } - - int negative = decimal_str[0] == '-' ? 1 : 0; - int decimal_scale_length = decimal_str.size() - negative; - - std::string v; - if (decimal_scale_length <= scale) { - // decimal(5,2) : the integer of 0.01 is 1, so we should fill 0 befor integer - v = std::string(negative ? "-0." : "0."); - int fill_zero = scale - decimal_scale_length; - while (fill_zero--) { - v += "0"; - } - if (negative) { - v += decimal_str.substr(1, decimal_str.length()); - } else { - v += decimal_str; - } + if (negative) { + v += decimal_str.substr(1, decimal_str.length()); } else { - //Orc api will fill in 0 at the end, so size must greater than scale - v = decimal_str.substr(0, decimal_str.size() - scale) + "." + decimal_str.substr(decimal_str.size() - scale); + v += decimal_str; } - - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(v.size())); - memcpy(str_slot->ptr, v.c_str(), v.size()); - str_slot->len = v.size(); - break; + } else { + //Orc api will fill in 0 at the end, so size must greater than scale + v = decimal_str.substr(0, decimal_str.size() - scale) + "." + + decimal_str.substr(decimal_str.size() - scale); } - case orc::DATE: { - //Date columns record the number of days since the UNIX epoch (1/1/1970 in UTC). - int64_t timestamp = ((orc::LongVectorBatch*) cvb)->data[_current_line_of_group] * 24 * 60 * 60; - DateTimeValue dtv; - if (!dtv.from_unixtime(timestamp, "UTC")) { - std::stringstream str_error; - str_error << "Parse timestamp (" + std::to_string(timestamp) + ") error"; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - dtv.cast_to_date(); - char* buf_end = dtv.to_string((char*) tmp_buf); - wbytes = buf_end - (char*) tmp_buf -1; - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; - } - case orc::TIMESTAMP: { - //The time zone of orc's timestamp is stored inside orc's stripe information, - //so the timestamp obtained here is an offset timestamp, so parse timestamp with UTC is actual datetime literal. - int64_t timestamp = ((orc::TimestampVectorBatch*) cvb)->data[_current_line_of_group]; - DateTimeValue dtv; - if (!dtv.from_unixtime(timestamp, "UTC")) { - std::stringstream str_error; - str_error << "Parse timestamp (" + std::to_string(timestamp) + ") error"; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - char* buf_end = dtv.to_string((char*) tmp_buf); - wbytes = buf_end - (char*) tmp_buf -1; - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; + + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(v.size())); + memcpy(str_slot->ptr, v.c_str(), v.size()); + str_slot->len = v.size(); + break; + } + case orc::DATE: { + //Date columns record the number of days since the UNIX epoch (1/1/1970 in UTC). + int64_t timestamp = + ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group] * 24 * + 60 * 60; + DateTimeValue dtv; + if (!dtv.from_unixtime(timestamp, "UTC")) { + std::stringstream str_error; + str_error + << "Parse timestamp (" + std::to_string(timestamp) + ") error"; + LOG(WARNING) << str_error.str(); + return Status::InternalError(str_error.str()); } - default: { + dtv.cast_to_date(); + char* buf_end = dtv.to_string((char*)tmp_buf); + wbytes = buf_end - (char*)tmp_buf - 1; + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); + memcpy(str_slot->ptr, tmp_buf, wbytes); + str_slot->len = wbytes; + break; + } + case orc::TIMESTAMP: { + //The time zone of orc's timestamp is stored inside orc's stripe information, + //so the timestamp obtained here is an offset timestamp, so parse timestamp with UTC is actual datetime literal. + int64_t timestamp = + ((orc::TimestampVectorBatch*)cvb)->data[_current_line_of_group]; + DateTimeValue dtv; + if (!dtv.from_unixtime(timestamp, "UTC")) { std::stringstream str_error; - str_error << "The field name(" << slot_desc->col_name() << ") type not support. "; + str_error + << "Parse timestamp (" + std::to_string(timestamp) + ") error"; LOG(WARNING) << str_error.str(); return Status::InternalError(str_error.str()); } + char* buf_end = dtv.to_string((char*)tmp_buf); + wbytes = buf_end - (char*)tmp_buf - 1; + str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); + memcpy(str_slot->ptr, tmp_buf, wbytes); + str_slot->len = wbytes; + break; + } + default: { + std::stringstream str_error; + str_error << "The field name(" << slot_desc->col_name() + << ") type not support. "; + LOG(WARNING) << str_error.str(); + return Status::InternalError(str_error.str()); + } } } } @@ -336,13 +346,14 @@ Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { // range of current file const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); if (range.__isset.num_of_columns_from_file) { - fill_slots_of_columns_from_path(range.num_of_columns_from_file, range.columns_from_path); + fill_slots_of_columns_from_path(range.num_of_columns_from_file, + range.columns_from_path); } COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); if (fill_dest_tuple(tuple, tuple_pool)) { break; // get one line, break from while - } // else skip this line and continue get_next to return + } // else skip this line and continue get_next to return } return Status::OK(); } catch (orc::ParseError& e) { @@ -372,23 +383,26 @@ Status ORCScanner::open_next_reader() { const TBrokerRangeDesc& range = _ranges[_next_range++]; std::unique_ptr file_reader; switch (range.file_type) { - case TFileType::FILE_LOCAL: { - file_reader.reset(new LocalFileReader(range.path, range.start_offset)); - break; - } - case TFileType::FILE_BROKER: { - int64_t file_size = 0; - // for compatibility - if (range.__isset.file_size) { file_size = range.file_size; } - file_reader.reset(new BrokerReader(_state->exec_env(), _broker_addresses, _params.properties, - range.path, range.start_offset, file_size)); - break; - } - default: { - std::stringstream ss; - ss << "Unknown file type, type=" << range.file_type; - return Status::InternalError(ss.str()); + case TFileType::FILE_LOCAL: { + file_reader.reset(new LocalFileReader(range.path, range.start_offset)); + break; + } + case TFileType::FILE_BROKER: { + int64_t file_size = 0; + // for compatibility + if (range.__isset.file_size) { + file_size = range.file_size; } + file_reader.reset(new BrokerReader(_state->exec_env(), _broker_addresses, + _params.properties, range.path, range.start_offset, + file_size)); + break; + } + default: { + std::stringstream ss; + ss << "Unknown file type, type=" << range.file_type; + return Status::InternalError(ss.str()); + } } RETURN_IF_ERROR(file_reader->open()); if (file_reader->size() == 0) { @@ -413,7 +427,8 @@ Status ORCScanner::open_next_reader() { auto include_cols = _row_reader_options.getIncludeNames(); for (int i = 0; i < _row_reader->getSelectedType().getSubtypeCount(); ++i) { //include columns must in reader field, otherwise createRowReader will throw exception - auto pos = std::find(include_cols.begin(), include_cols.end(), _row_reader->getSelectedType().getFieldName(i)); + auto pos = std::find(include_cols.begin(), include_cols.end(), + _row_reader->getSelectedType().getFieldName(i)); _position_in_orc_original.at(std::distance(include_cols.begin(), pos)) = orc_index++; } return Status::OK(); @@ -426,4 +441,4 @@ void ORCScanner::close() { _row_reader.reset(nullptr); } -} +} // namespace doris diff --git a/be/src/exec/orc_scanner.h b/be/src/exec/orc_scanner.h index 85a5e883c7a00b..e1eb21de2c8da6 100644 --- a/be/src/exec/orc_scanner.h +++ b/be/src/exec/orc_scanner.h @@ -27,9 +27,7 @@ namespace doris { // Broker scanner convert the data read from broker to doris's tuple. class ORCScanner : public BaseScanner { public: - ORCScanner(RuntimeState* state, - RuntimeProfile* profile, - const TBrokerScanRangeParams& params, + ORCScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, ScannerCounter* counter); @@ -74,5 +72,5 @@ class ORCScanner : public BaseScanner { int64_t _current_line_of_group; }; -} +} // namespace doris #endif //ORC_SCANNER_H diff --git a/be/src/exec/parquet_reader.cpp b/be/src/exec/parquet_reader.cpp index 0b38c160109299..27a486d40be58d 100644 --- a/be/src/exec/parquet_reader.cpp +++ b/be/src/exec/parquet_reader.cpp @@ -15,27 +15,34 @@ // specific language governing permissions and limitations // under the License. #include "exec/parquet_reader.h" -#include -#include + #include -#include "exec/file_reader.h" +#include +#include + #include "common/logging.h" +#include "exec/file_reader.h" #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/TPaloBrokerService.h" #include "runtime/broker_mgr.h" #include "runtime/client_cache.h" -#include "runtime/exec_env.h" -#include "util/thrift_util.h" -#include "runtime/tuple.h" #include "runtime/descriptors.h" +#include "runtime/exec_env.h" #include "runtime/mem_pool.h" +#include "runtime/tuple.h" +#include "util/thrift_util.h" namespace doris { // Broker -ParquetReaderWrap::ParquetReaderWrap(FileReader *file_reader, int32_t num_of_columns_from_file) : - _num_of_columns_from_file(num_of_columns_from_file), _total_groups(0), _current_group(0), _rows_of_group(0), _current_line_of_group(0), _current_line_of_batch(0) { +ParquetReaderWrap::ParquetReaderWrap(FileReader* file_reader, int32_t num_of_columns_from_file) + : _num_of_columns_from_file(num_of_columns_from_file), + _total_groups(0), + _current_group(0), + _rows_of_group(0), + _current_line_of_group(0), + _current_line_of_batch(0) { _parquet = std::shared_ptr(new ParquetFile(file_reader)); _properties = parquet::ReaderProperties(); _properties.enable_buffered_stream(); @@ -45,12 +52,13 @@ ParquetReaderWrap::ParquetReaderWrap(FileReader *file_reader, int32_t num_of_col ParquetReaderWrap::~ParquetReaderWrap() { close(); } -Status ParquetReaderWrap::init_parquet_reader(const std::vector& tuple_slot_descs, const std::string& timezone) { +Status ParquetReaderWrap::init_parquet_reader(const std::vector& tuple_slot_descs, + const std::string& timezone) { try { // new file reader for parquet file - auto st = parquet::arrow::FileReader::Make(arrow::default_memory_pool(), - parquet::ParquetFileReader::Open(_parquet, _properties), - &_reader); + auto st = parquet::arrow::FileReader::Make( + arrow::default_memory_pool(), + parquet::ParquetFileReader::Open(_parquet, _properties), &_reader); if (!st.ok()) { LOG(WARNING) << "failed to create parquet file reader, errmsg=" << st.ToString(); return Status::InternalError("Failed to create file reader"); @@ -65,22 +73,23 @@ Status ParquetReaderWrap::init_parquet_reader(const std::vector _rows_of_group = _file_metadata->RowGroup(0)->num_rows(); // map - auto *schemaDescriptor = _file_metadata->schema(); + auto* schemaDescriptor = _file_metadata->schema(); for (int i = 0; i < _file_metadata->num_columns(); ++i) { // Get the Column Reader for the boolean column if (schemaDescriptor->Column(i)->max_definition_level() > 1) { _map_column.emplace(schemaDescriptor->Column(i)->path()->ToDotVector()[0], i); } else { _map_column.emplace(schemaDescriptor->Column(i)->name(), i); - } + } } - + _timezone = timezone; - if (_current_line_of_group == 0) {// the first read + if (_current_line_of_group == 0) { // the first read RETURN_IF_ERROR(column_indices(tuple_slot_descs)); // read batch - arrow::Status status = _reader->GetRecordBatchReader({_current_group}, _parquet_column_ids, &_rb_batch); + arrow::Status status = _reader->GetRecordBatchReader({_current_group}, + _parquet_column_ids, &_rb_batch); if (!status.ok()) { LOG(WARNING) << "Get RecordBatch Failed. " << status.ToString(); return Status::InternalError(status.ToString()); @@ -94,7 +103,7 @@ Status ParquetReaderWrap::init_parquet_reader(const std::vector //save column type std::shared_ptr field_schema = _batch->schema(); for (int i = 0; i < _parquet_column_ids.size(); i++) { - std::shared_ptr field = field_schema->field(i); + std::shared_ptr field = field_schema->field(i); if (!field) { LOG(WARNING) << "Get field schema failed. Column order:" << i; return Status::InternalError(status.ToString()); @@ -120,7 +129,8 @@ Status ParquetReaderWrap::size(int64_t* size) { return Status::OK(); } -inline void ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len) { +inline void ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, + const uint8_t* value, int32_t len) { tuple->set_not_null(slot_desc->null_indicator_offset()); void* slot = tuple->get_slot(slot_desc->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); @@ -130,8 +140,7 @@ inline void ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc return; } -Status ParquetReaderWrap::column_indices(const std::vector& tuple_slot_descs) -{ +Status ParquetReaderWrap::column_indices(const std::vector& tuple_slot_descs) { _parquet_column_ids.clear(); for (int i = 0; i < _num_of_columns_from_file; i++) { auto slot_desc = tuple_slot_descs.at(i); @@ -152,7 +161,8 @@ Status ParquetReaderWrap::column_indices(const std::vector& tup inline Status ParquetReaderWrap::set_field_null(Tuple* tuple, const SlotDescriptor* slot_desc) { if (!slot_desc->is_nullable()) { std::stringstream str_error; - str_error << "The field name("<< slot_desc->col_name() <<") is not allowed null, but Parquet field is NULL."; + str_error << "The field name(" << slot_desc->col_name() + << ") is not allowed null, but Parquet field is NULL."; LOG(WARNING) << str_error.str(); return Status::RuntimeError(str_error.str()); } @@ -160,21 +170,25 @@ inline Status ParquetReaderWrap::set_field_null(Tuple* tuple, const SlotDescript return Status::OK(); } -Status ParquetReaderWrap::read_record_batch(const std::vector& tuple_slot_descs, bool* eof) { - if (_current_line_of_group >= _rows_of_group) {// read next row group - VLOG(7) << "read_record_batch, current group id:" << _current_group << " current line of group:" - << _current_line_of_group << " is larger than rows group size:" - << _rows_of_group << ". start to read next row group"; +Status ParquetReaderWrap::read_record_batch(const std::vector& tuple_slot_descs, + bool* eof) { + if (_current_line_of_group >= _rows_of_group) { // read next row group + VLOG(7) << "read_record_batch, current group id:" << _current_group + << " current line of group:" << _current_line_of_group + << " is larger than rows group size:" << _rows_of_group + << ". start to read next row group"; _current_group++; - if (_current_group >= _total_groups) {// read completed. + if (_current_group >= _total_groups) { // read completed. _parquet_column_ids.clear(); *eof = true; return Status::OK(); } _current_line_of_group = 0; - _rows_of_group = _file_metadata->RowGroup(_current_group)->num_rows(); //get rows of the current row group + _rows_of_group = _file_metadata->RowGroup(_current_group) + ->num_rows(); //get rows of the current row group // read batch - arrow::Status status = _reader->GetRecordBatchReader({_current_group}, _parquet_column_ids, &_rb_batch); + arrow::Status status = + _reader->GetRecordBatchReader({_current_group}, _parquet_column_ids, &_rb_batch); if (!status.ok()) { return Status::InternalError("Get RecordBatchReader Failed."); } @@ -184,9 +198,10 @@ Status ParquetReaderWrap::read_record_batch(const std::vector& } _current_line_of_batch = 0; } else if (_current_line_of_batch >= _batch->num_rows()) { - VLOG(7) << "read_record_batch, current group id:" << _current_group << " current line of batch:" - << _current_line_of_batch << " is larger than batch size:" - << _batch->num_rows() << ". start to read next batch"; + VLOG(7) << "read_record_batch, current group id:" << _current_group + << " current line of batch:" << _current_line_of_batch + << " is larger than batch size:" << _batch->num_rows() + << ". start to read next batch"; arrow::Status status = _rb_batch->ReadNext(&_batch); if (!status.ok()) { return Status::InternalError("Read Batch Error With Libarrow."); @@ -196,29 +211,30 @@ Status ParquetReaderWrap::read_record_batch(const std::vector& return Status::OK(); } -Status ParquetReaderWrap::handle_timestamp(const std::shared_ptr& ts_array, uint8_t *buf, int32_t *wbytes) { +Status ParquetReaderWrap::handle_timestamp(const std::shared_ptr& ts_array, + uint8_t* buf, int32_t* wbytes) { const auto type = std::dynamic_pointer_cast(ts_array->type()); // Doris only supports seconds int64_t timestamp = 0; switch (type->unit()) { - case arrow::TimeUnit::type::NANO: {// INT96 - timestamp = ts_array->Value(_current_line_of_batch) / 1000000000L; // convert to Second - break; - } - case arrow::TimeUnit::type::SECOND: { - timestamp = ts_array->Value(_current_line_of_batch); - break; - } - case arrow::TimeUnit::type::MILLI: { - timestamp = ts_array->Value(_current_line_of_batch) / 1000; // convert to Second - break; - } - case arrow::TimeUnit::type::MICRO: { - timestamp = ts_array->Value(_current_line_of_batch) / 1000000; // convert to Second - break; - } - default: - return Status::InternalError("Invalid Time Type."); + case arrow::TimeUnit::type::NANO: { // INT96 + timestamp = ts_array->Value(_current_line_of_batch) / 1000000000L; // convert to Second + break; + } + case arrow::TimeUnit::type::SECOND: { + timestamp = ts_array->Value(_current_line_of_batch); + break; + } + case arrow::TimeUnit::type::MILLI: { + timestamp = ts_array->Value(_current_line_of_batch) / 1000; // convert to Second + break; + } + case arrow::TimeUnit::type::MICRO: { + timestamp = ts_array->Value(_current_line_of_batch) / 1000000; // convert to Second + break; + } + default: + return Status::InternalError("Invalid Time Type."); } DateTimeValue dtv; @@ -226,254 +242,280 @@ Status ParquetReaderWrap::handle_timestamp(const std::shared_ptr& tuple_slot_descs, MemPool* mem_pool, bool* eof) { +Status ParquetReaderWrap::read(Tuple* tuple, const std::vector& tuple_slot_descs, + MemPool* mem_pool, bool* eof) { uint8_t tmp_buf[128] = {0}; int32_t wbytes = 0; - const uint8_t *value = nullptr; + const uint8_t* value = nullptr; int column_index = 0; try { size_t slots = _parquet_column_ids.size(); for (size_t i = 0; i < slots; ++i) { auto slot_desc = tuple_slot_descs[i]; - column_index = i;// column index in batch record + column_index = i; // column index in batch record switch (_parquet_column_type[i]) { - case arrow::Type::type::STRING: { - auto str_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (str_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - value = str_array->GetValue(_current_line_of_batch, &wbytes); - fill_slot(tuple, slot_desc, mem_pool, value, wbytes); - } - break; - } - case arrow::Type::type::INT32: { - auto int32_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (int32_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int32_t value = int32_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + case arrow::Type::type::STRING: { + auto str_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (str_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + value = str_array->GetValue(_current_line_of_batch, &wbytes); + fill_slot(tuple, slot_desc, mem_pool, value, wbytes); } - case arrow::Type::type::INT64: { - auto int64_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (int64_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int64_t value = int64_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%ld", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::INT32: { + auto int32_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (int32_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + int32_t value = int32_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%d", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::UINT32: { - auto uint32_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (uint32_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint32_t value = uint32_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%u", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::INT64: { + auto int64_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (int64_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + int64_t value = int64_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%ld", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::UINT64: { - auto uint64_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (uint64_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint64_t value = uint64_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%lu", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::UINT32: { + auto uint32_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (uint32_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + uint32_t value = uint32_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%u", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::BINARY: { - auto str_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (str_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - value = str_array->GetValue(_current_line_of_batch, &wbytes); - fill_slot(tuple, slot_desc, mem_pool, value, wbytes); - } - break; + break; + } + case arrow::Type::type::UINT64: { + auto uint64_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (uint64_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + uint64_t value = uint64_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%lu", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::FIXED_SIZE_BINARY: { - auto fixed_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (fixed_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - std::string value = fixed_array->GetString(_current_line_of_batch); - fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)value.c_str(), value.length()); - } - break; + break; + } + case arrow::Type::type::BINARY: { + auto str_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (str_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + value = str_array->GetValue(_current_line_of_batch, &wbytes); + fill_slot(tuple, slot_desc, mem_pool, value, wbytes); } - case arrow::Type::type::BOOL: { - auto boolean_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (boolean_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - bool value = boolean_array->Value(_current_line_of_batch); - if (value) { - fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"true", 4); - } else { - fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"false", 5); - } - } - break; + break; + } + case arrow::Type::type::FIXED_SIZE_BINARY: { + auto fixed_array = std::dynamic_pointer_cast( + _batch->column(column_index)); + if (fixed_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + std::string value = fixed_array->GetString(_current_line_of_batch); + fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)value.c_str(), value.length()); } - case arrow::Type::type::UINT8: { - auto uint8_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (uint8_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + break; + } + case arrow::Type::type::BOOL: { + auto boolean_array = std::dynamic_pointer_cast( + _batch->column(column_index)); + if (boolean_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + bool value = boolean_array->Value(_current_line_of_batch); + if (value) { + fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"true", 4); } else { - uint8_t value = uint8_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); + fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"false", 5); } - break; } - case arrow::Type::type::INT8: { - auto int8_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (int8_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int8_t value = int8_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::UINT8: { + auto uint8_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (uint8_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + uint8_t value = uint8_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%d", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::UINT16: { - auto uint16_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (uint16_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint16_t value = uint16_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::INT8: { + auto int8_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (int8_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + int8_t value = int8_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%d", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::INT16: { - auto int16_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (int16_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int16_t value = int16_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::UINT16: { + auto uint16_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (uint16_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + uint16_t value = uint16_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%d", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::HALF_FLOAT: { - auto half_float_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (half_float_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - float value = half_float_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%f", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::INT16: { + auto int16_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (int16_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + int16_t value = int16_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%d", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::FLOAT: { - auto float_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (float_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - float value = float_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%f", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::HALF_FLOAT: { + auto half_float_array = std::dynamic_pointer_cast( + _batch->column(column_index)); + if (half_float_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + float value = half_float_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%f", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::DOUBLE: { - auto double_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (double_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - float value = double_array->Value(_current_line_of_batch); - wbytes = sprintf((char*)tmp_buf, "%f", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::FLOAT: { + auto float_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (float_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + float value = float_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%f", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::TIMESTAMP: { - auto ts_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (ts_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - RETURN_IF_ERROR(handle_timestamp(ts_array, tmp_buf, &wbytes));// convert timestamp to string time - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::DOUBLE: { + auto double_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (double_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + float value = double_array->Value(_current_line_of_batch); + wbytes = sprintf((char*)tmp_buf, "%f", value); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::DECIMAL: { - auto decimal_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (decimal_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - std::string value = decimal_array->FormatValue(_current_line_of_batch); - fill_slot(tuple, slot_desc, mem_pool, (const uint8_t*)value.c_str(), value.length()); - } - break; + break; + } + case arrow::Type::type::TIMESTAMP: { + auto ts_array = std::dynamic_pointer_cast( + _batch->column(column_index)); + if (ts_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + RETURN_IF_ERROR(handle_timestamp(ts_array, tmp_buf, + &wbytes)); // convert timestamp to string time + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - case arrow::Type::type::DATE32: { - auto ts_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (ts_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - time_t timestamp = (time_t)((int64_t)ts_array->Value(_current_line_of_batch) * 24 * 60 * 60); - struct tm local; - localtime_r(×tamp, &local); - char* to = reinterpret_cast(&tmp_buf); - wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d", &local); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::DECIMAL: { + auto decimal_array = std::dynamic_pointer_cast( + _batch->column(column_index)); + if (decimal_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + std::string value = decimal_array->FormatValue(_current_line_of_batch); + fill_slot(tuple, slot_desc, mem_pool, (const uint8_t*)value.c_str(), + value.length()); } - case arrow::Type::type::DATE64: { - auto ts_array = std::dynamic_pointer_cast(_batch->column(column_index)); - if (ts_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - // convert milliseconds to seconds - time_t timestamp = (time_t)((int64_t)ts_array->Value(_current_line_of_batch) / 1000); - struct tm local; - localtime_r(×tamp, &local); - char* to = reinterpret_cast(&tmp_buf); - wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d %H:%M:%S", &local); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; + break; + } + case arrow::Type::type::DATE32: { + auto ts_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (ts_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + time_t timestamp = (time_t)((int64_t)ts_array->Value(_current_line_of_batch) * + 24 * 60 * 60); + struct tm local; + localtime_r(×tamp, &local); + char* to = reinterpret_cast(&tmp_buf); + wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d", &local); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } - default: { - // other type not support. - std::stringstream str_error; - str_error << "The field name("<< slot_desc->col_name() <<"), type("<< _parquet_column_type[i] << - ") not support. RowGroup: " << _current_group - << ", Row: " << _current_line_of_group << ", ColumnIndex:" << column_index; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); + break; + } + case arrow::Type::type::DATE64: { + auto ts_array = + std::dynamic_pointer_cast(_batch->column(column_index)); + if (ts_array->IsNull(_current_line_of_batch)) { + RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); + } else { + // convert milliseconds to seconds + time_t timestamp = + (time_t)((int64_t)ts_array->Value(_current_line_of_batch) / 1000); + struct tm local; + localtime_r(×tamp, &local); + char* to = reinterpret_cast(&tmp_buf); + wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d %H:%M:%S", &local); + fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); } + break; + } + default: { + // other type not support. + std::stringstream str_error; + str_error << "The field name(" << slot_desc->col_name() << "), type(" + << _parquet_column_type[i] + << ") not support. RowGroup: " << _current_group + << ", Row: " << _current_line_of_group + << ", ColumnIndex:" << column_index; + LOG(WARNING) << str_error.str(); + return Status::InternalError(str_error.str()); + } } } } catch (parquet::ParquetException& e) { std::stringstream str_error; - str_error << e.what() << " RowGroup:" << _current_group << ", Row:" << _current_line_of_group - << ", ColumnIndex " << column_index; + str_error << e.what() << " RowGroup:" << _current_group + << ", Row:" << _current_line_of_group << ", ColumnIndex " << column_index; LOG(WARNING) << str_error.str(); return Status::InternalError(str_error.str()); } @@ -484,9 +526,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector& return read_record_batch(tuple_slot_descs, eof); } -ParquetFile::ParquetFile(FileReader *file): _file(file) { - -} +ParquetFile::ParquetFile(FileReader* file) : _file(file) {} ParquetFile::~ParquetFile() { Close(); @@ -513,7 +553,8 @@ arrow::Status ParquetFile::Read(int64_t nbytes, int64_t* bytes_read, void* buffe return ReadAt(_pos, nbytes, bytes_read, buffer); } -arrow::Status ParquetFile::ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) { +arrow::Status ParquetFile::ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, + void* out) { int64_t reads = 0; _pos = position; while (nbytes > 0) { @@ -525,8 +566,8 @@ arrow::Status ParquetFile::ReadAt(int64_t position, int64_t nbytes, int64_t* byt if (reads == 0) { break; } - *bytes_read += reads;// total read bytes - nbytes -= reads; // remained bytes + *bytes_read += reads; // total read bytes + nbytes -= reads; // remained bytes _pos += reads; out = (char*)out + reads; } @@ -544,7 +585,6 @@ arrow::Status ParquetFile::Seek(int64_t position) { return arrow::Status::OK(); } - arrow::Status ParquetFile::Tell(int64_t* position) const { *position = _pos; return arrow::Status::OK(); @@ -564,4 +604,4 @@ arrow::Status ParquetFile::Read(int64_t nbytes, std::shared_ptr* return arrow::Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/parquet_reader.h b/be/src/exec/parquet_reader.h index 7a3fcc45ab742f..e5366e00da9346 100644 --- a/be/src/exec/parquet_reader.h +++ b/be/src/exec/parquet_reader.h @@ -17,25 +17,25 @@ #pragma once -#include - -#include -#include #include +#include #include #include #include -#include #include #include #include #include #include +#include + +#include +#include #include "common/status.h" -#include "gen_cpp/Types_types.h" #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" namespace doris { @@ -50,40 +50,44 @@ class FileReader; class ParquetFile : public arrow::io::RandomAccessFile { public: - ParquetFile(FileReader *file); + ParquetFile(FileReader* file); virtual ~ParquetFile(); arrow::Status Read(int64_t nbytes, int64_t* bytes_read, void* buffer) override; - arrow::Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, - void* out) override; + arrow::Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) override; arrow::Status GetSize(int64_t* size) override; arrow::Status Seek(int64_t position) override; arrow::Status Read(int64_t nbytes, std::shared_ptr* out) override; arrow::Status Tell(int64_t* position) const override; arrow::Status Close() override; bool closed() const override; + private: - FileReader *_file; + FileReader* _file; int64_t _pos = 0; }; // Reader of broker parquet file class ParquetReaderWrap { public: - ParquetReaderWrap(FileReader *file_reader, int32_t num_of_columns_from_file); + ParquetReaderWrap(FileReader* file_reader, int32_t num_of_columns_from_file); virtual ~ParquetReaderWrap(); - // Read - Status read(Tuple* tuple, const std::vector& tuple_slot_descs, MemPool* mem_pool, bool* eof); + // Read + Status read(Tuple* tuple, const std::vector& tuple_slot_descs, + MemPool* mem_pool, bool* eof); void close(); Status size(int64_t* size); - Status init_parquet_reader(const std::vector& tuple_slot_descs, const std::string& timezone); + Status init_parquet_reader(const std::vector& tuple_slot_descs, + const std::string& timezone); private: - void fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len); + void fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, + int32_t len); Status column_indices(const std::vector& tuple_slot_descs); Status set_field_null(Tuple* tuple, const SlotDescriptor* slot_desc); Status read_record_batch(const std::vector& tuple_slot_descs, bool* eof); - Status handle_timestamp(const std::shared_ptr& ts_array, uint8_t *buf, int32_t *wbtyes); + Status handle_timestamp(const std::shared_ptr& ts_array, uint8_t* buf, + int32_t* wbtyes); private: const int32_t _num_of_columns_from_file; @@ -104,9 +108,8 @@ class ParquetReaderWrap { int _rows_of_group; // rows in a group. int _current_line_of_group; int _current_line_of_batch; - + std::string _timezone; }; -} - +} // namespace doris diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp index adad61e9b0e134..119e162d0b77d7 100644 --- a/be/src/exec/parquet_scanner.cpp +++ b/be/src/exec/parquet_scanner.cpp @@ -16,39 +16,37 @@ // under the License. #include "exec/parquet_scanner.h" + +#include "exec/broker_reader.h" +#include "exec/buffered_reader.h" +#include "exec/decompressor.h" +#include "exec/local_file_reader.h" +#include "exec/parquet_reader.h" +#include "exec/text_converter.h" +#include "exec/text_converter.hpp" +#include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/raw_value.h" #include "runtime/stream_load/load_stream_mgr.h" #include "runtime/stream_load/stream_load_pipe.h" #include "runtime/tuple.h" -#include "exec/parquet_reader.h" -#include "exprs/expr.h" -#include "exec/text_converter.h" -#include "exec/text_converter.hpp" -#include "exec/local_file_reader.h" -#include "exec/broker_reader.h" -#include "exec/buffered_reader.h" -#include "exec/decompressor.h" -#include "exec/parquet_reader.h" namespace doris { - -ParquetScanner::ParquetScanner(RuntimeState* state, - RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - ScannerCounter* counter) : BaseScanner(state, profile, params, counter), - _ranges(ranges), - _broker_addresses(broker_addresses), - // _splittable(params.splittable), - _cur_file_reader(nullptr), - _next_range(0), - _cur_file_eof(false), - _scanner_eof(false) { -} +ParquetScanner::ParquetScanner(RuntimeState* state, RuntimeProfile* profile, + const TBrokerScanRangeParams& params, + const std::vector& ranges, + const std::vector& broker_addresses, + ScannerCounter* counter) + : BaseScanner(state, profile, params, counter), + _ranges(ranges), + _broker_addresses(broker_addresses), + // _splittable(params.splittable), + _cur_file_reader(nullptr), + _next_range(0), + _cur_file_eof(false), + _scanner_eof(false) {} ParquetScanner::~ParquetScanner() { close(); @@ -70,17 +68,19 @@ Status ParquetScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { } _cur_file_eof = false; } - RETURN_IF_ERROR(_cur_file_reader->read(_src_tuple, _src_slot_descs, tuple_pool, &_cur_file_eof)); + RETURN_IF_ERROR( + _cur_file_reader->read(_src_tuple, _src_slot_descs, tuple_pool, &_cur_file_eof)); // range of current file const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); if (range.__isset.num_of_columns_from_file) { - fill_slots_of_columns_from_path(range.num_of_columns_from_file, range.columns_from_path); + fill_slots_of_columns_from_path(range.num_of_columns_from_file, + range.columns_from_path); } COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); if (fill_dest_tuple(tuple, tuple_pool)) { - break;// break if true + break; // break if true } } if (_scanner_eof) { @@ -111,18 +111,21 @@ Status ParquetScanner::open_next_reader() { const TBrokerRangeDesc& range = _ranges[_next_range++]; std::unique_ptr file_reader; switch (range.file_type) { - case TFileType::FILE_LOCAL: { - file_reader.reset(new LocalFileReader(range.path, range.start_offset)); - break; - } - case TFileType::FILE_BROKER: { - int64_t file_size = 0; - // for compatibility - if (range.__isset.file_size) { file_size = range.file_size; } - file_reader.reset(new BufferedReader(new BrokerReader(_state->exec_env(), _broker_addresses, _params.properties, - range.path, range.start_offset, file_size))); - break; + case TFileType::FILE_LOCAL: { + file_reader.reset(new LocalFileReader(range.path, range.start_offset)); + break; + } + case TFileType::FILE_BROKER: { + int64_t file_size = 0; + // for compatibility + if (range.__isset.file_size) { + file_size = range.file_size; } + file_reader.reset(new BufferedReader( + new BrokerReader(_state->exec_env(), _broker_addresses, _params.properties, + range.path, range.start_offset, file_size))); + break; + } #if 0 case TFileType::FILE_STREAM: { @@ -134,11 +137,11 @@ Status ParquetScanner::open_next_reader() { break; } #endif - default: { - std::stringstream ss; - ss << "Unknown file type, type=" << range.file_type; - return Status::InternalError(ss.str()); - } + default: { + std::stringstream ss; + ss << "Unknown file type, type=" << range.file_type; + return Status::InternalError(ss.str()); + } } RETURN_IF_ERROR(file_reader->open()); if (file_reader->size() == 0) { @@ -146,13 +149,14 @@ Status ParquetScanner::open_next_reader() { continue; } if (range.__isset.num_of_columns_from_file) { - _cur_file_reader = new ParquetReaderWrap(file_reader.release(), range.num_of_columns_from_file); + _cur_file_reader = + new ParquetReaderWrap(file_reader.release(), range.num_of_columns_from_file); } else { _cur_file_reader = new ParquetReaderWrap(file_reader.release(), _src_slot_descs.size()); } Status status = _cur_file_reader->init_parquet_reader(_src_slot_descs, _state->timezone()); - + if (status.is_end_of_file()) { continue; } else { @@ -179,4 +183,4 @@ void ParquetScanner::close() { } } -} +} // namespace doris diff --git a/be/src/exec/parquet_scanner.h b/be/src/exec/parquet_scanner.h index 09d92ff8b76af7..3e7bdfffd6c32a 100644 --- a/be/src/exec/parquet_scanner.h +++ b/be/src/exec/parquet_scanner.h @@ -17,19 +17,19 @@ #pragma once -#include -#include -#include #include +#include #include +#include +#include -#include "exec/base_scanner.h" #include "common/status.h" +#include "exec/base_scanner.h" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" #include "runtime/mem_pool.h" -#include "util/slice.h" #include "util/runtime_profile.h" +#include "util/slice.h" namespace doris { @@ -48,13 +48,10 @@ class StreamLoadPipe; // Broker scanner convert the data read from broker to doris's tuple. class ParquetScanner : public BaseScanner { public: - ParquetScanner( - RuntimeState* state, - RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - ScannerCounter* counter); + ParquetScanner(RuntimeState* state, RuntimeProfile* profile, + const TBrokerScanRangeParams& params, + const std::vector& ranges, + const std::vector& broker_addresses, ScannerCounter* counter); ~ParquetScanner(); // Open this scanner, will initialize information need to @@ -85,4 +82,4 @@ class ParquetScanner : public BaseScanner { std::shared_ptr _stream_load_pipe; }; -} +} // namespace doris diff --git a/be/src/exec/parquet_writer.cpp b/be/src/exec/parquet_writer.cpp index 6c9f4033166316..474cd41a6d9b24 100644 --- a/be/src/exec/parquet_writer.cpp +++ b/be/src/exec/parquet_writer.cpp @@ -17,26 +17,26 @@ #include "exec/parquet_writer.h" -#include -#include #include +#include +#include -#include "exec/file_writer.h" #include "common/logging.h" +#include "exec/file_writer.h" #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/TPaloBrokerService.h" #include "runtime/broker_mgr.h" #include "runtime/client_cache.h" -#include "runtime/exec_env.h" -#include "runtime/tuple.h" #include "runtime/descriptors.h" +#include "runtime/exec_env.h" #include "runtime/mem_pool.h" +#include "runtime/tuple.h" #include "util/thrift_util.h" namespace doris { /// ParquetOutputStream -ParquetOutputStream::ParquetOutputStream(FileWriter* file_writer): _file_writer(file_writer) { +ParquetOutputStream::ParquetOutputStream(FileWriter* file_writer) : _file_writer(file_writer) { set_mode(arrow::io::FileMode::WRITE); } @@ -69,8 +69,9 @@ arrow::Status ParquetOutputStream::Close() { } /// ParquetWriterWrapper -ParquetWriterWrapper::ParquetWriterWrapper(FileWriter *file_writer, const std::vector& output_expr_ctxs) : - _output_expr_ctxs(output_expr_ctxs) { +ParquetWriterWrapper::ParquetWriterWrapper(FileWriter* file_writer, + const std::vector& output_expr_ctxs) + : _output_expr_ctxs(output_expr_ctxs) { // TODO(cmy): implement _outstream = new ParquetOutputStream(file_writer); } @@ -88,4 +89,4 @@ ParquetWriterWrapper::~ParquetWriterWrapper() { close(); } -} // end namespace +} // namespace doris diff --git a/be/src/exec/parquet_writer.h b/be/src/exec/parquet_writer.h index 5147d2105a8044..7499e81d29c1ce 100644 --- a/be/src/exec/parquet_writer.h +++ b/be/src/exec/parquet_writer.h @@ -17,25 +17,25 @@ #pragma once -#include - -#include -#include #include +#include #include #include #include -#include #include #include #include #include #include +#include + +#include +#include #include "common/status.h" -#include "gen_cpp/Types_types.h" #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" namespace doris { @@ -53,19 +53,19 @@ class ParquetOutputStream : public arrow::io::OutputStream { arrow::Status Tell(int64_t* position) const override; arrow::Status Close() override; - bool closed() const override { - return _is_closed; - } + bool closed() const override { return _is_closed; } + private: FileWriter* _file_writer; // not owned - int64_t _cur_pos; // current write position + int64_t _cur_pos; // current write position bool _is_closed = false; }; // a wrapper of parquet output stream class ParquetWriterWrapper { public: - ParquetWriterWrapper(FileWriter *file_writer, const std::vector& output_expr_ctxs); + ParquetWriterWrapper(FileWriter* file_writer, + const std::vector& output_expr_ctxs); virtual ~ParquetWriterWrapper(); Status write(const RowBatch& row_batch); @@ -77,5 +77,4 @@ class ParquetWriterWrapper { const std::vector& _output_expr_ctxs; }; -} - +} // namespace doris diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index feab536c8c3860..35cf67be4379f9 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -18,17 +18,20 @@ #include "exec/partitioned_aggregation_node.h" #include + #include #include #include #include "exec/partitioned_hash_table.h" #include "exec/partitioned_hash_table.inline.h" -#include "exprs/new_agg_fn_evaluator.h" #include "exprs/anyval_util.h" #include "exprs/expr_context.h" +#include "exprs/new_agg_fn_evaluator.h" // #include "exprs/scalar_expr_evaluator.h" #include "exprs/slot_ref.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/PlanNodes_types.h" #include "gutil/strings/substitute.h" #include "runtime/buffered_tuple_stream3.inline.h" #include "runtime/descriptors.h" @@ -39,15 +42,10 @@ #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" -#include "runtime/tuple_row.h" #include "runtime/tuple.h" +#include "runtime/tuple_row.h" #include "udf/udf_internal.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/PlanNodes_types.h" - - - using namespace strings; namespace doris { @@ -72,70 +70,69 @@ namespace doris { /// is in a random order. This means that we assume that the reduction factor will /// increase over time. struct StreamingHtMinReductionEntry { - // Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in - // bytes is greater than this threshold. - int min_ht_mem; - // The minimum reduction factor to expand the hash tables. - double streaming_ht_min_reduction; + // Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in + // bytes is greater than this threshold. + int min_ht_mem; + // The minimum reduction factor to expand the hash tables. + double streaming_ht_min_reduction; }; // TODO: experimentally tune these values and also programmatically get the cache size // of the machine that we're running on. static const StreamingHtMinReductionEntry STREAMING_HT_MIN_REDUCTION[] = { - // Expand up to L2 cache always. - {0, 0.0}, - // Expand into L3 cache if we look like we're getting some reduction. - {256 * 1024, 1.1}, - // Expand into main memory if we're getting a significant reduction. - {2 * 1024 * 1024, 2.0}, + // Expand up to L2 cache always. + {0, 0.0}, + // Expand into L3 cache if we look like we're getting some reduction. + {256 * 1024, 1.1}, + // Expand into main memory if we're getting a significant reduction. + {2 * 1024 * 1024, 2.0}, }; static const int STREAMING_HT_MIN_REDUCTION_SIZE = - sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]); - -PartitionedAggregationNode::PartitionedAggregationNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - intermediate_tuple_id_(tnode.agg_node.intermediate_tuple_id), - intermediate_tuple_desc_(descs.get_tuple_descriptor(intermediate_tuple_id_)), - intermediate_row_desc_(intermediate_tuple_desc_, false), - output_tuple_id_(tnode.agg_node.output_tuple_id), - output_tuple_desc_(descs.get_tuple_descriptor(output_tuple_id_)), - needs_finalize_(tnode.agg_node.need_finalize), - needs_serialize_(false), - output_partition_(NULL), - process_batch_no_grouping_fn_(NULL), - process_batch_fn_(NULL), - process_batch_streaming_fn_(NULL), - build_timer_(NULL), - ht_resize_timer_(NULL), - ht_resize_counter_(NULL), - get_results_timer_(NULL), - num_hash_buckets_(NULL), - num_hash_filled_buckets_(NULL), - num_hash_probe_(NULL), - num_hash_failed_probe_(NULL), - num_hash_travel_length_(NULL), - num_hash_collisions_(NULL), - partitions_created_(NULL), - max_partition_level_(NULL), - num_row_repartitioned_(NULL), - num_repartitions_(NULL), - num_spilled_partitions_(NULL), - largest_partition_percent_(NULL), - streaming_timer_(NULL), - num_processed_rows_(NULL), - num_passthrough_rows_(NULL), - preagg_estimated_reduction_(NULL), - preagg_streaming_ht_min_reduction_(NULL), -// estimated_input_cardinality_(tnode.agg_node.estimated_input_cardinality), - singleton_output_tuple_(NULL), - singleton_output_tuple_returned_(true), - partition_eos_(false), - child_eos_(false), - partition_pool_(new ObjectPool()) { - - DCHECK_EQ(PARTITION_FANOUT, 1 << NUM_PARTITIONING_BITS); + sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]); + +PartitionedAggregationNode::PartitionedAggregationNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + intermediate_tuple_id_(tnode.agg_node.intermediate_tuple_id), + intermediate_tuple_desc_(descs.get_tuple_descriptor(intermediate_tuple_id_)), + intermediate_row_desc_(intermediate_tuple_desc_, false), + output_tuple_id_(tnode.agg_node.output_tuple_id), + output_tuple_desc_(descs.get_tuple_descriptor(output_tuple_id_)), + needs_finalize_(tnode.agg_node.need_finalize), + needs_serialize_(false), + output_partition_(NULL), + process_batch_no_grouping_fn_(NULL), + process_batch_fn_(NULL), + process_batch_streaming_fn_(NULL), + build_timer_(NULL), + ht_resize_timer_(NULL), + ht_resize_counter_(NULL), + get_results_timer_(NULL), + num_hash_buckets_(NULL), + num_hash_filled_buckets_(NULL), + num_hash_probe_(NULL), + num_hash_failed_probe_(NULL), + num_hash_travel_length_(NULL), + num_hash_collisions_(NULL), + partitions_created_(NULL), + max_partition_level_(NULL), + num_row_repartitioned_(NULL), + num_repartitions_(NULL), + num_spilled_partitions_(NULL), + largest_partition_percent_(NULL), + streaming_timer_(NULL), + num_processed_rows_(NULL), + num_passthrough_rows_(NULL), + preagg_estimated_reduction_(NULL), + preagg_streaming_ht_min_reduction_(NULL), + // estimated_input_cardinality_(tnode.agg_node.estimated_input_cardinality), + singleton_output_tuple_(NULL), + singleton_output_tuple_returned_(true), + partition_eos_(false), + child_eos_(false), + partition_pool_(new ObjectPool()) { + DCHECK_EQ(PARTITION_FANOUT, 1 << NUM_PARTITIONING_BITS); if (tnode.agg_node.__isset.use_streaming_preaggregation) { is_streaming_preagg_ = tnode.agg_node.use_streaming_preaggregation; @@ -150,1329 +147,1311 @@ PartitionedAggregationNode::PartitionedAggregationNode( } Status PartitionedAggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode)); - DCHECK(intermediate_tuple_desc_ != nullptr); - DCHECK(output_tuple_desc_ != nullptr); - DCHECK_EQ(intermediate_tuple_desc_->slots().size(), output_tuple_desc_->slots().size()); - - const RowDescriptor& row_desc = child(0)->row_desc(); - RETURN_IF_ERROR(Expr::create(tnode.agg_node.grouping_exprs, row_desc, - state, &grouping_exprs_, mem_tracker())); - // Construct build exprs from intermediate_row_desc_ - for (int i = 0; i < grouping_exprs_.size(); ++i) { - SlotDescriptor* desc = intermediate_tuple_desc_->slots()[i]; - //DCHECK(desc->type().type == TYPE_NULL || desc->type() == grouping_exprs_[i]->type()); - // Hack to avoid TYPE_NULL SlotRefs. - SlotRef* build_expr = _pool->add(desc->type().type != TYPE_NULL ? - new SlotRef(desc) : new SlotRef(desc, TYPE_BOOLEAN)); - build_exprs_.push_back(build_expr); - // TODO chenhao - RETURN_IF_ERROR(build_expr->prepare(state, intermediate_row_desc_, nullptr)); - if (build_expr->type().is_var_len_string_type()) string_grouping_exprs_.push_back(i); - } - - int j = grouping_exprs_.size(); - for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i, ++j) { - SlotDescriptor* intermediate_slot_desc = intermediate_tuple_desc_->slots()[j]; - SlotDescriptor* output_slot_desc = output_tuple_desc_->slots()[j]; - AggFn* agg_fn; - RETURN_IF_ERROR(AggFn::Create(tnode.agg_node.aggregate_functions[i], row_desc, - *intermediate_slot_desc, *output_slot_desc, state, &agg_fn)); - agg_fns_.push_back(agg_fn); - needs_serialize_ |= agg_fn->SupportsSerialize(); - } - return Status::OK(); -} + RETURN_IF_ERROR(ExecNode::init(tnode)); + DCHECK(intermediate_tuple_desc_ != nullptr); + DCHECK(output_tuple_desc_ != nullptr); + DCHECK_EQ(intermediate_tuple_desc_->slots().size(), output_tuple_desc_->slots().size()); + + const RowDescriptor& row_desc = child(0)->row_desc(); + RETURN_IF_ERROR(Expr::create(tnode.agg_node.grouping_exprs, row_desc, state, &grouping_exprs_, + mem_tracker())); + // Construct build exprs from intermediate_row_desc_ + for (int i = 0; i < grouping_exprs_.size(); ++i) { + SlotDescriptor* desc = intermediate_tuple_desc_->slots()[i]; + //DCHECK(desc->type().type == TYPE_NULL || desc->type() == grouping_exprs_[i]->type()); + // Hack to avoid TYPE_NULL SlotRefs. + SlotRef* build_expr = + _pool->add(desc->type().type != TYPE_NULL ? new SlotRef(desc) + : new SlotRef(desc, TYPE_BOOLEAN)); + build_exprs_.push_back(build_expr); + // TODO chenhao + RETURN_IF_ERROR(build_expr->prepare(state, intermediate_row_desc_, nullptr)); + if (build_expr->type().is_var_len_string_type()) string_grouping_exprs_.push_back(i); + } -Status PartitionedAggregationNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - - RETURN_IF_ERROR(ExecNode::prepare(state)); - state_ = state; - - mem_pool_.reset(new MemPool(mem_tracker().get())); - agg_fn_pool_.reset(new MemPool(expr_mem_tracker().get())); - - ht_resize_timer_ = ADD_TIMER(runtime_profile(), "HTResizeTime"); - get_results_timer_ = ADD_TIMER(runtime_profile(), "GetResultsTime"); - num_processed_rows_ = - ADD_COUNTER(runtime_profile(), "RowsProcessed", TUnit::UNIT); - num_hash_buckets_ = - ADD_COUNTER(runtime_profile(), "HashBuckets", TUnit::UNIT); - num_hash_filled_buckets_ = - ADD_COUNTER(runtime_profile(), "HashFilledBuckets", TUnit::UNIT); - num_hash_probe_ = - ADD_COUNTER(runtime_profile(), "HashProbe", TUnit::UNIT); - num_hash_failed_probe_ = - ADD_COUNTER(runtime_profile(), "HashFailedProbe", TUnit::UNIT); - num_hash_travel_length_ = - ADD_COUNTER(runtime_profile(), "HashTravelLength", TUnit::UNIT); - num_hash_collisions_ = - ADD_COUNTER(runtime_profile(), "HashCollisions", TUnit::UNIT); - ht_resize_counter_ = - ADD_COUNTER(runtime_profile(), "HTResize", TUnit::UNIT); - partitions_created_ = - ADD_COUNTER(runtime_profile(), "PartitionsCreated", TUnit::UNIT); - largest_partition_percent_ = - runtime_profile()->AddHighWaterMarkCounter("LargestPartitionPercent", TUnit::UNIT); - - if (config::enable_quadratic_probing) { - runtime_profile()->add_info_string("Probe Method", "HashTable Quadratic Probing"); - } else { - runtime_profile()->add_info_string("Probe Method", "HashTable Linear Probing"); - } - - if (is_streaming_preagg_) { - runtime_profile()->append_exec_option("Streaming Preaggregation"); - streaming_timer_ = ADD_TIMER(runtime_profile(), "StreamingTime"); - num_passthrough_rows_ = - ADD_COUNTER(runtime_profile(), "RowsPassedThrough", TUnit::UNIT); - preagg_estimated_reduction_ = ADD_COUNTER( - runtime_profile(), "ReductionFactorEstimate", TUnit::DOUBLE_VALUE); - preagg_streaming_ht_min_reduction_ = ADD_COUNTER( - runtime_profile(), "ReductionFactorThresholdToExpand", TUnit::DOUBLE_VALUE); - } else { - build_timer_ = ADD_TIMER(runtime_profile(), "BuildTime"); - num_row_repartitioned_ = - ADD_COUNTER(runtime_profile(), "RowsRepartitioned", TUnit::UNIT); - num_repartitions_ = - ADD_COUNTER(runtime_profile(), "NumRepartitions", TUnit::UNIT); - num_spilled_partitions_ = - ADD_COUNTER(runtime_profile(), "SpilledPartitions", TUnit::UNIT); - max_partition_level_ = runtime_profile()->AddHighWaterMarkCounter( - "MaxPartitionLevel", TUnit::UNIT); - } - // TODO chenhao - const RowDescriptor& row_desc = child(0)->row_desc(); - RETURN_IF_ERROR(NewAggFnEvaluator::Create(agg_fns_, state, _pool, agg_fn_pool_.get(), - &agg_fn_evals_, expr_mem_tracker(), row_desc)); - - expr_results_pool_.reset(new MemPool(expr_mem_tracker().get())); - if (!grouping_exprs_.empty()) { - RowDescriptor build_row_desc(intermediate_tuple_desc_, false); - RETURN_IF_ERROR(PartitionedHashTableCtx::Create(_pool, state, build_exprs_, - grouping_exprs_, true, vector(build_exprs_.size(), true), - state->fragment_hash_seed(), MAX_PARTITION_DEPTH, 1, expr_mem_pool(), - expr_results_pool_.get(), expr_mem_tracker(), build_row_desc, row_desc, &ht_ctx_)); - } - // AddCodegenDisabledMessage(state); - return Status::OK(); + int j = grouping_exprs_.size(); + for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i, ++j) { + SlotDescriptor* intermediate_slot_desc = intermediate_tuple_desc_->slots()[j]; + SlotDescriptor* output_slot_desc = output_tuple_desc_->slots()[j]; + AggFn* agg_fn; + RETURN_IF_ERROR(AggFn::Create(tnode.agg_node.aggregate_functions[i], row_desc, + *intermediate_slot_desc, *output_slot_desc, state, &agg_fn)); + agg_fns_.push_back(agg_fn); + needs_serialize_ |= agg_fn->SupportsSerialize(); + } + return Status::OK(); } -Status PartitionedAggregationNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - // Open the child before consuming resources in this node. - RETURN_IF_ERROR(child(0)->open(state)); - RETURN_IF_ERROR(ExecNode::open(state)); - - // Claim reservation after the child has been opened to reduce the peak reservation - // requirement. - if (!_buffer_pool_client.is_registered() && !grouping_exprs_.empty()) { - DCHECK_GE(_resource_profile.min_reservation, MinReservation()); - RETURN_IF_ERROR(claim_buffer_reservation(state)); - } - - if (ht_ctx_.get() != nullptr) RETURN_IF_ERROR(ht_ctx_->Open(state)); - RETURN_IF_ERROR(NewAggFnEvaluator::Open(agg_fn_evals_, state)); - if (grouping_exprs_.empty()) { - // Create the single output tuple for this non-grouping agg. This must happen after - // opening the aggregate evaluators. - singleton_output_tuple_ = - ConstructSingletonOutputTuple(agg_fn_evals_, mem_pool_.get()); - // Check for failures during NewAggFnEvaluator::Init(). - RETURN_IF_ERROR(state_->query_status()); - singleton_output_tuple_returned_ = false; - } else { - if (ht_allocator_ == nullptr) { - // Allocate 'serialize_stream_' and 'ht_allocator_' on the first Open() call. - ht_allocator_.reset(new Suballocator(state_->exec_env()->buffer_pool(), - &_buffer_pool_client, _resource_profile.spillable_buffer_size)); - - if (!is_streaming_preagg_ && needs_serialize_) { - serialize_stream_.reset(new BufferedTupleStream3(state, &intermediate_row_desc_, - &_buffer_pool_client, _resource_profile.spillable_buffer_size, - _resource_profile.max_row_buffer_size)); - RETURN_IF_ERROR(serialize_stream_->Init(id(), false)); - bool got_buffer; - // Reserve the memory for 'serialize_stream_' so we don't need to scrounge up - // another buffer during spilling. - RETURN_IF_ERROR(serialize_stream_->PrepareForWrite(&got_buffer)); - DCHECK(got_buffer) - << "Accounted in min reservation" << _buffer_pool_client.DebugString(); - DCHECK(serialize_stream_->has_write_iterator()); - } +Status PartitionedAggregationNode::prepare(RuntimeState* state) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + + RETURN_IF_ERROR(ExecNode::prepare(state)); + state_ = state; + + mem_pool_.reset(new MemPool(mem_tracker().get())); + agg_fn_pool_.reset(new MemPool(expr_mem_tracker().get())); + + ht_resize_timer_ = ADD_TIMER(runtime_profile(), "HTResizeTime"); + get_results_timer_ = ADD_TIMER(runtime_profile(), "GetResultsTime"); + num_processed_rows_ = ADD_COUNTER(runtime_profile(), "RowsProcessed", TUnit::UNIT); + num_hash_buckets_ = ADD_COUNTER(runtime_profile(), "HashBuckets", TUnit::UNIT); + num_hash_filled_buckets_ = ADD_COUNTER(runtime_profile(), "HashFilledBuckets", TUnit::UNIT); + num_hash_probe_ = ADD_COUNTER(runtime_profile(), "HashProbe", TUnit::UNIT); + num_hash_failed_probe_ = ADD_COUNTER(runtime_profile(), "HashFailedProbe", TUnit::UNIT); + num_hash_travel_length_ = ADD_COUNTER(runtime_profile(), "HashTravelLength", TUnit::UNIT); + num_hash_collisions_ = ADD_COUNTER(runtime_profile(), "HashCollisions", TUnit::UNIT); + ht_resize_counter_ = ADD_COUNTER(runtime_profile(), "HTResize", TUnit::UNIT); + partitions_created_ = ADD_COUNTER(runtime_profile(), "PartitionsCreated", TUnit::UNIT); + largest_partition_percent_ = + runtime_profile()->AddHighWaterMarkCounter("LargestPartitionPercent", TUnit::UNIT); + + if (config::enable_quadratic_probing) { + runtime_profile()->add_info_string("Probe Method", "HashTable Quadratic Probing"); + } else { + runtime_profile()->add_info_string("Probe Method", "HashTable Linear Probing"); } - RETURN_IF_ERROR(CreateHashPartitions(0)); - } - // Streaming preaggregations do all processing in GetNext(). - if (is_streaming_preagg_) return Status::OK(); + if (is_streaming_preagg_) { + runtime_profile()->append_exec_option("Streaming Preaggregation"); + streaming_timer_ = ADD_TIMER(runtime_profile(), "StreamingTime"); + num_passthrough_rows_ = ADD_COUNTER(runtime_profile(), "RowsPassedThrough", TUnit::UNIT); + preagg_estimated_reduction_ = + ADD_COUNTER(runtime_profile(), "ReductionFactorEstimate", TUnit::DOUBLE_VALUE); + preagg_streaming_ht_min_reduction_ = ADD_COUNTER( + runtime_profile(), "ReductionFactorThresholdToExpand", TUnit::DOUBLE_VALUE); + } else { + build_timer_ = ADD_TIMER(runtime_profile(), "BuildTime"); + num_row_repartitioned_ = ADD_COUNTER(runtime_profile(), "RowsRepartitioned", TUnit::UNIT); + num_repartitions_ = ADD_COUNTER(runtime_profile(), "NumRepartitions", TUnit::UNIT); + num_spilled_partitions_ = ADD_COUNTER(runtime_profile(), "SpilledPartitions", TUnit::UNIT); + max_partition_level_ = + runtime_profile()->AddHighWaterMarkCounter("MaxPartitionLevel", TUnit::UNIT); + } + // TODO chenhao + const RowDescriptor& row_desc = child(0)->row_desc(); + RETURN_IF_ERROR(NewAggFnEvaluator::Create(agg_fns_, state, _pool, agg_fn_pool_.get(), + &agg_fn_evals_, expr_mem_tracker(), row_desc)); + + expr_results_pool_.reset(new MemPool(expr_mem_tracker().get())); + if (!grouping_exprs_.empty()) { + RowDescriptor build_row_desc(intermediate_tuple_desc_, false); + RETURN_IF_ERROR(PartitionedHashTableCtx::Create( + _pool, state, build_exprs_, grouping_exprs_, true, + vector(build_exprs_.size(), true), state->fragment_hash_seed(), + MAX_PARTITION_DEPTH, 1, expr_mem_pool(), expr_results_pool_.get(), + expr_mem_tracker(), build_row_desc, row_desc, &ht_ctx_)); + } + // AddCodegenDisabledMessage(state); + return Status::OK(); +} - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); - // Read all the rows from the child and process them. - bool eos = false; - do { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state( - "New partitioned aggregation, while getting next from child 0.")); - RETURN_IF_ERROR(_children[0]->get_next(state, &batch, &eos)); - if (UNLIKELY(VLOG_ROW_IS_ON)) { - for (int i = 0; i < batch.num_rows(); ++i) { - TupleRow* row = batch.get_row(i); - VLOG_ROW << "input row: " << row->to_string(_children[0]->row_desc()); - } +Status PartitionedAggregationNode::open(RuntimeState* state) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + // Open the child before consuming resources in this node. + RETURN_IF_ERROR(child(0)->open(state)); + RETURN_IF_ERROR(ExecNode::open(state)); + + // Claim reservation after the child has been opened to reduce the peak reservation + // requirement. + if (!_buffer_pool_client.is_registered() && !grouping_exprs_.empty()) { + DCHECK_GE(_resource_profile.min_reservation, MinReservation()); + RETURN_IF_ERROR(claim_buffer_reservation(state)); } - SCOPED_TIMER(build_timer_); + if (ht_ctx_.get() != nullptr) RETURN_IF_ERROR(ht_ctx_->Open(state)); + RETURN_IF_ERROR(NewAggFnEvaluator::Open(agg_fn_evals_, state)); if (grouping_exprs_.empty()) { - if (process_batch_no_grouping_fn_ != NULL) { - RETURN_IF_ERROR(process_batch_no_grouping_fn_(this, &batch)); - } else { - RETURN_IF_ERROR(ProcessBatchNoGrouping(&batch)); - } + // Create the single output tuple for this non-grouping agg. This must happen after + // opening the aggregate evaluators. + singleton_output_tuple_ = ConstructSingletonOutputTuple(agg_fn_evals_, mem_pool_.get()); + // Check for failures during NewAggFnEvaluator::Init(). + RETURN_IF_ERROR(state_->query_status()); + singleton_output_tuple_returned_ = false; } else { - // There is grouping, so we will do partitioned aggregation. - if (process_batch_fn_ != NULL) { - RETURN_IF_ERROR(process_batch_fn_(this, &batch, ht_ctx_.get())); - } else { - RETURN_IF_ERROR(ProcessBatch(&batch, ht_ctx_.get())); - } + if (ht_allocator_ == nullptr) { + // Allocate 'serialize_stream_' and 'ht_allocator_' on the first Open() call. + ht_allocator_.reset(new Suballocator(state_->exec_env()->buffer_pool(), + &_buffer_pool_client, + _resource_profile.spillable_buffer_size)); + + if (!is_streaming_preagg_ && needs_serialize_) { + serialize_stream_.reset(new BufferedTupleStream3( + state, &intermediate_row_desc_, &_buffer_pool_client, + _resource_profile.spillable_buffer_size, + _resource_profile.max_row_buffer_size)); + RETURN_IF_ERROR(serialize_stream_->Init(id(), false)); + bool got_buffer; + // Reserve the memory for 'serialize_stream_' so we don't need to scrounge up + // another buffer during spilling. + RETURN_IF_ERROR(serialize_stream_->PrepareForWrite(&got_buffer)); + DCHECK(got_buffer) + << "Accounted in min reservation" << _buffer_pool_client.DebugString(); + DCHECK(serialize_stream_->has_write_iterator()); + } + } + RETURN_IF_ERROR(CreateHashPartitions(0)); + } + + // Streaming preaggregations do all processing in GetNext(). + if (is_streaming_preagg_) return Status::OK(); + + RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); + // Read all the rows from the child and process them. + bool eos = false; + do { + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(state->check_query_state( + "New partitioned aggregation, while getting next from child 0.")); + RETURN_IF_ERROR(_children[0]->get_next(state, &batch, &eos)); + if (UNLIKELY(VLOG_ROW_IS_ON)) { + for (int i = 0; i < batch.num_rows(); ++i) { + TupleRow* row = batch.get_row(i); + VLOG_ROW << "input row: " << row->to_string(_children[0]->row_desc()); + } + } + + SCOPED_TIMER(build_timer_); + if (grouping_exprs_.empty()) { + if (process_batch_no_grouping_fn_ != NULL) { + RETURN_IF_ERROR(process_batch_no_grouping_fn_(this, &batch)); + } else { + RETURN_IF_ERROR(ProcessBatchNoGrouping(&batch)); + } + } else { + // There is grouping, so we will do partitioned aggregation. + if (process_batch_fn_ != NULL) { + RETURN_IF_ERROR(process_batch_fn_(this, &batch, ht_ctx_.get())); + } else { + RETURN_IF_ERROR(ProcessBatch(&batch, ht_ctx_.get())); + } + } + batch.reset(); + } while (!eos); + + // The child can be closed at this point in most cases because we have consumed all of + // the input from the child and transfered ownership of the resources we need. The + // exception is if we are inside a subplan expecting to call Open()/GetNext() on the + // child again, + if (!is_in_subplan()) child(0)->close(state); + child_eos_ = true; + + // Done consuming child(0)'s input. Move all the partitions in hash_partitions_ + // to spilled_partitions_ or aggregated_partitions_. We'll finish the processing in + // GetNext(). + if (!grouping_exprs_.empty()) { + RETURN_IF_ERROR(MoveHashPartitions(child(0)->rows_returned())); } - batch.reset(); - } while (!eos); - - // The child can be closed at this point in most cases because we have consumed all of - // the input from the child and transfered ownership of the resources we need. The - // exception is if we are inside a subplan expecting to call Open()/GetNext() on the - // child again, - if (!is_in_subplan()) child(0)->close(state); - child_eos_ = true; - - // Done consuming child(0)'s input. Move all the partitions in hash_partitions_ - // to spilled_partitions_ or aggregated_partitions_. We'll finish the processing in - // GetNext(). - if (!grouping_exprs_.empty()) { - RETURN_IF_ERROR(MoveHashPartitions(child(0)->rows_returned())); - } - return Status::OK(); + return Status::OK(); } -Status PartitionedAggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, - bool* eos) { - int first_row_idx = row_batch->num_rows(); - RETURN_IF_ERROR(GetNextInternal(state, row_batch, eos)); - RETURN_IF_ERROR(HandleOutputStrings(row_batch, first_row_idx)); - return Status::OK(); +Status PartitionedAggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { + int first_row_idx = row_batch->num_rows(); + RETURN_IF_ERROR(GetNextInternal(state, row_batch, eos)); + RETURN_IF_ERROR(HandleOutputStrings(row_batch, first_row_idx)); + return Status::OK(); } -Status PartitionedAggregationNode::HandleOutputStrings(RowBatch* row_batch, - int first_row_idx) { - if (!needs_finalize_ && !needs_serialize_) return Status::OK(); - // String data returned by Serialize() or Finalize() is from local expr allocations in - // the agg function contexts, and will be freed on the next GetNext() call by - // FreeLocalAllocations(). The data either needs to be copied out now or sent up the - // plan and copied out by a blocking ancestor. (See IMPALA-3311) - for (const AggFn* agg_fn : agg_fns_) { - const SlotDescriptor& slot_desc = agg_fn->output_slot_desc(); - DCHECK(!slot_desc.type().is_collection_type()) << "producing collections NYI"; - if (!slot_desc.type().is_var_len_string_type()) continue; - if (is_in_subplan()) { - // Copy string data to the row batch's pool. This is more efficient than - // MarkNeedsDeepCopy() in a subplan since we are likely producing many small - // batches. - RETURN_IF_ERROR(CopyStringData(slot_desc, row_batch, - first_row_idx, row_batch->tuple_data_pool())); - } else { - row_batch->mark_needs_deep_copy(); - break; +Status PartitionedAggregationNode::HandleOutputStrings(RowBatch* row_batch, int first_row_idx) { + if (!needs_finalize_ && !needs_serialize_) return Status::OK(); + // String data returned by Serialize() or Finalize() is from local expr allocations in + // the agg function contexts, and will be freed on the next GetNext() call by + // FreeLocalAllocations(). The data either needs to be copied out now or sent up the + // plan and copied out by a blocking ancestor. (See IMPALA-3311) + for (const AggFn* agg_fn : agg_fns_) { + const SlotDescriptor& slot_desc = agg_fn->output_slot_desc(); + DCHECK(!slot_desc.type().is_collection_type()) << "producing collections NYI"; + if (!slot_desc.type().is_var_len_string_type()) continue; + if (is_in_subplan()) { + // Copy string data to the row batch's pool. This is more efficient than + // MarkNeedsDeepCopy() in a subplan since we are likely producing many small + // batches. + RETURN_IF_ERROR(CopyStringData(slot_desc, row_batch, first_row_idx, + row_batch->tuple_data_pool())); + } else { + row_batch->mark_needs_deep_copy(); + break; + } } - } - return Status::OK(); + return Status::OK(); } Status PartitionedAggregationNode::CopyStringData(const SlotDescriptor& slot_desc, - RowBatch* row_batch, int first_row_idx, MemPool* pool) { - DCHECK(slot_desc.type().is_var_len_string_type()); - DCHECK_EQ(row_batch->row_desc().tuple_descriptors().size(), 1); - FOREACH_ROW(row_batch, first_row_idx, batch_iter) { - Tuple* tuple = batch_iter.get()->get_tuple(0); - StringValue* sv = reinterpret_cast( - tuple->get_slot(slot_desc.tuple_offset())); - if (sv == NULL || sv->len == 0) continue; - char* new_ptr = reinterpret_cast(pool->try_allocate(sv->len)); - if (UNLIKELY(new_ptr == NULL)) { - string details = Substitute("Cannot perform aggregation at node with id $0." - " Failed to allocate $1 output bytes.", _id, sv->len); - return pool->mem_tracker()->MemLimitExceeded(state_, details, sv->len); + RowBatch* row_batch, int first_row_idx, + MemPool* pool) { + DCHECK(slot_desc.type().is_var_len_string_type()); + DCHECK_EQ(row_batch->row_desc().tuple_descriptors().size(), 1); + FOREACH_ROW(row_batch, first_row_idx, batch_iter) { + Tuple* tuple = batch_iter.get()->get_tuple(0); + StringValue* sv = reinterpret_cast(tuple->get_slot(slot_desc.tuple_offset())); + if (sv == NULL || sv->len == 0) continue; + char* new_ptr = reinterpret_cast(pool->try_allocate(sv->len)); + if (UNLIKELY(new_ptr == NULL)) { + string details = Substitute( + "Cannot perform aggregation at node with id $0." + " Failed to allocate $1 output bytes.", + _id, sv->len); + return pool->mem_tracker()->MemLimitExceeded(state_, details, sv->len); + } + memcpy(new_ptr, sv->ptr, sv->len); + sv->ptr = new_ptr; } - memcpy(new_ptr, sv->ptr, sv->len); - sv->ptr = new_ptr; - } - return Status::OK(); -} - -Status PartitionedAggregationNode::GetNextInternal(RuntimeState* state, - RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("New partitioned aggregation, while getting next.")); - // clear tmp expr result alocations - expr_results_pool_->clear(); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - - if (grouping_exprs_.empty()) { - // There was no grouping, so evaluate the conjuncts and return the single result row. - // We allow calling GetNext() after eos, so don't return this row again. - if (!singleton_output_tuple_returned_) GetSingletonOutput(row_batch); - singleton_output_tuple_returned_ = true; - *eos = true; return Status::OK(); - } - - if (!child_eos_) { - // For streaming preaggregations, we process rows from the child as we go. - DCHECK(is_streaming_preagg_); - RETURN_IF_ERROR(GetRowsStreaming(state, row_batch)); - } else if (!partition_eos_) { - RETURN_IF_ERROR(GetRowsFromPartition(state, row_batch)); - } - - *eos = partition_eos_ && child_eos_; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); } -void PartitionedAggregationNode::GetSingletonOutput(RowBatch* row_batch) { - DCHECK(grouping_exprs_.empty()); - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - Tuple* output_tuple = GetOutputTuple(agg_fn_evals_, - singleton_output_tuple_, row_batch->tuple_data_pool()); - row->set_tuple(0, output_tuple); - if (ExecNode::eval_conjuncts( - _conjunct_ctxs.data(), _conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - } - // Keep the current chunk to amortize the memory allocation over a series - // of Reset()/Open()/GetNext()* calls. - row_batch->tuple_data_pool()->acquire_data(mem_pool_.get(), true); - // This node no longer owns the memory for singleton_output_tuple_. - singleton_output_tuple_ = NULL; -} +Status PartitionedAggregationNode::GetNextInternal(RuntimeState* state, RowBatch* row_batch, + bool* eos) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(state->check_query_state("New partitioned aggregation, while getting next.")); + // clear tmp expr result alocations + expr_results_pool_->clear(); -Status PartitionedAggregationNode::GetRowsFromPartition(RuntimeState* state, - RowBatch* row_batch) { - DCHECK(!row_batch->at_capacity()); - if (output_iterator_.AtEnd()) { - // Done with this partition, move onto the next one. - if (output_partition_ != NULL) { - output_partition_->Close(false); - output_partition_ = NULL; + if (reached_limit()) { + *eos = true; + return Status::OK(); } - if (aggregated_partitions_.empty() && spilled_partitions_.empty()) { - // No more partitions, all done. - partition_eos_ = true; - return Status::OK(); + + if (grouping_exprs_.empty()) { + // There was no grouping, so evaluate the conjuncts and return the single result row. + // We allow calling GetNext() after eos, so don't return this row again. + if (!singleton_output_tuple_returned_) GetSingletonOutput(row_batch); + singleton_output_tuple_returned_ = true; + *eos = true; + return Status::OK(); } - // Process next partition. - RETURN_IF_ERROR(NextPartition()); - DCHECK(output_partition_ != NULL); - } - - SCOPED_TIMER(get_results_timer_); - int count = 0; - const int N = BitUtil::next_power_of_two(state->batch_size()); - // Keeping returning rows from the current partition. - while (!output_iterator_.AtEnd()) { - // This loop can go on for a long time if the conjuncts are very selective. Do query - // maintenance every N iterations. - if ((count++ & (N - 1)) == 0) { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state( - "New partitioned aggregation, while getting rows from partition.")); + + if (!child_eos_) { + // For streaming preaggregations, we process rows from the child as we go. + DCHECK(is_streaming_preagg_); + RETURN_IF_ERROR(GetRowsStreaming(state, row_batch)); + } else if (!partition_eos_) { + RETURN_IF_ERROR(GetRowsFromPartition(state, row_batch)); } + *eos = partition_eos_ && child_eos_; + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + return Status::OK(); +} + +void PartitionedAggregationNode::GetSingletonOutput(RowBatch* row_batch) { + DCHECK(grouping_exprs_.empty()); int row_idx = row_batch->add_row(); TupleRow* row = row_batch->get_row(row_idx); - Tuple* intermediate_tuple = output_iterator_.GetTuple(); - Tuple* output_tuple = GetOutputTuple( - output_partition_->agg_fn_evals, intermediate_tuple, row_batch->tuple_data_pool()); - output_iterator_.Next(); + Tuple* output_tuple = + GetOutputTuple(agg_fn_evals_, singleton_output_tuple_, row_batch->tuple_data_pool()); row->set_tuple(0, output_tuple); - // TODO chenhao - // DCHECK_EQ(_conjunct_ctxs.size(), _conjuncts.size()); if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - if (reached_limit() || row_batch->at_capacity()) { - break; - } + row_batch->commit_last_row(); + ++_num_rows_returned; + COUNTER_SET(_rows_returned_counter, _num_rows_returned); } - } + // Keep the current chunk to amortize the memory allocation over a series + // of Reset()/Open()/GetNext()* calls. + row_batch->tuple_data_pool()->acquire_data(mem_pool_.get(), true); + // This node no longer owns the memory for singleton_output_tuple_. + singleton_output_tuple_ = NULL; +} - COUNTER_SET(num_processed_rows_, num_hash_probe_->value()); - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - partition_eos_ = reached_limit(); - if (output_iterator_.AtEnd()) row_batch->mark_needs_deep_copy(); +Status PartitionedAggregationNode::GetRowsFromPartition(RuntimeState* state, RowBatch* row_batch) { + DCHECK(!row_batch->at_capacity()); + if (output_iterator_.AtEnd()) { + // Done with this partition, move onto the next one. + if (output_partition_ != NULL) { + output_partition_->Close(false); + output_partition_ = NULL; + } + if (aggregated_partitions_.empty() && spilled_partitions_.empty()) { + // No more partitions, all done. + partition_eos_ = true; + return Status::OK(); + } + // Process next partition. + RETURN_IF_ERROR(NextPartition()); + DCHECK(output_partition_ != NULL); + } - return Status::OK(); -} + SCOPED_TIMER(get_results_timer_); + int count = 0; + const int N = BitUtil::next_power_of_two(state->batch_size()); + // Keeping returning rows from the current partition. + while (!output_iterator_.AtEnd()) { + // This loop can go on for a long time if the conjuncts are very selective. Do query + // maintenance every N iterations. + if ((count++ & (N - 1)) == 0) { + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(state->check_query_state( + "New partitioned aggregation, while getting rows from partition.")); + } -Status PartitionedAggregationNode::GetRowsStreaming(RuntimeState* state, - RowBatch* out_batch) { - DCHECK(!child_eos_); - DCHECK(is_streaming_preagg_); + int row_idx = row_batch->add_row(); + TupleRow* row = row_batch->get_row(row_idx); + Tuple* intermediate_tuple = output_iterator_.GetTuple(); + Tuple* output_tuple = GetOutputTuple(output_partition_->agg_fn_evals, intermediate_tuple, + row_batch->tuple_data_pool()); + output_iterator_.Next(); + row->set_tuple(0, output_tuple); + // TODO chenhao + // DCHECK_EQ(_conjunct_ctxs.size(), _conjuncts.size()); + if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), row)) { + row_batch->commit_last_row(); + ++_num_rows_returned; + if (reached_limit() || row_batch->at_capacity()) { + break; + } + } + } - if (child_batch_ == NULL) { - child_batch_.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), - mem_tracker().get())); - } + COUNTER_SET(num_processed_rows_, num_hash_probe_->value()); + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + partition_eos_ = reached_limit(); + if (output_iterator_.AtEnd()) row_batch->mark_needs_deep_copy(); - do { - DCHECK_EQ(out_batch->num_rows(), 0); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state( - "New partitioned aggregation, while getting rows in streaming.")); + return Status::OK(); +} - RETURN_IF_ERROR(child(0)->get_next(state, child_batch_.get(), &child_eos_)); - SCOPED_TIMER(streaming_timer_); +Status PartitionedAggregationNode::GetRowsStreaming(RuntimeState* state, RowBatch* out_batch) { + DCHECK(!child_eos_); + DCHECK(is_streaming_preagg_); - int remaining_capacity[PARTITION_FANOUT]; - bool ht_needs_expansion = false; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - PartitionedHashTable* hash_tbl = GetHashTable(i); - remaining_capacity[i] = hash_tbl->NumInsertsBeforeResize(); - ht_needs_expansion |= remaining_capacity[i] < child_batch_->num_rows(); + if (child_batch_ == NULL) { + child_batch_.reset( + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); } - // Stop expanding hash tables if we're not reducing the input sufficiently. As our - // hash tables expand out of each level of cache hierarchy, every hash table lookup - // will take longer. We also may not be able to expand hash tables because of memory - // pressure. In this case HashTable::CheckAndResize() will fail. In either case we - // should always use the remaining space in the hash table to avoid wasting memory. - if (ht_needs_expansion && ShouldExpandPreaggHashTables()) { - for (int i = 0; i < PARTITION_FANOUT; ++i) { - PartitionedHashTable* ht = GetHashTable(i); - if (remaining_capacity[i] < child_batch_->num_rows()) { - SCOPED_TIMER(ht_resize_timer_); - bool resized; - RETURN_IF_ERROR( - ht->CheckAndResize(child_batch_->num_rows(), ht_ctx_.get(), &resized)); - if (resized) { - remaining_capacity[i] = ht->NumInsertsBeforeResize(); - } + do { + DCHECK_EQ(out_batch->num_rows(), 0); + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(state->check_query_state( + "New partitioned aggregation, while getting rows in streaming.")); + + RETURN_IF_ERROR(child(0)->get_next(state, child_batch_.get(), &child_eos_)); + SCOPED_TIMER(streaming_timer_); + + int remaining_capacity[PARTITION_FANOUT]; + bool ht_needs_expansion = false; + for (int i = 0; i < PARTITION_FANOUT; ++i) { + PartitionedHashTable* hash_tbl = GetHashTable(i); + remaining_capacity[i] = hash_tbl->NumInsertsBeforeResize(); + ht_needs_expansion |= remaining_capacity[i] < child_batch_->num_rows(); } - } - } - if (process_batch_streaming_fn_ != NULL) { - RETURN_IF_ERROR(process_batch_streaming_fn_(this, needs_serialize_, - child_batch_.get(), out_batch, ht_ctx_.get(), remaining_capacity)); - } else { - RETURN_IF_ERROR(ProcessBatchStreaming(needs_serialize_, - child_batch_.get(), out_batch, ht_ctx_.get(), remaining_capacity)); - } + // Stop expanding hash tables if we're not reducing the input sufficiently. As our + // hash tables expand out of each level of cache hierarchy, every hash table lookup + // will take longer. We also may not be able to expand hash tables because of memory + // pressure. In this case HashTable::CheckAndResize() will fail. In either case we + // should always use the remaining space in the hash table to avoid wasting memory. + if (ht_needs_expansion && ShouldExpandPreaggHashTables()) { + for (int i = 0; i < PARTITION_FANOUT; ++i) { + PartitionedHashTable* ht = GetHashTable(i); + if (remaining_capacity[i] < child_batch_->num_rows()) { + SCOPED_TIMER(ht_resize_timer_); + bool resized; + RETURN_IF_ERROR( + ht->CheckAndResize(child_batch_->num_rows(), ht_ctx_.get(), &resized)); + if (resized) { + remaining_capacity[i] = ht->NumInsertsBeforeResize(); + } + } + } + } - child_batch_->reset(); // All rows from child_batch_ were processed. - } while (out_batch->num_rows() == 0 && !child_eos_); + if (process_batch_streaming_fn_ != NULL) { + RETURN_IF_ERROR(process_batch_streaming_fn_(this, needs_serialize_, child_batch_.get(), + out_batch, ht_ctx_.get(), + remaining_capacity)); + } else { + RETURN_IF_ERROR(ProcessBatchStreaming(needs_serialize_, child_batch_.get(), out_batch, + ht_ctx_.get(), remaining_capacity)); + } - if (child_eos_) { - child(0)->close(state); - child_batch_.reset(); - RETURN_IF_ERROR(MoveHashPartitions(child(0)->rows_returned())); - } + child_batch_->reset(); // All rows from child_batch_ were processed. + } while (out_batch->num_rows() == 0 && !child_eos_); - _num_rows_returned += out_batch->num_rows(); - COUNTER_SET(num_passthrough_rows_, _num_rows_returned); - return Status::OK(); + if (child_eos_) { + child(0)->close(state); + child_batch_.reset(); + RETURN_IF_ERROR(MoveHashPartitions(child(0)->rows_returned())); + } + + _num_rows_returned += out_batch->num_rows(); + COUNTER_SET(num_passthrough_rows_, _num_rows_returned); + return Status::OK(); } bool PartitionedAggregationNode::ShouldExpandPreaggHashTables() const { - int64_t ht_mem = 0; - int64_t ht_rows = 0; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - PartitionedHashTable* ht = hash_partitions_[i]->hash_tbl.get(); - ht_mem += ht->CurrentMemSize(); - ht_rows += ht->size(); - } - - // Need some rows in tables to have valid statistics. - if (ht_rows == 0) return true; - - // Find the appropriate reduction factor in our table for the current hash table sizes. - int cache_level = 0; - while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && - ht_mem >= STREAMING_HT_MIN_REDUCTION[cache_level + 1].min_ht_mem) { - ++cache_level; - } - - // Compare the number of rows in the hash table with the number of input rows that - // were aggregated into it. Exclude passed through rows from this calculation since - // they were not in hash tables. - const int64_t input_rows = _children[0]->rows_returned(); - const int64_t aggregated_input_rows = input_rows - _num_rows_returned; - // TODO chenhao -// const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_; - double current_reduction = static_cast(aggregated_input_rows) / ht_rows; - - // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be - // inaccurate, which could lead to a divide by zero below. - if (aggregated_input_rows <= 0) return true; - - // Extrapolate the current reduction factor (r) using the formula - // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data - // set, N is the number of input rows, excluding passed-through rows, and n is the - // number of rows inserted or merged into the hash tables. This is a very rough - // approximation but is good enough to be useful. - // TODO: consider collecting more statistics to better estimate reduction. -// double estimated_reduction = aggregated_input_rows >= expected_input_rows -// ? current_reduction -// : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); - double min_reduction = - STREAMING_HT_MIN_REDUCTION[cache_level].streaming_ht_min_reduction; - -// COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); - COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); -// return estimated_reduction > min_reduction; - return current_reduction > min_reduction; -} + int64_t ht_mem = 0; + int64_t ht_rows = 0; + for (int i = 0; i < PARTITION_FANOUT; ++i) { + PartitionedHashTable* ht = hash_partitions_[i]->hash_tbl.get(); + ht_mem += ht->CurrentMemSize(); + ht_rows += ht->size(); + } + + // Need some rows in tables to have valid statistics. + if (ht_rows == 0) return true; -void PartitionedAggregationNode::CleanupHashTbl( - const vector& agg_fn_evals, PartitionedHashTable::Iterator it) { - if (!needs_finalize_ && !needs_serialize_) return; - - // Iterate through the remaining rows in the hash table and call Serialize/Finalize on - // them in order to free any memory allocated by UDAs. - if (needs_finalize_) { - // Finalize() requires a dst tuple but we don't actually need the result, - // so allocate a single dummy tuple to avoid accumulating memory. - Tuple* dummy_dst = NULL; - dummy_dst = Tuple::create(output_tuple_desc_->byte_size(), mem_pool_.get()); - while (!it.AtEnd()) { - Tuple* tuple = it.GetTuple(); - NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dummy_dst); - it.Next(); + // Find the appropriate reduction factor in our table for the current hash table sizes. + int cache_level = 0; + while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && + ht_mem >= STREAMING_HT_MIN_REDUCTION[cache_level + 1].min_ht_mem) { + ++cache_level; } - } else { - while (!it.AtEnd()) { - Tuple* tuple = it.GetTuple(); - NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); - it.Next(); + + // Compare the number of rows in the hash table with the number of input rows that + // were aggregated into it. Exclude passed through rows from this calculation since + // they were not in hash tables. + const int64_t input_rows = _children[0]->rows_returned(); + const int64_t aggregated_input_rows = input_rows - _num_rows_returned; + // TODO chenhao + // const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_; + double current_reduction = static_cast(aggregated_input_rows) / ht_rows; + + // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be + // inaccurate, which could lead to a divide by zero below. + if (aggregated_input_rows <= 0) return true; + + // Extrapolate the current reduction factor (r) using the formula + // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data + // set, N is the number of input rows, excluding passed-through rows, and n is the + // number of rows inserted or merged into the hash tables. This is a very rough + // approximation but is good enough to be useful. + // TODO: consider collecting more statistics to better estimate reduction. + // double estimated_reduction = aggregated_input_rows >= expected_input_rows + // ? current_reduction + // : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); + double min_reduction = STREAMING_HT_MIN_REDUCTION[cache_level].streaming_ht_min_reduction; + + // COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); + COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); + // return estimated_reduction > min_reduction; + return current_reduction > min_reduction; +} + +void PartitionedAggregationNode::CleanupHashTbl(const vector& agg_fn_evals, + PartitionedHashTable::Iterator it) { + if (!needs_finalize_ && !needs_serialize_) return; + + // Iterate through the remaining rows in the hash table and call Serialize/Finalize on + // them in order to free any memory allocated by UDAs. + if (needs_finalize_) { + // Finalize() requires a dst tuple but we don't actually need the result, + // so allocate a single dummy tuple to avoid accumulating memory. + Tuple* dummy_dst = NULL; + dummy_dst = Tuple::create(output_tuple_desc_->byte_size(), mem_pool_.get()); + while (!it.AtEnd()) { + Tuple* tuple = it.GetTuple(); + NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dummy_dst); + it.Next(); + } + } else { + while (!it.AtEnd()) { + Tuple* tuple = it.GetTuple(); + NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); + it.Next(); + } } - } } Status PartitionedAggregationNode::reset(RuntimeState* state) { - DCHECK(!is_streaming_preagg_) << "Cannot reset preaggregation"; - if (!grouping_exprs_.empty()) { - child_eos_ = false; - partition_eos_ = false; - // Reset the HT and the partitions for this grouping agg. - ht_ctx_->set_level(0); - ClosePartitions(); - } - return ExecNode::reset(state); + DCHECK(!is_streaming_preagg_) << "Cannot reset preaggregation"; + if (!grouping_exprs_.empty()) { + child_eos_ = false; + partition_eos_ = false; + // Reset the HT and the partitions for this grouping agg. + ht_ctx_->set_level(0); + ClosePartitions(); + } + return ExecNode::reset(state); } Status PartitionedAggregationNode::close(RuntimeState* state) { - if (is_closed()) return Status::OK(); - - if (!singleton_output_tuple_returned_) { - GetOutputTuple(agg_fn_evals_, singleton_output_tuple_, mem_pool_.get()); - } - - // Iterate through the remaining rows in the hash table and call Serialize/Finalize on - // them in order to free any memory allocated by UDAs - if (output_partition_ != NULL) { - CleanupHashTbl(output_partition_->agg_fn_evals, output_iterator_); - output_partition_->Close(false); - } - - ClosePartitions(); - child_batch_.reset(); - - // Close all the agg-fn-evaluators - NewAggFnEvaluator::Close(agg_fn_evals_, state); - - if (expr_results_pool_.get() != nullptr) { - expr_results_pool_->free_all(); - } - if (agg_fn_pool_.get() != nullptr) agg_fn_pool_->free_all(); - if (mem_pool_.get() != nullptr) mem_pool_->free_all(); - if (ht_ctx_.get() != nullptr) ht_ctx_->Close(state); - ht_ctx_.reset(); - if (serialize_stream_.get() != nullptr) { - serialize_stream_->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - } - Expr::close(grouping_exprs_); - Expr::close(build_exprs_); - AggFn::Close(agg_fns_); - return ExecNode::close(state); + if (is_closed()) return Status::OK(); + + if (!singleton_output_tuple_returned_) { + GetOutputTuple(agg_fn_evals_, singleton_output_tuple_, mem_pool_.get()); + } + + // Iterate through the remaining rows in the hash table and call Serialize/Finalize on + // them in order to free any memory allocated by UDAs + if (output_partition_ != NULL) { + CleanupHashTbl(output_partition_->agg_fn_evals, output_iterator_); + output_partition_->Close(false); + } + + ClosePartitions(); + child_batch_.reset(); + + // Close all the agg-fn-evaluators + NewAggFnEvaluator::Close(agg_fn_evals_, state); + + if (expr_results_pool_.get() != nullptr) { + expr_results_pool_->free_all(); + } + if (agg_fn_pool_.get() != nullptr) agg_fn_pool_->free_all(); + if (mem_pool_.get() != nullptr) mem_pool_->free_all(); + if (ht_ctx_.get() != nullptr) ht_ctx_->Close(state); + ht_ctx_.reset(); + if (serialize_stream_.get() != nullptr) { + serialize_stream_->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); + } + Expr::close(grouping_exprs_); + Expr::close(build_exprs_); + AggFn::Close(agg_fns_); + return ExecNode::close(state); } PartitionedAggregationNode::Partition::~Partition() { - DCHECK(is_closed); + DCHECK(is_closed); } Status PartitionedAggregationNode::Partition::InitStreams() { - agg_fn_pool.reset(new MemPool(parent->expr_mem_tracker().get())); - DCHECK_EQ(agg_fn_evals.size(), 0); - NewAggFnEvaluator::ShallowClone(parent->partition_pool_.get(), agg_fn_pool.get(), - parent->agg_fn_evals_, &agg_fn_evals); - - // Varlen aggregate function results are stored outside of aggregated_row_stream because - // BufferedTupleStream3 doesn't support relocating varlen data stored in the stream. - auto agg_slot = parent->intermediate_tuple_desc_->slots().begin() + - parent->grouping_exprs_.size(); - std::set external_varlen_slots; - for (; agg_slot != parent->intermediate_tuple_desc_->slots().end(); ++agg_slot) { - if ((*agg_slot)->type().is_var_len_string_type()) { - external_varlen_slots.insert((*agg_slot)->id()); + agg_fn_pool.reset(new MemPool(parent->expr_mem_tracker().get())); + DCHECK_EQ(agg_fn_evals.size(), 0); + NewAggFnEvaluator::ShallowClone(parent->partition_pool_.get(), agg_fn_pool.get(), + parent->agg_fn_evals_, &agg_fn_evals); + + // Varlen aggregate function results are stored outside of aggregated_row_stream because + // BufferedTupleStream3 doesn't support relocating varlen data stored in the stream. + auto agg_slot = + parent->intermediate_tuple_desc_->slots().begin() + parent->grouping_exprs_.size(); + std::set external_varlen_slots; + for (; agg_slot != parent->intermediate_tuple_desc_->slots().end(); ++agg_slot) { + if ((*agg_slot)->type().is_var_len_string_type()) { + external_varlen_slots.insert((*agg_slot)->id()); + } } - } - - aggregated_row_stream.reset(new BufferedTupleStream3(parent->state_, - &parent->intermediate_row_desc_, &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, - parent->_resource_profile.max_row_buffer_size, external_varlen_slots)); - RETURN_IF_ERROR( - aggregated_row_stream->Init(parent->id(), true)); - bool got_buffer; - RETURN_IF_ERROR(aggregated_row_stream->PrepareForWrite(&got_buffer)); - DCHECK(got_buffer) << "Buffer included in reservation " << parent->_id << "\n" - << parent->_buffer_pool_client.DebugString() << "\n" - << parent->DebugString(2); - - if (!parent->is_streaming_preagg_) { - unaggregated_row_stream.reset(new BufferedTupleStream3(parent->state_, - &(parent->child(0)->row_desc()), &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, - parent->_resource_profile.max_row_buffer_size)); - // This stream is only used to spill, no need to ever have this pinned. - RETURN_IF_ERROR(unaggregated_row_stream->Init(parent->id(), false)); - // Save memory by waiting until we spill to allocate the write buffer for the - // unaggregated row stream. - DCHECK(!unaggregated_row_stream->has_write_iterator()); - } - return Status::OK(); + + aggregated_row_stream.reset(new BufferedTupleStream3( + parent->state_, &parent->intermediate_row_desc_, &parent->_buffer_pool_client, + parent->_resource_profile.spillable_buffer_size, + parent->_resource_profile.max_row_buffer_size, external_varlen_slots)); + RETURN_IF_ERROR(aggregated_row_stream->Init(parent->id(), true)); + bool got_buffer; + RETURN_IF_ERROR(aggregated_row_stream->PrepareForWrite(&got_buffer)); + DCHECK(got_buffer) << "Buffer included in reservation " << parent->_id << "\n" + << parent->_buffer_pool_client.DebugString() << "\n" + << parent->DebugString(2); + + if (!parent->is_streaming_preagg_) { + unaggregated_row_stream.reset(new BufferedTupleStream3( + parent->state_, &(parent->child(0)->row_desc()), &parent->_buffer_pool_client, + parent->_resource_profile.spillable_buffer_size, + parent->_resource_profile.max_row_buffer_size)); + // This stream is only used to spill, no need to ever have this pinned. + RETURN_IF_ERROR(unaggregated_row_stream->Init(parent->id(), false)); + // Save memory by waiting until we spill to allocate the write buffer for the + // unaggregated row stream. + DCHECK(!unaggregated_row_stream->has_write_iterator()); + } + return Status::OK(); } Status PartitionedAggregationNode::Partition::InitHashTable(bool* got_memory) { - DCHECK(aggregated_row_stream != nullptr); - DCHECK(hash_tbl == nullptr); - // We use the upper PARTITION_FANOUT num bits to pick the partition so only the - // remaining bits can be used for the hash table. - // TODO: we could switch to 64 bit hashes and then we don't need a max size. - // It might be reasonable to limit individual hash table size for other reasons - // though. Always start with small buffers. - hash_tbl.reset(PartitionedHashTable::Create(parent->ht_allocator_.get(), false, 1, nullptr, - 1L << (32 - NUM_PARTITIONING_BITS), PAGG_DEFAULT_HASH_TABLE_SZ)); - // Please update the error message in CreateHashPartitions() if initial size of - // hash table changes. - return hash_tbl->Init(got_memory); + DCHECK(aggregated_row_stream != nullptr); + DCHECK(hash_tbl == nullptr); + // We use the upper PARTITION_FANOUT num bits to pick the partition so only the + // remaining bits can be used for the hash table. + // TODO: we could switch to 64 bit hashes and then we don't need a max size. + // It might be reasonable to limit individual hash table size for other reasons + // though. Always start with small buffers. + hash_tbl.reset(PartitionedHashTable::Create(parent->ht_allocator_.get(), false, 1, nullptr, + 1L << (32 - NUM_PARTITIONING_BITS), + PAGG_DEFAULT_HASH_TABLE_SZ)); + // Please update the error message in CreateHashPartitions() if initial size of + // hash table changes. + return hash_tbl->Init(got_memory); } Status PartitionedAggregationNode::Partition::SerializeStreamForSpilling() { - DCHECK(!parent->is_streaming_preagg_); - if (parent->needs_serialize_) { - // We need to do a lot more work in this case. This step effectively does a merge - // aggregation in this node. We need to serialize the intermediates, spill the - // intermediates and then feed them into the aggregate function's merge step. - // This is often used when the intermediate is a string type, meaning the current - // (before serialization) in-memory layout is not the on-disk block layout. - // The disk layout does not support mutable rows. We need to rewrite the stream - // into the on disk format. - // TODO: if it happens to not be a string, we could serialize in place. This is - // a future optimization since it is very unlikely to have a serialize phase - // for those UDAs. - DCHECK(parent->serialize_stream_.get() != NULL); - DCHECK(!parent->serialize_stream_->is_pinned()); - - // Serialize and copy the spilled partition's stream into the new stream. - Status status = Status::OK(); - BufferedTupleStream3* new_stream = parent->serialize_stream_.get(); - PartitionedHashTable::Iterator it = hash_tbl->Begin(parent->ht_ctx_.get()); - while (!it.AtEnd()) { - Tuple* tuple = it.GetTuple(); - it.Next(); - NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); - if (UNLIKELY(!new_stream->AddRow(reinterpret_cast(&tuple), &status))) { - DCHECK(!status.ok()) << "Stream was unpinned - AddRow() only fails on error"; - // Even if we can't add to new_stream, finish up processing this agg stream to make - // clean up easier (someone has to finalize this stream and we don't want to remember - // where we are). - parent->CleanupHashTbl(agg_fn_evals, it); - hash_tbl->Close(); - hash_tbl.reset(); + DCHECK(!parent->is_streaming_preagg_); + if (parent->needs_serialize_) { + // We need to do a lot more work in this case. This step effectively does a merge + // aggregation in this node. We need to serialize the intermediates, spill the + // intermediates and then feed them into the aggregate function's merge step. + // This is often used when the intermediate is a string type, meaning the current + // (before serialization) in-memory layout is not the on-disk block layout. + // The disk layout does not support mutable rows. We need to rewrite the stream + // into the on disk format. + // TODO: if it happens to not be a string, we could serialize in place. This is + // a future optimization since it is very unlikely to have a serialize phase + // for those UDAs. + DCHECK(parent->serialize_stream_.get() != NULL); + DCHECK(!parent->serialize_stream_->is_pinned()); + + // Serialize and copy the spilled partition's stream into the new stream. + Status status = Status::OK(); + BufferedTupleStream3* new_stream = parent->serialize_stream_.get(); + PartitionedHashTable::Iterator it = hash_tbl->Begin(parent->ht_ctx_.get()); + while (!it.AtEnd()) { + Tuple* tuple = it.GetTuple(); + it.Next(); + NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); + if (UNLIKELY(!new_stream->AddRow(reinterpret_cast(&tuple), &status))) { + DCHECK(!status.ok()) << "Stream was unpinned - AddRow() only fails on error"; + // Even if we can't add to new_stream, finish up processing this agg stream to make + // clean up easier (someone has to finalize this stream and we don't want to remember + // where we are). + parent->CleanupHashTbl(agg_fn_evals, it); + hash_tbl->Close(); + hash_tbl.reset(); + aggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); + return status; + } + } + aggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - return status; - } + aggregated_row_stream.swap(parent->serialize_stream_); + // Recreate the serialize_stream (and reserve 1 buffer) now in preparation for + // when we need to spill again. We need to have this available before we need + // to spill to make sure it is available. This should be acquirable since we just + // freed at least one buffer from this partition's (old) aggregated_row_stream. + parent->serialize_stream_.reset(new BufferedTupleStream3( + parent->state_, &parent->intermediate_row_desc_, &parent->_buffer_pool_client, + parent->_resource_profile.spillable_buffer_size, + parent->_resource_profile.max_row_buffer_size)); + status = parent->serialize_stream_->Init(parent->id(), false); + if (status.ok()) { + bool got_buffer; + status = parent->serialize_stream_->PrepareForWrite(&got_buffer); + DCHECK(!status.ok() || got_buffer) << "Accounted in min reservation"; + } + if (!status.ok()) { + hash_tbl->Close(); + hash_tbl.reset(); + return status; + } + DCHECK(parent->serialize_stream_->has_write_iterator()); } + return Status::OK(); +} - aggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - aggregated_row_stream.swap(parent->serialize_stream_); - // Recreate the serialize_stream (and reserve 1 buffer) now in preparation for - // when we need to spill again. We need to have this available before we need - // to spill to make sure it is available. This should be acquirable since we just - // freed at least one buffer from this partition's (old) aggregated_row_stream. - parent->serialize_stream_.reset(new BufferedTupleStream3(parent->state_, - &parent->intermediate_row_desc_, &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, - parent->_resource_profile.max_row_buffer_size)); - status = parent->serialize_stream_->Init(parent->id(), false); - if (status.ok()) { - bool got_buffer; - status = parent->serialize_stream_->PrepareForWrite(&got_buffer); - DCHECK(!status.ok() || got_buffer) << "Accounted in min reservation"; +Status PartitionedAggregationNode::Partition::Spill(bool more_aggregate_rows) { + DCHECK(!parent->is_streaming_preagg_); + DCHECK(!is_closed); + DCHECK(!is_spilled()); + // TODO(ml): enable spill + std::stringstream msg; + msg << "New partitioned Aggregation in spill"; + LIMIT_EXCEEDED(parent->mem_tracker(), parent->state_, msg.str()); + // RETURN_IF_ERROR(parent->state_->StartSpilling(parent->mem_tracker())); + + RETURN_IF_ERROR(SerializeStreamForSpilling()); + + // Free the in-memory result data. + NewAggFnEvaluator::Close(agg_fn_evals, parent->state_); + agg_fn_evals.clear(); + + if (agg_fn_pool.get() != NULL) { + agg_fn_pool->free_all(); + agg_fn_pool.reset(); } - if (!status.ok()) { - hash_tbl->Close(); - hash_tbl.reset(); - return status; + + hash_tbl->Close(); + hash_tbl.reset(); + + // Unpin the stream to free memory, but leave a write buffer in place so we can + // continue appending rows to one of the streams in the partition. + DCHECK(aggregated_row_stream->has_write_iterator()); + DCHECK(!unaggregated_row_stream->has_write_iterator()); + if (more_aggregate_rows) { + // aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL_EXCEPT_CURRENT); + } else { + // aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); + bool got_buffer; + RETURN_IF_ERROR(unaggregated_row_stream->PrepareForWrite(&got_buffer)); + DCHECK(got_buffer) << "Accounted in min reservation" + << parent->_buffer_pool_client.DebugString(); } - DCHECK(parent->serialize_stream_->has_write_iterator()); - } - return Status::OK(); -} -Status PartitionedAggregationNode::Partition::Spill(bool more_aggregate_rows) { - DCHECK(!parent->is_streaming_preagg_); - DCHECK(!is_closed); - DCHECK(!is_spilled()); - // TODO(ml): enable spill - std::stringstream msg; - msg << "New partitioned Aggregation in spill"; - LIMIT_EXCEEDED(parent->mem_tracker(), parent->state_, msg.str()); - // RETURN_IF_ERROR(parent->state_->StartSpilling(parent->mem_tracker())); - - RETURN_IF_ERROR(SerializeStreamForSpilling()); - - // Free the in-memory result data. - NewAggFnEvaluator::Close(agg_fn_evals, parent->state_); - agg_fn_evals.clear(); - - if (agg_fn_pool.get() != NULL) { - agg_fn_pool->free_all(); - agg_fn_pool.reset(); - } - - hash_tbl->Close(); - hash_tbl.reset(); - - // Unpin the stream to free memory, but leave a write buffer in place so we can - // continue appending rows to one of the streams in the partition. - DCHECK(aggregated_row_stream->has_write_iterator()); - DCHECK(!unaggregated_row_stream->has_write_iterator()); - if (more_aggregate_rows) { -// aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL_EXCEPT_CURRENT); - } else { -// aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - bool got_buffer; - RETURN_IF_ERROR(unaggregated_row_stream->PrepareForWrite(&got_buffer)); - DCHECK(got_buffer) - << "Accounted in min reservation" << parent->_buffer_pool_client.DebugString(); - } - - COUNTER_UPDATE(parent->num_spilled_partitions_, 1); - if (parent->num_spilled_partitions_->value() == 1) { - parent->add_runtime_exec_option("Spilled"); - } - return Status::OK(); + COUNTER_UPDATE(parent->num_spilled_partitions_, 1); + if (parent->num_spilled_partitions_->value() == 1) { + parent->add_runtime_exec_option("Spilled"); + } + return Status::OK(); } void PartitionedAggregationNode::Partition::Close(bool finalize_rows) { - if (is_closed) return; - is_closed = true; - if (aggregated_row_stream.get() != NULL) { - if (finalize_rows && hash_tbl.get() != NULL) { - // We need to walk all the rows and Finalize them here so the UDA gets a chance - // to cleanup. If the hash table is gone (meaning this was spilled), the rows - // should have been finalized/serialized in Spill(). - parent->CleanupHashTbl(agg_fn_evals, hash_tbl->Begin(parent->ht_ctx_.get())); + if (is_closed) return; + is_closed = true; + if (aggregated_row_stream.get() != NULL) { + if (finalize_rows && hash_tbl.get() != NULL) { + // We need to walk all the rows and Finalize them here so the UDA gets a chance + // to cleanup. If the hash table is gone (meaning this was spilled), the rows + // should have been finalized/serialized in Spill(). + parent->CleanupHashTbl(agg_fn_evals, hash_tbl->Begin(parent->ht_ctx_.get())); + } + aggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); + } + if (hash_tbl.get() != NULL) hash_tbl->Close(); + if (unaggregated_row_stream.get() != NULL) { + unaggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); } - aggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - } - if (hash_tbl.get() != NULL) hash_tbl->Close(); - if (unaggregated_row_stream.get() != NULL) { - unaggregated_row_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - } - - for (NewAggFnEvaluator* eval : agg_fn_evals) eval->Close(parent->state_); - if (agg_fn_pool.get() != NULL) agg_fn_pool->free_all(); + + for (NewAggFnEvaluator* eval : agg_fn_evals) eval->Close(parent->state_); + if (agg_fn_pool.get() != NULL) agg_fn_pool->free_all(); } Tuple* PartitionedAggregationNode::ConstructSingletonOutputTuple( - const vector& agg_fn_evals, MemPool* pool) { - DCHECK(grouping_exprs_.empty()); - Tuple* output_tuple = Tuple::create(intermediate_tuple_desc_->byte_size(), pool); - InitAggSlots(agg_fn_evals, output_tuple); - return output_tuple; + const vector& agg_fn_evals, MemPool* pool) { + DCHECK(grouping_exprs_.empty()); + Tuple* output_tuple = Tuple::create(intermediate_tuple_desc_->byte_size(), pool); + InitAggSlots(agg_fn_evals, output_tuple); + return output_tuple; } Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( - const vector& agg_fn_evals, MemPool* pool, Status* status) { - const int fixed_size = intermediate_tuple_desc_->byte_size(); - const int varlen_size = GroupingExprsVarlenSize(); - const int tuple_data_size = fixed_size + varlen_size; - uint8_t* tuple_data = pool->try_allocate(tuple_data_size); - if (UNLIKELY(tuple_data == NULL)) { - string details = Substitute("Cannot perform aggregation at node with id $0. Failed " - "to allocate $1 bytes for intermediate tuple.", _id, tuple_data_size); - *status = pool->mem_tracker()->MemLimitExceeded(state_, details, tuple_data_size); - return NULL; - } - memset(tuple_data, 0, fixed_size); - Tuple* intermediate_tuple = reinterpret_cast(tuple_data); - uint8_t* varlen_data = tuple_data + fixed_size; - CopyGroupingValues(intermediate_tuple, varlen_data, varlen_size); - InitAggSlots(agg_fn_evals, intermediate_tuple); - return intermediate_tuple; + const vector& agg_fn_evals, MemPool* pool, Status* status) { + const int fixed_size = intermediate_tuple_desc_->byte_size(); + const int varlen_size = GroupingExprsVarlenSize(); + const int tuple_data_size = fixed_size + varlen_size; + uint8_t* tuple_data = pool->try_allocate(tuple_data_size); + if (UNLIKELY(tuple_data == NULL)) { + string details = Substitute( + "Cannot perform aggregation at node with id $0. Failed " + "to allocate $1 bytes for intermediate tuple.", + _id, tuple_data_size); + *status = pool->mem_tracker()->MemLimitExceeded(state_, details, tuple_data_size); + return NULL; + } + memset(tuple_data, 0, fixed_size); + Tuple* intermediate_tuple = reinterpret_cast(tuple_data); + uint8_t* varlen_data = tuple_data + fixed_size; + CopyGroupingValues(intermediate_tuple, varlen_data, varlen_size); + InitAggSlots(agg_fn_evals, intermediate_tuple); + return intermediate_tuple; } Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( - const vector& agg_fn_evals, BufferedTupleStream3* stream, - Status* status) { - DCHECK(stream != NULL && status != NULL); - // Allocate space for the entire tuple in the stream. - const int fixed_size = intermediate_tuple_desc_->byte_size(); - const int varlen_size = GroupingExprsVarlenSize(); - const int tuple_size = fixed_size + varlen_size; - uint8_t* tuple_data = stream->AddRowCustomBegin(tuple_size, status); - if (UNLIKELY(tuple_data == nullptr)) { - // If we failed to allocate and did not hit an error (indicated by a non-ok status), - // the caller of this function can try to free some space, e.g. through spilling, and - // re-attempt to allocate space for this row. - return nullptr; - } - Tuple* tuple = reinterpret_cast(tuple_data); - tuple->init(fixed_size); - uint8_t* varlen_buffer = tuple_data + fixed_size; - CopyGroupingValues(tuple, varlen_buffer, varlen_size); - InitAggSlots(agg_fn_evals, tuple); - stream->AddRowCustomEnd(tuple_size); - return tuple; + const vector& agg_fn_evals, BufferedTupleStream3* stream, + Status* status) { + DCHECK(stream != NULL && status != NULL); + // Allocate space for the entire tuple in the stream. + const int fixed_size = intermediate_tuple_desc_->byte_size(); + const int varlen_size = GroupingExprsVarlenSize(); + const int tuple_size = fixed_size + varlen_size; + uint8_t* tuple_data = stream->AddRowCustomBegin(tuple_size, status); + if (UNLIKELY(tuple_data == nullptr)) { + // If we failed to allocate and did not hit an error (indicated by a non-ok status), + // the caller of this function can try to free some space, e.g. through spilling, and + // re-attempt to allocate space for this row. + return nullptr; + } + Tuple* tuple = reinterpret_cast(tuple_data); + tuple->init(fixed_size); + uint8_t* varlen_buffer = tuple_data + fixed_size; + CopyGroupingValues(tuple, varlen_buffer, varlen_size); + InitAggSlots(agg_fn_evals, tuple); + stream->AddRowCustomEnd(tuple_size); + return tuple; } int PartitionedAggregationNode::GroupingExprsVarlenSize() { - int varlen_size = 0; - // TODO: The hash table could compute this as it hashes. - for (int expr_idx: string_grouping_exprs_) { - StringValue* sv = reinterpret_cast(ht_ctx_->ExprValue(expr_idx)); - // Avoid branching by multiplying length by null bit. - varlen_size += sv->len * !ht_ctx_->ExprValueNull(expr_idx); - } - return varlen_size; + int varlen_size = 0; + // TODO: The hash table could compute this as it hashes. + for (int expr_idx : string_grouping_exprs_) { + StringValue* sv = reinterpret_cast(ht_ctx_->ExprValue(expr_idx)); + // Avoid branching by multiplying length by null bit. + varlen_size += sv->len * !ht_ctx_->ExprValueNull(expr_idx); + } + return varlen_size; } // TODO: codegen this function. -void PartitionedAggregationNode::CopyGroupingValues(Tuple* intermediate_tuple, - uint8_t* buffer, int varlen_size) { - // Copy over all grouping slots (the variable length data is copied below). - for (int i = 0; i < grouping_exprs_.size(); ++i) { - SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[i]; - if (ht_ctx_->ExprValueNull(i)) { - intermediate_tuple->set_null(slot_desc->null_indicator_offset()); - } else { - void* src = ht_ctx_->ExprValue(i); - void* dst = intermediate_tuple->get_slot(slot_desc->tuple_offset()); - memcpy(dst, src, slot_desc->slot_size()); +void PartitionedAggregationNode::CopyGroupingValues(Tuple* intermediate_tuple, uint8_t* buffer, + int varlen_size) { + // Copy over all grouping slots (the variable length data is copied below). + for (int i = 0; i < grouping_exprs_.size(); ++i) { + SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[i]; + if (ht_ctx_->ExprValueNull(i)) { + intermediate_tuple->set_null(slot_desc->null_indicator_offset()); + } else { + void* src = ht_ctx_->ExprValue(i); + void* dst = intermediate_tuple->get_slot(slot_desc->tuple_offset()); + memcpy(dst, src, slot_desc->slot_size()); + } + } + + for (int expr_idx : string_grouping_exprs_) { + if (ht_ctx_->ExprValueNull(expr_idx)) continue; + + SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[expr_idx]; + // ptr and len were already copied to the fixed-len part of string value + StringValue* sv = reinterpret_cast( + intermediate_tuple->get_slot(slot_desc->tuple_offset())); + memcpy(buffer, sv->ptr, sv->len); + sv->ptr = reinterpret_cast(buffer); + buffer += sv->len; } - } - - for (int expr_idx: string_grouping_exprs_) { - if (ht_ctx_->ExprValueNull(expr_idx)) continue; - - SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[expr_idx]; - // ptr and len were already copied to the fixed-len part of string value - StringValue* sv = reinterpret_cast( - intermediate_tuple->get_slot(slot_desc->tuple_offset())); - memcpy(buffer, sv->ptr, sv->len); - sv->ptr = reinterpret_cast(buffer); - buffer += sv->len; - } } // TODO: codegen this function. -void PartitionedAggregationNode::InitAggSlots( - const vector& agg_fn_evals, Tuple* intermediate_tuple) { - vector::const_iterator slot_desc = - intermediate_tuple_desc_->slots().begin() + grouping_exprs_.size(); - for (int i = 0; i < agg_fn_evals.size(); ++i, ++slot_desc) { - // To minimize branching on the UpdateTuple path, initialize the result value so that - // the Add() UDA function can ignore the NULL bit of its destination value. E.g. for - // SUM(), if we initialize the destination value to 0 (with the NULL bit set), we can - // just start adding to the destination value (rather than repeatedly checking the - // destination NULL bit. The codegen'd version of UpdateSlot() exploits this to - // eliminate a branch per value. - // - // For boolean and numeric types, the default values are false/0, so the nullable - // aggregate functions SUM() and AVG() produce the correct result. For MIN()/MAX(), - // initialize the value to max/min possible value for the same effect. - NewAggFnEvaluator* eval = agg_fn_evals[i]; - eval->Init(intermediate_tuple); - - DCHECK(agg_fns_[i] == &(eval->agg_fn())); - const AggFn* agg_fn = agg_fns_[i]; - const AggFn::AggregationOp agg_op = agg_fn->agg_op(); - if ((agg_op == AggFn::MIN || agg_op == AggFn::MAX) && - !agg_fn->intermediate_type().is_string_type() && - !agg_fn->intermediate_type().is_date_type()) { - ExprValue default_value; - void* default_value_ptr = NULL; - if (agg_op == AggFn::MIN) { - default_value_ptr = default_value.set_to_max((*slot_desc)->type()); - } else { - DCHECK_EQ(agg_op, AggFn::MAX); - default_value_ptr = default_value.set_to_min((*slot_desc)->type()); - } - RawValue::write(default_value_ptr, intermediate_tuple, *slot_desc, NULL); +void PartitionedAggregationNode::InitAggSlots(const vector& agg_fn_evals, + Tuple* intermediate_tuple) { + vector::const_iterator slot_desc = + intermediate_tuple_desc_->slots().begin() + grouping_exprs_.size(); + for (int i = 0; i < agg_fn_evals.size(); ++i, ++slot_desc) { + // To minimize branching on the UpdateTuple path, initialize the result value so that + // the Add() UDA function can ignore the NULL bit of its destination value. E.g. for + // SUM(), if we initialize the destination value to 0 (with the NULL bit set), we can + // just start adding to the destination value (rather than repeatedly checking the + // destination NULL bit. The codegen'd version of UpdateSlot() exploits this to + // eliminate a branch per value. + // + // For boolean and numeric types, the default values are false/0, so the nullable + // aggregate functions SUM() and AVG() produce the correct result. For MIN()/MAX(), + // initialize the value to max/min possible value for the same effect. + NewAggFnEvaluator* eval = agg_fn_evals[i]; + eval->Init(intermediate_tuple); + + DCHECK(agg_fns_[i] == &(eval->agg_fn())); + const AggFn* agg_fn = agg_fns_[i]; + const AggFn::AggregationOp agg_op = agg_fn->agg_op(); + if ((agg_op == AggFn::MIN || agg_op == AggFn::MAX) && + !agg_fn->intermediate_type().is_string_type() && + !agg_fn->intermediate_type().is_date_type()) { + ExprValue default_value; + void* default_value_ptr = NULL; + if (agg_op == AggFn::MIN) { + default_value_ptr = default_value.set_to_max((*slot_desc)->type()); + } else { + DCHECK_EQ(agg_op, AggFn::MAX); + default_value_ptr = default_value.set_to_min((*slot_desc)->type()); + } + RawValue::write(default_value_ptr, intermediate_tuple, *slot_desc, NULL); + } } - } } -void PartitionedAggregationNode::UpdateTuple(NewAggFnEvaluator** agg_fn_evals, - Tuple* tuple, TupleRow* row, bool is_merge) { - DCHECK(tuple != NULL || agg_fns_.empty()); - for (int i = 0; i < agg_fns_.size(); ++i) { - if (is_merge) { - agg_fn_evals[i]->Merge(row->get_tuple(0), tuple); - } else { - agg_fn_evals[i]->Add(row, tuple); +void PartitionedAggregationNode::UpdateTuple(NewAggFnEvaluator** agg_fn_evals, Tuple* tuple, + TupleRow* row, bool is_merge) { + DCHECK(tuple != NULL || agg_fns_.empty()); + for (int i = 0; i < agg_fns_.size(); ++i) { + if (is_merge) { + agg_fn_evals[i]->Merge(row->get_tuple(0), tuple); + } else { + agg_fn_evals[i]->Add(row, tuple); + } } - } } -Tuple* PartitionedAggregationNode::GetOutputTuple( - const vector& agg_fn_evals, Tuple* tuple, MemPool* pool) { - DCHECK(tuple != NULL || agg_fn_evals.empty()) << tuple; - Tuple* dst = tuple; - if (needs_finalize_ && intermediate_tuple_id_ != output_tuple_id_) { - dst = Tuple::create(output_tuple_desc_->byte_size(), pool); - } - if (needs_finalize_) { - NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dst); - } else { - NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); - } - // Copy grouping values from tuple to dst. - // TODO: Codegen this. - if (dst != tuple) { - int num_grouping_slots = grouping_exprs_.size(); - for (int i = 0; i < num_grouping_slots; ++i) { - SlotDescriptor* src_slot_desc = intermediate_tuple_desc_->slots()[i]; - SlotDescriptor* dst_slot_desc = output_tuple_desc_->slots()[i]; - bool src_slot_null = tuple->is_null(src_slot_desc->null_indicator_offset()); - void* src_slot = NULL; - if (!src_slot_null) src_slot = tuple->get_slot(src_slot_desc->tuple_offset()); - RawValue::write(src_slot, dst, dst_slot_desc, NULL); +Tuple* PartitionedAggregationNode::GetOutputTuple(const vector& agg_fn_evals, + Tuple* tuple, MemPool* pool) { + DCHECK(tuple != NULL || agg_fn_evals.empty()) << tuple; + Tuple* dst = tuple; + if (needs_finalize_ && intermediate_tuple_id_ != output_tuple_id_) { + dst = Tuple::create(output_tuple_desc_->byte_size(), pool); } - } - return dst; + if (needs_finalize_) { + NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dst); + } else { + NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); + } + // Copy grouping values from tuple to dst. + // TODO: Codegen this. + if (dst != tuple) { + int num_grouping_slots = grouping_exprs_.size(); + for (int i = 0; i < num_grouping_slots; ++i) { + SlotDescriptor* src_slot_desc = intermediate_tuple_desc_->slots()[i]; + SlotDescriptor* dst_slot_desc = output_tuple_desc_->slots()[i]; + bool src_slot_null = tuple->is_null(src_slot_desc->null_indicator_offset()); + void* src_slot = NULL; + if (!src_slot_null) src_slot = tuple->get_slot(src_slot_desc->tuple_offset()); + RawValue::write(src_slot, dst, dst_slot_desc, NULL); + } + } + return dst; } template -Status PartitionedAggregationNode::AppendSpilledRow( - Partition* partition, TupleRow* row) { - DCHECK(!is_streaming_preagg_); - DCHECK(partition->is_spilled()); - BufferedTupleStream3* stream = AGGREGATED_ROWS ? - partition->aggregated_row_stream.get() : - partition->unaggregated_row_stream.get(); - DCHECK(!stream->is_pinned()); - Status status; - if (LIKELY(stream->AddRow(row, &status))) return Status::OK(); - RETURN_IF_ERROR(status); - - // Keep trying to free memory by spilling until we succeed or hit an error. - // Running out of partitions to spill is treated as an error by SpillPartition(). - while (true) { - RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); - if (stream->AddRow(row, &status)) return Status::OK(); +Status PartitionedAggregationNode::AppendSpilledRow(Partition* partition, TupleRow* row) { + DCHECK(!is_streaming_preagg_); + DCHECK(partition->is_spilled()); + BufferedTupleStream3* stream = AGGREGATED_ROWS ? partition->aggregated_row_stream.get() + : partition->unaggregated_row_stream.get(); + DCHECK(!stream->is_pinned()); + Status status; + if (LIKELY(stream->AddRow(row, &status))) return Status::OK(); RETURN_IF_ERROR(status); - } + + // Keep trying to free memory by spilling until we succeed or hit an error. + // Running out of partitions to spill is treated as an error by SpillPartition(). + while (true) { + RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); + if (stream->AddRow(row, &status)) return Status::OK(); + RETURN_IF_ERROR(status); + } } string PartitionedAggregationNode::DebugString(int indentation_level) const { - stringstream ss; - DebugString(indentation_level, &ss); - return ss.str(); + stringstream ss; + DebugString(indentation_level, &ss); + return ss.str(); } -void PartitionedAggregationNode::DebugString(int indentation_level, - stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "PartitionedAggregationNode(" - << "intermediate_tuple_id=" << intermediate_tuple_id_ - << " output_tuple_id=" << output_tuple_id_ - << " needs_finalize=" << needs_finalize_ - << " grouping_exprs=" << Expr::debug_string(grouping_exprs_) - << " agg_exprs=" << AggFn::DebugString(agg_fns_); - ExecNode::debug_string(indentation_level, out); - *out << ")"; +void PartitionedAggregationNode::DebugString(int indentation_level, stringstream* out) const { + *out << string(indentation_level * 2, ' '); + *out << "PartitionedAggregationNode(" + << "intermediate_tuple_id=" << intermediate_tuple_id_ + << " output_tuple_id=" << output_tuple_id_ << " needs_finalize=" << needs_finalize_ + << " grouping_exprs=" << Expr::debug_string(grouping_exprs_) + << " agg_exprs=" << AggFn::DebugString(agg_fns_); + ExecNode::debug_string(indentation_level, out); + *out << ")"; } -Status PartitionedAggregationNode::CreateHashPartitions( - int level, int single_partition_idx) { - if (is_streaming_preagg_) DCHECK_EQ(level, 0); - if (UNLIKELY(level >= MAX_PARTITION_DEPTH)) { +Status PartitionedAggregationNode::CreateHashPartitions(int level, int single_partition_idx) { + if (is_streaming_preagg_) DCHECK_EQ(level, 0); + if (UNLIKELY(level >= MAX_PARTITION_DEPTH)) { stringstream error_msg; - error_msg << "Cannot perform aggregation at hash aggregation node with id " - << _id << '.' - << " The input data was partitioned the maximum number of " - << MAX_PARTITION_DEPTH << " times." - << " This could mean there is significant skew in the data or the memory limit is" - << " set too low."; + error_msg << "Cannot perform aggregation at hash aggregation node with id " << _id << '.' + << " The input data was partitioned the maximum number of " << MAX_PARTITION_DEPTH + << " times." + << " This could mean there is significant skew in the data or the memory limit is" + << " set too low."; return state_->set_mem_limit_exceeded(error_msg.str()); - } - ht_ctx_->set_level(level); - - DCHECK(hash_partitions_.empty()); - int num_partitions_created = 0; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - hash_tbls_[i] = nullptr; - if (single_partition_idx == -1 || i == single_partition_idx) { - Partition* new_partition = partition_pool_->add(new Partition(this, level, i)); - ++num_partitions_created; - hash_partitions_.push_back(new_partition); - RETURN_IF_ERROR(new_partition->InitStreams()); - } else { - hash_partitions_.push_back(nullptr); } - } - - // Now that all the streams are reserved (meaning we have enough memory to execute - // the algorithm), allocate the hash tables. These can fail and we can still continue. - for (int i = 0; i < PARTITION_FANOUT; ++i) { - Partition* partition = hash_partitions_[i]; - if (partition == nullptr) continue; - if (partition->aggregated_row_stream == nullptr) { - // Failed to create the aggregated row stream - cannot create a hash table. - // Just continue with a NULL hash table so rows will be passed through. - DCHECK(is_streaming_preagg_); - } else { - bool got_memory; - RETURN_IF_ERROR(partition->InitHashTable(&got_memory)); - // Spill the partition if we cannot create a hash table for a merge aggregation. - if (UNLIKELY(!got_memory)) { - DCHECK(!is_streaming_preagg_) << "Preagg reserves enough memory for hash tables"; - // If we're repartitioning, we will be writing aggregated rows first. - RETURN_IF_ERROR(partition->Spill(level > 0)); - } + ht_ctx_->set_level(level); + + DCHECK(hash_partitions_.empty()); + int num_partitions_created = 0; + for (int i = 0; i < PARTITION_FANOUT; ++i) { + hash_tbls_[i] = nullptr; + if (single_partition_idx == -1 || i == single_partition_idx) { + Partition* new_partition = partition_pool_->add(new Partition(this, level, i)); + ++num_partitions_created; + hash_partitions_.push_back(new_partition); + RETURN_IF_ERROR(new_partition->InitStreams()); + } else { + hash_partitions_.push_back(nullptr); + } } - hash_tbls_[i] = partition->hash_tbl.get(); - } - // In this case we did not have to repartition, so ensure that while building the hash - // table all rows will be inserted into the partition at 'single_partition_idx' in case - // a non deterministic grouping expression causes a row to hash to a different - // partition index. - if (single_partition_idx != -1) { - Partition* partition = hash_partitions_[single_partition_idx]; + + // Now that all the streams are reserved (meaning we have enough memory to execute + // the algorithm), allocate the hash tables. These can fail and we can still continue. for (int i = 0; i < PARTITION_FANOUT; ++i) { - hash_partitions_[i] = partition; - hash_tbls_[i] = partition->hash_tbl.get(); + Partition* partition = hash_partitions_[i]; + if (partition == nullptr) continue; + if (partition->aggregated_row_stream == nullptr) { + // Failed to create the aggregated row stream - cannot create a hash table. + // Just continue with a NULL hash table so rows will be passed through. + DCHECK(is_streaming_preagg_); + } else { + bool got_memory; + RETURN_IF_ERROR(partition->InitHashTable(&got_memory)); + // Spill the partition if we cannot create a hash table for a merge aggregation. + if (UNLIKELY(!got_memory)) { + DCHECK(!is_streaming_preagg_) << "Preagg reserves enough memory for hash tables"; + // If we're repartitioning, we will be writing aggregated rows first. + RETURN_IF_ERROR(partition->Spill(level > 0)); + } + } + hash_tbls_[i] = partition->hash_tbl.get(); + } + // In this case we did not have to repartition, so ensure that while building the hash + // table all rows will be inserted into the partition at 'single_partition_idx' in case + // a non deterministic grouping expression causes a row to hash to a different + // partition index. + if (single_partition_idx != -1) { + Partition* partition = hash_partitions_[single_partition_idx]; + for (int i = 0; i < PARTITION_FANOUT; ++i) { + hash_partitions_[i] = partition; + hash_tbls_[i] = partition->hash_tbl.get(); + } } - } - COUNTER_UPDATE(partitions_created_, num_partitions_created); - if (!is_streaming_preagg_) { - COUNTER_SET(max_partition_level_, level); - } - return Status::OK(); + COUNTER_UPDATE(partitions_created_, num_partitions_created); + if (!is_streaming_preagg_) { + COUNTER_SET(max_partition_level_, level); + } + return Status::OK(); } Status PartitionedAggregationNode::CheckAndResizeHashPartitions( - bool partitioning_aggregated_rows, int num_rows, const PartitionedHashTableCtx* ht_ctx) { - DCHECK(!is_streaming_preagg_); - for (int i = 0; i < PARTITION_FANOUT; ++i) { - Partition* partition = hash_partitions_[i]; - if (partition == nullptr) continue; - while (!partition->is_spilled()) { - { - SCOPED_TIMER(ht_resize_timer_); - bool resized; - RETURN_IF_ERROR(partition->hash_tbl->CheckAndResize(num_rows, ht_ctx, &resized)); - if (resized) break; - } - RETURN_IF_ERROR(SpillPartition(partitioning_aggregated_rows)); + bool partitioning_aggregated_rows, int num_rows, const PartitionedHashTableCtx* ht_ctx) { + DCHECK(!is_streaming_preagg_); + for (int i = 0; i < PARTITION_FANOUT; ++i) { + Partition* partition = hash_partitions_[i]; + if (partition == nullptr) continue; + while (!partition->is_spilled()) { + { + SCOPED_TIMER(ht_resize_timer_); + bool resized; + RETURN_IF_ERROR(partition->hash_tbl->CheckAndResize(num_rows, ht_ctx, &resized)); + if (resized) break; + } + RETURN_IF_ERROR(SpillPartition(partitioning_aggregated_rows)); + } } - } - return Status::OK(); + return Status::OK(); } Status PartitionedAggregationNode::NextPartition() { - DCHECK(output_partition_ == nullptr); - - if (!is_in_subplan() && spilled_partitions_.empty()) { - // All partitions are in memory. Release reservation that was used for previous - // partitions that is no longer needed. If we have spilled partitions, we want to - // hold onto all reservation in case it is needed to process the spilled partitions. - DCHECK(!_buffer_pool_client.has_unpinned_pages()); - Status status = release_unused_reservation(); - DCHECK(status.ok()) << "Should not fail - all partitions are in memory so there are " - << "no unpinned pages. " << status.get_error_msg(); - } - - // Keep looping until we get to a partition that fits in memory. - Partition* partition = nullptr; - while (true) { - // First return partitions that are fully aggregated (and in memory). - if (!aggregated_partitions_.empty()) { - partition = aggregated_partitions_.front(); - DCHECK(!partition->is_spilled()); - aggregated_partitions_.pop_front(); - break; + DCHECK(output_partition_ == nullptr); + + if (!is_in_subplan() && spilled_partitions_.empty()) { + // All partitions are in memory. Release reservation that was used for previous + // partitions that is no longer needed. If we have spilled partitions, we want to + // hold onto all reservation in case it is needed to process the spilled partitions. + DCHECK(!_buffer_pool_client.has_unpinned_pages()); + Status status = release_unused_reservation(); + DCHECK(status.ok()) << "Should not fail - all partitions are in memory so there are " + << "no unpinned pages. " << status.get_error_msg(); } - // No aggregated partitions in memory - we should not be using any reservation aside - // from 'serialize_stream_'. - DCHECK_EQ(serialize_stream_ != nullptr ? serialize_stream_->BytesPinned(false) : 0, - _buffer_pool_client.GetUsedReservation()) << _buffer_pool_client.DebugString(); - - // Try to fit a single spilled partition in memory. We can often do this because - // we only need to fit 1/PARTITION_FANOUT of the data in memory. - // TODO: in some cases when the partition probably won't fit in memory it could - // be better to skip directly to repartitioning. - RETURN_IF_ERROR(BuildSpilledPartition(&partition)); - if (partition != nullptr) break; - - // If we can't fit the partition in memory, repartition it. - RETURN_IF_ERROR(RepartitionSpilledPartition()); - } - DCHECK(!partition->is_spilled()); - DCHECK(partition->hash_tbl.get() != nullptr); - DCHECK(partition->aggregated_row_stream->is_pinned()); - - output_partition_ = partition; - output_iterator_ = output_partition_->hash_tbl->Begin(ht_ctx_.get()); - COUNTER_UPDATE(num_hash_buckets_, output_partition_->hash_tbl->num_buckets()); - COUNTER_UPDATE(ht_resize_counter_, output_partition_->hash_tbl->num_resize()); - COUNTER_UPDATE(num_hash_filled_buckets_, output_partition_->hash_tbl->num_filled_buckets()); - COUNTER_UPDATE(num_hash_probe_, output_partition_->hash_tbl->num_probe()); - COUNTER_UPDATE(num_hash_failed_probe_, output_partition_->hash_tbl->num_failed_probe()); - COUNTER_UPDATE(num_hash_travel_length_, output_partition_->hash_tbl->travel_length()); - COUNTER_UPDATE(num_hash_collisions_, output_partition_->hash_tbl->NumHashCollisions()); - - return Status::OK(); + // Keep looping until we get to a partition that fits in memory. + Partition* partition = nullptr; + while (true) { + // First return partitions that are fully aggregated (and in memory). + if (!aggregated_partitions_.empty()) { + partition = aggregated_partitions_.front(); + DCHECK(!partition->is_spilled()); + aggregated_partitions_.pop_front(); + break; + } + + // No aggregated partitions in memory - we should not be using any reservation aside + // from 'serialize_stream_'. + DCHECK_EQ(serialize_stream_ != nullptr ? serialize_stream_->BytesPinned(false) : 0, + _buffer_pool_client.GetUsedReservation()) + << _buffer_pool_client.DebugString(); + + // Try to fit a single spilled partition in memory. We can often do this because + // we only need to fit 1/PARTITION_FANOUT of the data in memory. + // TODO: in some cases when the partition probably won't fit in memory it could + // be better to skip directly to repartitioning. + RETURN_IF_ERROR(BuildSpilledPartition(&partition)); + if (partition != nullptr) break; + + // If we can't fit the partition in memory, repartition it. + RETURN_IF_ERROR(RepartitionSpilledPartition()); + } + DCHECK(!partition->is_spilled()); + DCHECK(partition->hash_tbl.get() != nullptr); + DCHECK(partition->aggregated_row_stream->is_pinned()); + + output_partition_ = partition; + output_iterator_ = output_partition_->hash_tbl->Begin(ht_ctx_.get()); + COUNTER_UPDATE(num_hash_buckets_, output_partition_->hash_tbl->num_buckets()); + COUNTER_UPDATE(ht_resize_counter_, output_partition_->hash_tbl->num_resize()); + COUNTER_UPDATE(num_hash_filled_buckets_, output_partition_->hash_tbl->num_filled_buckets()); + COUNTER_UPDATE(num_hash_probe_, output_partition_->hash_tbl->num_probe()); + COUNTER_UPDATE(num_hash_failed_probe_, output_partition_->hash_tbl->num_failed_probe()); + COUNTER_UPDATE(num_hash_travel_length_, output_partition_->hash_tbl->travel_length()); + COUNTER_UPDATE(num_hash_collisions_, output_partition_->hash_tbl->NumHashCollisions()); + + return Status::OK(); } Status PartitionedAggregationNode::BuildSpilledPartition(Partition** built_partition) { - DCHECK(!spilled_partitions_.empty()); - DCHECK(!is_streaming_preagg_); - // Leave the partition in 'spilled_partitions_' to be closed if we hit an error. - Partition* src_partition = spilled_partitions_.front(); - DCHECK(src_partition->is_spilled()); - - // Create a new hash partition from the rows of the spilled partition. This is simpler - // than trying to finish building a partially-built partition in place. We only - // initialise one hash partition that all rows in 'src_partition' will hash to. - RETURN_IF_ERROR(CreateHashPartitions(src_partition->level, src_partition->idx)); - Partition* dst_partition = hash_partitions_[src_partition->idx]; - DCHECK(dst_partition != nullptr); - - // Rebuild the hash table over spilled aggregate rows then start adding unaggregated - // rows to the hash table. It's possible the partition will spill at either stage. - // In that case we need to finish processing 'src_partition' so that all rows are - // appended to 'dst_partition'. - // TODO: if the partition spills again but the aggregation reduces the input - // significantly, we could do better here by keeping the incomplete hash table in - // memory and only spilling unaggregated rows that didn't fit in the hash table - // (somewhat similar to the passthrough pre-aggregation). - RETURN_IF_ERROR(ProcessStream(src_partition->aggregated_row_stream.get())); - RETURN_IF_ERROR(ProcessStream(src_partition->unaggregated_row_stream.get())); - src_partition->Close(false); - spilled_partitions_.pop_front(); - hash_partitions_.clear(); - - if (dst_partition->is_spilled()) { - PushSpilledPartition(dst_partition); - *built_partition = nullptr; - // Spilled the partition - we should not be using any reservation except from - // 'serialize_stream_'. - DCHECK_EQ(serialize_stream_ != nullptr ? serialize_stream_->BytesPinned(false) : 0, - _buffer_pool_client.GetUsedReservation()) << _buffer_pool_client.DebugString(); - } else { - *built_partition = dst_partition; - } - return Status::OK(); + DCHECK(!spilled_partitions_.empty()); + DCHECK(!is_streaming_preagg_); + // Leave the partition in 'spilled_partitions_' to be closed if we hit an error. + Partition* src_partition = spilled_partitions_.front(); + DCHECK(src_partition->is_spilled()); + + // Create a new hash partition from the rows of the spilled partition. This is simpler + // than trying to finish building a partially-built partition in place. We only + // initialise one hash partition that all rows in 'src_partition' will hash to. + RETURN_IF_ERROR(CreateHashPartitions(src_partition->level, src_partition->idx)); + Partition* dst_partition = hash_partitions_[src_partition->idx]; + DCHECK(dst_partition != nullptr); + + // Rebuild the hash table over spilled aggregate rows then start adding unaggregated + // rows to the hash table. It's possible the partition will spill at either stage. + // In that case we need to finish processing 'src_partition' so that all rows are + // appended to 'dst_partition'. + // TODO: if the partition spills again but the aggregation reduces the input + // significantly, we could do better here by keeping the incomplete hash table in + // memory and only spilling unaggregated rows that didn't fit in the hash table + // (somewhat similar to the passthrough pre-aggregation). + RETURN_IF_ERROR(ProcessStream(src_partition->aggregated_row_stream.get())); + RETURN_IF_ERROR(ProcessStream(src_partition->unaggregated_row_stream.get())); + src_partition->Close(false); + spilled_partitions_.pop_front(); + hash_partitions_.clear(); + + if (dst_partition->is_spilled()) { + PushSpilledPartition(dst_partition); + *built_partition = nullptr; + // Spilled the partition - we should not be using any reservation except from + // 'serialize_stream_'. + DCHECK_EQ(serialize_stream_ != nullptr ? serialize_stream_->BytesPinned(false) : 0, + _buffer_pool_client.GetUsedReservation()) + << _buffer_pool_client.DebugString(); + } else { + *built_partition = dst_partition; + } + return Status::OK(); } Status PartitionedAggregationNode::RepartitionSpilledPartition() { - DCHECK(!spilled_partitions_.empty()); - DCHECK(!is_streaming_preagg_); - // Leave the partition in 'spilled_partitions_' to be closed if we hit an error. - Partition* partition = spilled_partitions_.front(); - DCHECK(partition->is_spilled()); - - // Create the new hash partitions to repartition into. This will allocate a - // write buffer for each partition's aggregated row stream. - RETURN_IF_ERROR(CreateHashPartitions(partition->level + 1)); - COUNTER_UPDATE(num_repartitions_, 1); - - // Rows in this partition could have been spilled into two streams, depending - // on if it is an aggregated intermediate, or an unaggregated row. Aggregated - // rows are processed first to save a hash table lookup in ProcessBatch(). - RETURN_IF_ERROR(ProcessStream(partition->aggregated_row_stream.get())); - - // Prepare write buffers so we can append spilled rows to unaggregated partitions. - for (Partition* hash_partition : hash_partitions_) { - if (!hash_partition->is_spilled()) continue; - // The aggregated rows have been repartitioned. Free up at least a buffer's worth of - // reservation and use it to pin the unaggregated write buffer. -// hash_partition->aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - bool got_buffer; - RETURN_IF_ERROR( - hash_partition->unaggregated_row_stream->PrepareForWrite(&got_buffer)); - DCHECK(got_buffer) - << "Accounted in min reservation" << _buffer_pool_client.DebugString(); - } - RETURN_IF_ERROR(ProcessStream(partition->unaggregated_row_stream.get())); - - COUNTER_UPDATE(num_row_repartitioned_, partition->aggregated_row_stream->num_rows()); - COUNTER_UPDATE(num_row_repartitioned_, partition->unaggregated_row_stream->num_rows()); - - partition->Close(false); - spilled_partitions_.pop_front(); - - // Done processing this partition. Move the new partitions into - // spilled_partitions_/aggregated_partitions_. - int64_t num_input_rows = partition->aggregated_row_stream->num_rows() - + partition->unaggregated_row_stream->num_rows(); - RETURN_IF_ERROR(MoveHashPartitions(num_input_rows)); - return Status::OK(); + DCHECK(!spilled_partitions_.empty()); + DCHECK(!is_streaming_preagg_); + // Leave the partition in 'spilled_partitions_' to be closed if we hit an error. + Partition* partition = spilled_partitions_.front(); + DCHECK(partition->is_spilled()); + + // Create the new hash partitions to repartition into. This will allocate a + // write buffer for each partition's aggregated row stream. + RETURN_IF_ERROR(CreateHashPartitions(partition->level + 1)); + COUNTER_UPDATE(num_repartitions_, 1); + + // Rows in this partition could have been spilled into two streams, depending + // on if it is an aggregated intermediate, or an unaggregated row. Aggregated + // rows are processed first to save a hash table lookup in ProcessBatch(). + RETURN_IF_ERROR(ProcessStream(partition->aggregated_row_stream.get())); + + // Prepare write buffers so we can append spilled rows to unaggregated partitions. + for (Partition* hash_partition : hash_partitions_) { + if (!hash_partition->is_spilled()) continue; + // The aggregated rows have been repartitioned. Free up at least a buffer's worth of + // reservation and use it to pin the unaggregated write buffer. + // hash_partition->aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); + bool got_buffer; + RETURN_IF_ERROR(hash_partition->unaggregated_row_stream->PrepareForWrite(&got_buffer)); + DCHECK(got_buffer) << "Accounted in min reservation" << _buffer_pool_client.DebugString(); + } + RETURN_IF_ERROR(ProcessStream(partition->unaggregated_row_stream.get())); + + COUNTER_UPDATE(num_row_repartitioned_, partition->aggregated_row_stream->num_rows()); + COUNTER_UPDATE(num_row_repartitioned_, partition->unaggregated_row_stream->num_rows()); + + partition->Close(false); + spilled_partitions_.pop_front(); + + // Done processing this partition. Move the new partitions into + // spilled_partitions_/aggregated_partitions_. + int64_t num_input_rows = partition->aggregated_row_stream->num_rows() + + partition->unaggregated_row_stream->num_rows(); + RETURN_IF_ERROR(MoveHashPartitions(num_input_rows)); + return Status::OK(); } -template +template Status PartitionedAggregationNode::ProcessStream(BufferedTupleStream3* input_stream) { - DCHECK(!is_streaming_preagg_); - if (input_stream->num_rows() > 0) { - while (true) { - bool got_buffer = false; - RETURN_IF_ERROR(input_stream->PrepareForRead(true, &got_buffer)); - if (got_buffer) break; - // Did not have a buffer to read the input stream. Spill and try again. - RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); - } + DCHECK(!is_streaming_preagg_); + if (input_stream->num_rows() > 0) { + while (true) { + bool got_buffer = false; + RETURN_IF_ERROR(input_stream->PrepareForRead(true, &got_buffer)); + if (got_buffer) break; + // Did not have a buffer to read the input stream. Spill and try again. + RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); + } - bool eos = false; - const RowDescriptor* desc = - AGGREGATED_ROWS ? &intermediate_row_desc_ : &(_children[0]->row_desc()); - RowBatch batch(*desc, state_->batch_size(), mem_tracker().get()); - do { - RETURN_IF_ERROR(input_stream->GetNext(&batch, &eos)); - RETURN_IF_ERROR( - ProcessBatch(&batch, ht_ctx_.get())); - RETURN_IF_ERROR(state_->check_query_state("New partitioned aggregation, while processing stream.")); - batch.reset(); - } while (!eos); - } - input_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - return Status::OK(); + bool eos = false; + const RowDescriptor* desc = + AGGREGATED_ROWS ? &intermediate_row_desc_ : &(_children[0]->row_desc()); + RowBatch batch(*desc, state_->batch_size(), mem_tracker().get()); + do { + RETURN_IF_ERROR(input_stream->GetNext(&batch, &eos)); + RETURN_IF_ERROR(ProcessBatch(&batch, ht_ctx_.get())); + RETURN_IF_ERROR(state_->check_query_state( + "New partitioned aggregation, while processing stream.")); + batch.reset(); + } while (!eos); + } + input_stream->Close(NULL, RowBatch::FlushMode::NO_FLUSH_RESOURCES); + return Status::OK(); } Status PartitionedAggregationNode::SpillPartition(bool more_aggregate_rows) { - int64_t max_freed_mem = 0; - int partition_idx = -1; - - // Iterate over the partitions and pick the largest partition that is not spilled. - for (int i = 0; i < hash_partitions_.size(); ++i) { - if (hash_partitions_[i] == nullptr) continue; - if (hash_partitions_[i]->is_closed) continue; - if (hash_partitions_[i]->is_spilled()) continue; - // Pass 'true' because we need to keep the write block pinned. See Partition::Spill(). - int64_t mem = hash_partitions_[i]->aggregated_row_stream->BytesPinned(true); - mem += hash_partitions_[i]->hash_tbl->ByteSize(); - mem += hash_partitions_[i]->agg_fn_pool->total_reserved_bytes(); - DCHECK_GT(mem, 0); // At least the hash table buckets should occupy memory. - if (mem > max_freed_mem) { - max_freed_mem = mem; - partition_idx = i; + int64_t max_freed_mem = 0; + int partition_idx = -1; + + // Iterate over the partitions and pick the largest partition that is not spilled. + for (int i = 0; i < hash_partitions_.size(); ++i) { + if (hash_partitions_[i] == nullptr) continue; + if (hash_partitions_[i]->is_closed) continue; + if (hash_partitions_[i]->is_spilled()) continue; + // Pass 'true' because we need to keep the write block pinned. See Partition::Spill(). + int64_t mem = hash_partitions_[i]->aggregated_row_stream->BytesPinned(true); + mem += hash_partitions_[i]->hash_tbl->ByteSize(); + mem += hash_partitions_[i]->agg_fn_pool->total_reserved_bytes(); + DCHECK_GT(mem, 0); // At least the hash table buckets should occupy memory. + if (mem > max_freed_mem) { + max_freed_mem = mem; + partition_idx = i; + } } - } - DCHECK_NE(partition_idx, -1) << "Should have been able to spill a partition to " - << "reclaim memory: " << _buffer_pool_client.DebugString(); - // Remove references to the destroyed hash table from 'hash_tbls_'. - // Additionally, we might be dealing with a rebuilt spilled partition, where all - // partitions point to a single in-memory partition. This also ensures that 'hash_tbls_' - // remains consistent in that case. - for (int i = 0; i < PARTITION_FANOUT; ++i) { - if (hash_partitions_[i] == hash_partitions_[partition_idx]) hash_tbls_[i] = nullptr; - } - return hash_partitions_[partition_idx]->Spill(more_aggregate_rows); + DCHECK_NE(partition_idx, -1) << "Should have been able to spill a partition to " + << "reclaim memory: " << _buffer_pool_client.DebugString(); + // Remove references to the destroyed hash table from 'hash_tbls_'. + // Additionally, we might be dealing with a rebuilt spilled partition, where all + // partitions point to a single in-memory partition. This also ensures that 'hash_tbls_' + // remains consistent in that case. + for (int i = 0; i < PARTITION_FANOUT; ++i) { + if (hash_partitions_[i] == hash_partitions_[partition_idx]) hash_tbls_[i] = nullptr; + } + return hash_partitions_[partition_idx]->Spill(more_aggregate_rows); } Status PartitionedAggregationNode::MoveHashPartitions(int64_t num_input_rows) { - DCHECK(!hash_partitions_.empty()); - std::stringstream ss; - ss << "PA(node_id=" << id() << ") partitioned(level=" << hash_partitions_[0]->level - << ") " << num_input_rows << " rows into:" << std::endl; - for (int i = 0; i < hash_partitions_.size(); ++i) { - Partition* partition = hash_partitions_[i]; - if (partition == nullptr) continue; - // We might be dealing with a rebuilt spilled partition, where all partitions are - // pointing to a single in-memory partition, so make sure we only proceed for the - // right partition. - if(i != partition->idx) continue; - int64_t aggregated_rows = 0; - if (partition->aggregated_row_stream != nullptr) { - aggregated_rows = partition->aggregated_row_stream->num_rows(); - } - int64_t unaggregated_rows = 0; - if (partition->unaggregated_row_stream != nullptr) { - unaggregated_rows = partition->unaggregated_row_stream->num_rows(); - } - double total_rows = aggregated_rows + unaggregated_rows; - double percent = total_rows * 100 / num_input_rows; - ss << " " << i << " " << (partition->is_spilled() ? "spilled" : "not spilled") - << " (fraction=" << std::fixed << std::setprecision(2) << percent << "%)" << std::endl - << " #aggregated rows:" << aggregated_rows << std::endl - << " #unaggregated rows: " << unaggregated_rows << std::endl; - - // TODO: update counters to support doubles. - COUNTER_SET(largest_partition_percent_, static_cast(percent)); - - if (total_rows == 0) { - partition->Close(false); - } else if (partition->is_spilled()) { - PushSpilledPartition(partition); - } else { - aggregated_partitions_.push_back(partition); + DCHECK(!hash_partitions_.empty()); + std::stringstream ss; + ss << "PA(node_id=" << id() << ") partitioned(level=" << hash_partitions_[0]->level << ") " + << num_input_rows << " rows into:" << std::endl; + for (int i = 0; i < hash_partitions_.size(); ++i) { + Partition* partition = hash_partitions_[i]; + if (partition == nullptr) continue; + // We might be dealing with a rebuilt spilled partition, where all partitions are + // pointing to a single in-memory partition, so make sure we only proceed for the + // right partition. + if (i != partition->idx) continue; + int64_t aggregated_rows = 0; + if (partition->aggregated_row_stream != nullptr) { + aggregated_rows = partition->aggregated_row_stream->num_rows(); + } + int64_t unaggregated_rows = 0; + if (partition->unaggregated_row_stream != nullptr) { + unaggregated_rows = partition->unaggregated_row_stream->num_rows(); + } + double total_rows = aggregated_rows + unaggregated_rows; + double percent = total_rows * 100 / num_input_rows; + ss << " " << i << " " << (partition->is_spilled() ? "spilled" : "not spilled") + << " (fraction=" << std::fixed << std::setprecision(2) << percent << "%)" << std::endl + << " #aggregated rows:" << aggregated_rows << std::endl + << " #unaggregated rows: " << unaggregated_rows << std::endl; + + // TODO: update counters to support doubles. + COUNTER_SET(largest_partition_percent_, static_cast(percent)); + + if (total_rows == 0) { + partition->Close(false); + } else if (partition->is_spilled()) { + PushSpilledPartition(partition); + } else { + aggregated_partitions_.push_back(partition); + } } - - } - VLOG(2) << ss.str(); - hash_partitions_.clear(); - return Status::OK(); + VLOG(2) << ss.str(); + hash_partitions_.clear(); + return Status::OK(); } void PartitionedAggregationNode::PushSpilledPartition(Partition* partition) { - DCHECK(partition->is_spilled()); - DCHECK(partition->hash_tbl == nullptr); - // Ensure all pages in the spilled partition's streams are unpinned by invalidating - // the streams' read and write iterators. We may need all the memory to process the - // next spilled partitions. -// partition->aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); -// partition->unaggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - spilled_partitions_.push_front(partition); + DCHECK(partition->is_spilled()); + DCHECK(partition->hash_tbl == nullptr); + // Ensure all pages in the spilled partition's streams are unpinned by invalidating + // the streams' read and write iterators. We may need all the memory to process the + // next spilled partitions. + // partition->aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); + // partition->unaggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); + spilled_partitions_.push_front(partition); } void PartitionedAggregationNode::ClosePartitions() { - for (Partition* partition : hash_partitions_) { - if (partition != nullptr) partition->Close(true); - } - hash_partitions_.clear(); - for (Partition* partition : aggregated_partitions_) partition->Close(true); - aggregated_partitions_.clear(); - for (Partition* partition : spilled_partitions_) partition->Close(true); - spilled_partitions_.clear(); - memset(hash_tbls_, 0, sizeof(hash_tbls_)); - partition_pool_->clear(); + for (Partition* partition : hash_partitions_) { + if (partition != nullptr) partition->Close(true); + } + hash_partitions_.clear(); + for (Partition* partition : aggregated_partitions_) partition->Close(true); + aggregated_partitions_.clear(); + for (Partition* partition : spilled_partitions_) partition->Close(true); + spilled_partitions_.clear(); + memset(hash_tbls_, 0, sizeof(hash_tbls_)); + partition_pool_->clear(); } //Status PartitionedAggregationNode::QueryMaintenance(RuntimeState* state) { @@ -1487,9 +1466,7 @@ void PartitionedAggregationNode::ClosePartitions() { //} // Instantiate required templates. -template Status PartitionedAggregationNode::AppendSpilledRow( - Partition*, TupleRow*); +template Status PartitionedAggregationNode::AppendSpilledRow(Partition*, TupleRow*); template Status PartitionedAggregationNode::AppendSpilledRow(Partition*, TupleRow*); -} - +} // namespace doris diff --git a/be/src/exec/partitioned_aggregation_node.h b/be/src/exec/partitioned_aggregation_node.h index bedadb171a3f01..eb8358050b4ba4 100644 --- a/be/src/exec/partitioned_aggregation_node.h +++ b/be/src/exec/partitioned_aggregation_node.h @@ -18,15 +18,14 @@ #ifndef DORIS_BE_SRC_EXEC_NEW_PARTITIONED_AGGREGATION_NODE_H #define DORIS_BE_SRC_EXEC_NEW_PARTITIONED_AGGREGATION_NODE_H -#include - #include +#include #include "exec/exec_node.h" #include "exec/partitioned_hash_table.h" #include "runtime/buffered_tuple_stream3.h" #include "runtime/bufferpool/suballocator.h" -#include "runtime/descriptors.h" // for TupleId +#include "runtime/descriptors.h" // for TupleId #include "runtime/mem_pool.h" #include "runtime/string_value.h" @@ -118,581 +117,583 @@ class SlotDescriptor; /// Likewise, it's easy to mixup the agg fn ctxs, there should be a way to simplify this. /// TODO: support an Init() method with an initial value in the UDAF interface. class PartitionedAggregationNode : public ExecNode { - public: - - PartitionedAggregationNode(ObjectPool* pool, - const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state); - virtual Status prepare(RuntimeState* state); -// virtual void Codegen(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status reset(RuntimeState* state); - virtual Status close(RuntimeState* state); - - protected: - /// Frees local allocations from aggregate_evals_ and agg_fn_evals -// virtual Status QueryMaintenance(RuntimeState* state); - virtual std::string DebugString(int indentation_level) const; - virtual void DebugString(int indentation_level, std::stringstream* out) const; - - private: - struct Partition; - - /// Number of initial partitions to create. Must be a power of 2. - static const int PARTITION_FANOUT = 16; - - /// Needs to be the log(PARTITION_FANOUT). - /// We use the upper bits to pick the partition and lower bits in the HT. - /// TODO: different hash functions here too? We don't need that many bits to pick - /// the partition so this might be okay. - static const int NUM_PARTITIONING_BITS = 4; - - /// Maximum number of times we will repartition. The maximum build table we can process - /// (if we have enough scratch disk space) in case there is no skew is: - /// MEM_LIMIT * (PARTITION_FANOUT ^ MAX_PARTITION_DEPTH). - /// In the case where there is skew, repartitioning is unlikely to help (assuming a - /// reasonable hash function). - /// Note that we need to have at least as many SEED_PRIMES in PartitionedHashTableCtx. - /// TODO: we can revisit and try harder to explicitly detect skew. - static const int MAX_PARTITION_DEPTH = 16; - - /// Default initial number of buckets in a hash table. - /// TODO: rethink this ? - static const int64_t PAGG_DEFAULT_HASH_TABLE_SZ = 1024; - - /// Codegen doesn't allow for automatic Status variables because then exception - /// handling code is needed to destruct the Status, and our function call substitution - /// doesn't know how to deal with the LLVM IR 'invoke' instruction. Workaround that by - /// placing the Status here so exceptions won't need to destruct it. - /// TODO: fix IMPALA-1948 and remove this. - Status process_batch_status_; - - /// Tuple into which Update()/Merge()/Serialize() results are stored. - TupleId intermediate_tuple_id_; - TupleDescriptor* intermediate_tuple_desc_; - - /// Row with the intermediate tuple as its only tuple. - /// Construct a new row desc for preparing the build exprs because neither the child's - /// nor this node's output row desc may contain the intermediate tuple, e.g., - /// in a single-node plan with an intermediate tuple different from the output tuple. - /// Lives in the query state's obj_pool. - RowDescriptor intermediate_row_desc_; - - /// Tuple into which Finalize() results are stored. Possibly the same as - /// the intermediate tuple. - TupleId output_tuple_id_; - TupleDescriptor* output_tuple_desc_; - - /// Certain aggregates require a finalize step, which is the final step of the - /// aggregate after consuming all input rows. The finalize step converts the aggregate - /// value into its final form. This is true if this node contains aggregate that - /// requires a finalize step. - const bool needs_finalize_; +public: + PartitionedAggregationNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs); + + virtual Status init(const TPlanNode& tnode, RuntimeState* state); + virtual Status prepare(RuntimeState* state); + // virtual void Codegen(RuntimeState* state); + virtual Status open(RuntimeState* state); + virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); + virtual Status reset(RuntimeState* state); + virtual Status close(RuntimeState* state); + +protected: + /// Frees local allocations from aggregate_evals_ and agg_fn_evals + // virtual Status QueryMaintenance(RuntimeState* state); + virtual std::string DebugString(int indentation_level) const; + virtual void DebugString(int indentation_level, std::stringstream* out) const; + +private: + struct Partition; + + /// Number of initial partitions to create. Must be a power of 2. + static const int PARTITION_FANOUT = 16; + + /// Needs to be the log(PARTITION_FANOUT). + /// We use the upper bits to pick the partition and lower bits in the HT. + /// TODO: different hash functions here too? We don't need that many bits to pick + /// the partition so this might be okay. + static const int NUM_PARTITIONING_BITS = 4; + + /// Maximum number of times we will repartition. The maximum build table we can process + /// (if we have enough scratch disk space) in case there is no skew is: + /// MEM_LIMIT * (PARTITION_FANOUT ^ MAX_PARTITION_DEPTH). + /// In the case where there is skew, repartitioning is unlikely to help (assuming a + /// reasonable hash function). + /// Note that we need to have at least as many SEED_PRIMES in PartitionedHashTableCtx. + /// TODO: we can revisit and try harder to explicitly detect skew. + static const int MAX_PARTITION_DEPTH = 16; + + /// Default initial number of buckets in a hash table. + /// TODO: rethink this ? + static const int64_t PAGG_DEFAULT_HASH_TABLE_SZ = 1024; + + /// Codegen doesn't allow for automatic Status variables because then exception + /// handling code is needed to destruct the Status, and our function call substitution + /// doesn't know how to deal with the LLVM IR 'invoke' instruction. Workaround that by + /// placing the Status here so exceptions won't need to destruct it. + /// TODO: fix IMPALA-1948 and remove this. + Status process_batch_status_; + + /// Tuple into which Update()/Merge()/Serialize() results are stored. + TupleId intermediate_tuple_id_; + TupleDescriptor* intermediate_tuple_desc_; + + /// Row with the intermediate tuple as its only tuple. + /// Construct a new row desc for preparing the build exprs because neither the child's + /// nor this node's output row desc may contain the intermediate tuple, e.g., + /// in a single-node plan with an intermediate tuple different from the output tuple. + /// Lives in the query state's obj_pool. + RowDescriptor intermediate_row_desc_; + + /// Tuple into which Finalize() results are stored. Possibly the same as + /// the intermediate tuple. + TupleId output_tuple_id_; + TupleDescriptor* output_tuple_desc_; + + /// Certain aggregates require a finalize step, which is the final step of the + /// aggregate after consuming all input rows. The finalize step converts the aggregate + /// value into its final form. This is true if this node contains aggregate that + /// requires a finalize step. + const bool needs_finalize_; /// True if this is first phase of a two-phase distributed aggregation for which we /// are doing a streaming preaggregation. bool is_streaming_preagg_; - /// True if any of the evaluators require the serialize step. - bool needs_serialize_; - - /// The list of all aggregate operations for this exec node. - std::vector agg_fns_; - - /// Evaluators for each aggregate function. If this is a grouping aggregation, these - /// evaluators are only used to create cloned per-partition evaluators. The cloned - /// evaluators are then used to evaluate the functions. If this is a non-grouping - /// aggregation these evaluators are used directly to evaluate the functions. - /// - /// Permanent and result allocations for these allocators are allocated from - /// 'expr_perm_pool_' and 'expr_results_pool_' respectively. - std::vector agg_fn_evals_; - boost::scoped_ptr agg_fn_pool_; - - /// Exprs used to evaluate input rows - std::vector grouping_exprs_; + /// True if any of the evaluators require the serialize step. + bool needs_serialize_; - /// Exprs used to insert constructed aggregation tuple into the hash table. - /// All the exprs are simply SlotRefs for the intermediate tuple. - std::vector build_exprs_; + /// The list of all aggregate operations for this exec node. + std::vector agg_fns_; - /// Exprs used to evaluate input rows - /// TODO (pengyubing) Is this variable useful? - std::vector grouping_expr_ctxs_; - - /// Indices of grouping exprs with var-len string types in grouping_expr_ctxs_. We need - /// to do more work for var-len expressions when allocating and spilling rows. All - /// var-len grouping exprs have type string. - std::vector string_grouping_exprs_; - - RuntimeState* state_; - /// Allocator for hash table memory. - boost::scoped_ptr ht_allocator_; - /// MemPool used to allocate memory for when we don't have grouping and don't initialize - /// the partitioning structures, or during Close() when creating new output tuples. - /// For non-grouping aggregations, the ownership of the pool's memory is transferred - /// to the output batch on eos. The pool should not be Reset() to allow amortizing - /// memory allocation over a series of Reset()/Open()/GetNext()* calls. - boost::scoped_ptr mem_pool_; - - // MemPool for allocations made by copying expr results - boost::scoped_ptr expr_results_pool_; + /// Evaluators for each aggregate function. If this is a grouping aggregation, these + /// evaluators are only used to create cloned per-partition evaluators. The cloned + /// evaluators are then used to evaluate the functions. If this is a non-grouping + /// aggregation these evaluators are used directly to evaluate the functions. + /// + /// Permanent and result allocations for these allocators are allocated from + /// 'expr_perm_pool_' and 'expr_results_pool_' respectively. + std::vector agg_fn_evals_; + boost::scoped_ptr agg_fn_pool_; - /// The current partition and iterator to the next row in its hash table that we need - /// to return in GetNext() - Partition* output_partition_; - PartitionedHashTable::Iterator output_iterator_; + /// Exprs used to evaluate input rows + std::vector grouping_exprs_; - typedef Status (*ProcessBatchNoGroupingFn)(PartitionedAggregationNode*, RowBatch*); - /// Jitted ProcessBatchNoGrouping function pointer. Null if codegen is disabled. - ProcessBatchNoGroupingFn process_batch_no_grouping_fn_; + /// Exprs used to insert constructed aggregation tuple into the hash table. + /// All the exprs are simply SlotRefs for the intermediate tuple. + std::vector build_exprs_; - typedef Status (*ProcessBatchFn)( - PartitionedAggregationNode*, RowBatch*, PartitionedHashTableCtx*); - /// Jitted ProcessBatch function pointer. Null if codegen is disabled. - ProcessBatchFn process_batch_fn_; + /// Exprs used to evaluate input rows + /// TODO (pengyubing) Is this variable useful? + std::vector grouping_expr_ctxs_; - typedef Status (*ProcessBatchStreamingFn)(PartitionedAggregationNode*, bool, - RowBatch*, RowBatch*, PartitionedHashTableCtx*, int[PARTITION_FANOUT]); - /// Jitted ProcessBatchStreaming function pointer. Null if codegen is disabled. - ProcessBatchStreamingFn process_batch_streaming_fn_; + /// Indices of grouping exprs with var-len string types in grouping_expr_ctxs_. We need + /// to do more work for var-len expressions when allocating and spilling rows. All + /// var-len grouping exprs have type string. + std::vector string_grouping_exprs_; - /// Time spent processing the child rows - RuntimeProfile::Counter* build_timer_; + RuntimeState* state_; + /// Allocator for hash table memory. + boost::scoped_ptr ht_allocator_; + /// MemPool used to allocate memory for when we don't have grouping and don't initialize + /// the partitioning structures, or during Close() when creating new output tuples. + /// For non-grouping aggregations, the ownership of the pool's memory is transferred + /// to the output batch on eos. The pool should not be Reset() to allow amortizing + /// memory allocation over a series of Reset()/Open()/GetNext()* calls. + boost::scoped_ptr mem_pool_; - /// Total time spent resizing hash tables. - RuntimeProfile::Counter* ht_resize_timer_; + // MemPool for allocations made by copying expr results + boost::scoped_ptr expr_results_pool_; - /// Total time of resizing hash tables. - RuntimeProfile::Counter* ht_resize_counter_; + /// The current partition and iterator to the next row in its hash table that we need + /// to return in GetNext() + Partition* output_partition_; + PartitionedHashTable::Iterator output_iterator_; - /// Time spent returning the aggregated rows - RuntimeProfile::Counter* get_results_timer_; + typedef Status (*ProcessBatchNoGroupingFn)(PartitionedAggregationNode*, RowBatch*); + /// Jitted ProcessBatchNoGrouping function pointer. Null if codegen is disabled. + ProcessBatchNoGroupingFn process_batch_no_grouping_fn_; - /// Total number of hash buckets across all partitions. - RuntimeProfile::Counter* num_hash_buckets_; + typedef Status (*ProcessBatchFn)(PartitionedAggregationNode*, RowBatch*, + PartitionedHashTableCtx*); + /// Jitted ProcessBatch function pointer. Null if codegen is disabled. + ProcessBatchFn process_batch_fn_; - /// Total number of hash filled buckets across all partitions. - RuntimeProfile::Counter* num_hash_filled_buckets_; + typedef Status (*ProcessBatchStreamingFn)(PartitionedAggregationNode*, bool, RowBatch*, + RowBatch*, PartitionedHashTableCtx*, + int[PARTITION_FANOUT]); + /// Jitted ProcessBatchStreaming function pointer. Null if codegen is disabled. + ProcessBatchStreamingFn process_batch_streaming_fn_; - /// Total time of probe operation across all partitions. - RuntimeProfile::Counter* num_hash_probe_; + /// Time spent processing the child rows + RuntimeProfile::Counter* build_timer_; - /// Total time of failed probe operation across all partitions. - RuntimeProfile::Counter* num_hash_failed_probe_; + /// Total time spent resizing hash tables. + RuntimeProfile::Counter* ht_resize_timer_; - /// Total time of travel_length of probe operation across all partitions. - RuntimeProfile::Counter* num_hash_travel_length_; + /// Total time of resizing hash tables. + RuntimeProfile::Counter* ht_resize_counter_; - /// Total time of hash_collisions across all partitions. - RuntimeProfile::Counter* num_hash_collisions_; + /// Time spent returning the aggregated rows + RuntimeProfile::Counter* get_results_timer_; - /// Total number of partitions created. - RuntimeProfile::Counter* partitions_created_; + /// Total number of hash buckets across all partitions. + RuntimeProfile::Counter* num_hash_buckets_; - /// Level of max partition (i.e. number of repartitioning steps). - RuntimeProfile::HighWaterMarkCounter* max_partition_level_; + /// Total number of hash filled buckets across all partitions. + RuntimeProfile::Counter* num_hash_filled_buckets_; - /// Number of rows that have been repartitioned. - RuntimeProfile::Counter* num_row_repartitioned_; + /// Total time of probe operation across all partitions. + RuntimeProfile::Counter* num_hash_probe_; - /// Number of partitions that have been repartitioned. - RuntimeProfile::Counter* num_repartitions_; + /// Total time of failed probe operation across all partitions. + RuntimeProfile::Counter* num_hash_failed_probe_; - /// Number of partitions that have been spilled. - RuntimeProfile::Counter* num_spilled_partitions_; + /// Total time of travel_length of probe operation across all partitions. + RuntimeProfile::Counter* num_hash_travel_length_; - /// The largest fraction after repartitioning. This is expected to be - /// 1 / PARTITION_FANOUT. A value much larger indicates skew. - RuntimeProfile::HighWaterMarkCounter* largest_partition_percent_; + /// Total time of hash_collisions across all partitions. + RuntimeProfile::Counter* num_hash_collisions_; - /// Time spent in streaming preagg algorithm. - RuntimeProfile::Counter* streaming_timer_; + /// Total number of partitions created. + RuntimeProfile::Counter* partitions_created_; - /// num_processed_rows == num_hash_probe_ add this counter - /// just make the runningprofie more clearly - /// The number of rows which proessed by aggregation. - RuntimeProfile::Counter* num_processed_rows_; + /// Level of max partition (i.e. number of repartitioning steps). + RuntimeProfile::HighWaterMarkCounter* max_partition_level_; - /// The number of rows passed through without aggregation. - RuntimeProfile::Counter* num_passthrough_rows_; + /// Number of rows that have been repartitioned. + RuntimeProfile::Counter* num_row_repartitioned_; - /// The estimated reduction of the preaggregation. - RuntimeProfile::Counter* preagg_estimated_reduction_; + /// Number of partitions that have been repartitioned. + RuntimeProfile::Counter* num_repartitions_; - /// Expose the minimum reduction factor to continue growing the hash tables. - RuntimeProfile::Counter* preagg_streaming_ht_min_reduction_; + /// Number of partitions that have been spilled. + RuntimeProfile::Counter* num_spilled_partitions_; - /// The estimated number of input rows from the planner. - int64_t estimated_input_cardinality_; + /// The largest fraction after repartitioning. This is expected to be + /// 1 / PARTITION_FANOUT. A value much larger indicates skew. + RuntimeProfile::HighWaterMarkCounter* largest_partition_percent_; - ///////////////////////////////////////// - /// BEGIN: Members that must be Reset() + /// Time spent in streaming preagg algorithm. + RuntimeProfile::Counter* streaming_timer_; - /// Result of aggregation w/o GROUP BY. - /// Note: can be NULL even if there is no grouping if the result tuple is 0 width - /// e.g. select 1 from table group by col. - Tuple* singleton_output_tuple_; - bool singleton_output_tuple_returned_; + /// num_processed_rows == num_hash_probe_ add this counter + /// just make the runningprofie more clearly + /// The number of rows which proessed by aggregation. + RuntimeProfile::Counter* num_processed_rows_; - /// Row batch used as argument to GetNext() for the child node preaggregations. Store - /// in node to avoid reallocating for every GetNext() call when streaming. - boost::scoped_ptr child_batch_; + /// The number of rows passed through without aggregation. + RuntimeProfile::Counter* num_passthrough_rows_; - /// If true, no more rows to output from partitions. - bool partition_eos_; + /// The estimated reduction of the preaggregation. + RuntimeProfile::Counter* preagg_estimated_reduction_; - /// True if no more rows to process from child. - bool child_eos_; + /// Expose the minimum reduction factor to continue growing the hash tables. + RuntimeProfile::Counter* preagg_streaming_ht_min_reduction_; - /// Used for hash-related functionality, such as evaluating rows and calculating hashes. - /// It also owns the evaluators for the grouping and build expressions used during hash - /// table insertion and probing. - boost::scoped_ptr ht_ctx_; + /// The estimated number of input rows from the planner. + int64_t estimated_input_cardinality_; - /// Object pool that holds the Partition objects in hash_partitions_. - boost::scoped_ptr partition_pool_; + ///////////////////////////////////////// + /// BEGIN: Members that must be Reset() - /// Current partitions we are partitioning into. IMPALA-5788: For the case where we - /// rebuild a spilled partition that fits in memory, all pointers in this vector will - /// point to a single in-memory partition. - std::vector hash_partitions_; + /// Result of aggregation w/o GROUP BY. + /// Note: can be NULL even if there is no grouping if the result tuple is 0 width + /// e.g. select 1 from table group by col. + Tuple* singleton_output_tuple_; + bool singleton_output_tuple_returned_; - /// Cache for hash tables in 'hash_partitions_'. IMPALA-5788: For the case where we - /// rebuild a spilled partition that fits in memory, all pointers in this array will - /// point to the hash table that is a part of a single in-memory partition. - PartitionedHashTable* hash_tbls_[PARTITION_FANOUT]; + /// Row batch used as argument to GetNext() for the child node preaggregations. Store + /// in node to avoid reallocating for every GetNext() call when streaming. + boost::scoped_ptr child_batch_; - /// All partitions that have been spilled and need further processing. - std::deque spilled_partitions_; + /// If true, no more rows to output from partitions. + bool partition_eos_; - /// All partitions that are aggregated and can just return the results in GetNext(). - /// After consuming all the input, hash_partitions_ is split into spilled_partitions_ - /// and aggregated_partitions_, depending on if it was spilled or not. - std::deque aggregated_partitions_; + /// True if no more rows to process from child. + bool child_eos_; - /// END: Members that must be Reset() - ///////////////////////////////////////// + /// Used for hash-related functionality, such as evaluating rows and calculating hashes. + /// It also owns the evaluators for the grouping and build expressions used during hash + /// table insertion and probing. + boost::scoped_ptr ht_ctx_; - /// The hash table and streams (aggregated and unaggregated) for an individual - /// partition. The streams of each partition always (i.e. regardless of level) - /// initially use small buffers. Streaming pre-aggregations do not spill and do not - /// require an unaggregated stream. - struct Partition { - Partition(PartitionedAggregationNode* parent, int level, int idx) - : parent(parent), is_closed(false), level(level), idx(idx) {} + /// Object pool that holds the Partition objects in hash_partitions_. + boost::scoped_ptr partition_pool_; - ~Partition(); + /// Current partitions we are partitioning into. IMPALA-5788: For the case where we + /// rebuild a spilled partition that fits in memory, all pointers in this vector will + /// point to a single in-memory partition. + std::vector hash_partitions_; - /// Initializes aggregated_row_stream and unaggregated_row_stream (if a spilling - /// aggregation), allocating one buffer for each. Spilling merge aggregations must - /// have enough reservation for the initial buffer for the stream, so this should - /// not fail due to OOM. Preaggregations do not reserve any buffers: if does not - /// have enough reservation for the initial buffer, the aggregated row stream is not - /// created and an OK status is returned. - Status InitStreams(); + /// Cache for hash tables in 'hash_partitions_'. IMPALA-5788: For the case where we + /// rebuild a spilled partition that fits in memory, all pointers in this array will + /// point to the hash table that is a part of a single in-memory partition. + PartitionedHashTable* hash_tbls_[PARTITION_FANOUT]; - /// Initializes the hash table. 'aggregated_row_stream' must be non-NULL. - /// Sets 'got_memory' to true if the hash table was initialised or false on OOM. - Status InitHashTable(bool* got_memory); + /// All partitions that have been spilled and need further processing. + std::deque spilled_partitions_; - /// Called in case we need to serialize aggregated rows. This step effectively does - /// a merge aggregation in this node. - Status SerializeStreamForSpilling(); - - /// Closes this partition. If finalize_rows is true, this iterates over all rows - /// in aggregated_row_stream and finalizes them (this is only used in the cancellation - /// path). - void Close(bool finalize_rows); - - /// Spill this partition. 'more_aggregate_rows' = true means that more aggregate rows - /// may be appended to the the partition before appending unaggregated rows. On - /// success, one of the streams is left with a write iterator: the aggregated stream - /// if 'more_aggregate_rows' is true or the unaggregated stream otherwise. - Status Spill(bool more_aggregate_rows); - - bool is_spilled() const { return hash_tbl.get() == NULL; } - - PartitionedAggregationNode* parent; - - /// If true, this partition is closed and there is nothing left to do. - bool is_closed; - - /// How many times rows in this partition have been repartitioned. Partitions created - /// from the node's children's input is level 0, 1 after the first repartitionining, - /// etc. - const int level; - - /// The index of this partition within 'hash_partitions_' at its level. - const int idx; - - /// Hash table for this partition. - /// Can be NULL if this partition is no longer maintaining a hash table (i.e. - /// is spilled or we are passing through all rows for this partition). - boost::scoped_ptr hash_tbl; - - /// Clone of parent's agg_fn_evals_. Permanent allocations come from - /// 'agg_fn_perm_pool' and result allocations come from the ExecNode's - /// 'expr_results_pool_'. - std::vector agg_fn_evals; - boost::scoped_ptr agg_fn_pool; - - /// Tuple stream used to store aggregated rows. When the partition is not spilled, - /// (meaning the hash table is maintained), this stream is pinned and contains the - /// memory referenced by the hash table. When it is spilled, this consumes reservation - /// for a write buffer only during repartitioning of aggregated rows. - /// - /// For streaming preaggs, this may be NULL if sufficient memory is not available. - /// In that case hash_tbl is also NULL and all rows for the partition will be passed - /// through. - boost::scoped_ptr aggregated_row_stream; - - /// Unaggregated rows that are spilled. Always NULL for streaming pre-aggregations. - /// Always unpinned. Has a write buffer allocated when the partition is spilled and - /// unaggregated rows are being processed. - boost::scoped_ptr unaggregated_row_stream; - }; - - /// Stream used to store serialized spilled rows. Only used if needs_serialize_ - /// is set. This stream is never pinned and only used in Partition::Spill as a - /// a temporary buffer. - boost::scoped_ptr serialize_stream_; - - /// Accessor for 'hash_tbls_' that verifies consistency with the partitions. - PartitionedHashTable* ALWAYS_INLINE GetHashTable(int partition_idx) { - PartitionedHashTable* ht = hash_tbls_[partition_idx]; - DCHECK_EQ(ht, hash_partitions_[partition_idx]->hash_tbl.get()); - return ht; - } - - /// Materializes 'row_batch' in either grouping or non-grouping case. - Status GetNextInternal(RuntimeState* state, RowBatch* row_batch, bool* eos); - - /// Helper function called by GetNextInternal() to ensure that string data referenced in - /// 'row_batch' will live as long as 'row_batch's tuples. 'first_row_idx' indexes the - /// first row that should be processed in 'row_batch'. - Status HandleOutputStrings(RowBatch* row_batch, int first_row_idx); - - /// Copies string data from the specified slot into 'pool', and sets the StringValues' - /// ptrs to the copied data. Copies data from all tuples in 'row_batch' from - /// 'first_row_idx' onwards. 'slot_desc' must have a var-len string type. - Status CopyStringData(const SlotDescriptor& slot_desc, RowBatch* row_batch, - int first_row_idx, MemPool* pool); - - /// Constructs singleton output tuple, allocating memory from pool. - Tuple* ConstructSingletonOutputTuple( - const std::vector& agg_fn_evals, MemPool* pool); - - /// Copies grouping values stored in 'ht_ctx_' that were computed over 'current_row_' - /// using 'grouping_expr_evals_'. Aggregation expr slots are set to their initial - /// values. Returns NULL if there was not enough memory to allocate the tuple or errors - /// occurred. In which case, 'status' is set. Allocates tuple and var-len data for - /// grouping exprs from stream. Var-len data for aggregate exprs is allocated from the - /// FunctionContexts, so is stored outside the stream. If stream's small buffers get - /// full, it will attempt to switch to IO-buffers. - Tuple* ConstructIntermediateTuple(const std::vector& agg_fn_evals, - BufferedTupleStream3* stream, Status* status); - - /// Constructs intermediate tuple, allocating memory from pool instead of the stream. - /// Returns NULL and sets status if there is not enough memory to allocate the tuple. - Tuple* ConstructIntermediateTuple(const std::vector& agg_fn_evals, - MemPool* pool, Status* status); - - /// Returns the number of bytes of variable-length data for the grouping values stored - /// in 'ht_ctx_'. - int GroupingExprsVarlenSize(); - - /// Initializes intermediate tuple by copying grouping values stored in 'ht_ctx_' that - /// that were computed over 'current_row_' using 'grouping_expr_evals_'. Writes the - /// var-len data into buffer. 'buffer' points to the start of a buffer of at least the - /// size of the variable-length data: 'varlen_size'. - void CopyGroupingValues(Tuple* intermediate_tuple, uint8_t* buffer, int varlen_size); - - /// Initializes the aggregate function slots of an intermediate tuple. - /// Any var-len data is allocated from the FunctionContexts. - void InitAggSlots(const std::vector& agg_fn_evals, - Tuple* intermediate_tuple); - - /// Updates the given aggregation intermediate tuple with aggregation values computed - /// over 'row' using 'agg_fn_evals'. Whether the agg fn evaluator calls Update() or - /// Merge() is controlled by the evaluator itself, unless enforced explicitly by passing - /// in is_merge == true. The override is needed to merge spilled and non-spilled rows - /// belonging to the same partition independent of whether the agg fn evaluators have - /// is_merge() == true. - /// This function is replaced by codegen (which is why we don't use a vector argument - /// for agg_fn_evals).. Any var-len data is allocated from the FunctionContexts. - void UpdateTuple(NewAggFnEvaluator** agg_fn_evals, Tuple* tuple, TupleRow* row, - bool is_merge = false); - - /// Called on the intermediate tuple of each group after all input rows have been - /// consumed and aggregated. Computes the final aggregate values to be returned in - /// GetNext() using the agg fn evaluators' Serialize() or Finalize(). - /// For the Finalize() case if the output tuple is different from the intermediate - /// tuple, then a new tuple is allocated from 'pool' to hold the final result. - /// Grouping values are copied into the output tuple and the the output tuple holding - /// the finalized/serialized aggregate values is returned. - /// TODO: Coordinate the allocation of new tuples with the release of memory - /// so as not to make memory consumption blow up. - Tuple* GetOutputTuple(const std::vector& agg_fn_evals, - Tuple* tuple, MemPool* pool); - - /// Do the aggregation for all tuple rows in the batch when there is no grouping. - /// This function is replaced by codegen. - Status ProcessBatchNoGrouping(RowBatch* batch); - - /// Processes a batch of rows. This is the core function of the algorithm. We partition - /// the rows into hash_partitions_, spilling as necessary. - /// If AGGREGATED_ROWS is true, it means that the rows in the batch are already - /// pre-aggregated. - /// 'prefetch_mode' specifies the prefetching mode in use. If it's not PREFETCH_NONE, - /// hash table buckets will be prefetched based on the hash values computed. Note - /// that 'prefetch_mode' will be substituted with constants during codegen time. - // - /// This function is replaced by codegen. We pass in ht_ctx_.get() as an argument for - /// performance. - template - Status IR_ALWAYS_INLINE ProcessBatch(RowBatch* batch, PartitionedHashTableCtx* ht_ctx); - - /// Evaluates the rows in 'batch' starting at 'start_row_idx' and stores the results in - /// the expression values cache in 'ht_ctx'. The number of rows evaluated depends on - /// the capacity of the cache. 'prefetch_mode' specifies the prefetching mode in use. - /// If it's not PREFETCH_NONE, hash table buckets for the computed hashes will be - /// prefetched. Note that codegen replaces 'prefetch_mode' with a constant. - template - void EvalAndHashPrefetchGroup(RowBatch* batch, int start_row_idx, PartitionedHashTableCtx* ht_ctx); - - /// This function processes each individual row in ProcessBatch(). Must be inlined into - /// ProcessBatch for codegen to substitute function calls with codegen'd versions. - /// May spill partitions if not enough memory is available. - template - Status IR_ALWAYS_INLINE ProcessRow(TupleRow* row, PartitionedHashTableCtx* ht_ctx); - - /// Create a new intermediate tuple in partition, initialized with row. ht_ctx is - /// the context for the partition's hash table and hash is the precomputed hash of - /// the row. The row can be an unaggregated or aggregated row depending on - /// AGGREGATED_ROWS. Spills partitions if necessary to append the new intermediate - /// tuple to the partition's stream. Must be inlined into ProcessBatch for codegen - /// to substitute function calls with codegen'd versions. insert_it is an iterator - /// for insertion returned from PartitionedHashTable::FindBuildRowBucket(). - template - Status IR_ALWAYS_INLINE AddIntermediateTuple(Partition* partition, - TupleRow* row, uint32_t hash, PartitionedHashTable::Iterator insert_it); - - /// Append a row to a spilled partition. May spill partitions if needed to switch to - /// I/O buffers. Selects the correct stream according to the argument. Inlined into - /// ProcessBatch(). - template - Status IR_ALWAYS_INLINE AppendSpilledRow(Partition* partition, TupleRow* row); - - /// Reads all the rows from input_stream and process them by calling ProcessBatch(). - template - Status ProcessStream(BufferedTupleStream3* input_stream); - - /// Output 'singleton_output_tuple_' and transfer memory to 'row_batch'. - void GetSingletonOutput(RowBatch* row_batch); - - /// Get rows for the next rowbatch from the next partition. Sets 'partition_eos_' to - /// true if all rows from all partitions have been returned or the limit is reached. - Status GetRowsFromPartition(RuntimeState* state, RowBatch* row_batch); - - /// Get output rows from child for streaming pre-aggregation. Aggregates some rows with - /// hash table and passes through other rows converted into the intermediate - /// tuple format. Sets 'child_eos_' once all rows from child have been returned. - Status GetRowsStreaming(RuntimeState* state, RowBatch* row_batch); - - /// Return true if we should keep expanding hash tables in the preagg. If false, - /// the preagg should pass through any rows it can't fit in its tables. - bool ShouldExpandPreaggHashTables() const; - - /// Streaming processing of in_batch from child. Rows from child are either aggregated - /// into the hash table or added to 'out_batch' in the intermediate tuple format. - /// 'in_batch' is processed entirely, and 'out_batch' must have enough capacity to - /// store all of the rows in 'in_batch'. - /// 'needs_serialize' is an argument so that codegen can replace it with a constant, - /// rather than using the member variable 'needs_serialize_'. - /// 'prefetch_mode' specifies the prefetching mode in use. If it's not PREFETCH_NONE, - /// hash table buckets will be prefetched based on the hash values computed. Note - /// that 'prefetch_mode' will be substituted with constants during codegen time. - /// 'remaining_capacity' is an array with PARTITION_FANOUT entries with the number of - /// additional rows that can be added to the hash table per partition. It is updated - /// by ProcessBatchStreaming() when it inserts new rows. - /// 'ht_ctx' is passed in as a way to avoid aliasing of 'this' confusing the optimiser. - Status ProcessBatchStreaming(bool needs_serialize, - RowBatch* in_batch, RowBatch* out_batch, PartitionedHashTableCtx* ht_ctx, - int remaining_capacity[PARTITION_FANOUT]); - - /// Tries to add intermediate to the hash table 'hash_tbl' of 'partition' for streaming - /// aggregation. The input row must have been evaluated with 'ht_ctx', with 'hash' set - /// to the corresponding hash. If the tuple already exists in the hash table, update - /// the tuple and return true. Otherwise try to create a new entry in the hash table, - /// returning true if successful or false if the table is full. 'remaining_capacity' - /// keeps track of how many more entries can be added to the hash table so we can avoid - /// retrying inserts. It is decremented if an insert succeeds and set to zero if an - /// insert fails. If an error occurs, returns false and sets 'status'. - bool IR_ALWAYS_INLINE TryAddToHashTable(PartitionedHashTableCtx* ht_ctx, - Partition* partition, PartitionedHashTable* hash_tbl, TupleRow* in_row, uint32_t hash, - int* remaining_capacity, Status* status); - - /// Initializes hash_partitions_. 'level' is the level for the partitions to create. - /// If 'single_partition_idx' is provided, it must be a number in range - /// [0, PARTITION_FANOUT), and only that partition is created - all others point to it. - /// Also sets ht_ctx_'s level to 'level'. - Status CreateHashPartitions(int level, int single_partition_idx = -1); - - /// Ensure that hash tables for all in-memory partitions are large enough to fit - /// 'num_rows' additional hash table entries. If there is not enough memory to - /// resize the hash tables, may spill partitions. 'aggregated_rows' is true if - /// we're currently partitioning aggregated rows. - Status CheckAndResizeHashPartitions(bool aggregated_rows, int num_rows, const PartitionedHashTableCtx* ht_ctx); - - /// Prepares the next partition to return results from. On return, this function - /// initializes output_iterator_ and output_partition_. This either removes - /// a partition from aggregated_partitions_ (and is done) or removes the next - /// partition from aggregated_partitions_ and repartitions it. - Status NextPartition(); - - /// Tries to build the first partition in 'spilled_partitions_'. - /// If successful, set *built_partition to the partition. The caller owns the partition - /// and is responsible for closing it. If unsuccessful because the partition could not - /// fit in memory, set *built_partition to NULL and append the spilled partition to the - /// head of 'spilled_partitions_' so it can be processed by - /// RepartitionSpilledPartition(). - Status BuildSpilledPartition(Partition** built_partition); - - /// Repartitions the first partition in 'spilled_partitions_' into PARTITION_FANOUT - /// output partitions. On success, each output partition is either: - /// * closed, if no rows were added to the partition. - /// * in 'spilled_partitions_', if the partition spilled. - /// * in 'aggregated_partitions_', if the output partition was not spilled. - Status RepartitionSpilledPartition(); - - /// Picks a partition from 'hash_partitions_' to spill. 'more_aggregate_rows' is passed - /// to Partition::Spill() when spilling the partition. See the Partition::Spill() - /// comment for further explanation. - Status SpillPartition(bool more_aggregate_rows); - - /// Moves the partitions in hash_partitions_ to aggregated_partitions_ or - /// spilled_partitions_. Partitions moved to spilled_partitions_ are unpinned. - /// input_rows is the number of input rows that have been repartitioned. - /// Used for diagnostics. - Status MoveHashPartitions(int64_t input_rows); - - /// Adds a partition to the front of 'spilled_partitions_' for later processing. - /// 'spilled_partitions_' uses LIFO so more finely partitioned partitions are processed - /// first). This allows us to delete pages earlier and bottom out the recursion - /// earlier and also improves time locality of access to spilled data on disk. - void PushSpilledPartition(Partition* partition); - - /// Calls Close() on every Partition in 'aggregated_partitions_', - /// 'spilled_partitions_', and 'hash_partitions_' and then resets the lists, - /// the vector and the partition pool. - void ClosePartitions(); - - /// Calls finalizes on all tuples starting at 'it'. - void CleanupHashTbl(const std::vector& agg_fn_evals, - PartitionedHashTable::Iterator it); + /// All partitions that are aggregated and can just return the results in GetNext(). + /// After consuming all the input, hash_partitions_ is split into spilled_partitions_ + /// and aggregated_partitions_, depending on if it was spilled or not. + std::deque aggregated_partitions_; + + /// END: Members that must be Reset() + ///////////////////////////////////////// + + /// The hash table and streams (aggregated and unaggregated) for an individual + /// partition. The streams of each partition always (i.e. regardless of level) + /// initially use small buffers. Streaming pre-aggregations do not spill and do not + /// require an unaggregated stream. + struct Partition { + Partition(PartitionedAggregationNode* parent, int level, int idx) + : parent(parent), is_closed(false), level(level), idx(idx) {} + + ~Partition(); + + /// Initializes aggregated_row_stream and unaggregated_row_stream (if a spilling + /// aggregation), allocating one buffer for each. Spilling merge aggregations must + /// have enough reservation for the initial buffer for the stream, so this should + /// not fail due to OOM. Preaggregations do not reserve any buffers: if does not + /// have enough reservation for the initial buffer, the aggregated row stream is not + /// created and an OK status is returned. + Status InitStreams(); + + /// Initializes the hash table. 'aggregated_row_stream' must be non-NULL. + /// Sets 'got_memory' to true if the hash table was initialised or false on OOM. + Status InitHashTable(bool* got_memory); + + /// Called in case we need to serialize aggregated rows. This step effectively does + /// a merge aggregation in this node. + Status SerializeStreamForSpilling(); + + /// Closes this partition. If finalize_rows is true, this iterates over all rows + /// in aggregated_row_stream and finalizes them (this is only used in the cancellation + /// path). + void Close(bool finalize_rows); + + /// Spill this partition. 'more_aggregate_rows' = true means that more aggregate rows + /// may be appended to the the partition before appending unaggregated rows. On + /// success, one of the streams is left with a write iterator: the aggregated stream + /// if 'more_aggregate_rows' is true or the unaggregated stream otherwise. + Status Spill(bool more_aggregate_rows); + + bool is_spilled() const { return hash_tbl.get() == NULL; } + + PartitionedAggregationNode* parent; + + /// If true, this partition is closed and there is nothing left to do. + bool is_closed; + + /// How many times rows in this partition have been repartitioned. Partitions created + /// from the node's children's input is level 0, 1 after the first repartitionining, + /// etc. + const int level; + + /// The index of this partition within 'hash_partitions_' at its level. + const int idx; + + /// Hash table for this partition. + /// Can be NULL if this partition is no longer maintaining a hash table (i.e. + /// is spilled or we are passing through all rows for this partition). + boost::scoped_ptr hash_tbl; + + /// Clone of parent's agg_fn_evals_. Permanent allocations come from + /// 'agg_fn_perm_pool' and result allocations come from the ExecNode's + /// 'expr_results_pool_'. + std::vector agg_fn_evals; + boost::scoped_ptr agg_fn_pool; + + /// Tuple stream used to store aggregated rows. When the partition is not spilled, + /// (meaning the hash table is maintained), this stream is pinned and contains the + /// memory referenced by the hash table. When it is spilled, this consumes reservation + /// for a write buffer only during repartitioning of aggregated rows. + /// + /// For streaming preaggs, this may be NULL if sufficient memory is not available. + /// In that case hash_tbl is also NULL and all rows for the partition will be passed + /// through. + boost::scoped_ptr aggregated_row_stream; + + /// Unaggregated rows that are spilled. Always NULL for streaming pre-aggregations. + /// Always unpinned. Has a write buffer allocated when the partition is spilled and + /// unaggregated rows are being processed. + boost::scoped_ptr unaggregated_row_stream; + }; + + /// Stream used to store serialized spilled rows. Only used if needs_serialize_ + /// is set. This stream is never pinned and only used in Partition::Spill as a + /// a temporary buffer. + boost::scoped_ptr serialize_stream_; + + /// Accessor for 'hash_tbls_' that verifies consistency with the partitions. + PartitionedHashTable* ALWAYS_INLINE GetHashTable(int partition_idx) { + PartitionedHashTable* ht = hash_tbls_[partition_idx]; + DCHECK_EQ(ht, hash_partitions_[partition_idx]->hash_tbl.get()); + return ht; + } + + /// Materializes 'row_batch' in either grouping or non-grouping case. + Status GetNextInternal(RuntimeState* state, RowBatch* row_batch, bool* eos); + + /// Helper function called by GetNextInternal() to ensure that string data referenced in + /// 'row_batch' will live as long as 'row_batch's tuples. 'first_row_idx' indexes the + /// first row that should be processed in 'row_batch'. + Status HandleOutputStrings(RowBatch* row_batch, int first_row_idx); + + /// Copies string data from the specified slot into 'pool', and sets the StringValues' + /// ptrs to the copied data. Copies data from all tuples in 'row_batch' from + /// 'first_row_idx' onwards. 'slot_desc' must have a var-len string type. + Status CopyStringData(const SlotDescriptor& slot_desc, RowBatch* row_batch, int first_row_idx, + MemPool* pool); + + /// Constructs singleton output tuple, allocating memory from pool. + Tuple* ConstructSingletonOutputTuple(const std::vector& agg_fn_evals, + MemPool* pool); + + /// Copies grouping values stored in 'ht_ctx_' that were computed over 'current_row_' + /// using 'grouping_expr_evals_'. Aggregation expr slots are set to their initial + /// values. Returns NULL if there was not enough memory to allocate the tuple or errors + /// occurred. In which case, 'status' is set. Allocates tuple and var-len data for + /// grouping exprs from stream. Var-len data for aggregate exprs is allocated from the + /// FunctionContexts, so is stored outside the stream. If stream's small buffers get + /// full, it will attempt to switch to IO-buffers. + Tuple* ConstructIntermediateTuple(const std::vector& agg_fn_evals, + BufferedTupleStream3* stream, Status* status); + + /// Constructs intermediate tuple, allocating memory from pool instead of the stream. + /// Returns NULL and sets status if there is not enough memory to allocate the tuple. + Tuple* ConstructIntermediateTuple(const std::vector& agg_fn_evals, + MemPool* pool, Status* status); + + /// Returns the number of bytes of variable-length data for the grouping values stored + /// in 'ht_ctx_'. + int GroupingExprsVarlenSize(); + + /// Initializes intermediate tuple by copying grouping values stored in 'ht_ctx_' that + /// that were computed over 'current_row_' using 'grouping_expr_evals_'. Writes the + /// var-len data into buffer. 'buffer' points to the start of a buffer of at least the + /// size of the variable-length data: 'varlen_size'. + void CopyGroupingValues(Tuple* intermediate_tuple, uint8_t* buffer, int varlen_size); + + /// Initializes the aggregate function slots of an intermediate tuple. + /// Any var-len data is allocated from the FunctionContexts. + void InitAggSlots(const std::vector& agg_fn_evals, + Tuple* intermediate_tuple); + + /// Updates the given aggregation intermediate tuple with aggregation values computed + /// over 'row' using 'agg_fn_evals'. Whether the agg fn evaluator calls Update() or + /// Merge() is controlled by the evaluator itself, unless enforced explicitly by passing + /// in is_merge == true. The override is needed to merge spilled and non-spilled rows + /// belonging to the same partition independent of whether the agg fn evaluators have + /// is_merge() == true. + /// This function is replaced by codegen (which is why we don't use a vector argument + /// for agg_fn_evals).. Any var-len data is allocated from the FunctionContexts. + void UpdateTuple(NewAggFnEvaluator** agg_fn_evals, Tuple* tuple, TupleRow* row, + bool is_merge = false); + + /// Called on the intermediate tuple of each group after all input rows have been + /// consumed and aggregated. Computes the final aggregate values to be returned in + /// GetNext() using the agg fn evaluators' Serialize() or Finalize(). + /// For the Finalize() case if the output tuple is different from the intermediate + /// tuple, then a new tuple is allocated from 'pool' to hold the final result. + /// Grouping values are copied into the output tuple and the the output tuple holding + /// the finalized/serialized aggregate values is returned. + /// TODO: Coordinate the allocation of new tuples with the release of memory + /// so as not to make memory consumption blow up. + Tuple* GetOutputTuple(const std::vector& agg_fn_evals, Tuple* tuple, + MemPool* pool); + + /// Do the aggregation for all tuple rows in the batch when there is no grouping. + /// This function is replaced by codegen. + Status ProcessBatchNoGrouping(RowBatch* batch); + + /// Processes a batch of rows. This is the core function of the algorithm. We partition + /// the rows into hash_partitions_, spilling as necessary. + /// If AGGREGATED_ROWS is true, it means that the rows in the batch are already + /// pre-aggregated. + /// 'prefetch_mode' specifies the prefetching mode in use. If it's not PREFETCH_NONE, + /// hash table buckets will be prefetched based on the hash values computed. Note + /// that 'prefetch_mode' will be substituted with constants during codegen time. + // + /// This function is replaced by codegen. We pass in ht_ctx_.get() as an argument for + /// performance. + template + Status IR_ALWAYS_INLINE ProcessBatch(RowBatch* batch, PartitionedHashTableCtx* ht_ctx); + + /// Evaluates the rows in 'batch' starting at 'start_row_idx' and stores the results in + /// the expression values cache in 'ht_ctx'. The number of rows evaluated depends on + /// the capacity of the cache. 'prefetch_mode' specifies the prefetching mode in use. + /// If it's not PREFETCH_NONE, hash table buckets for the computed hashes will be + /// prefetched. Note that codegen replaces 'prefetch_mode' with a constant. + template + void EvalAndHashPrefetchGroup(RowBatch* batch, int start_row_idx, + PartitionedHashTableCtx* ht_ctx); + + /// This function processes each individual row in ProcessBatch(). Must be inlined into + /// ProcessBatch for codegen to substitute function calls with codegen'd versions. + /// May spill partitions if not enough memory is available. + template + Status IR_ALWAYS_INLINE ProcessRow(TupleRow* row, PartitionedHashTableCtx* ht_ctx); + + /// Create a new intermediate tuple in partition, initialized with row. ht_ctx is + /// the context for the partition's hash table and hash is the precomputed hash of + /// the row. The row can be an unaggregated or aggregated row depending on + /// AGGREGATED_ROWS. Spills partitions if necessary to append the new intermediate + /// tuple to the partition's stream. Must be inlined into ProcessBatch for codegen + /// to substitute function calls with codegen'd versions. insert_it is an iterator + /// for insertion returned from PartitionedHashTable::FindBuildRowBucket(). + template + Status IR_ALWAYS_INLINE AddIntermediateTuple(Partition* partition, TupleRow* row, uint32_t hash, + PartitionedHashTable::Iterator insert_it); + + /// Append a row to a spilled partition. May spill partitions if needed to switch to + /// I/O buffers. Selects the correct stream according to the argument. Inlined into + /// ProcessBatch(). + template + Status IR_ALWAYS_INLINE AppendSpilledRow(Partition* partition, TupleRow* row); + + /// Reads all the rows from input_stream and process them by calling ProcessBatch(). + template + Status ProcessStream(BufferedTupleStream3* input_stream); + + /// Output 'singleton_output_tuple_' and transfer memory to 'row_batch'. + void GetSingletonOutput(RowBatch* row_batch); + + /// Get rows for the next rowbatch from the next partition. Sets 'partition_eos_' to + /// true if all rows from all partitions have been returned or the limit is reached. + Status GetRowsFromPartition(RuntimeState* state, RowBatch* row_batch); + + /// Get output rows from child for streaming pre-aggregation. Aggregates some rows with + /// hash table and passes through other rows converted into the intermediate + /// tuple format. Sets 'child_eos_' once all rows from child have been returned. + Status GetRowsStreaming(RuntimeState* state, RowBatch* row_batch); + + /// Return true if we should keep expanding hash tables in the preagg. If false, + /// the preagg should pass through any rows it can't fit in its tables. + bool ShouldExpandPreaggHashTables() const; + + /// Streaming processing of in_batch from child. Rows from child are either aggregated + /// into the hash table or added to 'out_batch' in the intermediate tuple format. + /// 'in_batch' is processed entirely, and 'out_batch' must have enough capacity to + /// store all of the rows in 'in_batch'. + /// 'needs_serialize' is an argument so that codegen can replace it with a constant, + /// rather than using the member variable 'needs_serialize_'. + /// 'prefetch_mode' specifies the prefetching mode in use. If it's not PREFETCH_NONE, + /// hash table buckets will be prefetched based on the hash values computed. Note + /// that 'prefetch_mode' will be substituted with constants during codegen time. + /// 'remaining_capacity' is an array with PARTITION_FANOUT entries with the number of + /// additional rows that can be added to the hash table per partition. It is updated + /// by ProcessBatchStreaming() when it inserts new rows. + /// 'ht_ctx' is passed in as a way to avoid aliasing of 'this' confusing the optimiser. + Status ProcessBatchStreaming(bool needs_serialize, RowBatch* in_batch, RowBatch* out_batch, + PartitionedHashTableCtx* ht_ctx, + int remaining_capacity[PARTITION_FANOUT]); + + /// Tries to add intermediate to the hash table 'hash_tbl' of 'partition' for streaming + /// aggregation. The input row must have been evaluated with 'ht_ctx', with 'hash' set + /// to the corresponding hash. If the tuple already exists in the hash table, update + /// the tuple and return true. Otherwise try to create a new entry in the hash table, + /// returning true if successful or false if the table is full. 'remaining_capacity' + /// keeps track of how many more entries can be added to the hash table so we can avoid + /// retrying inserts. It is decremented if an insert succeeds and set to zero if an + /// insert fails. If an error occurs, returns false and sets 'status'. + bool IR_ALWAYS_INLINE TryAddToHashTable(PartitionedHashTableCtx* ht_ctx, Partition* partition, + PartitionedHashTable* hash_tbl, TupleRow* in_row, + uint32_t hash, int* remaining_capacity, Status* status); + + /// Initializes hash_partitions_. 'level' is the level for the partitions to create. + /// If 'single_partition_idx' is provided, it must be a number in range + /// [0, PARTITION_FANOUT), and only that partition is created - all others point to it. + /// Also sets ht_ctx_'s level to 'level'. + Status CreateHashPartitions(int level, int single_partition_idx = -1); + + /// Ensure that hash tables for all in-memory partitions are large enough to fit + /// 'num_rows' additional hash table entries. If there is not enough memory to + /// resize the hash tables, may spill partitions. 'aggregated_rows' is true if + /// we're currently partitioning aggregated rows. + Status CheckAndResizeHashPartitions(bool aggregated_rows, int num_rows, + const PartitionedHashTableCtx* ht_ctx); + + /// Prepares the next partition to return results from. On return, this function + /// initializes output_iterator_ and output_partition_. This either removes + /// a partition from aggregated_partitions_ (and is done) or removes the next + /// partition from aggregated_partitions_ and repartitions it. + Status NextPartition(); + + /// Tries to build the first partition in 'spilled_partitions_'. + /// If successful, set *built_partition to the partition. The caller owns the partition + /// and is responsible for closing it. If unsuccessful because the partition could not + /// fit in memory, set *built_partition to NULL and append the spilled partition to the + /// head of 'spilled_partitions_' so it can be processed by + /// RepartitionSpilledPartition(). + Status BuildSpilledPartition(Partition** built_partition); + + /// Repartitions the first partition in 'spilled_partitions_' into PARTITION_FANOUT + /// output partitions. On success, each output partition is either: + /// * closed, if no rows were added to the partition. + /// * in 'spilled_partitions_', if the partition spilled. + /// * in 'aggregated_partitions_', if the output partition was not spilled. + Status RepartitionSpilledPartition(); + + /// Picks a partition from 'hash_partitions_' to spill. 'more_aggregate_rows' is passed + /// to Partition::Spill() when spilling the partition. See the Partition::Spill() + /// comment for further explanation. + Status SpillPartition(bool more_aggregate_rows); + + /// Moves the partitions in hash_partitions_ to aggregated_partitions_ or + /// spilled_partitions_. Partitions moved to spilled_partitions_ are unpinned. + /// input_rows is the number of input rows that have been repartitioned. + /// Used for diagnostics. + Status MoveHashPartitions(int64_t input_rows); + + /// Adds a partition to the front of 'spilled_partitions_' for later processing. + /// 'spilled_partitions_' uses LIFO so more finely partitioned partitions are processed + /// first). This allows us to delete pages earlier and bottom out the recursion + /// earlier and also improves time locality of access to spilled data on disk. + void PushSpilledPartition(Partition* partition); + + /// Calls Close() on every Partition in 'aggregated_partitions_', + /// 'spilled_partitions_', and 'hash_partitions_' and then resets the lists, + /// the vector and the partition pool. + void ClosePartitions(); + + /// Calls finalizes on all tuples starting at 'it'. + void CleanupHashTbl(const std::vector& agg_fn_evals, + PartitionedHashTable::Iterator it); /// Compute minimum buffer reservation for grouping aggregations. /// We need one buffer per partition, which is used either as the write buffer for the @@ -704,22 +705,21 @@ class PartitionedAggregationNode : public ExecNode { /// If we need to serialize, we need an additional buffer while spilling a partition /// as the partitions aggregate stream needs to be serialized and rewritten. /// We do not spill streaming preaggregations, so we do not need to reserve any buffers. - int64_t MinReservation() const { - //DCHECK(!grouping_exprs_.empty()); - // Must be kept in sync with AggregationNode.computeNodeResourceProfile() in fe. - //if (is_streaming_preagg_) { - // Reserve at least one buffer and a 64kb hash table per partition. - // return (_resource_profile.spillable_buffer_size + 64 * 1024) * PARTITION_FANOUT; - //} - //int num_buffers = PARTITION_FANOUT + 1 + (needs_serialize_ ? 1 : 0); - // Two of the buffers must fit the maximum row. - //return _resource_profile.spillable_buffer_size * (num_buffers - 2) + - //_resource_profile.max_row_buffer_size * 2; - return 0; - } + int64_t MinReservation() const { + //DCHECK(!grouping_exprs_.empty()); + // Must be kept in sync with AggregationNode.computeNodeResourceProfile() in fe. + //if (is_streaming_preagg_) { + // Reserve at least one buffer and a 64kb hash table per partition. + // return (_resource_profile.spillable_buffer_size + 64 * 1024) * PARTITION_FANOUT; + //} + //int num_buffers = PARTITION_FANOUT + 1 + (needs_serialize_ ? 1 : 0); + // Two of the buffers must fit the maximum row. + //return _resource_profile.spillable_buffer_size * (num_buffers - 2) + + //_resource_profile.max_row_buffer_size * 2; + return 0; + } }; -} +} // namespace doris #endif - diff --git a/be/src/exec/partitioned_aggregation_node_ir.cc b/be/src/exec/partitioned_aggregation_node_ir.cc index 7fa7bf8aca1ce2..6d45a2cf7906bb 100644 --- a/be/src/exec/partitioned_aggregation_node_ir.cc +++ b/be/src/exec/partitioned_aggregation_node_ir.cc @@ -26,226 +26,225 @@ using namespace doris; Status PartitionedAggregationNode::ProcessBatchNoGrouping(RowBatch* batch) { - Tuple* output_tuple = singleton_output_tuple_; - FOREACH_ROW(batch, 0, batch_iter) { - UpdateTuple(agg_fn_evals_.data(), output_tuple, batch_iter.get()); - } - return Status::OK(); + Tuple* output_tuple = singleton_output_tuple_; + FOREACH_ROW(batch, 0, batch_iter) { + UpdateTuple(agg_fn_evals_.data(), output_tuple, batch_iter.get()); + } + return Status::OK(); } -template -Status PartitionedAggregationNode::ProcessBatch(RowBatch* batch, - PartitionedHashTableCtx* ht_ctx) { - DCHECK(!hash_partitions_.empty()); - DCHECK(!is_streaming_preagg_); - - // Make sure that no resizes will happen when inserting individual rows to the hash - // table of each partition by pessimistically assuming that all the rows in each batch - // will end up to the same partition. - // TODO: Once we have a histogram with the number of rows per partition, we will have - // accurate resize calls. - RETURN_IF_ERROR(CheckAndResizeHashPartitions(AGGREGATED_ROWS, batch->num_rows(), ht_ctx)); - - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - const int cache_size = expr_vals_cache->capacity(); - const int num_rows = batch->num_rows(); - for (int group_start = 0; group_start < num_rows; group_start += cache_size) { - EvalAndHashPrefetchGroup(batch, group_start, ht_ctx); - - FOREACH_ROW_LIMIT(batch, group_start, cache_size, batch_iter) { - RETURN_IF_ERROR(ProcessRow(batch_iter.get(), ht_ctx)); - expr_vals_cache->NextRow(); +template +Status PartitionedAggregationNode::ProcessBatch(RowBatch* batch, PartitionedHashTableCtx* ht_ctx) { + DCHECK(!hash_partitions_.empty()); + DCHECK(!is_streaming_preagg_); + + // Make sure that no resizes will happen when inserting individual rows to the hash + // table of each partition by pessimistically assuming that all the rows in each batch + // will end up to the same partition. + // TODO: Once we have a histogram with the number of rows per partition, we will have + // accurate resize calls. + RETURN_IF_ERROR(CheckAndResizeHashPartitions(AGGREGATED_ROWS, batch->num_rows(), ht_ctx)); + + PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); + const int cache_size = expr_vals_cache->capacity(); + const int num_rows = batch->num_rows(); + for (int group_start = 0; group_start < num_rows; group_start += cache_size) { + EvalAndHashPrefetchGroup(batch, group_start, ht_ctx); + + FOREACH_ROW_LIMIT(batch, group_start, cache_size, batch_iter) { + RETURN_IF_ERROR(ProcessRow(batch_iter.get(), ht_ctx)); + expr_vals_cache->NextRow(); + } + ht_ctx->expr_results_pool_->clear(); + DCHECK(expr_vals_cache->AtEnd()); } - ht_ctx->expr_results_pool_->clear(); - DCHECK(expr_vals_cache->AtEnd()); - } - return Status::OK(); + return Status::OK(); } -template +template void IR_ALWAYS_INLINE PartitionedAggregationNode::EvalAndHashPrefetchGroup( - RowBatch* batch, int start_row_idx, - PartitionedHashTableCtx* ht_ctx) { - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - const int cache_size = expr_vals_cache->capacity(); - - expr_vals_cache->Reset(); - FOREACH_ROW_LIMIT(batch, start_row_idx, cache_size, batch_iter) { - TupleRow* row = batch_iter.get(); - bool is_null; - if (AGGREGATED_ROWS) { - is_null = !ht_ctx->EvalAndHashBuild(row); - } else { - is_null = !ht_ctx->EvalAndHashProbe(row); + RowBatch* batch, int start_row_idx, PartitionedHashTableCtx* ht_ctx) { + PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); + const int cache_size = expr_vals_cache->capacity(); + + expr_vals_cache->Reset(); + FOREACH_ROW_LIMIT(batch, start_row_idx, cache_size, batch_iter) { + TupleRow* row = batch_iter.get(); + bool is_null; + if (AGGREGATED_ROWS) { + is_null = !ht_ctx->EvalAndHashBuild(row); + } else { + is_null = !ht_ctx->EvalAndHashProbe(row); + } + // Hoist lookups out of non-null branch to speed up non-null case. + const uint32_t hash = expr_vals_cache->CurExprValuesHash(); + const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); + PartitionedHashTable* hash_tbl = GetHashTable(partition_idx); + if (is_null) { + expr_vals_cache->SetRowNull(); + } else if (config::enable_prefetch) { + if (LIKELY(hash_tbl != NULL)) hash_tbl->PrefetchBucket(hash); + } + expr_vals_cache->NextRow(); } + + expr_vals_cache->ResetForRead(); +} + +template +Status PartitionedAggregationNode::ProcessRow(TupleRow* row, PartitionedHashTableCtx* ht_ctx) { + PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); // Hoist lookups out of non-null branch to speed up non-null case. const uint32_t hash = expr_vals_cache->CurExprValuesHash(); const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); + if (expr_vals_cache->IsRowNull()) return Status::OK(); + // To process this row, we first see if it can be aggregated or inserted into this + // partition's hash table. If we need to insert it and that fails, due to OOM, we + // spill the partition. The partition to spill is not necessarily dst_partition, + // so we can try again to insert the row. PartitionedHashTable* hash_tbl = GetHashTable(partition_idx); - if (is_null) { - expr_vals_cache->SetRowNull(); - } else if (config::enable_prefetch) { - if (LIKELY(hash_tbl != NULL)) hash_tbl->PrefetchBucket(hash); + Partition* dst_partition = hash_partitions_[partition_idx]; + DCHECK(dst_partition != nullptr); + DCHECK_EQ(dst_partition->is_spilled(), hash_tbl == NULL); + if (hash_tbl == NULL) { + // This partition is already spilled, just append the row. + return AppendSpilledRow(dst_partition, row); } - expr_vals_cache->NextRow(); - } - expr_vals_cache->ResetForRead(); -} - -template -Status PartitionedAggregationNode::ProcessRow(TupleRow* row, - PartitionedHashTableCtx* ht_ctx) { - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - // Hoist lookups out of non-null branch to speed up non-null case. - const uint32_t hash = expr_vals_cache->CurExprValuesHash(); - const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); - if (expr_vals_cache->IsRowNull()) return Status::OK(); - // To process this row, we first see if it can be aggregated or inserted into this - // partition's hash table. If we need to insert it and that fails, due to OOM, we - // spill the partition. The partition to spill is not necessarily dst_partition, - // so we can try again to insert the row. - PartitionedHashTable* hash_tbl = GetHashTable(partition_idx); - Partition* dst_partition = hash_partitions_[partition_idx]; - DCHECK(dst_partition != nullptr); - DCHECK_EQ(dst_partition->is_spilled(), hash_tbl == NULL); - if (hash_tbl == NULL) { - // This partition is already spilled, just append the row. - return AppendSpilledRow(dst_partition, row); - } - - DCHECK(dst_partition->aggregated_row_stream->is_pinned()); - bool found; - // Find the appropriate bucket in the hash table. There will always be a free - // bucket because we checked the size above. - PartitionedHashTable::Iterator it = hash_tbl->FindBuildRowBucket(ht_ctx, &found); - DCHECK(!it.AtEnd()) << "Hash table had no free buckets"; - if (AGGREGATED_ROWS) { - // If the row is already an aggregate row, it cannot match anything in the - // hash table since we process the aggregate rows first. These rows should - // have been aggregated in the initial pass. - DCHECK(!found); - } else if (found) { - // Row is already in hash table. Do the aggregation and we're done. - UpdateTuple(dst_partition->agg_fn_evals.data(), it.GetTuple(), row); - return Status::OK(); - } + DCHECK(dst_partition->aggregated_row_stream->is_pinned()); + bool found; + // Find the appropriate bucket in the hash table. There will always be a free + // bucket because we checked the size above. + PartitionedHashTable::Iterator it = hash_tbl->FindBuildRowBucket(ht_ctx, &found); + DCHECK(!it.AtEnd()) << "Hash table had no free buckets"; + if (AGGREGATED_ROWS) { + // If the row is already an aggregate row, it cannot match anything in the + // hash table since we process the aggregate rows first. These rows should + // have been aggregated in the initial pass. + DCHECK(!found); + } else if (found) { + // Row is already in hash table. Do the aggregation and we're done. + UpdateTuple(dst_partition->agg_fn_evals.data(), it.GetTuple(), row); + return Status::OK(); + } - // If we are seeing this result row for the first time, we need to construct the - // result row and initialize it. - return AddIntermediateTuple(dst_partition, row, hash, it); + // If we are seeing this result row for the first time, we need to construct the + // result row and initialize it. + return AddIntermediateTuple(dst_partition, row, hash, it); } -template -Status PartitionedAggregationNode::AddIntermediateTuple(Partition* partition, - TupleRow* row, uint32_t hash, PartitionedHashTable::Iterator insert_it) { - while (true) { - DCHECK(partition->aggregated_row_stream->is_pinned()); - Tuple* intermediate_tuple = ConstructIntermediateTuple(partition->agg_fn_evals, - partition->aggregated_row_stream.get(), &process_batch_status_); - - if (LIKELY(intermediate_tuple != NULL)) { - UpdateTuple(partition->agg_fn_evals.data(), intermediate_tuple, row, AGGREGATED_ROWS); - // After copying and initializing the tuple, insert it into the hash table. - insert_it.SetTuple(intermediate_tuple, hash); - return Status::OK(); - } else if (!process_batch_status_.ok()) { - return std::move(process_batch_status_); - } +template +Status PartitionedAggregationNode::AddIntermediateTuple(Partition* partition, TupleRow* row, + uint32_t hash, + PartitionedHashTable::Iterator insert_it) { + while (true) { + DCHECK(partition->aggregated_row_stream->is_pinned()); + Tuple* intermediate_tuple = ConstructIntermediateTuple( + partition->agg_fn_evals, partition->aggregated_row_stream.get(), + &process_batch_status_); + + if (LIKELY(intermediate_tuple != NULL)) { + UpdateTuple(partition->agg_fn_evals.data(), intermediate_tuple, row, AGGREGATED_ROWS); + // After copying and initializing the tuple, insert it into the hash table. + insert_it.SetTuple(intermediate_tuple, hash); + return Status::OK(); + } else if (!process_batch_status_.ok()) { + return std::move(process_batch_status_); + } - // We did not have enough memory to add intermediate_tuple to the stream. - RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); - if (partition->is_spilled()) { - return AppendSpilledRow(partition, row); + // We did not have enough memory to add intermediate_tuple to the stream. + RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); + if (partition->is_spilled()) { + return AppendSpilledRow(partition, row); + } } - } } -Status PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize, - RowBatch* in_batch, RowBatch* out_batch, - PartitionedHashTableCtx* ht_ctx, int remaining_capacity[PARTITION_FANOUT]) { - DCHECK(is_streaming_preagg_); - DCHECK_EQ(out_batch->num_rows(), 0); - DCHECK_LE(in_batch->num_rows(), out_batch->capacity()); - - RowBatch::Iterator out_batch_iterator(out_batch, out_batch->num_rows()); - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - const int num_rows = in_batch->num_rows(); - const int cache_size = expr_vals_cache->capacity(); - for (int group_start = 0; group_start < num_rows; group_start += cache_size) { - EvalAndHashPrefetchGroup(in_batch, group_start, ht_ctx); - - FOREACH_ROW_LIMIT(in_batch, group_start, cache_size, in_batch_iter) { - // Hoist lookups out of non-null branch to speed up non-null case. - TupleRow* in_row = in_batch_iter.get(); - const uint32_t hash = expr_vals_cache->CurExprValuesHash(); - const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); - if (!expr_vals_cache->IsRowNull() && - !TryAddToHashTable(ht_ctx, hash_partitions_[partition_idx], - GetHashTable(partition_idx), in_row, hash, &remaining_capacity[partition_idx], - &process_batch_status_)) { - RETURN_IF_ERROR(std::move(process_batch_status_)); - // Tuple is not going into hash table, add it to the output batch. - Tuple* intermediate_tuple = ConstructIntermediateTuple(agg_fn_evals_, - out_batch->tuple_data_pool(), &process_batch_status_); - if (UNLIKELY(intermediate_tuple == NULL)) { - DCHECK(!process_batch_status_.ok()); - return std::move(process_batch_status_); +Status PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize, RowBatch* in_batch, + RowBatch* out_batch, + PartitionedHashTableCtx* ht_ctx, + int remaining_capacity[PARTITION_FANOUT]) { + DCHECK(is_streaming_preagg_); + DCHECK_EQ(out_batch->num_rows(), 0); + DCHECK_LE(in_batch->num_rows(), out_batch->capacity()); + + RowBatch::Iterator out_batch_iterator(out_batch, out_batch->num_rows()); + PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); + const int num_rows = in_batch->num_rows(); + const int cache_size = expr_vals_cache->capacity(); + for (int group_start = 0; group_start < num_rows; group_start += cache_size) { + EvalAndHashPrefetchGroup(in_batch, group_start, ht_ctx); + + FOREACH_ROW_LIMIT(in_batch, group_start, cache_size, in_batch_iter) { + // Hoist lookups out of non-null branch to speed up non-null case. + TupleRow* in_row = in_batch_iter.get(); + const uint32_t hash = expr_vals_cache->CurExprValuesHash(); + const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); + if (!expr_vals_cache->IsRowNull() && + !TryAddToHashTable(ht_ctx, hash_partitions_[partition_idx], + GetHashTable(partition_idx), in_row, hash, + &remaining_capacity[partition_idx], &process_batch_status_)) { + RETURN_IF_ERROR(std::move(process_batch_status_)); + // Tuple is not going into hash table, add it to the output batch. + Tuple* intermediate_tuple = ConstructIntermediateTuple( + agg_fn_evals_, out_batch->tuple_data_pool(), &process_batch_status_); + if (UNLIKELY(intermediate_tuple == NULL)) { + DCHECK(!process_batch_status_.ok()); + return std::move(process_batch_status_); + } + UpdateTuple(agg_fn_evals_.data(), intermediate_tuple, in_row); + out_batch_iterator.get()->set_tuple(0, intermediate_tuple); + out_batch_iterator.next(); + out_batch->commit_last_row(); + } + DCHECK(process_batch_status_.ok()); + expr_vals_cache->NextRow(); } - UpdateTuple(agg_fn_evals_.data(), intermediate_tuple, in_row); - out_batch_iterator.get()->set_tuple(0, intermediate_tuple); - out_batch_iterator.next(); - out_batch->commit_last_row(); - } - DCHECK(process_batch_status_.ok()); - expr_vals_cache->NextRow(); + ht_ctx->expr_results_pool_->clear(); + DCHECK(expr_vals_cache->AtEnd()); } - ht_ctx->expr_results_pool_->clear(); - DCHECK(expr_vals_cache->AtEnd()); - } - if (needs_serialize) { - FOREACH_ROW(out_batch, 0, out_batch_iter) { - NewAggFnEvaluator::Serialize(agg_fn_evals_, out_batch_iter.get()->get_tuple(0)); + if (needs_serialize) { + FOREACH_ROW(out_batch, 0, out_batch_iter) { + NewAggFnEvaluator::Serialize(agg_fn_evals_, out_batch_iter.get()->get_tuple(0)); + } } - } - return Status::OK(); + return Status::OK(); } -bool PartitionedAggregationNode::TryAddToHashTable( - PartitionedHashTableCtx* ht_ctx, Partition* partition, - PartitionedHashTable* hash_tbl, TupleRow* in_row, - uint32_t hash, int* remaining_capacity, Status* status) { - DCHECK(remaining_capacity != NULL); - DCHECK_EQ(hash_tbl, partition->hash_tbl.get()); - DCHECK_GE(*remaining_capacity, 0); - bool found; - // This is called from ProcessBatchStreaming() so the rows are not aggregated. - PartitionedHashTable::Iterator it = hash_tbl->FindBuildRowBucket(ht_ctx, &found); - Tuple* intermediate_tuple; - if (found) { - intermediate_tuple = it.GetTuple(); - } else if (*remaining_capacity == 0) { - return false; - } else { - intermediate_tuple = ConstructIntermediateTuple(partition->agg_fn_evals, - partition->aggregated_row_stream.get(), status); - if (LIKELY(intermediate_tuple != NULL)) { - it.SetTuple(intermediate_tuple, hash); - --(*remaining_capacity); +bool PartitionedAggregationNode::TryAddToHashTable(PartitionedHashTableCtx* ht_ctx, + Partition* partition, + PartitionedHashTable* hash_tbl, TupleRow* in_row, + uint32_t hash, int* remaining_capacity, + Status* status) { + DCHECK(remaining_capacity != NULL); + DCHECK_EQ(hash_tbl, partition->hash_tbl.get()); + DCHECK_GE(*remaining_capacity, 0); + bool found; + // This is called from ProcessBatchStreaming() so the rows are not aggregated. + PartitionedHashTable::Iterator it = hash_tbl->FindBuildRowBucket(ht_ctx, &found); + Tuple* intermediate_tuple; + if (found) { + intermediate_tuple = it.GetTuple(); + } else if (*remaining_capacity == 0) { + return false; } else { - // Avoid repeatedly trying to add tuples when under memory pressure. - *remaining_capacity = 0; - return false; + intermediate_tuple = ConstructIntermediateTuple( + partition->agg_fn_evals, partition->aggregated_row_stream.get(), status); + if (LIKELY(intermediate_tuple != NULL)) { + it.SetTuple(intermediate_tuple, hash); + --(*remaining_capacity); + } else { + // Avoid repeatedly trying to add tuples when under memory pressure. + *remaining_capacity = 0; + return false; + } } - } - UpdateTuple(partition->agg_fn_evals.data(), intermediate_tuple, in_row); - return true; + UpdateTuple(partition->agg_fn_evals.data(), intermediate_tuple, in_row); + return true; } // Instantiate required templates. template Status PartitionedAggregationNode::ProcessBatch(RowBatch*, - PartitionedHashTableCtx*); -template Status PartitionedAggregationNode::ProcessBatch(RowBatch*, - PartitionedHashTableCtx*); - + PartitionedHashTableCtx*); +template Status PartitionedAggregationNode::ProcessBatch(RowBatch*, PartitionedHashTableCtx*); diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index de79e1da30bdb3..a50bf8f36820da 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -#include "exec/partitioned_hash_table.inline.h" +#include #include #include -#include #include "exec/exec_node.h" +#include "exec/partitioned_hash_table.inline.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "exprs/slot_ref.h" @@ -32,8 +32,6 @@ #include "runtime/string_value.h" #include "util/doris_metrics.h" - - using namespace doris; using namespace strings; @@ -41,24 +39,9 @@ using namespace strings; // Random primes to multiply the seed with. static uint32_t SEED_PRIMES[] = { - 1, // First seed must be 1, level 0 is used by other operators in the fragment. - 1431655781, - 1183186591, - 622729787, - 472882027, - 338294347, - 275604541, - 41161739, - 29999999, - 27475109, - 611603, - 16313357, - 11380003, - 21261403, - 33393119, - 101, - 71043403 -}; + 1, // First seed must be 1, level 0 is used by other operators in the fragment. + 1431655781, 1183186591, 622729787, 472882027, 338294347, 275604541, 41161739, 29999999, + 27475109, 611603, 16313357, 11380003, 21261403, 33393119, 101, 71043403}; // Put a non-zero constant in the result location for NULL. // We don't want(NULL, 1) to hash to the same as (0, 1). @@ -67,15 +50,14 @@ static uint32_t SEED_PRIMES[] = { // TODO find a better approach, since primitives like CHAR(N) can be up // to 255 bytes static int64_t NULL_VALUE[] = { - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED -}; + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED}; PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build_exprs, const std::vector& probe_exprs, @@ -96,21 +78,21 @@ PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build mem_pool_(mem_pool), expr_results_pool_(expr_results_pool) { DCHECK(tracker_ != nullptr); - DCHECK(!finds_some_nulls_ || stores_nulls_); - // Compute the layout and buffer size to store the evaluated expr results - DCHECK_EQ(build_exprs_.size(), probe_exprs_.size()); - DCHECK_EQ(build_exprs_.size(), finds_nulls_.size()); - DCHECK(!build_exprs_.empty()); - - // Populate the seeds to use for all the levels. TODO: revisit how we generate these. - DCHECK_GE(max_levels, 0); - DCHECK_LT(max_levels, sizeof(SEED_PRIMES) / sizeof(SEED_PRIMES[0])); - DCHECK_NE(initial_seed, 0); - seeds_.resize(max_levels + 1); - seeds_[0] = initial_seed; - for (int i = 1; i <= max_levels; ++i) { - seeds_[i] = seeds_[i - 1] * SEED_PRIMES[i]; - } + DCHECK(!finds_some_nulls_ || stores_nulls_); + // Compute the layout and buffer size to store the evaluated expr results + DCHECK_EQ(build_exprs_.size(), probe_exprs_.size()); + DCHECK_EQ(build_exprs_.size(), finds_nulls_.size()); + DCHECK(!build_exprs_.empty()); + + // Populate the seeds to use for all the levels. TODO: revisit how we generate these. + DCHECK_GE(max_levels, 0); + DCHECK_LT(max_levels, sizeof(SEED_PRIMES) / sizeof(SEED_PRIMES[0])); + DCHECK_NE(initial_seed, 0); + seeds_.resize(max_levels + 1); + seeds_[0] = initial_seed; + for (int i = 1; i <= max_levels; ++i) { + seeds_[i] = seeds_[i - 1] * SEED_PRIMES[i]; + } } Status PartitionedHashTableCtx::Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, @@ -148,17 +130,17 @@ Status PartitionedHashTableCtx::Init(ObjectPool* pool, RuntimeState* state, int return expr_values_cache_.Init(state, tracker_, build_exprs_); } -Status PartitionedHashTableCtx::Create(ObjectPool* pool, RuntimeState* state, - const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, int max_levels, - int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, - const std::shared_ptr& tracker, const RowDescriptor& row_desc, - const RowDescriptor& row_desc_probe, - boost::scoped_ptr* ht_ctx) { - ht_ctx->reset(new PartitionedHashTableCtx(build_exprs, probe_exprs, stores_nulls, - finds_nulls, initial_seed, max_levels, mem_pool, expr_results_pool, tracker)); - return (*ht_ctx)->Init(pool, state, num_build_tuples, row_desc, row_desc_probe); +Status PartitionedHashTableCtx::Create( + ObjectPool* pool, RuntimeState* state, const std::vector& build_exprs, + const std::vector& probe_exprs, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, int max_levels, + int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, + const std::shared_ptr& tracker, const RowDescriptor& row_desc, + const RowDescriptor& row_desc_probe, boost::scoped_ptr* ht_ctx) { + ht_ctx->reset(new PartitionedHashTableCtx(build_exprs, probe_exprs, stores_nulls, finds_nulls, + initial_seed, max_levels, mem_pool, expr_results_pool, + tracker)); + return (*ht_ctx)->Init(pool, state, num_build_tuples, row_desc, row_desc_probe); } Status PartitionedHashTableCtx::Open(RuntimeState* state) { @@ -173,441 +155,449 @@ Status PartitionedHashTableCtx::Open(RuntimeState* state) { } void PartitionedHashTableCtx::Close(RuntimeState* state) { - free(scratch_row_); - scratch_row_ = NULL; - expr_values_cache_.Close(tracker_); - for (int i = 0; i < build_expr_evals_.size(); i++) { - build_expr_evals_[i]->close(state); - } - - for (int i = 0; i < probe_expr_evals_.size(); i++) { - probe_expr_evals_[i]->close(state); - } - - // TODO chenhao release new expr in Init, remove this after merging - // ScalarFnEvaluator. - build_expr_evals_.clear(); - probe_expr_evals_.clear(); + free(scratch_row_); + scratch_row_ = NULL; + expr_values_cache_.Close(tracker_); + for (int i = 0; i < build_expr_evals_.size(); i++) { + build_expr_evals_[i]->close(state); + } + + for (int i = 0; i < probe_expr_evals_.size(); i++) { + probe_expr_evals_[i]->close(state); + } + + // TODO chenhao release new expr in Init, remove this after merging + // ScalarFnEvaluator. + build_expr_evals_.clear(); + probe_expr_evals_.clear(); } void PartitionedHashTableCtx::FreeBuildLocalAllocations() { - //ExprContext::FreeLocalAllocations(build_expr_evals_); + //ExprContext::FreeLocalAllocations(build_expr_evals_); } void PartitionedHashTableCtx::FreeProbeLocalAllocations() { - //ExprContext::FreeLocalAllocations(probe_expr_evals_); + //ExprContext::FreeLocalAllocations(probe_expr_evals_); } void PartitionedHashTableCtx::FreeLocalAllocations() { - FreeBuildLocalAllocations(); - FreeProbeLocalAllocations(); + FreeBuildLocalAllocations(); + FreeProbeLocalAllocations(); } uint32_t PartitionedHashTableCtx::Hash(const void* input, int len, uint32_t hash) const { - /// Use CRC hash at first level for better performance. Switch to murmur hash at - /// subsequent levels since CRC doesn't randomize well with different seed inputs. - if (level_ == 0) return HashUtil::hash(input, len, hash); - return HashUtil::murmur_hash2_64(input, len, hash); + /// Use CRC hash at first level for better performance. Switch to murmur hash at + /// subsequent levels since CRC doesn't randomize well with different seed inputs. + if (level_ == 0) return HashUtil::hash(input, len, hash); + return HashUtil::murmur_hash2_64(input, len, hash); } -uint32_t PartitionedHashTableCtx::HashRow( - const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept { - DCHECK_LT(level_, seeds_.size()); - if (expr_values_cache_.var_result_offset() == -1) { - /// This handles NULLs implicitly since a constant seed value was put - /// into results buffer for nulls. - return Hash( - expr_values, expr_values_cache_.expr_values_bytes_per_row(), seeds_[level_]); - } else { - return PartitionedHashTableCtx::HashVariableLenRow(expr_values, expr_values_null); - } +uint32_t PartitionedHashTableCtx::HashRow(const uint8_t* expr_values, + const uint8_t* expr_values_null) const noexcept { + DCHECK_LT(level_, seeds_.size()); + if (expr_values_cache_.var_result_offset() == -1) { + /// This handles NULLs implicitly since a constant seed value was put + /// into results buffer for nulls. + return Hash(expr_values, expr_values_cache_.expr_values_bytes_per_row(), seeds_[level_]); + } else { + return PartitionedHashTableCtx::HashVariableLenRow(expr_values, expr_values_null); + } } bool PartitionedHashTableCtx::EvalRow(TupleRow* row, const vector& ctxs, - uint8_t* expr_values, uint8_t* expr_values_null) noexcept { - bool has_null = false; - for (int i = 0; i < ctxs.size(); ++i) { - void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - void* val = ctxs[i]->get_value(row); - if (val == NULL) { - // If the table doesn't store nulls, no reason to keep evaluating - if (!stores_nulls_) return true; - expr_values_null[i] = true; - val = reinterpret_cast(&NULL_VALUE); - has_null = true; - DCHECK_LE(build_exprs_[i]->type().get_slot_size(), - sizeof(NULL_VALUE)); - RawValue::write(val, loc, build_exprs_[i]->type(), NULL); - } else { - expr_values_null[i] = false; - DCHECK_LE(build_exprs_[i]->type().get_slot_size(), - sizeof(NULL_VALUE)); - RawValue::write(val, loc, build_exprs_[i]->type(), expr_results_pool_); + uint8_t* expr_values, uint8_t* expr_values_null) noexcept { + bool has_null = false; + for (int i = 0; i < ctxs.size(); ++i) { + void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); + void* val = ctxs[i]->get_value(row); + if (val == NULL) { + // If the table doesn't store nulls, no reason to keep evaluating + if (!stores_nulls_) return true; + expr_values_null[i] = true; + val = reinterpret_cast(&NULL_VALUE); + has_null = true; + DCHECK_LE(build_exprs_[i]->type().get_slot_size(), sizeof(NULL_VALUE)); + RawValue::write(val, loc, build_exprs_[i]->type(), NULL); + } else { + expr_values_null[i] = false; + DCHECK_LE(build_exprs_[i]->type().get_slot_size(), sizeof(NULL_VALUE)); + RawValue::write(val, loc, build_exprs_[i]->type(), expr_results_pool_); + } } - } - return has_null; + return has_null; } uint32_t PartitionedHashTableCtx::HashVariableLenRow(const uint8_t* expr_values, - const uint8_t* expr_values_null) const { - uint32_t hash = seeds_[level_]; - int var_result_offset = expr_values_cache_.var_result_offset(); - // Hash the non-var length portions (if there are any) - if (var_result_offset != 0) { - hash = Hash(expr_values, var_result_offset, hash); - } - - for (int i = 0; i < build_exprs_.size(); ++i) { - // non-string and null slots are already part of 'expr_values'. - // if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING - PrimitiveType type = build_exprs_[i]->type().type; - if (type != TYPE_CHAR && type != TYPE_VARCHAR) { - continue; + const uint8_t* expr_values_null) const { + uint32_t hash = seeds_[level_]; + int var_result_offset = expr_values_cache_.var_result_offset(); + // Hash the non-var length portions (if there are any) + if (var_result_offset != 0) { + hash = Hash(expr_values, var_result_offset, hash); } - const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - if (expr_values_null[i]) { - // Hash the null random seed values at 'loc' - hash = Hash(loc, sizeof(StringValue), hash); - } else { - // Hash the string - // TODO: when using CRC hash on empty string, this only swaps bytes. - const StringValue* str = reinterpret_cast(loc); - hash = Hash(str->ptr, str->len, hash); + for (int i = 0; i < build_exprs_.size(); ++i) { + // non-string and null slots are already part of 'expr_values'. + // if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING + PrimitiveType type = build_exprs_[i]->type().type; + if (type != TYPE_CHAR && type != TYPE_VARCHAR) { + continue; + } + + const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); + if (expr_values_null[i]) { + // Hash the null random seed values at 'loc' + hash = Hash(loc, sizeof(StringValue), hash); + } else { + // Hash the string + // TODO: when using CRC hash on empty string, this only swaps bytes. + const StringValue* str = reinterpret_cast(loc); + hash = Hash(str->ptr, str->len, hash); + } } - } - return hash; + return hash; } template bool PartitionedHashTableCtx::Equals(TupleRow* build_row, const uint8_t* expr_values, - const uint8_t* expr_values_null) const noexcept { - for (int i = 0; i < build_expr_evals_.size(); ++i) { - void* val = build_expr_evals_[i]->get_value(build_row); - if (val == NULL) { - if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false; - if (!expr_values_null[i]) return false; - continue; - } else { - if (expr_values_null[i]) return false; - } + const uint8_t* expr_values_null) const noexcept { + for (int i = 0; i < build_expr_evals_.size(); ++i) { + void* val = build_expr_evals_[i]->get_value(build_row); + if (val == NULL) { + if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false; + if (!expr_values_null[i]) return false; + continue; + } else { + if (expr_values_null[i]) return false; + } - const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - if (!RawValue::eq(loc, val, build_exprs_[i]->type())) { - return false; + const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); + if (!RawValue::eq(loc, val, build_exprs_[i]->type())) { + return false; + } } - } - return true; + return true; } -template bool PartitionedHashTableCtx::Equals(TupleRow* build_row, - const uint8_t* expr_values, const uint8_t* expr_values_null) const; +template bool PartitionedHashTableCtx::Equals(TupleRow* build_row, const uint8_t* expr_values, + const uint8_t* expr_values_null) const; template bool PartitionedHashTableCtx::Equals(TupleRow* build_row, - const uint8_t* expr_values, const uint8_t* expr_values_null) const; + const uint8_t* expr_values, + const uint8_t* expr_values_null) const; PartitionedHashTableCtx::ExprValuesCache::ExprValuesCache() - : capacity_(0), - cur_expr_values_(NULL), - cur_expr_values_null_(NULL), - cur_expr_values_hash_(NULL), - cur_expr_values_hash_end_(NULL), - expr_values_array_(NULL), - expr_values_null_array_(NULL), - expr_values_hash_array_(NULL), - null_bitmap_(0) {} + : capacity_(0), + cur_expr_values_(NULL), + cur_expr_values_null_(NULL), + cur_expr_values_hash_(NULL), + cur_expr_values_hash_end_(NULL), + expr_values_array_(NULL), + expr_values_null_array_(NULL), + expr_values_hash_array_(NULL), + null_bitmap_(0) {} Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, - const std::shared_ptr& tracker, const std::vector& build_exprs) { - // Initialize the number of expressions. - num_exprs_ = build_exprs.size(); - // Compute the layout of evaluated values of a row. - expr_values_bytes_per_row_ = Expr::compute_results_layout(build_exprs, - &expr_values_offsets_, &var_result_offset_); - if (expr_values_bytes_per_row_ == 0) { - DCHECK_EQ(num_exprs_, 0); + const std::shared_ptr& tracker, + const std::vector& build_exprs) { + // Initialize the number of expressions. + num_exprs_ = build_exprs.size(); + // Compute the layout of evaluated values of a row. + expr_values_bytes_per_row_ = + Expr::compute_results_layout(build_exprs, &expr_values_offsets_, &var_result_offset_); + if (expr_values_bytes_per_row_ == 0) { + DCHECK_EQ(num_exprs_, 0); + return Status::OK(); + } + DCHECK_GT(expr_values_bytes_per_row_, 0); + // Compute the maximum number of cached rows which can fit in the memory budget. + // TODO: Find the optimal prefetch batch size. This may be something + // processor dependent so we may need calibration at Impala startup time. + capacity_ = std::max(1, std::min(state->batch_size(), + MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); + + int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); + if (UNLIKELY(!tracker->TryConsume(mem_usage))) { + capacity_ = 0; + string details = Substitute( + "PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes.", mem_usage); + return tracker->MemLimitExceeded(state, details, mem_usage); + } + + int expr_values_size = expr_values_bytes_per_row_ * capacity_; + expr_values_array_.reset(new uint8_t[expr_values_size]); + cur_expr_values_ = expr_values_array_.get(); + memset(cur_expr_values_, 0, expr_values_size); + + int expr_values_null_size = num_exprs_ * capacity_; + expr_values_null_array_.reset(new uint8_t[expr_values_null_size]); + cur_expr_values_null_ = expr_values_null_array_.get(); + memset(cur_expr_values_null_, 0, expr_values_null_size); + + expr_values_hash_array_.reset(new uint32_t[capacity_]); + cur_expr_values_hash_ = expr_values_hash_array_.get(); + cur_expr_values_hash_end_ = cur_expr_values_hash_; + memset(cur_expr_values_hash_, 0, sizeof(uint32) * capacity_); + + null_bitmap_.Reset(capacity_); return Status::OK(); - } - DCHECK_GT(expr_values_bytes_per_row_, 0); - // Compute the maximum number of cached rows which can fit in the memory budget. - // TODO: Find the optimal prefetch batch size. This may be something - // processor dependent so we may need calibration at Impala startup time. - capacity_ = std::max(1, std::min(state->batch_size(), - MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); - - int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - if (UNLIKELY(!tracker->TryConsume(mem_usage))) { - capacity_ = 0; - string details = Substitute("PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes.", - mem_usage); - return tracker->MemLimitExceeded(state, details, mem_usage); - } - - int expr_values_size = expr_values_bytes_per_row_ * capacity_; - expr_values_array_.reset(new uint8_t[expr_values_size]); - cur_expr_values_ = expr_values_array_.get(); - memset(cur_expr_values_, 0, expr_values_size); - - int expr_values_null_size = num_exprs_ * capacity_; - expr_values_null_array_.reset(new uint8_t[expr_values_null_size]); - cur_expr_values_null_ = expr_values_null_array_.get(); - memset(cur_expr_values_null_, 0, expr_values_null_size); - - expr_values_hash_array_.reset(new uint32_t[capacity_]); - cur_expr_values_hash_ = expr_values_hash_array_.get(); - cur_expr_values_hash_end_ = cur_expr_values_hash_; - memset(cur_expr_values_hash_, 0, sizeof(uint32) * capacity_); - - null_bitmap_.Reset(capacity_); - return Status::OK(); } void PartitionedHashTableCtx::ExprValuesCache::Close(const std::shared_ptr& tracker) { - if (capacity_ == 0) return; - cur_expr_values_ = NULL; - cur_expr_values_null_ = NULL; - cur_expr_values_hash_ = NULL; - cur_expr_values_hash_end_ = NULL; - expr_values_array_.reset(); - expr_values_null_array_.reset(); - expr_values_hash_array_.reset(); - null_bitmap_.Reset(0); - int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - tracker->Release(mem_usage); + if (capacity_ == 0) return; + cur_expr_values_ = NULL; + cur_expr_values_null_ = NULL; + cur_expr_values_hash_ = NULL; + cur_expr_values_hash_end_ = NULL; + expr_values_array_.reset(); + expr_values_null_array_.reset(); + expr_values_hash_array_.reset(); + null_bitmap_.Reset(0); + int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); + tracker->Release(mem_usage); } -int PartitionedHashTableCtx::ExprValuesCache::MemUsage(int capacity, - int expr_values_bytes_per_row, int num_exprs) { - return expr_values_bytes_per_row * capacity + // expr_values_array_ - num_exprs * capacity + // expr_values_null_array_ - sizeof(uint32) * capacity + // expr_values_hash_array_ - Bitmap::MemUsage(capacity); // null_bitmap_ +int PartitionedHashTableCtx::ExprValuesCache::MemUsage(int capacity, int expr_values_bytes_per_row, + int num_exprs) { + return expr_values_bytes_per_row * capacity + // expr_values_array_ + num_exprs * capacity + // expr_values_null_array_ + sizeof(uint32) * capacity + // expr_values_hash_array_ + Bitmap::MemUsage(capacity); // null_bitmap_ } void PartitionedHashTableCtx::ExprValuesCache::ResetIterators() { - cur_expr_values_ = expr_values_array_.get(); - cur_expr_values_null_ = expr_values_null_array_.get(); - cur_expr_values_hash_ = expr_values_hash_array_.get(); + cur_expr_values_ = expr_values_array_.get(); + cur_expr_values_null_ = expr_values_null_array_.get(); + cur_expr_values_hash_ = expr_values_hash_array_.get(); } void PartitionedHashTableCtx::ExprValuesCache::Reset() noexcept { - ResetIterators(); - // Set the end pointer after resetting the other pointers so they point to - // the same location. - cur_expr_values_hash_end_ = cur_expr_values_hash_; - null_bitmap_.SetAllBits(false); + ResetIterators(); + // Set the end pointer after resetting the other pointers so they point to + // the same location. + cur_expr_values_hash_end_ = cur_expr_values_hash_; + null_bitmap_.SetAllBits(false); } void PartitionedHashTableCtx::ExprValuesCache::ResetForRead() { - // Record the end of hash values iterator to be used in AtEnd(). - // Do it before resetting the pointers. - cur_expr_values_hash_end_ = cur_expr_values_hash_; - ResetIterators(); + // Record the end of hash values iterator to be used in AtEnd(). + // Do it before resetting the pointers. + cur_expr_values_hash_end_ = cur_expr_values_hash_; + ResetIterators(); } constexpr double PartitionedHashTable::MAX_FILL_FACTOR; constexpr int64_t PartitionedHashTable::DATA_PAGE_SIZE; PartitionedHashTable* PartitionedHashTable::Create(Suballocator* allocator, bool stores_duplicates, - int num_build_tuples, BufferedTupleStream3* tuple_stream, int64_t max_num_buckets, - int64_t initial_num_buckets) { - return new PartitionedHashTable(config::enable_quadratic_probing, allocator, stores_duplicates, - num_build_tuples, tuple_stream, max_num_buckets, initial_num_buckets); + int num_build_tuples, + BufferedTupleStream3* tuple_stream, + int64_t max_num_buckets, + int64_t initial_num_buckets) { + return new PartitionedHashTable(config::enable_quadratic_probing, allocator, stores_duplicates, + num_build_tuples, tuple_stream, max_num_buckets, + initial_num_buckets); } PartitionedHashTable::PartitionedHashTable(bool quadratic_probing, Suballocator* allocator, - bool stores_duplicates, int num_build_tuples, BufferedTupleStream3* stream, - int64_t max_num_buckets, int64_t num_buckets) - : allocator_(allocator), - tuple_stream_(stream), - stores_tuples_(num_build_tuples == 1), - stores_duplicates_(stores_duplicates), - quadratic_probing_(quadratic_probing), - total_data_page_size_(0), - next_node_(NULL), - node_remaining_current_page_(0), - num_duplicate_nodes_(0), - max_num_buckets_(max_num_buckets), - buckets_(NULL), - num_buckets_(num_buckets), - num_filled_buckets_(0), - num_buckets_with_duplicates_(0), - num_build_tuples_(num_build_tuples), - has_matches_(false), - num_probes_(0), num_failed_probes_(0), travel_length_(0), num_hash_collisions_(0), - num_resizes_(0) { - DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; - DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0"; - DCHECK(stores_tuples_ || stream != NULL); + bool stores_duplicates, int num_build_tuples, + BufferedTupleStream3* stream, int64_t max_num_buckets, + int64_t num_buckets) + : allocator_(allocator), + tuple_stream_(stream), + stores_tuples_(num_build_tuples == 1), + stores_duplicates_(stores_duplicates), + quadratic_probing_(quadratic_probing), + total_data_page_size_(0), + next_node_(NULL), + node_remaining_current_page_(0), + num_duplicate_nodes_(0), + max_num_buckets_(max_num_buckets), + buckets_(NULL), + num_buckets_(num_buckets), + num_filled_buckets_(0), + num_buckets_with_duplicates_(0), + num_build_tuples_(num_build_tuples), + has_matches_(false), + num_probes_(0), + num_failed_probes_(0), + travel_length_(0), + num_hash_collisions_(0), + num_resizes_(0) { + DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; + DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0"; + DCHECK(stores_tuples_ || stream != NULL); } Status PartitionedHashTable::Init(bool* got_memory) { - int64_t buckets_byte_size = num_buckets_ * sizeof(Bucket); - RETURN_IF_ERROR(allocator_->Allocate(buckets_byte_size, &bucket_allocation_)); - if (bucket_allocation_ == nullptr) { - num_buckets_ = 0; - *got_memory = false; + int64_t buckets_byte_size = num_buckets_ * sizeof(Bucket); + RETURN_IF_ERROR(allocator_->Allocate(buckets_byte_size, &bucket_allocation_)); + if (bucket_allocation_ == nullptr) { + num_buckets_ = 0; + *got_memory = false; + return Status::OK(); + } + buckets_ = reinterpret_cast(bucket_allocation_->data()); + memset(buckets_, 0, buckets_byte_size); + *got_memory = true; return Status::OK(); - } - buckets_ = reinterpret_cast(bucket_allocation_->data()); - memset(buckets_, 0, buckets_byte_size); - *got_memory = true; - return Status::OK(); } void PartitionedHashTable::Close() { - // Print statistics only for the large or heavily used hash tables. - // TODO: Tweak these numbers/conditions, or print them always? - const int64_t LARGE_HT = 128 * 1024; - const int64_t HEAVILY_USED = 1024 * 1024; - // TODO: These statistics should go to the runtime profile as well. - if ((num_buckets_ > LARGE_HT) || (num_probes_ > HEAVILY_USED)) VLOG(2) << PrintStats(); - for (auto& data_page : data_pages_) allocator_->Free(move(data_page)); - data_pages_.clear(); - if (bucket_allocation_ != nullptr) allocator_->Free(move(bucket_allocation_)); + // Print statistics only for the large or heavily used hash tables. + // TODO: Tweak these numbers/conditions, or print them always? + const int64_t LARGE_HT = 128 * 1024; + const int64_t HEAVILY_USED = 1024 * 1024; + // TODO: These statistics should go to the runtime profile as well. + if ((num_buckets_ > LARGE_HT) || (num_probes_ > HEAVILY_USED)) VLOG(2) << PrintStats(); + for (auto& data_page : data_pages_) allocator_->Free(move(data_page)); + data_pages_.clear(); + if (bucket_allocation_ != nullptr) allocator_->Free(move(bucket_allocation_)); } -Status PartitionedHashTable::CheckAndResize( - uint64_t buckets_to_fill, const PartitionedHashTableCtx* ht_ctx, bool* got_memory) { - uint64_t shift = 0; - while (num_filled_buckets_ + buckets_to_fill > - (num_buckets_ << shift) * MAX_FILL_FACTOR) { - ++shift; - } - if (shift > 0) return ResizeBuckets(num_buckets_ << shift, ht_ctx, got_memory); - *got_memory = true; - return Status::OK(); +Status PartitionedHashTable::CheckAndResize(uint64_t buckets_to_fill, + const PartitionedHashTableCtx* ht_ctx, + bool* got_memory) { + uint64_t shift = 0; + while (num_filled_buckets_ + buckets_to_fill > (num_buckets_ << shift) * MAX_FILL_FACTOR) { + ++shift; + } + if (shift > 0) return ResizeBuckets(num_buckets_ << shift, ht_ctx, got_memory); + *got_memory = true; + return Status::OK(); } -Status PartitionedHashTable::ResizeBuckets( - int64_t num_buckets, const PartitionedHashTableCtx* ht_ctx, bool* got_memory) { - DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) - << "num_buckets=" << num_buckets << " must be a power of 2"; - DCHECK_GT(num_buckets, num_filled_buckets_) - << "Cannot shrink the hash table to smaller number of buckets than the number of " - << "filled buckets."; - VLOG(2) << "Resizing hash table from " << num_buckets_ << " to " << num_buckets - << " buckets."; - if (max_num_buckets_ != -1 && num_buckets > max_num_buckets_) { - *got_memory = false; - return Status::OK(); - } - ++num_resizes_; - - // All memory that can grow proportional to the input should come from the block mgrs - // mem tracker. - // Note that while we copying over the contents of the old hash table, we need to have - // allocated both the old and the new hash table. Once we finish, we return the memory - // of the old hash table. - // int64_t old_size = num_buckets_ * sizeof(Bucket); - int64_t new_size = num_buckets * sizeof(Bucket); - - std::unique_ptr new_allocation; - RETURN_IF_ERROR(allocator_->Allocate(new_size, &new_allocation)); - if (new_allocation == NULL) { - *got_memory = false; +Status PartitionedHashTable::ResizeBuckets(int64_t num_buckets, + const PartitionedHashTableCtx* ht_ctx, + bool* got_memory) { + DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) + << "num_buckets=" << num_buckets << " must be a power of 2"; + DCHECK_GT(num_buckets, num_filled_buckets_) + << "Cannot shrink the hash table to smaller number of buckets than the number of " + << "filled buckets."; + VLOG(2) << "Resizing hash table from " << num_buckets_ << " to " << num_buckets << " buckets."; + if (max_num_buckets_ != -1 && num_buckets > max_num_buckets_) { + *got_memory = false; + return Status::OK(); + } + ++num_resizes_; + + // All memory that can grow proportional to the input should come from the block mgrs + // mem tracker. + // Note that while we copying over the contents of the old hash table, we need to have + // allocated both the old and the new hash table. Once we finish, we return the memory + // of the old hash table. + // int64_t old_size = num_buckets_ * sizeof(Bucket); + int64_t new_size = num_buckets * sizeof(Bucket); + + std::unique_ptr new_allocation; + RETURN_IF_ERROR(allocator_->Allocate(new_size, &new_allocation)); + if (new_allocation == NULL) { + *got_memory = false; + return Status::OK(); + } + Bucket* new_buckets = reinterpret_cast(new_allocation->data()); + memset(new_buckets, 0, new_size); + + // Walk the old table and copy all the filled buckets to the new (resized) table. + // We do not have to do anything with the duplicate nodes. This operation is expected + // to succeed. + for (PartitionedHashTable::Iterator iter = Begin(ht_ctx); !iter.AtEnd(); + NextFilledBucket(&iter.bucket_idx_, &iter.node_)) { + Bucket* bucket_to_copy = &buckets_[iter.bucket_idx_]; + bool found = false; + int64_t bucket_idx = + Probe(new_buckets, num_buckets, NULL, bucket_to_copy->hash, &found); + DCHECK(!found); + DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND) + << " Probe failed even though " + " there are free buckets. " + << num_buckets << " " << num_filled_buckets_; + Bucket* dst_bucket = &new_buckets[bucket_idx]; + *dst_bucket = *bucket_to_copy; + } + + num_buckets_ = num_buckets; + allocator_->Free(move(bucket_allocation_)); + bucket_allocation_ = std::move(new_allocation); + buckets_ = reinterpret_cast(bucket_allocation_->data()); + *got_memory = true; return Status::OK(); - } - Bucket* new_buckets = reinterpret_cast(new_allocation->data()); - memset(new_buckets, 0, new_size); - - // Walk the old table and copy all the filled buckets to the new (resized) table. - // We do not have to do anything with the duplicate nodes. This operation is expected - // to succeed. - for (PartitionedHashTable::Iterator iter = Begin(ht_ctx); !iter.AtEnd(); - NextFilledBucket(&iter.bucket_idx_, &iter.node_)) { - Bucket* bucket_to_copy = &buckets_[iter.bucket_idx_]; - bool found = false; - int64_t bucket_idx = - Probe(new_buckets, num_buckets, NULL, bucket_to_copy->hash, &found); - DCHECK(!found); - DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND) << " Probe failed even though " - " there are free buckets. " << num_buckets << " " << num_filled_buckets_; - Bucket* dst_bucket = &new_buckets[bucket_idx]; - *dst_bucket = *bucket_to_copy; - } - - num_buckets_ = num_buckets; - allocator_->Free(move(bucket_allocation_)); - bucket_allocation_ = std::move(new_allocation); - buckets_ = reinterpret_cast(bucket_allocation_->data()); - *got_memory = true; - return Status::OK(); } bool PartitionedHashTable::GrowNodeArray(Status* status) { - std::unique_ptr allocation; - *status = allocator_->Allocate(DATA_PAGE_SIZE, &allocation); - if (!status->ok() || allocation == nullptr) return false; - next_node_ = reinterpret_cast(allocation->data()); - data_pages_.push_back(std::move(allocation)); - node_remaining_current_page_ = DATA_PAGE_SIZE / sizeof(DuplicateNode); - total_data_page_size_ += DATA_PAGE_SIZE; - return true; + std::unique_ptr allocation; + *status = allocator_->Allocate(DATA_PAGE_SIZE, &allocation); + if (!status->ok() || allocation == nullptr) return false; + next_node_ = reinterpret_cast(allocation->data()); + data_pages_.push_back(std::move(allocation)); + node_remaining_current_page_ = DATA_PAGE_SIZE / sizeof(DuplicateNode); + total_data_page_size_ += DATA_PAGE_SIZE; + return true; } void PartitionedHashTable::DebugStringTuple(std::stringstream& ss, HtData& htdata, - const RowDescriptor* desc) { - if (stores_tuples_) { - ss << "(" << htdata.tuple << ")"; - } else { - ss << "(" << htdata.flat_row << ")"; - } - if (desc != NULL) { - Tuple* row[num_build_tuples_]; - ss << " " << GetRow(htdata, reinterpret_cast(row))->to_string(*desc); - } + const RowDescriptor* desc) { + if (stores_tuples_) { + ss << "(" << htdata.tuple << ")"; + } else { + ss << "(" << htdata.flat_row << ")"; + } + if (desc != NULL) { + Tuple* row[num_build_tuples_]; + ss << " " << GetRow(htdata, reinterpret_cast(row))->to_string(*desc); + } } string PartitionedHashTable::DebugString(bool skip_empty, bool show_match, - const RowDescriptor* desc) { - std::stringstream ss; - ss << std::endl; - for (int i = 0; i < num_buckets_; ++i) { - if (skip_empty && !buckets_[i].filled) continue; - ss << i << ": "; - if (show_match) { - if (buckets_[i].matched) { - ss << " [M]"; - } else { - ss << " [U]"; - } - } - if (buckets_[i].hasDuplicates) { - DuplicateNode* node = buckets_[i].bucketData.duplicates; - bool first = true; - ss << " [D] "; - while (node != NULL) { - if (!first) ss << ","; - DebugStringTuple(ss, node->htdata, desc); - node = node->next; - first = false; - } - } else { - ss << " [B] "; - if (buckets_[i].filled) { - DebugStringTuple(ss, buckets_[i].bucketData.htdata, desc); - } else { - ss << " - "; - } - } + const RowDescriptor* desc) { + std::stringstream ss; ss << std::endl; - } - return ss.str(); + for (int i = 0; i < num_buckets_; ++i) { + if (skip_empty && !buckets_[i].filled) continue; + ss << i << ": "; + if (show_match) { + if (buckets_[i].matched) { + ss << " [M]"; + } else { + ss << " [U]"; + } + } + if (buckets_[i].hasDuplicates) { + DuplicateNode* node = buckets_[i].bucketData.duplicates; + bool first = true; + ss << " [D] "; + while (node != NULL) { + if (!first) ss << ","; + DebugStringTuple(ss, node->htdata, desc); + node = node->next; + first = false; + } + } else { + ss << " [B] "; + if (buckets_[i].filled) { + DebugStringTuple(ss, buckets_[i].bucketData.htdata, desc); + } else { + ss << " - "; + } + } + ss << std::endl; + } + return ss.str(); } string PartitionedHashTable::PrintStats() const { - double curr_fill_factor = (double)num_filled_buckets_/(double)num_buckets_; - double avg_travel = (double)travel_length_/(double)num_probes_; - double avg_collisions = (double)num_hash_collisions_/(double)num_filled_buckets_; - std::stringstream ss; - ss << "Buckets: " << num_buckets_ << " " << num_filled_buckets_ << " " - << curr_fill_factor << std::endl; - ss << "Duplicates: " << num_buckets_with_duplicates_ << " buckets " - << num_duplicate_nodes_ << " nodes" << std::endl; - ss << "Probes: " << num_probes_ << std::endl; - ss << "FailedProbes: " << num_failed_probes_ << std::endl; - ss << "Travel: " << travel_length_ << " " << avg_travel << std::endl; - ss << "HashCollisions: " << num_hash_collisions_ << " " << avg_collisions << std::endl; - ss << "Resizes: " << num_resizes_ << std::endl; - return ss.str(); + double curr_fill_factor = (double)num_filled_buckets_ / (double)num_buckets_; + double avg_travel = (double)travel_length_ / (double)num_probes_; + double avg_collisions = (double)num_hash_collisions_ / (double)num_filled_buckets_; + std::stringstream ss; + ss << "Buckets: " << num_buckets_ << " " << num_filled_buckets_ << " " << curr_fill_factor + << std::endl; + ss << "Duplicates: " << num_buckets_with_duplicates_ << " buckets " << num_duplicate_nodes_ + << " nodes" << std::endl; + ss << "Probes: " << num_probes_ << std::endl; + ss << "FailedProbes: " << num_failed_probes_ << std::endl; + ss << "Travel: " << travel_length_ << " " << avg_travel << std::endl; + ss << "HashCollisions: " << num_hash_collisions_ << " " << avg_collisions << std::endl; + ss << "Resizes: " << num_resizes_ << std::endl; + return ss.str(); } diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h index aec9a31cb86308..8cfa4a560d2c56 100644 --- a/be/src/exec/partitioned_hash_table.h +++ b/be/src/exec/partitioned_hash_table.h @@ -18,13 +18,14 @@ #ifndef DORIS_BE_SRC_EXEC_NEW_PARTITIONED_HASH_TABLE_H #define DORIS_BE_SRC_EXEC_NEW_PARTITIONED_HASH_TABLE_H -#include -#include #include #include +#include +#include + #include "codegen/doris_ir.h" -#include "common/logging.h" #include "common/compiler_util.h" +#include "common/logging.h" #include "runtime/buffered_tuple_stream3.h" #include "runtime/buffered_tuple_stream3.inline.h" #include "runtime/bufferpool/buffer_pool.h" @@ -103,394 +104,387 @@ class TupleRow; /// Control block for a hash table. This class contains the logic as well as the variables /// needed by a thread to operate on a hash table. class PartitionedHashTableCtx { - public: - - /// Create a hash table context with the specified parameters, invoke Init() to - /// initialize the new hash table context and return it in 'ht_ctx'. Expression - /// evaluators for the build and probe expressions will also be allocated. - /// Please see the comments of HashTableCtx constructor and Init() for details - /// of other parameters. - static Status Create(ObjectPool* pool, RuntimeState* state, - const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, int num_build_tuples, MemPool* mem_pool, - MemPool* expr_results_pool, const std::shared_ptr& tracker, - const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe, - boost::scoped_ptr* ht_ctx); - - /// Initialize the build and probe expression evaluators. - Status Open(RuntimeState* state); - - /// Call to cleanup any resources allocated by the expression evaluators. - void Close(RuntimeState* state); - - /// Free local allocations made by build and probe expression evaluators respectively. - void FreeBuildLocalAllocations(); - void FreeProbeLocalAllocations(); - - /// Free local allocations of both build and probe expression evaluators. - void FreeLocalAllocations(); - - void set_level(int level); - - int ALWAYS_INLINE level() const { return level_; } - - uint32_t ALWAYS_INLINE seed(int level) { return seeds_.at(level); } - - TupleRow* ALWAYS_INLINE scratch_row() const { return scratch_row_; } - - /// Returns the results of the expression at 'expr_idx' evaluated at the current row. - /// This value is invalid if the expr evaluated to NULL. - /// TODO: this is an awkward abstraction but aggregation node can take advantage of - /// it and save some expr evaluation calls. - void* ALWAYS_INLINE ExprValue(int expr_idx) const { - return expr_values_cache_.ExprValuePtr( - expr_values_cache_.cur_expr_values(), expr_idx); - } - - /// Returns if the expression at 'expr_idx' is evaluated to NULL for the current row. - bool ALWAYS_INLINE ExprValueNull(int expr_idx) const { - return static_cast(*(expr_values_cache_.cur_expr_values_null() + expr_idx)); - } - - /// Evaluate and hash the build/probe row, saving the evaluation to the current row of - /// the ExprValuesCache in this hash table context: the results are saved in - /// 'cur_expr_values_', the nullness of expressions values in 'cur_expr_values_null_', - /// and the hashed expression values in 'cur_expr_values_hash_'. Returns false if this - /// row should be rejected (doesn't need to be processed further) because it contains - /// NULL. These need to be inlined in the IR module so we can find and replace the - /// calls to EvalBuildRow()/EvalProbeRow(). - bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row); - bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row); - - /// Struct that returns the number of constants replaced by ReplaceConstants(). - struct HashTableReplacedConstants { - int stores_nulls; - int finds_some_nulls; - int stores_tuples; - int stores_duplicates; - int quadratic_probing; - }; - - /// To enable prefetching, the hash table building and probing are pipelined by the - /// exec nodes. A set of rows in a row batch will be evaluated and hashed first and - /// the corresponding hash table buckets are prefetched before they are probed against - /// the hash table. ExprValuesCache is a container for caching the results of - /// expressions evaluations for the rows in a prefetch set to avoid re-evaluating the - /// rows again during probing. Expressions evaluation can be very expensive. - /// - /// The expression evaluation results are cached in the following data structures: - /// - /// - 'expr_values_array_' is an array caching the results of the rows - /// evaluated against either the build or probe expressions. 'cur_expr_values_' - /// is a pointer into this array. - /// - 'expr_values_null_array_' is an array caching the nullness of each evaluated - /// expression in each row. 'cur_expr_values_null_' is a pointer into this array. - /// - 'expr_values_hash_array_' is an array of cached hash values of the rows. - /// 'cur_expr_values_hash_' is a pointer into this array. - /// - 'null_bitmap_' is a bitmap which indicates rows evaluated to NULL. - /// - /// ExprValuesCache provides an iterator like interface for performing a write pass - /// followed by a read pass. We refrain from providing an interface for random accesses - /// as there isn't a use case for it now and we want to avoid expensive multiplication - /// as the buffer size of each row is not necessarily power of two: - /// - Reset(), ResetForRead(): reset the iterators before writing / reading cached values. - /// - NextRow(): moves the iterators to point to the next row of cached values. - /// - AtEnd(): returns true if all cached rows have been read. Valid in read mode only. - /// - /// Various metadata information such as layout of results buffer is also stored in - /// this class. Note that the result buffer doesn't store variable length data. It only - /// contains pointers to the variable length data (e.g. if an expression value is a - /// StringValue). - /// - class ExprValuesCache { - public: - ExprValuesCache(); - - /// Allocates memory and initializes various data structures. Return error status - /// if memory allocation leads to the memory limits of the exec node to be exceeded. - /// 'tracker' is the memory tracker of the exec node which owns this PartitionedHashTableCtx. - Status Init(RuntimeState* state, const std::shared_ptr& tracker, - const std::vector& build_exprs); - - /// Frees up various resources and updates memory tracker with proper accounting. - /// 'tracker' should be the same memory tracker which was passed in for Init(). - void Close(const std::shared_ptr& tracker); - - /// Resets the cache states (iterators, end pointers etc) before writing. - void Reset() noexcept; - - /// Resets the iterators to the start before reading. Will record the current position - /// of the iterators in end pointer before resetting so AtEnd() can determine if all - /// cached values have been read. - void ResetForRead(); - - /// Advances the iterators to the next row by moving to the next entries in the - /// arrays of cached values. - void ALWAYS_INLINE NextRow(); - - /// Compute the total memory usage of this ExprValuesCache. - static int MemUsage(int capacity, int results_buffer_size, int num_build_exprs); - - /// Returns the maximum number rows of expression values states which can be cached. - int ALWAYS_INLINE capacity() const { return capacity_; } - - /// Returns the total size in bytes of a row of evaluated expressions' values. - int ALWAYS_INLINE expr_values_bytes_per_row() const { - return expr_values_bytes_per_row_; +public: + /// Create a hash table context with the specified parameters, invoke Init() to + /// initialize the new hash table context and return it in 'ht_ctx'. Expression + /// evaluators for the build and probe expressions will also be allocated. + /// Please see the comments of HashTableCtx constructor and Init() for details + /// of other parameters. + static Status Create(ObjectPool* pool, RuntimeState* state, + const std::vector& build_exprs, + const std::vector& probe_exprs, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, int max_levels, + int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, + const std::shared_ptr& tracker, const RowDescriptor& row_desc, + const RowDescriptor& row_desc_probe, + boost::scoped_ptr* ht_ctx); + + /// Initialize the build and probe expression evaluators. + Status Open(RuntimeState* state); + + /// Call to cleanup any resources allocated by the expression evaluators. + void Close(RuntimeState* state); + + /// Free local allocations made by build and probe expression evaluators respectively. + void FreeBuildLocalAllocations(); + void FreeProbeLocalAllocations(); + + /// Free local allocations of both build and probe expression evaluators. + void FreeLocalAllocations(); + + void set_level(int level); + + int ALWAYS_INLINE level() const { return level_; } + + uint32_t ALWAYS_INLINE seed(int level) { return seeds_.at(level); } + + TupleRow* ALWAYS_INLINE scratch_row() const { return scratch_row_; } + + /// Returns the results of the expression at 'expr_idx' evaluated at the current row. + /// This value is invalid if the expr evaluated to NULL. + /// TODO: this is an awkward abstraction but aggregation node can take advantage of + /// it and save some expr evaluation calls. + void* ALWAYS_INLINE ExprValue(int expr_idx) const { + return expr_values_cache_.ExprValuePtr(expr_values_cache_.cur_expr_values(), expr_idx); } - /// Returns the offset into the result buffer of the first variable length - /// data results. - int ALWAYS_INLINE var_result_offset() const { return var_result_offset_; } - - /// Returns true if the current read pass is complete, meaning all cached values - /// have been read. - bool ALWAYS_INLINE AtEnd() const { - return cur_expr_values_hash_ == cur_expr_values_hash_end_; + /// Returns if the expression at 'expr_idx' is evaluated to NULL for the current row. + bool ALWAYS_INLINE ExprValueNull(int expr_idx) const { + return static_cast(*(expr_values_cache_.cur_expr_values_null() + expr_idx)); } - /// Returns true if the current row is null but nulls are not considered in the current - /// phase (build or probe). - bool ALWAYS_INLINE IsRowNull() const { return null_bitmap_.Get(CurIdx()); } + /// Evaluate and hash the build/probe row, saving the evaluation to the current row of + /// the ExprValuesCache in this hash table context: the results are saved in + /// 'cur_expr_values_', the nullness of expressions values in 'cur_expr_values_null_', + /// and the hashed expression values in 'cur_expr_values_hash_'. Returns false if this + /// row should be rejected (doesn't need to be processed further) because it contains + /// NULL. These need to be inlined in the IR module so we can find and replace the + /// calls to EvalBuildRow()/EvalProbeRow(). + bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row); + bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row); + + /// Struct that returns the number of constants replaced by ReplaceConstants(). + struct HashTableReplacedConstants { + int stores_nulls; + int finds_some_nulls; + int stores_tuples; + int stores_duplicates; + int quadratic_probing; + }; - /// Record in a bitmap that the current row is null but nulls are not considered in - /// the current phase (build or probe). - void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set(CurIdx(), true); } + /// To enable prefetching, the hash table building and probing are pipelined by the + /// exec nodes. A set of rows in a row batch will be evaluated and hashed first and + /// the corresponding hash table buckets are prefetched before they are probed against + /// the hash table. ExprValuesCache is a container for caching the results of + /// expressions evaluations for the rows in a prefetch set to avoid re-evaluating the + /// rows again during probing. Expressions evaluation can be very expensive. + /// + /// The expression evaluation results are cached in the following data structures: + /// + /// - 'expr_values_array_' is an array caching the results of the rows + /// evaluated against either the build or probe expressions. 'cur_expr_values_' + /// is a pointer into this array. + /// - 'expr_values_null_array_' is an array caching the nullness of each evaluated + /// expression in each row. 'cur_expr_values_null_' is a pointer into this array. + /// - 'expr_values_hash_array_' is an array of cached hash values of the rows. + /// 'cur_expr_values_hash_' is a pointer into this array. + /// - 'null_bitmap_' is a bitmap which indicates rows evaluated to NULL. + /// + /// ExprValuesCache provides an iterator like interface for performing a write pass + /// followed by a read pass. We refrain from providing an interface for random accesses + /// as there isn't a use case for it now and we want to avoid expensive multiplication + /// as the buffer size of each row is not necessarily power of two: + /// - Reset(), ResetForRead(): reset the iterators before writing / reading cached values. + /// - NextRow(): moves the iterators to point to the next row of cached values. + /// - AtEnd(): returns true if all cached rows have been read. Valid in read mode only. + /// + /// Various metadata information such as layout of results buffer is also stored in + /// this class. Note that the result buffer doesn't store variable length data. It only + /// contains pointers to the variable length data (e.g. if an expression value is a + /// StringValue). + /// + class ExprValuesCache { + public: + ExprValuesCache(); + + /// Allocates memory and initializes various data structures. Return error status + /// if memory allocation leads to the memory limits of the exec node to be exceeded. + /// 'tracker' is the memory tracker of the exec node which owns this PartitionedHashTableCtx. + Status Init(RuntimeState* state, const std::shared_ptr& tracker, + const std::vector& build_exprs); + + /// Frees up various resources and updates memory tracker with proper accounting. + /// 'tracker' should be the same memory tracker which was passed in for Init(). + void Close(const std::shared_ptr& tracker); + + /// Resets the cache states (iterators, end pointers etc) before writing. + void Reset() noexcept; + + /// Resets the iterators to the start before reading. Will record the current position + /// of the iterators in end pointer before resetting so AtEnd() can determine if all + /// cached values have been read. + void ResetForRead(); + + /// Advances the iterators to the next row by moving to the next entries in the + /// arrays of cached values. + void ALWAYS_INLINE NextRow(); + + /// Compute the total memory usage of this ExprValuesCache. + static int MemUsage(int capacity, int results_buffer_size, int num_build_exprs); + + /// Returns the maximum number rows of expression values states which can be cached. + int ALWAYS_INLINE capacity() const { return capacity_; } + + /// Returns the total size in bytes of a row of evaluated expressions' values. + int ALWAYS_INLINE expr_values_bytes_per_row() const { return expr_values_bytes_per_row_; } + + /// Returns the offset into the result buffer of the first variable length + /// data results. + int ALWAYS_INLINE var_result_offset() const { return var_result_offset_; } + + /// Returns true if the current read pass is complete, meaning all cached values + /// have been read. + bool ALWAYS_INLINE AtEnd() const { + return cur_expr_values_hash_ == cur_expr_values_hash_end_; + } + + /// Returns true if the current row is null but nulls are not considered in the current + /// phase (build or probe). + bool ALWAYS_INLINE IsRowNull() const { return null_bitmap_.Get(CurIdx()); } + + /// Record in a bitmap that the current row is null but nulls are not considered in + /// the current phase (build or probe). + void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set(CurIdx(), true); } + + /// Returns the hash values of the current row. + uint32_t ALWAYS_INLINE CurExprValuesHash() const { return *cur_expr_values_hash_; } + + /// Sets the hash values for the current row. + void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) { *cur_expr_values_hash_ = hash; } + + /// Returns a pointer to the expression value at 'expr_idx' in 'expr_values'. + template + T ExprValuePtr(T expr_values, int expr_idx) const { + return expr_values + expr_values_offsets_[expr_idx]; + }; + + /// Returns the current row's expression buffer. The expression values in the buffer + /// are accessed using ExprValuePtr(). + uint8_t* ALWAYS_INLINE cur_expr_values() const { return cur_expr_values_; } + + /// Returns null indicator bytes for the current row, one per expression. Non-zero + /// bytes mean NULL, zero bytes mean non-NULL. Indexed by the expression index. + /// These are uint8_t instead of bool to simplify codegen with IRBuilder. + /// TODO: is there actually a valid reason why this is necessary for codegen? + uint8_t* ALWAYS_INLINE cur_expr_values_null() const { return cur_expr_values_null_; } + + /// Returns the offset into the results buffer of the expression value at 'expr_idx'. + int ALWAYS_INLINE expr_values_offsets(int expr_idx) const { + return expr_values_offsets_[expr_idx]; + } + + private: + friend class PartitionedHashTableCtx; + + /// Resets the iterators to the beginning of the cache values' arrays. + void ResetIterators(); + + /// Returns the offset in number of rows into the cached values' buffer. + int ALWAYS_INLINE CurIdx() const { + return cur_expr_values_hash_ - expr_values_hash_array_.get(); + } + + /// Max amount of memory in bytes for caching evaluated expression values. + static const int MAX_EXPR_VALUES_ARRAY_SIZE = 256 << 10; + + /// Maximum number of rows of expressions evaluation states which this + /// ExprValuesCache can cache. + int capacity_; + + /// Byte size of a row of evaluated expression values. Never changes once set, + /// can be used for constant substitution during codegen. + int expr_values_bytes_per_row_; + + /// Number of build/probe expressions. + int num_exprs_; + + /// Pointer into 'expr_values_array_' for the current row's expression values. + uint8_t* cur_expr_values_; + + /// Pointer into 'expr_values_null_array_' for the current row's nullness of each + /// expression value. + uint8_t* cur_expr_values_null_; + + /// Pointer into 'expr_hash_value_array_' for the hash value of current row's + /// expression values. + uint32_t* cur_expr_values_hash_; + + /// Pointer to the buffer one beyond the end of the last entry of cached expressions' + /// hash values. + uint32_t* cur_expr_values_hash_end_; + + /// Array for caching up to 'capacity_' number of rows worth of evaluated expression + /// values. Each row consumes 'expr_values_bytes_per_row_' number of bytes. + boost::scoped_array expr_values_array_; + + /// Array for caching up to 'capacity_' number of rows worth of null booleans. + /// Each row contains 'num_exprs_' booleans to indicate nullness of expression values. + /// Used when the hash table supports NULL. Use 'uint8_t' to guarantee each entry is 1 + /// byte as sizeof(bool) is implementation dependent. The IR depends on this + /// assumption. + boost::scoped_array expr_values_null_array_; + + /// Array for caching up to 'capacity_' number of rows worth of hashed values. + boost::scoped_array expr_values_hash_array_; + + /// One bit for each row. A bit is set if that row is not hashed as it's evaluated + /// to NULL but the hash table doesn't support NULL. Such rows may still be included + /// in outputs for certain join types (e.g. left anti joins). + Bitmap null_bitmap_; + + /// Maps from expression index to the byte offset into a row of expression values. + /// One entry per build/probe expression. + std::vector expr_values_offsets_; + + /// Byte offset into 'cur_expr_values_' that begins the variable length results for + /// a row. If -1, there are no variable length slots. Never changes once set, can be + /// constant substituted with codegen. + int var_result_offset_; + }; - /// Returns the hash values of the current row. - uint32_t ALWAYS_INLINE CurExprValuesHash() const { return *cur_expr_values_hash_; } + ExprValuesCache* ALWAYS_INLINE expr_values_cache() { return &expr_values_cache_; } - /// Sets the hash values for the current row. - void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) { - *cur_expr_values_hash_ = hash; +private: + friend class PartitionedAggregationNode; + friend class PartitionedHashTable; + friend class HashTableTest_HashEmpty_Test; + + /// Construct a hash table context. + /// - build_exprs are the exprs that should be used to evaluate rows during Insert(). + /// - probe_exprs are used during FindProbeRow() + /// - stores_nulls: if false, TupleRows with nulls are ignored during Insert + /// - finds_nulls: if finds_nulls[i] is false, FindProbeRow() returns End() for + /// TupleRows with nulls in position i even if stores_nulls is true. + /// - initial_seed: initial seed value to use when computing hashes for rows with + /// level 0. Other levels have their seeds derived from this seed. + /// - max_levels: the max lhashevels we will hash with. + /// - mem_pool: the MemPool which the expression evaluators allocate from. Owned by the + /// exec node which owns this hash table context. Memory usage of the expression + /// value cache is charged against its MemTracker. + /// + /// TODO: stores_nulls is too coarse: for a hash table in which some columns are joined + /// with '<=>' and others with '=', stores_nulls could distinguish between columns + /// in which nulls are stored and columns in which they are not, which could save + /// space by not storing some rows we know will never match. + PartitionedHashTableCtx(const std::vector& build_exprs, + const std::vector& probe_exprs, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, + int max_levels, MemPool* mem_pool, MemPool* expr_results_pool, + const std::shared_ptr& tracker); + + /// Allocate various buffers for storing expression evaluation results, hash values, + /// null bits etc. Also allocate evaluators for the build and probe expressions and + /// store them in 'pool'. Returns error if allocation causes query memory limit to + /// be exceeded or the evaluators fail to initialize. 'num_build_tuples' is the number + /// of tuples of a row in the build side, used for computing the size of a scratch row. + Status Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, + const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe); + + /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null'. + /// This will be replaced by codegen. We don't want this inlined for replacing + /// with codegen'd functions so the function name does not change. + uint32_t IR_NO_INLINE HashRow(const uint8_t* expr_values, + const uint8_t* expr_values_null) const noexcept; + + /// Wrapper function for calling correct HashUtil function in non-codegen'd case. + uint32_t Hash(const void* input, int len, uint32_t hash) const; + + /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness into + /// 'expr_values_null'. This will be replaced by codegen. We do not want this function + /// inlined when cross compiled because we need to be able to differentiate between + /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the + /// codegen'd function. + bool IR_NO_INLINE EvalBuildRow(TupleRow* row, uint8_t* expr_values, + uint8_t* expr_values_null) noexcept { + return EvalRow(row, build_expr_evals_, expr_values, expr_values_null); } - /// Returns a pointer to the expression value at 'expr_idx' in 'expr_values'. - template - T ExprValuePtr(T expr_values, int expr_idx) const { - return expr_values + expr_values_offsets_[expr_idx]; - }; + /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' and nullness + /// into 'expr_values_null'. This will be replaced by codegen. + bool IR_NO_INLINE EvalProbeRow(TupleRow* row, uint8_t* expr_values, + uint8_t* expr_values_null) noexcept { + return EvalRow(row, probe_expr_evals_, expr_values, expr_values_null); + } - /// Returns the current row's expression buffer. The expression values in the buffer - /// are accessed using ExprValuePtr(). - uint8_t* ALWAYS_INLINE cur_expr_values() const { return cur_expr_values_; } + /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null' + /// for a row with variable length fields (e.g. strings). + uint32_t HashVariableLenRow(const uint8_t* expr_values, const uint8_t* expr_values_null) const; + + /// Evaluate the exprs over row, storing the values into 'expr_values' and nullness into + /// 'expr_values_null'. Returns whether any expr evaluated to NULL. This will be + /// replaced by codegen. + bool EvalRow(TupleRow* row, const std::vector& ctxs, uint8_t* expr_values, + uint8_t* expr_values_null) noexcept; + + /// Returns true if the values of build_exprs evaluated over 'build_row' equal the + /// values in 'expr_values' with nullness 'expr_values_null'. FORCE_NULL_EQUALITY is + /// true if all nulls should be treated as equal, regardless of the values of + /// 'finds_nulls_'. This will be replaced by codegen. + template + bool IR_NO_INLINE Equals(TupleRow* build_row, const uint8_t* expr_values, + const uint8_t* expr_values_null) const noexcept; + + /// Helper function that calls Equals() with the current row. Always inlined so that + /// it does not appear in cross-compiled IR. + template + bool ALWAYS_INLINE Equals(TupleRow* build_row) const { + return Equals(build_row, expr_values_cache_.cur_expr_values(), + expr_values_cache_.cur_expr_values_null()); + } - /// Returns null indicator bytes for the current row, one per expression. Non-zero - /// bytes mean NULL, zero bytes mean non-NULL. Indexed by the expression index. - /// These are uint8_t instead of bool to simplify codegen with IRBuilder. - /// TODO: is there actually a valid reason why this is necessary for codegen? - uint8_t* ALWAYS_INLINE cur_expr_values_null() const { return cur_expr_values_null_; } + /// Cross-compiled function to access member variables used in CodegenHashRow(). + uint32_t IR_ALWAYS_INLINE GetHashSeed() const; - /// Returns the offset into the results buffer of the expression value at 'expr_idx'. - int ALWAYS_INLINE expr_values_offsets(int expr_idx) const { - return expr_values_offsets_[expr_idx]; - } + /// Functions to be replaced by codegen to specialize the hash table. + bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; } + bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; } - private: - friend class PartitionedHashTableCtx; + std::shared_ptr tracker_; - /// Resets the iterators to the beginning of the cache values' arrays. - void ResetIterators(); + const std::vector& build_exprs_; + std::vector build_expr_evals_; - /// Returns the offset in number of rows into the cached values' buffer. - int ALWAYS_INLINE CurIdx() const { - return cur_expr_values_hash_ - expr_values_hash_array_.get(); - } + const std::vector& probe_exprs_; + std::vector probe_expr_evals_; + + /// Constants on how the hash table should behave. Joins and aggs have slightly + /// different behavior. + const bool stores_nulls_; + const std::vector finds_nulls_; + + /// finds_some_nulls_ is just the logical OR of finds_nulls_. + const bool finds_some_nulls_; + + /// The current level this context is working on. Each level needs to use a + /// different seed. + int level_; - /// Max amount of memory in bytes for caching evaluated expression values. - static const int MAX_EXPR_VALUES_ARRAY_SIZE = 256 << 10; - - /// Maximum number of rows of expressions evaluation states which this - /// ExprValuesCache can cache. - int capacity_; - - /// Byte size of a row of evaluated expression values. Never changes once set, - /// can be used for constant substitution during codegen. - int expr_values_bytes_per_row_; - - /// Number of build/probe expressions. - int num_exprs_; - - /// Pointer into 'expr_values_array_' for the current row's expression values. - uint8_t* cur_expr_values_; - - /// Pointer into 'expr_values_null_array_' for the current row's nullness of each - /// expression value. - uint8_t* cur_expr_values_null_; - - /// Pointer into 'expr_hash_value_array_' for the hash value of current row's - /// expression values. - uint32_t* cur_expr_values_hash_; - - /// Pointer to the buffer one beyond the end of the last entry of cached expressions' - /// hash values. - uint32_t* cur_expr_values_hash_end_; - - /// Array for caching up to 'capacity_' number of rows worth of evaluated expression - /// values. Each row consumes 'expr_values_bytes_per_row_' number of bytes. - boost::scoped_array expr_values_array_; - - /// Array for caching up to 'capacity_' number of rows worth of null booleans. - /// Each row contains 'num_exprs_' booleans to indicate nullness of expression values. - /// Used when the hash table supports NULL. Use 'uint8_t' to guarantee each entry is 1 - /// byte as sizeof(bool) is implementation dependent. The IR depends on this - /// assumption. - boost::scoped_array expr_values_null_array_; - - /// Array for caching up to 'capacity_' number of rows worth of hashed values. - boost::scoped_array expr_values_hash_array_; - - /// One bit for each row. A bit is set if that row is not hashed as it's evaluated - /// to NULL but the hash table doesn't support NULL. Such rows may still be included - /// in outputs for certain join types (e.g. left anti joins). - Bitmap null_bitmap_; - - /// Maps from expression index to the byte offset into a row of expression values. - /// One entry per build/probe expression. - std::vector expr_values_offsets_; - - /// Byte offset into 'cur_expr_values_' that begins the variable length results for - /// a row. If -1, there are no variable length slots. Never changes once set, can be - /// constant substituted with codegen. - int var_result_offset_; - }; - - ExprValuesCache* ALWAYS_INLINE expr_values_cache() { return &expr_values_cache_; } - - private: - friend class PartitionedAggregationNode; - friend class PartitionedHashTable; - friend class HashTableTest_HashEmpty_Test; - - /// Construct a hash table context. - /// - build_exprs are the exprs that should be used to evaluate rows during Insert(). - /// - probe_exprs are used during FindProbeRow() - /// - stores_nulls: if false, TupleRows with nulls are ignored during Insert - /// - finds_nulls: if finds_nulls[i] is false, FindProbeRow() returns End() for - /// TupleRows with nulls in position i even if stores_nulls is true. - /// - initial_seed: initial seed value to use when computing hashes for rows with - /// level 0. Other levels have their seeds derived from this seed. - /// - max_levels: the max lhashevels we will hash with. - /// - mem_pool: the MemPool which the expression evaluators allocate from. Owned by the - /// exec node which owns this hash table context. Memory usage of the expression - /// value cache is charged against its MemTracker. - /// - /// TODO: stores_nulls is too coarse: for a hash table in which some columns are joined - /// with '<=>' and others with '=', stores_nulls could distinguish between columns - /// in which nulls are stored and columns in which they are not, which could save - /// space by not storing some rows we know will never match. - PartitionedHashTableCtx(const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, MemPool* mem_pool, MemPool* expr_results_pool, - const std::shared_ptr& tracker); - - /// Allocate various buffers for storing expression evaluation results, hash values, - /// null bits etc. Also allocate evaluators for the build and probe expressions and - /// store them in 'pool'. Returns error if allocation causes query memory limit to - /// be exceeded or the evaluators fail to initialize. 'num_build_tuples' is the number - /// of tuples of a row in the build side, used for computing the size of a scratch row. - Status Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, - const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe); - - /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null'. - /// This will be replaced by codegen. We don't want this inlined for replacing - /// with codegen'd functions so the function name does not change. - uint32_t IR_NO_INLINE HashRow( - const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept; - - /// Wrapper function for calling correct HashUtil function in non-codegen'd case. - uint32_t Hash(const void* input, int len, uint32_t hash) const; - - /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness into - /// 'expr_values_null'. This will be replaced by codegen. We do not want this function - /// inlined when cross compiled because we need to be able to differentiate between - /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the - /// codegen'd function. - bool IR_NO_INLINE EvalBuildRow( - TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { - return EvalRow(row, build_expr_evals_, expr_values, expr_values_null); - } - - /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' and nullness - /// into 'expr_values_null'. This will be replaced by codegen. - bool IR_NO_INLINE EvalProbeRow( - TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { - return EvalRow(row, probe_expr_evals_, expr_values, expr_values_null); - } - - /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null' - /// for a row with variable length fields (e.g. strings). - uint32_t HashVariableLenRow( - const uint8_t* expr_values, const uint8_t* expr_values_null) const; - - /// Evaluate the exprs over row, storing the values into 'expr_values' and nullness into - /// 'expr_values_null'. Returns whether any expr evaluated to NULL. This will be - /// replaced by codegen. - bool EvalRow(TupleRow* row, const std::vector& ctxs, - uint8_t* expr_values, uint8_t* expr_values_null) noexcept; - - /// Returns true if the values of build_exprs evaluated over 'build_row' equal the - /// values in 'expr_values' with nullness 'expr_values_null'. FORCE_NULL_EQUALITY is - /// true if all nulls should be treated as equal, regardless of the values of - /// 'finds_nulls_'. This will be replaced by codegen. - template - bool IR_NO_INLINE Equals(TupleRow* build_row, const uint8_t* expr_values, - const uint8_t* expr_values_null) const noexcept; - - /// Helper function that calls Equals() with the current row. Always inlined so that - /// it does not appear in cross-compiled IR. - template - bool ALWAYS_INLINE Equals(TupleRow* build_row) const { - return Equals(build_row, expr_values_cache_.cur_expr_values(), - expr_values_cache_.cur_expr_values_null()); - } - - /// Cross-compiled function to access member variables used in CodegenHashRow(). - uint32_t IR_ALWAYS_INLINE GetHashSeed() const; - - /// Functions to be replaced by codegen to specialize the hash table. - bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; } - bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; } - - std::shared_ptr tracker_; - - const std::vector& build_exprs_; - std::vector build_expr_evals_; - - const std::vector& probe_exprs_; - std::vector probe_expr_evals_; - - /// Constants on how the hash table should behave. Joins and aggs have slightly - /// different behavior. - const bool stores_nulls_; - const std::vector finds_nulls_; - - /// finds_some_nulls_ is just the logical OR of finds_nulls_. - const bool finds_some_nulls_; - - /// The current level this context is working on. Each level needs to use a - /// different seed. - int level_; - - /// The seeds to use for hashing. Indexed by the level. - std::vector seeds_; - - /// The ExprValuesCache for caching expression evaluation results, null bytes and hash - /// values for rows. Used to store results of batch evaluations of rows. - ExprValuesCache expr_values_cache_; - - /// Scratch buffer to generate rows on the fly. - TupleRow* scratch_row_; - - /// MemPool for 'build_expr_evals_' and 'probe_expr_evals_' to allocate expr-managed - /// memory from. Not owned. - MemPool* mem_pool_; - - // MemPool for allocations by made EvalRow to copy expr's StringVal result. Not owned - MemPool* expr_results_pool_; + /// The seeds to use for hashing. Indexed by the level. + std::vector seeds_; + + /// The ExprValuesCache for caching expression evaluation results, null bytes and hash + /// values for rows. Used to store results of batch evaluations of rows. + ExprValuesCache expr_values_cache_; + + /// Scratch buffer to generate rows on the fly. + TupleRow* scratch_row_; + + /// MemPool for 'build_expr_evals_' and 'probe_expr_evals_' to allocate expr-managed + /// memory from. Not owned. + MemPool* mem_pool_; + + // MemPool for allocations by made EvalRow to copy expr's StringVal result. Not owned + MemPool* expr_results_pool_; }; /// The hash table consists of a contiguous array of buckets that contain a pointer to the @@ -503,489 +497,476 @@ class PartitionedHashTableCtx { /// This array of buckets is sparse, we are shooting for up to 3/4 fill factor (75%). The /// data allocated by the hash table comes from the BufferPool. class PartitionedHashTable { - private: - - /// Rows are represented as pointers into the BufferedTupleStream data with one - /// of two formats, depending on the number of tuples in the row. - union HtData { - // For rows with multiple tuples per row, a pointer to the flattened TupleRow. - BufferedTupleStream3::FlatRowPtr flat_row; - Tuple* tuple; - }; - - /// Linked list of entries used for duplicates. - struct DuplicateNode { - /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the - /// row in the DuplicateNode has been matched. - /// From an abstraction point of view, this is an awkward place to store this - /// information. - /// TODO: Fold this flag in the next pointer below. - bool matched; - - /// Chain to next duplicate node, NULL when end of list. - DuplicateNode* next; - HtData htdata; - }; - - struct Bucket { - /// Whether this bucket contains a valid entry, or it is empty. - bool filled; - - /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the - /// row in the bucket has been matched. - /// From an abstraction point of view, this is an awkward place to store this - /// information but it is efficient. This space is otherwise unused. - bool matched; - - /// Used in case of duplicates. If true, then the bucketData union should be used as - /// 'duplicates'. - bool hasDuplicates; - - /// Cache of the hash for data. - /// TODO: Do we even have to cache the hash value? - uint32_t hash; - - /// Either the data for this bucket or the linked list of duplicates. - union { - HtData htdata; - DuplicateNode* duplicates; - } bucketData; - }; - - public: - class Iterator; - - /// Returns a newly allocated HashTable. The probing algorithm is set by the - /// FLAG_enable_quadratic_probing. - /// - allocator: allocator to allocate bucket directory and data pages from. - /// - stores_duplicates: true if rows with duplicate keys may be inserted into the - /// hash table. - /// - num_build_tuples: number of Tuples in the build tuple row. - /// - tuple_stream: the tuple stream which contains the tuple rows index by the - /// hash table. Can be NULL if the rows contain only a single tuple, in which - /// case the 'tuple_stream' is unused. - /// - max_num_buckets: the maximum number of buckets that can be stored. If we - /// try to grow the number of buckets to a larger number, the inserts will fail. - /// -1, if it unlimited. - /// - initial_num_buckets: number of buckets that the hash table should be initialized - /// with. - static PartitionedHashTable* Create(Suballocator* allocator, bool stores_duplicates, - int num_build_tuples, BufferedTupleStream3* tuple_stream, int64_t max_num_buckets, - int64_t initial_num_buckets); - - /// Allocates the initial bucket structure. Returns a non-OK status if an error is - /// encountered. If an OK status is returned , 'got_memory' is set to indicate whether - /// enough memory for the initial buckets was allocated from the Suballocator. - Status Init(bool* got_memory); - - /// Call to cleanup any resources. Must be called once. - void Close(); - - /// Inserts the row to the hash table. The caller is responsible for ensuring that the - /// table has free buckets. Returns true if the insertion was successful. Always - /// returns true if the table has free buckets and the key is not a duplicate. If the - /// key was a duplicate and memory could not be allocated for the new duplicate node, - /// returns false. If an error is encountered while creating a duplicate node, returns - /// false and sets 'status' to the error. - /// - /// 'flat_row' is a pointer to the flattened row in 'tuple_stream_' If the row contains - /// only one tuple, a pointer to that tuple is stored. Otherwise the 'flat_row' pointer - /// is stored. The 'row' is not copied by the hash table and the caller must guarantee - /// it stays in memory. This will not grow the hash table. - bool IR_ALWAYS_INLINE Insert(PartitionedHashTableCtx* ht_ctx, - BufferedTupleStream3::FlatRowPtr flat_row, TupleRow* row, - Status* status); - - /// Prefetch the hash table bucket which the given hash value 'hash' maps to. - template - void IR_ALWAYS_INLINE PrefetchBucket(uint32_t hash); - - /// Returns an iterator to the bucket that matches the probe expression results that - /// are cached at the current position of the ExprValuesCache in 'ht_ctx'. Assumes that - /// the ExprValuesCache was filled using EvalAndHashProbe(). Returns HashTable::End() - /// if no match is found. The iterator can be iterated until HashTable::End() to find - /// all the matching rows. Advancing the returned iterator will go to the next matching - /// row. The matching rows do not need to be evaluated since all the nodes of a bucket - /// are duplicates. One scan can be in progress for each 'ht_ctx'. Used in the probe - /// phase of hash joins. - Iterator IR_ALWAYS_INLINE FindProbeRow(PartitionedHashTableCtx* ht_ctx); - - /// If a match is found in the table, return an iterator as in FindProbeRow(). If a - /// match was not present, return an iterator pointing to the empty bucket where the key - /// should be inserted. Returns End() if the table is full. The caller can set the data - /// in the bucket using a Set*() method on the iterator. - Iterator IR_ALWAYS_INLINE FindBuildRowBucket(PartitionedHashTableCtx* ht_ctx, bool* found); - - /// Returns number of elements inserted in the hash table - int64_t size() const { - return num_filled_buckets_ - num_buckets_with_duplicates_ + num_duplicate_nodes_; - } - - /// Returns the number of empty buckets. - int64_t EmptyBuckets() const { return num_buckets_ - num_filled_buckets_; } - - /// Returns the number of buckets - int64_t num_buckets() const { return num_buckets_; } - - /// Returns the number of filled buckets - int64_t num_filled_buckets() const { return num_filled_buckets_; } - - /// Returns the time of hash table resize - int64_t num_resize() const { return num_resizes_; } - - /// Returns the number of bucket_with_duplicates - int64_t num_buckets_with_duplicates() const { return num_buckets_with_duplicates_; } - - /// Returns the number of bucket_with_duplicates - int64_t num_duplicates_nodes() const { return num_duplicate_nodes_; } - - /// Returns the number of probe operations - int64_t num_probe() const { return num_probes_; } - - /// Returns the number of failed probe operations - int64_t num_failed_probe() const { return num_failed_probes_; } - - /// Returns the number of travel_length of probe operations - int64_t travel_length() const { return travel_length_; } - - /// Returns the load factor (the number of non-empty buckets) - double load_factor() const { - return static_cast(num_filled_buckets_) / num_buckets_; - } - - /// Return an estimate of the number of bytes needed to build the hash table - /// structure for 'num_rows'. To do that, it estimates the number of buckets, - /// rounded up to a power of two, and also assumes that there are no duplicates. - static int64_t EstimateNumBuckets(int64_t num_rows) { - /// Assume max 66% fill factor and no duplicates. - return BitUtil::next_power_of_two(3 * num_rows / 2); - } - static int64_t EstimateSize(int64_t num_rows) { - int64_t num_buckets = EstimateNumBuckets(num_rows); - return num_buckets * sizeof(Bucket); - } - - /// Return the size of a hash table bucket in bytes. - static int64_t BucketSize() { return sizeof(Bucket); } - - /// Returns the memory occupied by the hash table, takes into account the number of - /// duplicates. - int64_t CurrentMemSize() const; - - /// Returns the number of inserts that can be performed before resizing the table. - int64_t NumInsertsBeforeResize() const; - - /// Calculates the fill factor if 'buckets_to_fill' additional buckets were to be - /// filled and resizes the hash table so that the projected fill factor is below the - /// max fill factor. - /// If 'got_memory' is true, then it is guaranteed at least 'rows_to_add' rows can be - /// inserted without need to resize. If there is not enough memory available to - /// resize the hash table, Status::OK()() is returned and 'got_memory' is false. If a - /// another error occurs, an error status may be returned. - Status CheckAndResize(uint64_t buckets_to_fill, const PartitionedHashTableCtx* ht_ctx, - bool* got_memory); - - /// Returns the number of bytes allocated to the hash table from the block manager. - int64_t ByteSize() const { - return num_buckets_ * sizeof(Bucket) + total_data_page_size_; - } - - /// Returns an iterator at the beginning of the hash table. Advancing this iterator - /// will traverse all elements. - Iterator Begin(const PartitionedHashTableCtx* ht_ctx); - - /// Return an iterator pointing to the first element (Bucket or DuplicateNode, if the - /// bucket has duplicates) in the hash table that does not have its matched flag set. - /// Used in right joins and full-outer joins. - Iterator FirstUnmatched(PartitionedHashTableCtx* ctx); - - /// Return true if there was a least one match. - bool HasMatches() const { return has_matches_; } - - /// Return end marker. - Iterator End() { return Iterator(); } - - /// Dump out the entire hash table to string. If 'skip_empty', empty buckets are - /// skipped. If 'show_match', it also prints the matched flag of each node. If - /// 'build_desc' is non-null, the build rows will be printed. Otherwise, only the - /// the addresses of the build rows will be printed. - std::string DebugString(bool skip_empty, bool show_match, - const RowDescriptor* build_desc); - - /// Print the content of a bucket or node. - void DebugStringTuple(std::stringstream& ss, HtData& htdata, const RowDescriptor* desc); - - /// Update and print some statistics that can be used for performance debugging. - std::string PrintStats() const; - - /// Number of hash collisions so far in the lifetime of this object - int64_t NumHashCollisions() const { return num_hash_collisions_; } - - /// stl-like iterator interface. - class Iterator { - private: - /// Bucket index value when probe is not successful. - static const int64_t BUCKET_NOT_FOUND = -1; - - public: - IR_ALWAYS_INLINE Iterator() : - table_(NULL), - scratch_row_(NULL), - bucket_idx_(BUCKET_NOT_FOUND), - node_(NULL) { } - - /// Iterates to the next element. It should be called only if !AtEnd(). - void IR_ALWAYS_INLINE Next(); - - /// Iterates to the next duplicate node. If the bucket does not have duplicates or - /// when it reaches the last duplicate node, then it moves the Iterator to AtEnd(). - /// Used when we want to iterate over all the duplicate nodes bypassing the Next() - /// interface (e.g. in semi/outer joins without other_join_conjuncts, in order to - /// iterate over all nodes of an unmatched bucket). - void IR_ALWAYS_INLINE NextDuplicate(); - - /// Iterates to the next element that does not have its matched flag set. Used in - /// right-outer and full-outer joins. - void IR_ALWAYS_INLINE NextUnmatched(); - - /// Return the current row or tuple. Callers must check the iterator is not AtEnd() - /// before calling them. The returned row is owned by the iterator and valid until - /// the next call to GetRow(). It is safe to advance the iterator. - TupleRow* IR_ALWAYS_INLINE GetRow() const; - Tuple* IR_ALWAYS_INLINE GetTuple() const; - - /// Set the current tuple for an empty bucket. Designed to be used with the iterator - /// returned from FindBuildRowBucket() in the case when the value is not found. It is - /// not valid to call this function if the bucket already has an entry. - void SetTuple(Tuple* tuple, uint32_t hash); - - /// Sets as matched the Bucket or DuplicateNode currently pointed by the iterator, - /// depending on whether the bucket has duplicates or not. The iterator cannot be - /// AtEnd(). - void SetMatched(); - - /// Returns the 'matched' flag of the current Bucket or DuplicateNode, depending on - /// whether the bucket has duplicates or not. It should be called only if !AtEnd(). - bool IsMatched() const; - - /// Resets everything but the pointer to the hash table. - void SetAtEnd(); - - /// Returns true if this iterator is at the end, i.e. GetRow() cannot be called. - bool ALWAYS_INLINE AtEnd() const { return bucket_idx_ == BUCKET_NOT_FOUND; } - - /// Prefetch the hash table bucket which the iterator is pointing to now. - template - void IR_ALWAYS_INLINE PrefetchBucket(); - - private: - friend class PartitionedHashTable; +private: + /// Rows are represented as pointers into the BufferedTupleStream data with one + /// of two formats, depending on the number of tuples in the row. + union HtData { + // For rows with multiple tuples per row, a pointer to the flattened TupleRow. + BufferedTupleStream3::FlatRowPtr flat_row; + Tuple* tuple; + }; + + /// Linked list of entries used for duplicates. + struct DuplicateNode { + /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the + /// row in the DuplicateNode has been matched. + /// From an abstraction point of view, this is an awkward place to store this + /// information. + /// TODO: Fold this flag in the next pointer below. + bool matched; + + /// Chain to next duplicate node, NULL when end of list. + DuplicateNode* next; + HtData htdata; + }; - ALWAYS_INLINE - Iterator(PartitionedHashTable* table, TupleRow* row, int bucket_idx, DuplicateNode* node) - : table_(table), - scratch_row_(row), - bucket_idx_(bucket_idx), - node_(node) { + struct Bucket { + /// Whether this bucket contains a valid entry, or it is empty. + bool filled; + + /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the + /// row in the bucket has been matched. + /// From an abstraction point of view, this is an awkward place to store this + /// information but it is efficient. This space is otherwise unused. + bool matched; + + /// Used in case of duplicates. If true, then the bucketData union should be used as + /// 'duplicates'. + bool hasDuplicates; + + /// Cache of the hash for data. + /// TODO: Do we even have to cache the hash value? + uint32_t hash; + + /// Either the data for this bucket or the linked list of duplicates. + union { + HtData htdata; + DuplicateNode* duplicates; + } bucketData; + }; + +public: + class Iterator; + + /// Returns a newly allocated HashTable. The probing algorithm is set by the + /// FLAG_enable_quadratic_probing. + /// - allocator: allocator to allocate bucket directory and data pages from. + /// - stores_duplicates: true if rows with duplicate keys may be inserted into the + /// hash table. + /// - num_build_tuples: number of Tuples in the build tuple row. + /// - tuple_stream: the tuple stream which contains the tuple rows index by the + /// hash table. Can be NULL if the rows contain only a single tuple, in which + /// case the 'tuple_stream' is unused. + /// - max_num_buckets: the maximum number of buckets that can be stored. If we + /// try to grow the number of buckets to a larger number, the inserts will fail. + /// -1, if it unlimited. + /// - initial_num_buckets: number of buckets that the hash table should be initialized + /// with. + static PartitionedHashTable* Create(Suballocator* allocator, bool stores_duplicates, + int num_build_tuples, BufferedTupleStream3* tuple_stream, + int64_t max_num_buckets, int64_t initial_num_buckets); + + /// Allocates the initial bucket structure. Returns a non-OK status if an error is + /// encountered. If an OK status is returned , 'got_memory' is set to indicate whether + /// enough memory for the initial buckets was allocated from the Suballocator. + Status Init(bool* got_memory); + + /// Call to cleanup any resources. Must be called once. + void Close(); + + /// Inserts the row to the hash table. The caller is responsible for ensuring that the + /// table has free buckets. Returns true if the insertion was successful. Always + /// returns true if the table has free buckets and the key is not a duplicate. If the + /// key was a duplicate and memory could not be allocated for the new duplicate node, + /// returns false. If an error is encountered while creating a duplicate node, returns + /// false and sets 'status' to the error. + /// + /// 'flat_row' is a pointer to the flattened row in 'tuple_stream_' If the row contains + /// only one tuple, a pointer to that tuple is stored. Otherwise the 'flat_row' pointer + /// is stored. The 'row' is not copied by the hash table and the caller must guarantee + /// it stays in memory. This will not grow the hash table. + bool IR_ALWAYS_INLINE Insert(PartitionedHashTableCtx* ht_ctx, + BufferedTupleStream3::FlatRowPtr flat_row, TupleRow* row, + Status* status); + + /// Prefetch the hash table bucket which the given hash value 'hash' maps to. + template + void IR_ALWAYS_INLINE PrefetchBucket(uint32_t hash); + + /// Returns an iterator to the bucket that matches the probe expression results that + /// are cached at the current position of the ExprValuesCache in 'ht_ctx'. Assumes that + /// the ExprValuesCache was filled using EvalAndHashProbe(). Returns HashTable::End() + /// if no match is found. The iterator can be iterated until HashTable::End() to find + /// all the matching rows. Advancing the returned iterator will go to the next matching + /// row. The matching rows do not need to be evaluated since all the nodes of a bucket + /// are duplicates. One scan can be in progress for each 'ht_ctx'. Used in the probe + /// phase of hash joins. + Iterator IR_ALWAYS_INLINE FindProbeRow(PartitionedHashTableCtx* ht_ctx); + + /// If a match is found in the table, return an iterator as in FindProbeRow(). If a + /// match was not present, return an iterator pointing to the empty bucket where the key + /// should be inserted. Returns End() if the table is full. The caller can set the data + /// in the bucket using a Set*() method on the iterator. + Iterator IR_ALWAYS_INLINE FindBuildRowBucket(PartitionedHashTableCtx* ht_ctx, bool* found); + + /// Returns number of elements inserted in the hash table + int64_t size() const { + return num_filled_buckets_ - num_buckets_with_duplicates_ + num_duplicate_nodes_; } - PartitionedHashTable* table_; + /// Returns the number of empty buckets. + int64_t EmptyBuckets() const { return num_buckets_ - num_filled_buckets_; } - /// Scratch buffer to hold generated rows. Not owned. - TupleRow* scratch_row_; + /// Returns the number of buckets + int64_t num_buckets() const { return num_buckets_; } + + /// Returns the number of filled buckets + int64_t num_filled_buckets() const { return num_filled_buckets_; } + + /// Returns the time of hash table resize + int64_t num_resize() const { return num_resizes_; } - /// Current bucket idx. - int64_t bucket_idx_; - - /// Pointer to the current duplicate node. - DuplicateNode* node_; - }; - - private: - friend class Iterator; - friend class HashTableTest; - - /// Hash table constructor. Private because Create() should be used, instead - /// of calling this constructor directly. - /// - quadratic_probing: set to true when the probing algorithm is quadratic, as - /// opposed to linear. - PartitionedHashTable(bool quadratic_probing, Suballocator* allocator, bool stores_duplicates, - int num_build_tuples, BufferedTupleStream3* tuple_stream, int64_t max_num_buckets, - int64_t initial_num_buckets); - - /// Performs the probing operation according to the probing algorithm (linear or - /// quadratic. Returns one of the following: - /// (a) the index of the bucket that contains the entry that matches with the last row - /// evaluated in 'ht_ctx'. If 'ht_ctx' is NULL then it does not check for row - /// equality and returns the index of the first empty bucket. - /// (b) the index of the first empty bucket according to the probing algorithm (linear - /// or quadratic), if the entry is not in the hash table or 'ht_ctx' is NULL. - /// (c) Iterator::BUCKET_NOT_FOUND if the probe was not successful, i.e. the maximum - /// distance was traveled without finding either an empty or a matching bucket. - /// Using the returned index value, the caller can create an iterator that can be - /// iterated until End() to find all the matching rows. - /// - /// EvalAndHashBuild() or EvalAndHashProbe() must have been called before calling - /// this function. The values of the expression values cache in 'ht_ctx' will be - /// used to probe the hash table. - /// - /// 'FORCE_NULL_EQUALITY' is true if NULLs should always be considered equal when - /// comparing two rows. - /// - /// 'hash' is the hash computed by EvalAndHashBuild() or EvalAndHashProbe(). - /// 'found' indicates that a bucket that contains an equal row is found. - /// - /// There are wrappers of this function that perform the Find and Insert logic. - template - int64_t IR_ALWAYS_INLINE Probe(Bucket* buckets, int64_t num_buckets, - PartitionedHashTableCtx* ht_ctx, uint32_t hash, bool* found); - - /// Performs the insert logic. Returns the HtData* of the bucket or duplicate node - /// where the data should be inserted. Returns NULL if the insert was not successful - /// and either sets 'status' to OK if it failed because not enough reservation was - /// available or the error if an error was encountered. - HtData* IR_ALWAYS_INLINE InsertInternal(PartitionedHashTableCtx* ht_ctx, Status* status); - - /// Updates 'bucket_idx' to the index of the next non-empty bucket. If the bucket has - /// duplicates, 'node' will be pointing to the head of the linked list of duplicates. - /// Otherwise, 'node' should not be used. If there are no more buckets, sets - /// 'bucket_idx' to BUCKET_NOT_FOUND. - void NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node); - - /// Resize the hash table to 'num_buckets'. 'got_memory' is false on OOM. - Status ResizeBuckets(int64_t num_buckets, const PartitionedHashTableCtx* ht_ctx, bool* got_memory); - - /// Appends the DuplicateNode pointed by next_node_ to 'bucket' and moves the next_node_ - /// pointer to the next DuplicateNode in the page, updating the remaining node counter. - DuplicateNode* IR_ALWAYS_INLINE AppendNextNode(Bucket* bucket); - - /// Creates a new DuplicateNode for a entry and chains it to the bucket with index - /// 'bucket_idx'. The duplicate nodes of a bucket are chained as a linked list. - /// This places the new duplicate node at the beginning of the list. If this is the - /// first duplicate entry inserted in this bucket, then the entry already contained by - /// the bucket is converted to a DuplicateNode. That is, the contents of 'data' of the - /// bucket are copied to a DuplicateNode and 'data' is updated to pointing to a - /// DuplicateNode. - /// Returns NULL and sets 'status' to OK if the node array could not grow, i.e. there - /// was not enough memory to allocate a new DuplicateNode. Returns NULL and sets - /// 'status' to an error if another error was encountered. - DuplicateNode* IR_ALWAYS_INLINE InsertDuplicateNode(int64_t bucket_idx, Status* status); - - /// Resets the contents of the empty bucket with index 'bucket_idx', in preparation for - /// an insert. Sets all the fields of the bucket other than 'data'. - void IR_ALWAYS_INLINE PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash); - - /// Return the TupleRow pointed by 'htdata'. - TupleRow* GetRow(HtData& htdata, TupleRow* row) const; - - /// Returns the TupleRow of the pointed 'bucket'. In case of duplicates, it - /// returns the content of the first chained duplicate node of the bucket. - TupleRow* GetRow(Bucket* bucket, TupleRow* row) const; - - /// Grow the node array. Returns true and sets 'status' to OK on success. Returns false - /// and set 'status' to OK if we can't get sufficient reservation to allocate the next - /// data page. Returns false and sets 'status' if another error is encountered. - bool GrowNodeArray(Status* status); - - /// Functions to be replaced by codegen to specialize the hash table. - bool IR_NO_INLINE stores_tuples() const { return stores_tuples_; } - bool IR_NO_INLINE stores_duplicates() const { return stores_duplicates_; } - bool IR_NO_INLINE quadratic_probing() const { return quadratic_probing_; } - - /// Load factor that will trigger growing the hash table on insert. This is - /// defined as the number of non-empty buckets / total_buckets - static constexpr double MAX_FILL_FACTOR = 0.75; - - /// The size in bytes of each page of duplicate nodes. Should be large enough to fit - /// enough DuplicateNodes to amortise the overhead of allocating each page and low - /// enough to not waste excessive memory to internal fragmentation. - static constexpr int64_t DATA_PAGE_SIZE = 64L * 1024; - - RuntimeState* state_; - - /// Suballocator to allocate data pages and hash table buckets with. - Suballocator* allocator_; - - /// Stream contains the rows referenced by the hash table. Can be NULL if the - /// row only contains a single tuple, in which case the TupleRow indirection - /// is removed by the hash table. - BufferedTupleStream3* tuple_stream_; - - /// Constants on how the hash table should behave. - - /// True if the HtData uses the Tuple* representation, or false if it uses FlatRowPtr. - const bool stores_tuples_; - - /// True if duplicates may be inserted into hash table. - const bool stores_duplicates_; - - /// Quadratic probing enabled (as opposed to linear). - const bool quadratic_probing_; - - /// Data pages for all nodes. Allocated from suballocator to reduce memory - /// consumption of small tables. - std::vector> data_pages_; - - /// Byte size of all buffers in data_pages_. - int64_t total_data_page_size_; - - /// Next duplicate node to insert. Valid when node_remaining_current_page_ > 0. - DuplicateNode* next_node_; - - /// Number of nodes left in the current page. - int node_remaining_current_page_; + /// Returns the number of bucket_with_duplicates + int64_t num_buckets_with_duplicates() const { return num_buckets_with_duplicates_; } - /// Number of duplicate nodes. - int64_t num_duplicate_nodes_; + /// Returns the number of bucket_with_duplicates + int64_t num_duplicates_nodes() const { return num_duplicate_nodes_; } - const int64_t max_num_buckets_; + /// Returns the number of probe operations + int64_t num_probe() const { return num_probes_; } - /// Allocation containing all buckets. - std::unique_ptr bucket_allocation_; + /// Returns the number of failed probe operations + int64_t num_failed_probe() const { return num_failed_probes_; } - /// Pointer to the 'buckets_' array from 'bucket_allocation_'. - Bucket* buckets_; + /// Returns the number of travel_length of probe operations + int64_t travel_length() const { return travel_length_; } + + /// Returns the load factor (the number of non-empty buckets) + double load_factor() const { return static_cast(num_filled_buckets_) / num_buckets_; } + + /// Return an estimate of the number of bytes needed to build the hash table + /// structure for 'num_rows'. To do that, it estimates the number of buckets, + /// rounded up to a power of two, and also assumes that there are no duplicates. + static int64_t EstimateNumBuckets(int64_t num_rows) { + /// Assume max 66% fill factor and no duplicates. + return BitUtil::next_power_of_two(3 * num_rows / 2); + } + static int64_t EstimateSize(int64_t num_rows) { + int64_t num_buckets = EstimateNumBuckets(num_rows); + return num_buckets * sizeof(Bucket); + } - /// Total number of buckets (filled and empty). - int64_t num_buckets_; + /// Return the size of a hash table bucket in bytes. + static int64_t BucketSize() { return sizeof(Bucket); } + + /// Returns the memory occupied by the hash table, takes into account the number of + /// duplicates. + int64_t CurrentMemSize() const; + + /// Returns the number of inserts that can be performed before resizing the table. + int64_t NumInsertsBeforeResize() const; - /// Number of non-empty buckets. Used to determine when to resize. - int64_t num_filled_buckets_; + /// Calculates the fill factor if 'buckets_to_fill' additional buckets were to be + /// filled and resizes the hash table so that the projected fill factor is below the + /// max fill factor. + /// If 'got_memory' is true, then it is guaranteed at least 'rows_to_add' rows can be + /// inserted without need to resize. If there is not enough memory available to + /// resize the hash table, Status::OK()() is returned and 'got_memory' is false. If a + /// another error occurs, an error status may be returned. + Status CheckAndResize(uint64_t buckets_to_fill, const PartitionedHashTableCtx* ht_ctx, + bool* got_memory); - /// Number of (non-empty) buckets with duplicates. These buckets do not point to slots - /// in the tuple stream, rather than to a linked list of Nodes. - int64_t num_buckets_with_duplicates_; + /// Returns the number of bytes allocated to the hash table from the block manager. + int64_t ByteSize() const { return num_buckets_ * sizeof(Bucket) + total_data_page_size_; } - /// Number of build tuples, used for constructing temp row* for probes. - const int num_build_tuples_; + /// Returns an iterator at the beginning of the hash table. Advancing this iterator + /// will traverse all elements. + Iterator Begin(const PartitionedHashTableCtx* ht_ctx); - /// Flag used to check that we don't lose stored matches when spilling hash tables - /// (IMPALA-1488). - bool has_matches_; + /// Return an iterator pointing to the first element (Bucket or DuplicateNode, if the + /// bucket has duplicates) in the hash table that does not have its matched flag set. + /// Used in right joins and full-outer joins. + Iterator FirstUnmatched(PartitionedHashTableCtx* ctx); - /// The stats below can be used for debugging perf. - /// TODO: Should we make these statistics atomic? - /// Number of FindProbeRow(), Insert(), or FindBuildRowBucket() calls that probe the - /// hash table. - int64_t num_probes_; + /// Return true if there was a least one match. + bool HasMatches() const { return has_matches_; } - /// Number of probes that failed and had to fall back to linear probing without cap. - int64_t num_failed_probes_; + /// Return end marker. + Iterator End() { return Iterator(); } - /// Total distance traveled for each probe. That is the sum of the diff between the end - /// position of a probe (find/insert) and its start position - /// (hash & (num_buckets_ - 1)). - int64_t travel_length_; + /// Dump out the entire hash table to string. If 'skip_empty', empty buckets are + /// skipped. If 'show_match', it also prints the matched flag of each node. If + /// 'build_desc' is non-null, the build rows will be printed. Otherwise, only the + /// the addresses of the build rows will be printed. + std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor* build_desc); - /// The number of cases where we had to compare buckets with the same hash value, but - /// the row equality failed. - int64_t num_hash_collisions_; + /// Print the content of a bucket or node. + void DebugStringTuple(std::stringstream& ss, HtData& htdata, const RowDescriptor* desc); - /// How many times this table has resized so far. - int64_t num_resizes_; + /// Update and print some statistics that can be used for performance debugging. + std::string PrintStats() const; + + /// Number of hash collisions so far in the lifetime of this object + int64_t NumHashCollisions() const { return num_hash_collisions_; } + + /// stl-like iterator interface. + class Iterator { + private: + /// Bucket index value when probe is not successful. + static const int64_t BUCKET_NOT_FOUND = -1; + + public: + IR_ALWAYS_INLINE Iterator() + : table_(NULL), scratch_row_(NULL), bucket_idx_(BUCKET_NOT_FOUND), node_(NULL) {} + + /// Iterates to the next element. It should be called only if !AtEnd(). + void IR_ALWAYS_INLINE Next(); + + /// Iterates to the next duplicate node. If the bucket does not have duplicates or + /// when it reaches the last duplicate node, then it moves the Iterator to AtEnd(). + /// Used when we want to iterate over all the duplicate nodes bypassing the Next() + /// interface (e.g. in semi/outer joins without other_join_conjuncts, in order to + /// iterate over all nodes of an unmatched bucket). + void IR_ALWAYS_INLINE NextDuplicate(); + + /// Iterates to the next element that does not have its matched flag set. Used in + /// right-outer and full-outer joins. + void IR_ALWAYS_INLINE NextUnmatched(); + + /// Return the current row or tuple. Callers must check the iterator is not AtEnd() + /// before calling them. The returned row is owned by the iterator and valid until + /// the next call to GetRow(). It is safe to advance the iterator. + TupleRow* IR_ALWAYS_INLINE GetRow() const; + Tuple* IR_ALWAYS_INLINE GetTuple() const; + + /// Set the current tuple for an empty bucket. Designed to be used with the iterator + /// returned from FindBuildRowBucket() in the case when the value is not found. It is + /// not valid to call this function if the bucket already has an entry. + void SetTuple(Tuple* tuple, uint32_t hash); + + /// Sets as matched the Bucket or DuplicateNode currently pointed by the iterator, + /// depending on whether the bucket has duplicates or not. The iterator cannot be + /// AtEnd(). + void SetMatched(); + + /// Returns the 'matched' flag of the current Bucket or DuplicateNode, depending on + /// whether the bucket has duplicates or not. It should be called only if !AtEnd(). + bool IsMatched() const; + + /// Resets everything but the pointer to the hash table. + void SetAtEnd(); + + /// Returns true if this iterator is at the end, i.e. GetRow() cannot be called. + bool ALWAYS_INLINE AtEnd() const { return bucket_idx_ == BUCKET_NOT_FOUND; } + + /// Prefetch the hash table bucket which the iterator is pointing to now. + template + void IR_ALWAYS_INLINE PrefetchBucket(); + + private: + friend class PartitionedHashTable; + + ALWAYS_INLINE + Iterator(PartitionedHashTable* table, TupleRow* row, int bucket_idx, DuplicateNode* node) + : table_(table), scratch_row_(row), bucket_idx_(bucket_idx), node_(node) {} + + PartitionedHashTable* table_; + + /// Scratch buffer to hold generated rows. Not owned. + TupleRow* scratch_row_; + + /// Current bucket idx. + int64_t bucket_idx_; + + /// Pointer to the current duplicate node. + DuplicateNode* node_; + }; + +private: + friend class Iterator; + friend class HashTableTest; + + /// Hash table constructor. Private because Create() should be used, instead + /// of calling this constructor directly. + /// - quadratic_probing: set to true when the probing algorithm is quadratic, as + /// opposed to linear. + PartitionedHashTable(bool quadratic_probing, Suballocator* allocator, bool stores_duplicates, + int num_build_tuples, BufferedTupleStream3* tuple_stream, + int64_t max_num_buckets, int64_t initial_num_buckets); + + /// Performs the probing operation according to the probing algorithm (linear or + /// quadratic. Returns one of the following: + /// (a) the index of the bucket that contains the entry that matches with the last row + /// evaluated in 'ht_ctx'. If 'ht_ctx' is NULL then it does not check for row + /// equality and returns the index of the first empty bucket. + /// (b) the index of the first empty bucket according to the probing algorithm (linear + /// or quadratic), if the entry is not in the hash table or 'ht_ctx' is NULL. + /// (c) Iterator::BUCKET_NOT_FOUND if the probe was not successful, i.e. the maximum + /// distance was traveled without finding either an empty or a matching bucket. + /// Using the returned index value, the caller can create an iterator that can be + /// iterated until End() to find all the matching rows. + /// + /// EvalAndHashBuild() or EvalAndHashProbe() must have been called before calling + /// this function. The values of the expression values cache in 'ht_ctx' will be + /// used to probe the hash table. + /// + /// 'FORCE_NULL_EQUALITY' is true if NULLs should always be considered equal when + /// comparing two rows. + /// + /// 'hash' is the hash computed by EvalAndHashBuild() or EvalAndHashProbe(). + /// 'found' indicates that a bucket that contains an equal row is found. + /// + /// There are wrappers of this function that perform the Find and Insert logic. + template + int64_t IR_ALWAYS_INLINE Probe(Bucket* buckets, int64_t num_buckets, + PartitionedHashTableCtx* ht_ctx, uint32_t hash, bool* found); + + /// Performs the insert logic. Returns the HtData* of the bucket or duplicate node + /// where the data should be inserted. Returns NULL if the insert was not successful + /// and either sets 'status' to OK if it failed because not enough reservation was + /// available or the error if an error was encountered. + HtData* IR_ALWAYS_INLINE InsertInternal(PartitionedHashTableCtx* ht_ctx, Status* status); + + /// Updates 'bucket_idx' to the index of the next non-empty bucket. If the bucket has + /// duplicates, 'node' will be pointing to the head of the linked list of duplicates. + /// Otherwise, 'node' should not be used. If there are no more buckets, sets + /// 'bucket_idx' to BUCKET_NOT_FOUND. + void NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node); + + /// Resize the hash table to 'num_buckets'. 'got_memory' is false on OOM. + Status ResizeBuckets(int64_t num_buckets, const PartitionedHashTableCtx* ht_ctx, + bool* got_memory); + + /// Appends the DuplicateNode pointed by next_node_ to 'bucket' and moves the next_node_ + /// pointer to the next DuplicateNode in the page, updating the remaining node counter. + DuplicateNode* IR_ALWAYS_INLINE AppendNextNode(Bucket* bucket); + + /// Creates a new DuplicateNode for a entry and chains it to the bucket with index + /// 'bucket_idx'. The duplicate nodes of a bucket are chained as a linked list. + /// This places the new duplicate node at the beginning of the list. If this is the + /// first duplicate entry inserted in this bucket, then the entry already contained by + /// the bucket is converted to a DuplicateNode. That is, the contents of 'data' of the + /// bucket are copied to a DuplicateNode and 'data' is updated to pointing to a + /// DuplicateNode. + /// Returns NULL and sets 'status' to OK if the node array could not grow, i.e. there + /// was not enough memory to allocate a new DuplicateNode. Returns NULL and sets + /// 'status' to an error if another error was encountered. + DuplicateNode* IR_ALWAYS_INLINE InsertDuplicateNode(int64_t bucket_idx, Status* status); + + /// Resets the contents of the empty bucket with index 'bucket_idx', in preparation for + /// an insert. Sets all the fields of the bucket other than 'data'. + void IR_ALWAYS_INLINE PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash); + + /// Return the TupleRow pointed by 'htdata'. + TupleRow* GetRow(HtData& htdata, TupleRow* row) const; + + /// Returns the TupleRow of the pointed 'bucket'. In case of duplicates, it + /// returns the content of the first chained duplicate node of the bucket. + TupleRow* GetRow(Bucket* bucket, TupleRow* row) const; + + /// Grow the node array. Returns true and sets 'status' to OK on success. Returns false + /// and set 'status' to OK if we can't get sufficient reservation to allocate the next + /// data page. Returns false and sets 'status' if another error is encountered. + bool GrowNodeArray(Status* status); + + /// Functions to be replaced by codegen to specialize the hash table. + bool IR_NO_INLINE stores_tuples() const { return stores_tuples_; } + bool IR_NO_INLINE stores_duplicates() const { return stores_duplicates_; } + bool IR_NO_INLINE quadratic_probing() const { return quadratic_probing_; } + + /// Load factor that will trigger growing the hash table on insert. This is + /// defined as the number of non-empty buckets / total_buckets + static constexpr double MAX_FILL_FACTOR = 0.75; + + /// The size in bytes of each page of duplicate nodes. Should be large enough to fit + /// enough DuplicateNodes to amortise the overhead of allocating each page and low + /// enough to not waste excessive memory to internal fragmentation. + static constexpr int64_t DATA_PAGE_SIZE = 64L * 1024; + + RuntimeState* state_; + + /// Suballocator to allocate data pages and hash table buckets with. + Suballocator* allocator_; + + /// Stream contains the rows referenced by the hash table. Can be NULL if the + /// row only contains a single tuple, in which case the TupleRow indirection + /// is removed by the hash table. + BufferedTupleStream3* tuple_stream_; + + /// Constants on how the hash table should behave. + + /// True if the HtData uses the Tuple* representation, or false if it uses FlatRowPtr. + const bool stores_tuples_; + + /// True if duplicates may be inserted into hash table. + const bool stores_duplicates_; + + /// Quadratic probing enabled (as opposed to linear). + const bool quadratic_probing_; + + /// Data pages for all nodes. Allocated from suballocator to reduce memory + /// consumption of small tables. + std::vector> data_pages_; + + /// Byte size of all buffers in data_pages_. + int64_t total_data_page_size_; + + /// Next duplicate node to insert. Valid when node_remaining_current_page_ > 0. + DuplicateNode* next_node_; + + /// Number of nodes left in the current page. + int node_remaining_current_page_; + + /// Number of duplicate nodes. + int64_t num_duplicate_nodes_; + + const int64_t max_num_buckets_; + + /// Allocation containing all buckets. + std::unique_ptr bucket_allocation_; + + /// Pointer to the 'buckets_' array from 'bucket_allocation_'. + Bucket* buckets_; + + /// Total number of buckets (filled and empty). + int64_t num_buckets_; + + /// Number of non-empty buckets. Used to determine when to resize. + int64_t num_filled_buckets_; + + /// Number of (non-empty) buckets with duplicates. These buckets do not point to slots + /// in the tuple stream, rather than to a linked list of Nodes. + int64_t num_buckets_with_duplicates_; + + /// Number of build tuples, used for constructing temp row* for probes. + const int num_build_tuples_; + + /// Flag used to check that we don't lose stored matches when spilling hash tables + /// (IMPALA-1488). + bool has_matches_; + + /// The stats below can be used for debugging perf. + /// TODO: Should we make these statistics atomic? + /// Number of FindProbeRow(), Insert(), or FindBuildRowBucket() calls that probe the + /// hash table. + int64_t num_probes_; + + /// Number of probes that failed and had to fall back to linear probing without cap. + int64_t num_failed_probes_; + + /// Total distance traveled for each probe. That is the sum of the diff between the end + /// position of a probe (find/insert) and its start position + /// (hash & (num_buckets_ - 1)). + int64_t travel_length_; + + /// The number of cases where we had to compare buckets with the same hash value, but + /// the row equality failed. + int64_t num_hash_collisions_; + + /// How many times this table has resized so far. + int64_t num_resizes_; }; -} +} // namespace doris #endif - diff --git a/be/src/exec/partitioned_hash_table.inline.h b/be/src/exec/partitioned_hash_table.inline.h index 2cdbe01a36d283..0432ccbcb70d4c 100644 --- a/be/src/exec/partitioned_hash_table.inline.h +++ b/be/src/exec/partitioned_hash_table.inline.h @@ -19,390 +19,392 @@ #define DORIS_BE_SRC_EXEC_NEW_PARTITIONED_HASH_TABLE_INLINE_H #include "exec/partitioned_hash_table.h" - #include "exprs/expr.h" #include "exprs/expr_context.h" namespace doris { inline bool PartitionedHashTableCtx::EvalAndHashBuild(TupleRow* row) { - uint8_t* expr_values = expr_values_cache_.cur_expr_values(); - uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null(); - bool has_null = EvalBuildRow(row, expr_values, expr_values_null); - if (!stores_nulls() && has_null) return false; - expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, expr_values_null)); - return true; + uint8_t* expr_values = expr_values_cache_.cur_expr_values(); + uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null(); + bool has_null = EvalBuildRow(row, expr_values, expr_values_null); + if (!stores_nulls() && has_null) return false; + expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, expr_values_null)); + return true; } inline bool PartitionedHashTableCtx::EvalAndHashProbe(TupleRow* row) { - uint8_t* expr_values = expr_values_cache_.cur_expr_values(); - uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null(); - bool has_null = EvalProbeRow(row, expr_values, expr_values_null); - if (has_null && !(stores_nulls() && finds_some_nulls())) return false; - expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, expr_values_null)); - return true; + uint8_t* expr_values = expr_values_cache_.cur_expr_values(); + uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null(); + bool has_null = EvalProbeRow(row, expr_values, expr_values_null); + if (has_null && !(stores_nulls() && finds_some_nulls())) return false; + expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, expr_values_null)); + return true; } inline void PartitionedHashTableCtx::ExprValuesCache::NextRow() { - cur_expr_values_ += expr_values_bytes_per_row_; - cur_expr_values_null_ += num_exprs_; - ++cur_expr_values_hash_; - DCHECK_LE(cur_expr_values_hash_ - expr_values_hash_array_.get(), capacity_); + cur_expr_values_ += expr_values_bytes_per_row_; + cur_expr_values_null_ += num_exprs_; + ++cur_expr_values_hash_; + DCHECK_LE(cur_expr_values_hash_ - expr_values_hash_array_.get(), capacity_); } template inline int64_t PartitionedHashTable::Probe(Bucket* buckets, int64_t num_buckets, - PartitionedHashTableCtx* ht_ctx, uint32_t hash, bool* found) { - DCHECK(buckets != NULL); - DCHECK_GT(num_buckets, 0); - *found = false; - int64_t bucket_idx = hash & (num_buckets - 1); - - // In case of linear probing it counts the total number of steps for statistics and - // for knowing when to exit the loop (e.g. by capping the total travel length). In case - // of quadratic probing it is also used for calculating the length of the next jump. - int64_t step = 0; - do { - Bucket* bucket = &buckets[bucket_idx]; - if (LIKELY(!bucket->filled)) return bucket_idx; - if (hash == bucket->hash) { - if (ht_ctx != NULL && - ht_ctx->Equals(GetRow(bucket, ht_ctx->scratch_row_))) { - *found = true; - return bucket_idx; - } - // Row equality failed, or not performed. This is a hash collision. Continue - // searching. - ++num_hash_collisions_; - } - // Move to the next bucket. - ++step; - ++travel_length_; - if (quadratic_probing()) { - // The i-th probe location is idx = (hash + (step * (step + 1)) / 2) mod num_buckets. - // This gives num_buckets unique idxs (between 0 and N-1) when num_buckets is a power - // of 2. - bucket_idx = (bucket_idx + step) & (num_buckets - 1); - } else { - bucket_idx = (bucket_idx + 1) & (num_buckets - 1); - } - } while (LIKELY(step < num_buckets)); - DCHECK_EQ(num_filled_buckets_, num_buckets) << "Probing of a non-full table " - << "failed: " << quadratic_probing() << " " << hash; - return Iterator::BUCKET_NOT_FOUND; + PartitionedHashTableCtx* ht_ctx, uint32_t hash, + bool* found) { + DCHECK(buckets != NULL); + DCHECK_GT(num_buckets, 0); + *found = false; + int64_t bucket_idx = hash & (num_buckets - 1); + + // In case of linear probing it counts the total number of steps for statistics and + // for knowing when to exit the loop (e.g. by capping the total travel length). In case + // of quadratic probing it is also used for calculating the length of the next jump. + int64_t step = 0; + do { + Bucket* bucket = &buckets[bucket_idx]; + if (LIKELY(!bucket->filled)) return bucket_idx; + if (hash == bucket->hash) { + if (ht_ctx != NULL && + ht_ctx->Equals(GetRow(bucket, ht_ctx->scratch_row_))) { + *found = true; + return bucket_idx; + } + // Row equality failed, or not performed. This is a hash collision. Continue + // searching. + ++num_hash_collisions_; + } + // Move to the next bucket. + ++step; + ++travel_length_; + if (quadratic_probing()) { + // The i-th probe location is idx = (hash + (step * (step + 1)) / 2) mod num_buckets. + // This gives num_buckets unique idxs (between 0 and N-1) when num_buckets is a power + // of 2. + bucket_idx = (bucket_idx + step) & (num_buckets - 1); + } else { + bucket_idx = (bucket_idx + 1) & (num_buckets - 1); + } + } while (LIKELY(step < num_buckets)); + DCHECK_EQ(num_filled_buckets_, num_buckets) << "Probing of a non-full table " + << "failed: " << quadratic_probing() << " " << hash; + return Iterator::BUCKET_NOT_FOUND; } inline PartitionedHashTable::HtData* PartitionedHashTable::InsertInternal( - PartitionedHashTableCtx* ht_ctx, Status* status) { - ++num_probes_; - bool found = false; - uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); - int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, &found); - DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND); - if (found) { - // We need to insert a duplicate node, note that this may fail to allocate memory. - DuplicateNode* new_node = InsertDuplicateNode(bucket_idx, status); - if (UNLIKELY(new_node == NULL)) return NULL; - return &new_node->htdata; - } else { - PrepareBucketForInsert(bucket_idx, hash); - return &buckets_[bucket_idx].bucketData.htdata; - } + PartitionedHashTableCtx* ht_ctx, Status* status) { + ++num_probes_; + bool found = false; + uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); + int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, &found); + DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND); + if (found) { + // We need to insert a duplicate node, note that this may fail to allocate memory. + DuplicateNode* new_node = InsertDuplicateNode(bucket_idx, status); + if (UNLIKELY(new_node == NULL)) return NULL; + return &new_node->htdata; + } else { + PrepareBucketForInsert(bucket_idx, hash); + return &buckets_[bucket_idx].bucketData.htdata; + } } inline bool PartitionedHashTable::Insert(PartitionedHashTableCtx* ht_ctx, - BufferedTupleStream3::FlatRowPtr flat_row, TupleRow* row, Status* status) { - HtData* htdata = InsertInternal(ht_ctx, status); - // If successful insert, update the contents of the newly inserted entry with 'idx'. - if (LIKELY(htdata != NULL)) { - if (stores_tuples()) { - htdata->tuple = row->get_tuple(0); - } else { - htdata->flat_row = flat_row; + BufferedTupleStream3::FlatRowPtr flat_row, TupleRow* row, + Status* status) { + HtData* htdata = InsertInternal(ht_ctx, status); + // If successful insert, update the contents of the newly inserted entry with 'idx'. + if (LIKELY(htdata != NULL)) { + if (stores_tuples()) { + htdata->tuple = row->get_tuple(0); + } else { + htdata->flat_row = flat_row; + } + return true; } - return true; - } - return false; + return false; } -template +template inline void PartitionedHashTable::PrefetchBucket(uint32_t hash) { - int64_t bucket_idx = hash & (num_buckets_ - 1); - // Two optional arguments: - // 'rw': 1 means the memory access is write - // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality. - // On x86, they map to instructions prefetchnta and prefetch{2-0} respectively. - // TODO: Reconsider the locality level with smaller prefetch batch size. - __builtin_prefetch(&buckets_[bucket_idx], READ ? 0 : 1, 1); + int64_t bucket_idx = hash & (num_buckets_ - 1); + // Two optional arguments: + // 'rw': 1 means the memory access is write + // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality. + // On x86, they map to instructions prefetchnta and prefetch{2-0} respectively. + // TODO: Reconsider the locality level with smaller prefetch batch size. + __builtin_prefetch(&buckets_[bucket_idx], READ ? 0 : 1, 1); } inline PartitionedHashTable::Iterator PartitionedHashTable::FindProbeRow( - PartitionedHashTableCtx* ht_ctx) { - ++num_probes_; - bool found = false; - uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); - int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, &found); - if (found) { - return Iterator(this, ht_ctx->scratch_row(), bucket_idx, - stores_duplicates() ? buckets_[bucket_idx].bucketData.duplicates : NULL); - } - return End(); + PartitionedHashTableCtx* ht_ctx) { + ++num_probes_; + bool found = false; + uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); + int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, &found); + if (found) { + return Iterator(this, ht_ctx->scratch_row(), bucket_idx, + stores_duplicates() ? buckets_[bucket_idx].bucketData.duplicates : NULL); + } + return End(); } // TODO: support lazy evaluation like HashTable::Insert(). inline PartitionedHashTable::Iterator PartitionedHashTable::FindBuildRowBucket( - PartitionedHashTableCtx* ht_ctx, bool* found) { - ++num_probes_; - uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); - int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, found); - DuplicateNode* duplicates = NULL; - if (stores_duplicates() && LIKELY(bucket_idx != Iterator::BUCKET_NOT_FOUND)) { - duplicates = buckets_[bucket_idx].bucketData.duplicates; - } - return Iterator(this, ht_ctx->scratch_row(), bucket_idx, duplicates); + PartitionedHashTableCtx* ht_ctx, bool* found) { + ++num_probes_; + uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); + int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, found); + DuplicateNode* duplicates = NULL; + if (stores_duplicates() && LIKELY(bucket_idx != Iterator::BUCKET_NOT_FOUND)) { + duplicates = buckets_[bucket_idx].bucketData.duplicates; + } + return Iterator(this, ht_ctx->scratch_row(), bucket_idx, duplicates); } inline PartitionedHashTable::Iterator PartitionedHashTable::Begin( - const PartitionedHashTableCtx* ctx) { - int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND; - DuplicateNode* node = NULL; - NextFilledBucket(&bucket_idx, &node); - return Iterator(this, ctx->scratch_row(), bucket_idx, node); + const PartitionedHashTableCtx* ctx) { + int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND; + DuplicateNode* node = NULL; + NextFilledBucket(&bucket_idx, &node); + return Iterator(this, ctx->scratch_row(), bucket_idx, node); } inline PartitionedHashTable::Iterator PartitionedHashTable::FirstUnmatched( - PartitionedHashTableCtx* ctx) { - int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND; - DuplicateNode* node = NULL; - NextFilledBucket(&bucket_idx, &node); - Iterator it(this, ctx->scratch_row(), bucket_idx, node); - // Check whether the bucket, or its first duplicate node, is matched. If it is not - // matched, then return. Otherwise, move to the first unmatched entry (node or bucket). - Bucket* bucket = &buckets_[bucket_idx]; - bool has_duplicates = stores_duplicates() && bucket->hasDuplicates; - if ((!has_duplicates && bucket->matched) || (has_duplicates && node->matched)) { - it.NextUnmatched(); - } - return it; + PartitionedHashTableCtx* ctx) { + int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND; + DuplicateNode* node = NULL; + NextFilledBucket(&bucket_idx, &node); + Iterator it(this, ctx->scratch_row(), bucket_idx, node); + // Check whether the bucket, or its first duplicate node, is matched. If it is not + // matched, then return. Otherwise, move to the first unmatched entry (node or bucket). + Bucket* bucket = &buckets_[bucket_idx]; + bool has_duplicates = stores_duplicates() && bucket->hasDuplicates; + if ((!has_duplicates && bucket->matched) || (has_duplicates && node->matched)) { + it.NextUnmatched(); + } + return it; } inline void PartitionedHashTable::NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node) { - ++*bucket_idx; - for (; *bucket_idx < num_buckets_; ++*bucket_idx) { - if (buckets_[*bucket_idx].filled) { - *node = stores_duplicates() ? buckets_[*bucket_idx].bucketData.duplicates : NULL; - return; + ++*bucket_idx; + for (; *bucket_idx < num_buckets_; ++*bucket_idx) { + if (buckets_[*bucket_idx].filled) { + *node = stores_duplicates() ? buckets_[*bucket_idx].bucketData.duplicates : NULL; + return; + } } - } - // Reached the end of the hash table. - *bucket_idx = Iterator::BUCKET_NOT_FOUND; - *node = NULL; + // Reached the end of the hash table. + *bucket_idx = Iterator::BUCKET_NOT_FOUND; + *node = NULL; } inline void PartitionedHashTable::PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash) { - DCHECK_GE(bucket_idx, 0); - DCHECK_LT(bucket_idx, num_buckets_); - Bucket* bucket = &buckets_[bucket_idx]; - DCHECK(!bucket->filled); - ++num_filled_buckets_; - bucket->filled = true; - bucket->matched = false; - bucket->hasDuplicates = false; - bucket->hash = hash; + DCHECK_GE(bucket_idx, 0); + DCHECK_LT(bucket_idx, num_buckets_); + Bucket* bucket = &buckets_[bucket_idx]; + DCHECK(!bucket->filled); + ++num_filled_buckets_; + bucket->filled = true; + bucket->matched = false; + bucket->hasDuplicates = false; + bucket->hash = hash; } inline PartitionedHashTable::DuplicateNode* PartitionedHashTable::AppendNextNode(Bucket* bucket) { - DCHECK_GT(node_remaining_current_page_, 0); - bucket->bucketData.duplicates = next_node_; - ++num_duplicate_nodes_; - --node_remaining_current_page_; - return next_node_++; + DCHECK_GT(node_remaining_current_page_, 0); + bucket->bucketData.duplicates = next_node_; + ++num_duplicate_nodes_; + --node_remaining_current_page_; + return next_node_++; } inline PartitionedHashTable::DuplicateNode* PartitionedHashTable::InsertDuplicateNode( - int64_t bucket_idx, Status* status) { - DCHECK_GE(bucket_idx, 0); - DCHECK_LT(bucket_idx, num_buckets_); - Bucket* bucket = &buckets_[bucket_idx]; - DCHECK(bucket->filled); - DCHECK(stores_duplicates()); - // Allocate one duplicate node for the new data and one for the preexisting data, - // if needed. - while (node_remaining_current_page_ < 1 + !bucket->hasDuplicates) { - if (UNLIKELY(!GrowNodeArray(status))) return NULL; - } - if (!bucket->hasDuplicates) { - // This is the first duplicate in this bucket. It means that we need to convert - // the current entry in the bucket to a node and link it from the bucket. - next_node_->htdata.flat_row = bucket->bucketData.htdata.flat_row; - DCHECK(!bucket->matched); + int64_t bucket_idx, Status* status) { + DCHECK_GE(bucket_idx, 0); + DCHECK_LT(bucket_idx, num_buckets_); + Bucket* bucket = &buckets_[bucket_idx]; + DCHECK(bucket->filled); + DCHECK(stores_duplicates()); + // Allocate one duplicate node for the new data and one for the preexisting data, + // if needed. + while (node_remaining_current_page_ < 1 + !bucket->hasDuplicates) { + if (UNLIKELY(!GrowNodeArray(status))) return NULL; + } + if (!bucket->hasDuplicates) { + // This is the first duplicate in this bucket. It means that we need to convert + // the current entry in the bucket to a node and link it from the bucket. + next_node_->htdata.flat_row = bucket->bucketData.htdata.flat_row; + DCHECK(!bucket->matched); + next_node_->matched = false; + next_node_->next = NULL; + AppendNextNode(bucket); + bucket->hasDuplicates = true; + ++num_buckets_with_duplicates_; + } + // Link a new node. + next_node_->next = bucket->bucketData.duplicates; next_node_->matched = false; - next_node_->next = NULL; - AppendNextNode(bucket); - bucket->hasDuplicates = true; - ++num_buckets_with_duplicates_; - } - // Link a new node. - next_node_->next = bucket->bucketData.duplicates; - next_node_->matched = false; - return AppendNextNode(bucket); + return AppendNextNode(bucket); } -inline TupleRow* IR_ALWAYS_INLINE PartitionedHashTable::GetRow(HtData& htdata, TupleRow* row) const { - if (stores_tuples()) { - return reinterpret_cast(&htdata.tuple); - } else { - // TODO: GetTupleRow() has interpreted code that iterates over the row's descriptor. - tuple_stream_->GetTupleRow(htdata.flat_row, row); - return row; - } +inline TupleRow* IR_ALWAYS_INLINE PartitionedHashTable::GetRow(HtData& htdata, + TupleRow* row) const { + if (stores_tuples()) { + return reinterpret_cast(&htdata.tuple); + } else { + // TODO: GetTupleRow() has interpreted code that iterates over the row's descriptor. + tuple_stream_->GetTupleRow(htdata.flat_row, row); + return row; + } } -inline TupleRow* IR_ALWAYS_INLINE PartitionedHashTable::GetRow(Bucket* bucket, TupleRow* row) const { - DCHECK(bucket != NULL); - if (UNLIKELY(stores_duplicates() && bucket->hasDuplicates)) { - DuplicateNode* duplicate = bucket->bucketData.duplicates; - DCHECK(duplicate != NULL); - return GetRow(duplicate->htdata, row); - } else { - return GetRow(bucket->bucketData.htdata, row); - } +inline TupleRow* IR_ALWAYS_INLINE PartitionedHashTable::GetRow(Bucket* bucket, + TupleRow* row) const { + DCHECK(bucket != NULL); + if (UNLIKELY(stores_duplicates() && bucket->hasDuplicates)) { + DuplicateNode* duplicate = bucket->bucketData.duplicates; + DCHECK(duplicate != NULL); + return GetRow(duplicate->htdata, row); + } else { + return GetRow(bucket->bucketData.htdata, row); + } } inline TupleRow* IR_ALWAYS_INLINE PartitionedHashTable::Iterator::GetRow() const { - DCHECK(!AtEnd()); - DCHECK(table_ != NULL); - DCHECK(scratch_row_ != NULL); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - if (UNLIKELY(table_->stores_duplicates() && bucket->hasDuplicates)) { - DCHECK(node_ != NULL); - return table_->GetRow(node_->htdata, scratch_row_); - } else { - return table_->GetRow(bucket->bucketData.htdata, scratch_row_); - } + DCHECK(!AtEnd()); + DCHECK(table_ != NULL); + DCHECK(scratch_row_ != NULL); + Bucket* bucket = &table_->buckets_[bucket_idx_]; + if (UNLIKELY(table_->stores_duplicates() && bucket->hasDuplicates)) { + DCHECK(node_ != NULL); + return table_->GetRow(node_->htdata, scratch_row_); + } else { + return table_->GetRow(bucket->bucketData.htdata, scratch_row_); + } } inline Tuple* IR_ALWAYS_INLINE PartitionedHashTable::Iterator::GetTuple() const { - DCHECK(!AtEnd()); - DCHECK(table_->stores_tuples()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - // TODO: To avoid the hasDuplicates check, store the HtData* in the Iterator. - if (UNLIKELY(table_->stores_duplicates() && bucket->hasDuplicates)) { - DCHECK(node_ != NULL); - return node_->htdata.tuple; - } else { - return bucket->bucketData.htdata.tuple; - } + DCHECK(!AtEnd()); + DCHECK(table_->stores_tuples()); + Bucket* bucket = &table_->buckets_[bucket_idx_]; + // TODO: To avoid the hasDuplicates check, store the HtData* in the Iterator. + if (UNLIKELY(table_->stores_duplicates() && bucket->hasDuplicates)) { + DCHECK(node_ != NULL); + return node_->htdata.tuple; + } else { + return bucket->bucketData.htdata.tuple; + } } inline void PartitionedHashTable::Iterator::SetTuple(Tuple* tuple, uint32_t hash) { - DCHECK(!AtEnd()); - DCHECK(table_->stores_tuples()); - table_->PrepareBucketForInsert(bucket_idx_, hash); - table_->buckets_[bucket_idx_].bucketData.htdata.tuple = tuple; + DCHECK(!AtEnd()); + DCHECK(table_->stores_tuples()); + table_->PrepareBucketForInsert(bucket_idx_, hash); + table_->buckets_[bucket_idx_].bucketData.htdata.tuple = tuple; } inline void PartitionedHashTable::Iterator::SetMatched() { - DCHECK(!AtEnd()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - if (table_->stores_duplicates() && bucket->hasDuplicates) { - node_->matched = true; - } else { - bucket->matched = true; - } - // Used for disabling spilling of hash tables in right and full-outer joins with - // matches. See IMPALA-1488. - table_->has_matches_ = true; + DCHECK(!AtEnd()); + Bucket* bucket = &table_->buckets_[bucket_idx_]; + if (table_->stores_duplicates() && bucket->hasDuplicates) { + node_->matched = true; + } else { + bucket->matched = true; + } + // Used for disabling spilling of hash tables in right and full-outer joins with + // matches. See IMPALA-1488. + table_->has_matches_ = true; } inline bool PartitionedHashTable::Iterator::IsMatched() const { - DCHECK(!AtEnd()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - if (table_->stores_duplicates() && bucket->hasDuplicates) { - return node_->matched; - } - return bucket->matched; + DCHECK(!AtEnd()); + Bucket* bucket = &table_->buckets_[bucket_idx_]; + if (table_->stores_duplicates() && bucket->hasDuplicates) { + return node_->matched; + } + return bucket->matched; } inline void PartitionedHashTable::Iterator::SetAtEnd() { - bucket_idx_ = BUCKET_NOT_FOUND; - node_ = NULL; + bucket_idx_ = BUCKET_NOT_FOUND; + node_ = NULL; } -template +template inline void PartitionedHashTable::Iterator::PrefetchBucket() { - if (LIKELY(!AtEnd())) { - // HashTable::PrefetchBucket() takes a hash value to index into the hash bucket - // array. Passing 'bucket_idx_' here is sufficient. - DCHECK_EQ((bucket_idx_ & ~(table_->num_buckets_ - 1)), 0); - table_->PrefetchBucket(bucket_idx_); - } + if (LIKELY(!AtEnd())) { + // HashTable::PrefetchBucket() takes a hash value to index into the hash bucket + // array. Passing 'bucket_idx_' here is sufficient. + DCHECK_EQ((bucket_idx_ & ~(table_->num_buckets_ - 1)), 0); + table_->PrefetchBucket(bucket_idx_); + } } inline void PartitionedHashTable::Iterator::Next() { - DCHECK(!AtEnd()); - if (table_->stores_duplicates() && table_->buckets_[bucket_idx_].hasDuplicates && - node_->next != NULL) { - node_ = node_->next; - } else { - table_->NextFilledBucket(&bucket_idx_, &node_); - } + DCHECK(!AtEnd()); + if (table_->stores_duplicates() && table_->buckets_[bucket_idx_].hasDuplicates && + node_->next != NULL) { + node_ = node_->next; + } else { + table_->NextFilledBucket(&bucket_idx_, &node_); + } } inline void PartitionedHashTable::Iterator::NextDuplicate() { - DCHECK(!AtEnd()); - if (table_->stores_duplicates() && table_->buckets_[bucket_idx_].hasDuplicates && - node_->next != NULL) { - node_ = node_->next; - } else { - bucket_idx_ = BUCKET_NOT_FOUND; - node_ = NULL; - } + DCHECK(!AtEnd()); + if (table_->stores_duplicates() && table_->buckets_[bucket_idx_].hasDuplicates && + node_->next != NULL) { + node_ = node_->next; + } else { + bucket_idx_ = BUCKET_NOT_FOUND; + node_ = NULL; + } } inline void PartitionedHashTable::Iterator::NextUnmatched() { - DCHECK(!AtEnd()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - // Check if there is any remaining unmatched duplicate node in the current bucket. - if (table_->stores_duplicates() && bucket->hasDuplicates) { - while (node_->next != NULL) { - node_ = node_->next; - if (!node_->matched) return; - } - } - // Move to the next filled bucket and return if this bucket is not matched or - // iterate to the first not matched duplicate node. - table_->NextFilledBucket(&bucket_idx_, &node_); - while (bucket_idx_ != Iterator::BUCKET_NOT_FOUND) { - bucket = &table_->buckets_[bucket_idx_]; - if (!table_->stores_duplicates() || !bucket->hasDuplicates) { - if (!bucket->matched) return; - } else { - while (node_->matched && node_->next != NULL) { - node_ = node_->next; - } - if (!node_->matched) return; + DCHECK(!AtEnd()); + Bucket* bucket = &table_->buckets_[bucket_idx_]; + // Check if there is any remaining unmatched duplicate node in the current bucket. + if (table_->stores_duplicates() && bucket->hasDuplicates) { + while (node_->next != NULL) { + node_ = node_->next; + if (!node_->matched) return; + } } + // Move to the next filled bucket and return if this bucket is not matched or + // iterate to the first not matched duplicate node. table_->NextFilledBucket(&bucket_idx_, &node_); - } + while (bucket_idx_ != Iterator::BUCKET_NOT_FOUND) { + bucket = &table_->buckets_[bucket_idx_]; + if (!table_->stores_duplicates() || !bucket->hasDuplicates) { + if (!bucket->matched) return; + } else { + while (node_->matched && node_->next != NULL) { + node_ = node_->next; + } + if (!node_->matched) return; + } + table_->NextFilledBucket(&bucket_idx_, &node_); + } } inline void PartitionedHashTableCtx::set_level(int level) { - DCHECK_GE(level, 0); - DCHECK_LT(level, seeds_.size()); - level_ = level; + DCHECK_GE(level, 0); + DCHECK_LT(level, seeds_.size()); + level_ = level; } inline int64_t PartitionedHashTable::CurrentMemSize() const { - return num_buckets_ * sizeof(Bucket) + num_duplicate_nodes_ * sizeof(DuplicateNode); + return num_buckets_ * sizeof(Bucket) + num_duplicate_nodes_ * sizeof(DuplicateNode); } inline int64_t PartitionedHashTable::NumInsertsBeforeResize() const { - return std::max( - 0, static_cast(num_buckets_ * MAX_FILL_FACTOR) - num_filled_buckets_); + return std::max( + 0, static_cast(num_buckets_ * MAX_FILL_FACTOR) - num_filled_buckets_); } -} +} // namespace doris #endif - diff --git a/be/src/exec/partitioned_hash_table_ir.cc b/be/src/exec/partitioned_hash_table_ir.cc index eabe610028a46a..50dc30787cc6c3 100644 --- a/be/src/exec/partitioned_hash_table_ir.cc +++ b/be/src/exec/partitioned_hash_table_ir.cc @@ -20,15 +20,16 @@ using namespace doris; -uint32_t PartitionedHashTableCtx::GetHashSeed() const { return seeds_[level_]; } +uint32_t PartitionedHashTableCtx::GetHashSeed() const { + return seeds_[level_]; +} ExprContext* const* PartitionedHashTableCtx::build_expr_evals() const { - return build_expr_evals_.data(); + return build_expr_evals_.data(); } ExprContext* const* PartitionedHashTableCtx::probe_expr_evals() const { - return probe_expr_evals_.data(); + return probe_expr_evals_.data(); } #endif - diff --git a/be/src/exec/pl_task_root.cpp b/be/src/exec/pl_task_root.cpp index 6867c8fc50bd38..fef20d5a91f329 100644 --- a/be/src/exec/pl_task_root.cpp +++ b/be/src/exec/pl_task_root.cpp @@ -19,18 +19,10 @@ namespace doris { -ExchangeNode::ExchangeNode( - ObjectPool* pool, - const TPlanNode& tnode, - const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _num_senders(0), - _stream_recvr(NULL), - _next_row_idx(0) { -} +ExchangeNode::ExchangeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), _num_senders(0), _stream_recvr(NULL), _next_row_idx(0) {} -ExchangeNode::~ExchangeNode() { -} +ExchangeNode::~ExchangeNode() {} Status ExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) { return ExecNode::init(tnode, state); @@ -44,7 +36,7 @@ Status ExchangeNode::prepare(RuntimeState* state) { // TODO: figure out appropriate buffer size DCHECK_GT(_num_senders, 0); _stream_recvr = state->create_recvr(_row_descriptor, _id, _num_senders, - config::exchg_node_buffer_size_bytes, runtime_profile()); + config::exchg_node_buffer_size_bytes, runtime_profile()); return Status::OK(); } @@ -78,8 +70,8 @@ Status ExchangeNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* SCOPED_TIMER(_convert_row_batch_timer); // copy rows until we hit the limit/capacity or until we exhaust _input_batch - while (!reached_limit() && !output_batch->is_full() - && _input_batch.get() != NULL && _next_row_idx < _input_batch->capacity()) { + while (!reached_limit() && !output_batch->is_full() && _input_batch.get() != NULL && + _next_row_idx < _input_batch->capacity()) { TupleRow* src = _input_batch->get_row(_next_row_idx); if (ExecNode::eval_conjuncts(ctxs, num_ctxs, src)) { @@ -145,5 +137,4 @@ void ExchangeNode::debug_string(int indentation_level, std::stringstream* out) c *out << ")"; } -} - +} // namespace doris diff --git a/be/src/exec/pl_task_root.h b/be/src/exec/pl_task_root.h index d7361f023f5d09..29a88e0f39a44b 100644 --- a/be/src/exec/pl_task_root.h +++ b/be/src/exec/pl_task_root.h @@ -35,16 +35,13 @@ class PlTaskRoot : public ExecNode { // the number of senders needs to be set after the c'tor, because it's not // recorded in TPlanNode, and before calling prepare() - void set_num_senders(int num_senders) { - _num_senders = num_senders; - } + void set_num_senders(int num_senders) { _num_senders = num_senders; } protected: virtual void debug_string(int indentation_level, std::stringstream* out) const; private: - int _num_senders; // needed for _stream_recvr construction + int _num_senders; // needed for _stream_recvr construction }; -} - +} // namespace doris diff --git a/be/src/exec/plain_text_line_reader.cpp b/be/src/exec/plain_text_line_reader.cpp index 634640cc3659dc..2f0dcc30b50941 100644 --- a/be/src/exec/plain_text_line_reader.cpp +++ b/be/src/exec/plain_text_line_reader.cpp @@ -18,13 +18,13 @@ #include "exec/plain_text_line_reader.h" #include "common/status.h" -#include "exec/file_reader.h" #include "exec/decompressor.h" +#include "exec/file_reader.h" // INPUT_CHUNK must // larger than 15B for correct lz4 file decompressing -// larger than 300B for correct lzo header decompressing -#define INPUT_CHUNK (2 * 1024 * 1024) +// larger than 300B for correct lzo header decompressing +#define INPUT_CHUNK (2 * 1024 * 1024) // #define INPUT_CHUNK (34) #define OUTPUT_CHUNK (8 * 1024 * 1024) // #define OUTPUT_CHUNK (32) @@ -32,34 +32,32 @@ namespace doris { -PlainTextLineReader::PlainTextLineReader( - RuntimeProfile* profile, - FileReader* file_reader, - Decompressor* decompressor, - size_t length, uint8_t line_delimiter) : - _profile(profile), - _file_reader(file_reader), - _decompressor(decompressor), - _min_length(length), - _total_read_bytes(0), - _line_delimiter(line_delimiter), - _input_buf(new uint8_t[INPUT_CHUNK]), - _input_buf_size(INPUT_CHUNK), - _input_buf_pos(0), - _input_buf_limit(0), - _output_buf(new uint8_t[OUTPUT_CHUNK]), - _output_buf_size(OUTPUT_CHUNK), - _output_buf_pos(0), - _output_buf_limit(0), - _file_eof(false), - _eof(false), - _stream_end(true), - _more_input_bytes(0), - _more_output_bytes(0), - _bytes_read_counter(nullptr), - _read_timer(nullptr), - _bytes_decompress_counter(nullptr), - _decompress_timer(nullptr) { +PlainTextLineReader::PlainTextLineReader(RuntimeProfile* profile, FileReader* file_reader, + Decompressor* decompressor, size_t length, + uint8_t line_delimiter) + : _profile(profile), + _file_reader(file_reader), + _decompressor(decompressor), + _min_length(length), + _total_read_bytes(0), + _line_delimiter(line_delimiter), + _input_buf(new uint8_t[INPUT_CHUNK]), + _input_buf_size(INPUT_CHUNK), + _input_buf_pos(0), + _input_buf_limit(0), + _output_buf(new uint8_t[OUTPUT_CHUNK]), + _output_buf_size(OUTPUT_CHUNK), + _output_buf_pos(0), + _output_buf_limit(0), + _file_eof(false), + _eof(false), + _stream_end(true), + _more_input_bytes(0), + _more_output_bytes(0), + _bytes_read_counter(nullptr), + _read_timer(nullptr), + _bytes_decompress_counter(nullptr), + _decompress_timer(nullptr) { _bytes_read_counter = ADD_COUNTER(_profile, "BytesRead", TUnit::BYTES); _read_timer = ADD_TIMER(_profile, "FileReadTime"); _bytes_decompress_counter = ADD_COUNTER(_profile, "BytesDecompressed", TUnit::BYTES); @@ -85,17 +83,16 @@ void PlainTextLineReader::close() { inline bool PlainTextLineReader::update_eof() { if (done()) { _eof = true; - } else if (_decompressor == nullptr - && (_min_length >= 0 && _total_read_bytes >= _min_length)) { + } else if (_decompressor == nullptr && (_min_length >= 0 && _total_read_bytes >= _min_length)) { _eof = true; } return _eof; } -uint8_t* PlainTextLineReader::update_field_pos_and_find_line_delimiter( - const uint8_t* start, size_t len) { +uint8_t* PlainTextLineReader::update_field_pos_and_find_line_delimiter(const uint8_t* start, + size_t len) { // TODO: meanwhile find and save field pos - return (uint8_t*) memmem(start, len, &_line_delimiter, 1); + return (uint8_t*)memmem(start, len, &_line_delimiter, 1); } // extend input buf if necessary only when _more_input_bytes > 0 @@ -137,7 +134,7 @@ void PlainTextLineReader::extend_input_buf() { // LOG(INFO) << "extend input buf." // << " input_buf_size: " << _input_buf_size // << " input_buf_pos: " << _input_buf_pos - // << " input_buf_limit: " << _input_buf_limit; + // << " input_buf_limit: " << _input_buf_limit; } void PlainTextLineReader::extend_output_buf() { @@ -166,7 +163,7 @@ void PlainTextLineReader::extend_output_buf() { while (_output_buf_size - output_buf_read_remaining() < target) { _output_buf_size = _output_buf_size * 2; } - + uint8_t* new_output_buf = new uint8_t[_output_buf_size]; memmove(new_output_buf, _output_buf + _output_buf_pos, output_buf_read_remaining()); delete[] _output_buf; @@ -194,8 +191,7 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e // find line delimiter in current decompressed data uint8_t* cur_ptr = _output_buf + _output_buf_pos; uint8_t* pos = update_field_pos_and_find_line_delimiter( - cur_ptr + offset, - output_buf_read_remaining() - offset); + cur_ptr + offset, output_buf_read_remaining() - offset); if (pos == nullptr) { // didn't find line delimiter, read more data from decompressor @@ -266,7 +262,6 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e _more_input_bytes = _more_input_bytes - read_len; continue; } - } if (_decompressor != nullptr) { @@ -277,15 +272,11 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e _more_input_bytes = 0; _more_output_bytes = 0; RETURN_IF_ERROR(_decompressor->decompress( - _input_buf + _input_buf_pos, /* input */ - _input_buf_limit - _input_buf_pos, /* input_len */ - &input_read_bytes, - _output_buf + _output_buf_limit, /* output */ - _output_buf_size - _output_buf_limit, /* output_max_len */ - &decompressed_len, - &_stream_end, - &_more_input_bytes, - &_more_output_bytes)); + _input_buf + _input_buf_pos, /* input */ + _input_buf_limit - _input_buf_pos, /* input_len */ + &input_read_bytes, _output_buf + _output_buf_limit, /* output */ + _output_buf_size - _output_buf_limit, /* output_max_len */ + &decompressed_len, &_stream_end, &_more_input_bytes, &_more_output_bytes)); // LOG(INFO) << "after decompress:" // << " stream_end: " << _stream_end @@ -300,8 +291,8 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e COUNTER_UPDATE(_bytes_decompress_counter, decompressed_len); // TODO(cmy): watch this case - if ((input_read_bytes == 0 /*decompressed_len == 0*/) - && _more_input_bytes == 0 && _more_output_bytes == 0) { + if ((input_read_bytes == 0 /*decompressed_len == 0*/) && _more_input_bytes == 0 && + _more_output_bytes == 0) { // decompress made no progress, may be // A. input data is not enough to decompress data to output // B. output buf is too small to save decompressed output @@ -347,4 +338,4 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e return Status::OK(); } -} // end of namespace +} // namespace doris diff --git a/be/src/exec/plain_text_line_reader.h b/be/src/exec/plain_text_line_reader.h index a3c1be2ba42e1e..01ed6926c0ff24 100644 --- a/be/src/exec/plain_text_line_reader.h +++ b/be/src/exec/plain_text_line_reader.h @@ -28,9 +28,8 @@ class Status; class PlainTextLineReader : public LineReader { public: - PlainTextLineReader(RuntimeProfile* profile, FileReader* file_reader, - Decompressor* decompressor, - size_t length, uint8_t line_delimiter); + PlainTextLineReader(RuntimeProfile* profile, FileReader* file_reader, + Decompressor* decompressor, size_t length, uint8_t line_delimiter); virtual ~PlainTextLineReader(); @@ -41,17 +40,11 @@ class PlainTextLineReader : public LineReader { private: bool update_eof(); - inline size_t output_buf_read_remaining() { - return _output_buf_limit - _output_buf_pos; - } + inline size_t output_buf_read_remaining() { return _output_buf_limit - _output_buf_pos; } - inline size_t input_buf_read_remaining() { - return _input_buf_limit - _input_buf_pos; - } + inline size_t input_buf_read_remaining() { return _input_buf_limit - _input_buf_pos; } - inline bool done() { - return _file_eof && output_buf_read_remaining() == 0; - } + inline bool done() { return _file_eof && output_buf_read_remaining() == 0; } // find line delimiter from 'start' to 'start' + len, // return line delimiter pos if found, otherwise return nullptr. @@ -95,4 +88,4 @@ class PlainTextLineReader : public LineReader { RuntimeProfile::Counter* _decompress_timer; }; -} +} // namespace doris diff --git a/be/src/exec/read_write_util.cpp b/be/src/exec/read_write_util.cpp index 0bc10f037cc4a3..0c8fa063c69aa7 100644 --- a/be/src/exec/read_write_util.cpp +++ b/be/src/exec/read_write_util.cpp @@ -69,4 +69,4 @@ std::string ReadWriteUtil::hex_dump(const uint8_t* buf, int64_t length) { std::string ReadWriteUtil::hex_dump(const char* buf, int64_t length) { return hex_dump(reinterpret_cast(buf), length); } -} +} // namespace doris diff --git a/be/src/exec/read_write_util.h b/be/src/exec/read_write_util.h index 09d0f0e2779520..c13092f99547ab 100644 --- a/be/src/exec/read_write_util.h +++ b/be/src/exec/read_write_util.h @@ -20,12 +20,14 @@ #include #include + #include "common/logging.h" #include "common/status.h" namespace doris { -#define RETURN_IF_FALSE(x) if (UNLIKELY(!(x))) return false +#define RETURN_IF_FALSE(x) \ + if (UNLIKELY(!(x))) return false // Class for reading and writing various data types. class ReadWriteUtil { @@ -102,10 +104,8 @@ inline int32_t ReadWriteUtil::get_int(const uint8_t* buf) { } inline int64_t ReadWriteUtil::get_long_int(const uint8_t* buf) { - return (static_cast(buf[0]) << 56) | - (static_cast(buf[1]) << 48) | - (static_cast(buf[2]) << 40) | - (static_cast(buf[3]) << 32) | + return (static_cast(buf[0]) << 56) | (static_cast(buf[1]) << 48) | + (static_cast(buf[2]) << 40) | (static_cast(buf[3]) << 32) | (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; } @@ -128,7 +128,7 @@ inline int ReadWriteUtil::get_vlong(uint8_t* buf, int64_t* vlong) { } inline int ReadWriteUtil::get_vlong(uint8_t* buf, int64_t offset, int64_t* vlong) { - int8_t firstbyte = (int8_t) buf[0 + offset]; + int8_t firstbyte = (int8_t)buf[0 + offset]; int len = decode_vint_size(firstbyte); @@ -148,22 +148,20 @@ inline int ReadWriteUtil::get_vlong(uint8_t* buf, int64_t offset, int64_t* vlong } if (is_negative_vint(firstbyte)) { - *vlong = *vlong ^ ((int64_t) - 1); + *vlong = *vlong ^ ((int64_t)-1); } return len; } -inline bool ReadWriteUtil::read_zint(uint8_t** buf, int* buf_len, int32_t* val, - Status* status) { +inline bool ReadWriteUtil::read_zint(uint8_t** buf, int* buf_len, int32_t* val, Status* status) { int64_t zlong; RETURN_IF_FALSE(read_zlong(buf, buf_len, &zlong, status)); *val = static_cast(zlong); return true; } -inline bool ReadWriteUtil::read_zlong(uint8_t** buf, int* buf_len, int64_t* val, - Status* status) { +inline bool ReadWriteUtil::read_zlong(uint8_t** buf, int* buf_len, int64_t* val, Status* status) { uint64_t zlong = 0; int shift = 0; bool more; @@ -204,8 +202,7 @@ inline bool ReadWriteUtil::read(uint8_t** buf, int* buf_len, T* val, Status* sta return true; } -inline bool ReadWriteUtil::skip_bytes(uint8_t** buf, int* buf_len, int num_bytes, - Status* status) { +inline bool ReadWriteUtil::skip_bytes(uint8_t** buf, int* buf_len, int num_bytes, Status* status) { DCHECK_GE(*buf_len, 0); if (UNLIKELY(num_bytes > *buf_len)) { @@ -234,5 +231,5 @@ inline int ReadWriteUtil::decode_vint_size(int8_t byte) { return -111 - byte; } -} +} // namespace doris #endif diff --git a/be/src/exec/repeat_node.cpp b/be/src/exec/repeat_node.cpp index 7997770d720a9a..a622e293313022 100644 --- a/be/src/exec/repeat_node.cpp +++ b/be/src/exec/repeat_node.cpp @@ -18,31 +18,28 @@ #include "exec/repeat_node.h" #include "exprs/expr.h" +#include "gutil/strings/join.h" #include "runtime/raw_value.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "util/runtime_profile.h" -#include "gutil/strings/join.h" namespace doris { -RepeatNode::RepeatNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _slot_id_set_list(tnode.repeat_node.slot_id_set_list), - _all_slot_ids(tnode.repeat_node.all_slot_ids), - _repeat_id_list(tnode.repeat_node.repeat_id_list), - _grouping_list(tnode.repeat_node.grouping_list), - _output_tuple_id(tnode.repeat_node.output_tuple_id), - _tuple_desc(nullptr), - _child_row_batch(nullptr), - _child_eos(false), - _repeat_id_idx(0), - _runtime_state(nullptr) { -} - -RepeatNode::~RepeatNode() { -} +RepeatNode::RepeatNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _slot_id_set_list(tnode.repeat_node.slot_id_set_list), + _all_slot_ids(tnode.repeat_node.all_slot_ids), + _repeat_id_list(tnode.repeat_node.repeat_id_list), + _grouping_list(tnode.repeat_node.grouping_list), + _output_tuple_id(tnode.repeat_node.output_tuple_id), + _tuple_desc(nullptr), + _child_row_batch(nullptr), + _child_eos(false), + _repeat_id_idx(0), + _runtime_state(nullptr) {} + +RepeatNode::~RepeatNode() {} Status RepeatNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); @@ -70,16 +67,17 @@ Status RepeatNode::open(RuntimeState* state) { * e.g. _repeat_id_list = [0, 3, 1, 2], _repeat_id_idx = 2, _grouping_list [[0, 3, 1, 2], [0, 1, 1, 0]], * row_batch tuple 0 ['a', 'b', 1] -> [['a', null, 1] tuple 1 [1, 1]] */ -Status RepeatNode::get_repeated_batch( - RowBatch* child_row_batch, int repeat_id_idx, RowBatch* row_batch) { - +Status RepeatNode::get_repeated_batch(RowBatch* child_row_batch, int repeat_id_idx, + RowBatch* row_batch) { DCHECK(child_row_batch != nullptr); DCHECK_EQ(row_batch->num_rows(), 0); // Fill all slots according to child MemPool* tuple_pool = row_batch->tuple_data_pool(); - const std::vector& src_tuple_descs = child_row_batch->row_desc().tuple_descriptors(); - const std::vector& dst_tuple_descs = row_batch->row_desc().tuple_descriptors(); + const std::vector& src_tuple_descs = + child_row_batch->row_desc().tuple_descriptors(); + const std::vector& dst_tuple_descs = + row_batch->row_desc().tuple_descriptors(); std::vector dst_tuples(src_tuple_descs.size(), nullptr); for (int i = 0; i < child_row_batch->num_rows(); ++i) { int row_idx = row_batch->add_row(); @@ -88,7 +86,8 @@ Status RepeatNode::get_repeated_batch( auto src_it = src_tuple_descs.begin(); auto dst_it = dst_tuple_descs.begin(); - for (int j = 0; src_it != src_tuple_descs.end() && dst_it != dst_tuple_descs.end(); ++src_it, ++dst_it, ++j) { + for (int j = 0; src_it != src_tuple_descs.end() && dst_it != dst_tuple_descs.end(); + ++src_it, ++dst_it, ++j) { Tuple* src_tuple = src_row->get_tuple(j); if (src_tuple == NULL) { continue; @@ -126,32 +125,33 @@ Status RepeatNode::get_repeated_batch( } row_batch->commit_last_row(); } - Tuple *tuple = nullptr; + Tuple* tuple = nullptr; // Fill grouping ID to tuple for (int i = 0; i < child_row_batch->num_rows(); ++i) { int row_idx = i; - TupleRow *row = row_batch->get_row(row_idx); + TupleRow* row = row_batch->get_row(row_idx); if (tuple == nullptr) { int size = row_batch->capacity() * _tuple_desc->byte_size(); - void *tuple_buffer = tuple_pool->allocate(size); + void* tuple_buffer = tuple_pool->allocate(size); if (tuple_buffer == nullptr) { return Status::InternalError("Allocate memory for row batch failed."); } - tuple = reinterpret_cast(tuple_buffer); + tuple = reinterpret_cast(tuple_buffer); } else { - char *new_tuple = reinterpret_cast(tuple); + char* new_tuple = reinterpret_cast(tuple); new_tuple += _tuple_desc->byte_size(); - tuple = reinterpret_cast(new_tuple); + tuple = reinterpret_cast(new_tuple); } row->set_tuple(src_tuple_descs.size(), tuple); memset(tuple, 0, _tuple_desc->num_null_bytes()); - for(size_t slot_idx = 0; slot_idx < _grouping_list.size(); ++slot_idx) { + for (size_t slot_idx = 0; slot_idx < _grouping_list.size(); ++slot_idx) { int64_t val = _grouping_list[slot_idx][repeat_id_idx]; - DCHECK_LT(slot_idx, _tuple_desc->slots().size()) << "TupleDescriptor: " << _tuple_desc->debug_string(); - const SlotDescriptor *slot_desc = _tuple_desc->slots()[slot_idx]; + DCHECK_LT(slot_idx, _tuple_desc->slots().size()) + << "TupleDescriptor: " << _tuple_desc->debug_string(); + const SlotDescriptor* slot_desc = _tuple_desc->slots()[slot_idx]; tuple->set_not_null(slot_desc->null_indicator_offset()); RawValue::write(&val, tuple, slot_desc, tuple_pool); } @@ -175,7 +175,7 @@ Status RepeatNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) } _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); RETURN_IF_ERROR(child(0)->get_next(state, _child_row_batch.get(), &_child_eos)); if (_child_row_batch->num_rows() <= 0) { @@ -213,7 +213,7 @@ void RepeatNode::debug_string(int indentation_level, std::stringstream* out) con *out << "repeat pattern: [" << JoinElements(_repeat_id_list, ",") << "]\n"; *out << "add " << _grouping_list.size() << " columns. \n"; *out << "added column values: "; - for (const std::vector &v : _grouping_list ){ + for (const std::vector& v : _grouping_list) { *out << "[" << JoinElements(v, ",") << "] "; } *out << "\n"; @@ -221,4 +221,4 @@ void RepeatNode::debug_string(int indentation_level, std::stringstream* out) con *out << ")"; } -} \ No newline at end of file +} // namespace doris \ No newline at end of file diff --git a/be/src/exec/repeat_node.h b/be/src/exec/repeat_node.h index 7e70acb3e84d51..01335d2233e159 100644 --- a/be/src/exec/repeat_node.h +++ b/be/src/exec/repeat_node.h @@ -60,4 +60,4 @@ class RepeatNode : public ExecNode { RuntimeState* _runtime_state; }; -} +} // namespace doris diff --git a/be/src/exec/row_batch_list.h b/be/src/exec/row_batch_list.h index 1fb2597e2c1c5b..706d813897c4d6 100644 --- a/be/src/exec/row_batch_list.h +++ b/be/src/exec/row_batch_list.h @@ -18,8 +18,8 @@ #ifndef DORIS_BE_SRC_QUERY_EXEC_ROW_BATCH_LIST_H #define DORIS_BE_SRC_QUERY_EXEC_ROW_BATCH_LIST_H -#include #include +#include #include "common/logging.h" #include "runtime/row_batch.h" @@ -35,20 +35,18 @@ class MemPool; // iterating over the TupleRows. class RowBatchList { public: - RowBatchList() : _total_num_rows(0) { } - virtual ~RowBatchList() { } + RowBatchList() : _total_num_rows(0) {} + virtual ~RowBatchList() {} // A simple iterator used to scan over all the rows stored in the list. class TupleRowIterator { public: // Dummy constructor - TupleRowIterator() : _list(NULL), _row_idx(0) { } - virtual ~TupleRowIterator() { } + TupleRowIterator() : _list(NULL), _row_idx(0) {} + virtual ~TupleRowIterator() {} // Returns true if this iterator is at the end, i.e. get_row() cannot be called. - bool at_end() { - return _batch_it == _list->_row_batches.end(); - } + bool at_end() { return _batch_it == _list->_row_batches.end(); } // Returns the current row. Callers must check the iterator is not at_end() before // calling get_row(). @@ -73,10 +71,7 @@ class RowBatchList { friend class RowBatchList; TupleRowIterator(RowBatchList* list) - : _list(list), - _batch_it(list->_row_batches.begin()), - _row_idx(0) { - } + : _list(list), _batch_it(list->_row_batches.begin()), _row_idx(0) {} RowBatchList* _list; std::vector::iterator _batch_it; @@ -117,14 +112,10 @@ class RowBatchList { } // Returns the total number of rows in all row batches. - int64_t total_num_rows() { - return _total_num_rows; - } + int64_t total_num_rows() { return _total_num_rows; } // Returns a new iterator over all the tuple rows. - TupleRowIterator iterator() { - return TupleRowIterator(this); - } + TupleRowIterator iterator() { return TupleRowIterator(this); } private: friend class TupleRowIterator; @@ -135,7 +126,6 @@ class RowBatchList { int64_t _total_num_rows; }; -} +} // namespace doris #endif - diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp index f89b2066117b78..f355fe58de5739 100644 --- a/be/src/exec/scan_node.cpp +++ b/be/src/exec/scan_node.cpp @@ -29,19 +29,17 @@ const std::string ScanNode::_s_num_disks_accessed_counter = "NumDiskAccess"; Status ScanNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - _bytes_read_counter = - ADD_COUNTER(runtime_profile(), _s_bytes_read_counter, TUnit::BYTES); + _bytes_read_counter = ADD_COUNTER(runtime_profile(), _s_bytes_read_counter, TUnit::BYTES); //TODO: The _rows_read_counter == RowsReturned counter in exec node, there is no need to keep both of them - _rows_read_counter = - ADD_COUNTER(runtime_profile(), _s_rows_read_counter, TUnit::UNIT); + _rows_read_counter = ADD_COUNTER(runtime_profile(), _s_rows_read_counter, TUnit::UNIT); #ifndef BE_TEST - _total_throughput_counter = runtime_profile()->add_rate_counter( - _s_total_throughput_counter, _bytes_read_counter); + _total_throughput_counter = + runtime_profile()->add_rate_counter(_s_total_throughput_counter, _bytes_read_counter); #endif _num_disks_accessed_counter = - ADD_COUNTER(runtime_profile(), _s_num_disks_accessed_counter, TUnit::UNIT); + ADD_COUNTER(runtime_profile(), _s_num_disks_accessed_counter, TUnit::UNIT); return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h index cf42357c0a4492..2ce04226bad7c3 100644 --- a/be/src/exec/scan_node.h +++ b/be/src/exec/scan_node.h @@ -19,9 +19,10 @@ #define DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H #include + #include "exec/exec_node.h" -#include "util/runtime_profile.h" #include "gen_cpp/PaloInternalService_types.h" +#include "util/runtime_profile.h" namespace doris { @@ -67,8 +68,8 @@ class TScanRange; class ScanNode : public ExecNode { public: ScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs) {} - virtual ~ScanNode() { } + : ExecNode(pool, tnode, descs) {} + virtual ~ScanNode() {} // Set up counters virtual Status prepare(RuntimeState* state); @@ -77,19 +78,11 @@ class ScanNode : public ExecNode { // called after prepare() virtual Status set_scan_ranges(const std::vector& scan_ranges) = 0; - virtual bool is_scan_node() const { - return true; - } + virtual bool is_scan_node() const { return true; } - RuntimeProfile::Counter* bytes_read_counter() const { - return _bytes_read_counter; - } - RuntimeProfile::Counter* rows_read_counter() const { - return _rows_read_counter; - } - RuntimeProfile::Counter* total_throughput_counter() const { - return _total_throughput_counter; - } + RuntimeProfile::Counter* bytes_read_counter() const { return _bytes_read_counter; } + RuntimeProfile::Counter* rows_read_counter() const { return _rows_read_counter; } + RuntimeProfile::Counter* total_throughput_counter() const { return _total_throughput_counter; } // names of ScanNode common counters static const std::string _s_bytes_read_counter; @@ -106,6 +99,6 @@ class ScanNode : public ExecNode { RuntimeProfile::Counter* _num_disks_accessed_counter; }; -} +} // namespace doris #endif diff --git a/be/src/exec/scanner_ir.cpp b/be/src/exec/scanner_ir.cpp index e88ffd73eb0ff8..6aea5f6bfe7762 100644 --- a/be/src/exec/scanner_ir.cpp +++ b/be/src/exec/scanner_ir.cpp @@ -16,48 +16,39 @@ // under the License. #ifdef IR_COMPILE -extern "C" -bool ir_string_to_bool(const char* s, int len, StringParser::ParseResult* result) { +extern "C" bool ir_string_to_bool(const char* s, int len, StringParser::ParseResult* result) { return StringParser::string_to_bool(s, len, result); } -extern "C" -int8_t ir_string_to_int8(const char* s, int len, StringParser::ParseResult* result) { +extern "C" int8_t ir_string_to_int8(const char* s, int len, StringParser::ParseResult* result) { return StringParser::string_to_int(s, len, result); } -extern "C" -int16_t ir_string_to_int16(const char* s, int len, StringParser::ParseResult* result) { +extern "C" int16_t ir_string_to_int16(const char* s, int len, StringParser::ParseResult* result) { return StringParser::string_to_int(s, len, result); } -extern "C" -int32_t ir_string_to_int32(const char* s, int len, StringParser::ParseResult* result) { +extern "C" int32_t ir_string_to_int32(const char* s, int len, StringParser::ParseResult* result) { return StringParser::string_to_int(s, len, result); } -extern "C" -int64_t ir_string_to_int64(const char* s, int len, StringParser::ParseResult* result) { +extern "C" int64_t ir_string_to_int64(const char* s, int len, StringParser::ParseResult* result) { return StringParser::StringToInt(s, len, result); } -extern "C" -float ir_string_to_float(const char* s, int len, StringParser::ParseResult* result) { +extern "C" float ir_string_to_float(const char* s, int len, StringParser::ParseResult* result) { return StringParser::StringToFloat(s, len, result); } -extern "C" -double ir_string_to_double(const char* s, int len, StringParser::ParseResult* result) { +extern "C" double ir_string_to_double(const char* s, int len, StringParser::ParseResult* result) { return StringParser::StringToFloat(s, len, result); } -extern "C" -bool ir_is_null_string(const char* data, int len) { +extern "C" bool ir_is_null_string(const char* data, int len) { return data == NULL || (len == 2 && data[0] == '\\' && data[1] == 'N'); } -extern "C" -bool ir_generic_is_null_string(const char* s, int slen, const char* n, int nlen) { +extern "C" bool ir_generic_is_null_string(const char* s, int slen, const char* n, int nlen) { return s == NULL || (slen == nlen && StringCompare(s, slen, n, nlen, slen) == 0); } #endif diff --git a/be/src/exec/schema_scan_node.cpp b/be/src/exec/schema_scan_node.cpp index 25908338abcfbe..e28f50c6cbd995 100644 --- a/be/src/exec/schema_scan_node.cpp +++ b/be/src/exec/schema_scan_node.cpp @@ -20,33 +20,31 @@ #include #include -#include "exec/text_converter.hpp" #include "exec/schema_scanner/schema_helper.h" +#include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" -#include "runtime/runtime_state.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" namespace doris { -SchemaScanNode::SchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _is_init(false), - _table_name(tnode.schema_scan_node.table_name), - _tuple_id(tnode.schema_scan_node.tuple_id), - _src_tuple_desc(NULL), - _dest_tuple_desc(NULL), - _tuple_idx(0), - _slot_num(0), - _tuple_pool(NULL), - _schema_scanner(NULL), - _src_tuple(NULL), - _dest_tuple(NULL) { -} +SchemaScanNode::SchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _is_init(false), + _table_name(tnode.schema_scan_node.table_name), + _tuple_id(tnode.schema_scan_node.tuple_id), + _src_tuple_desc(NULL), + _dest_tuple_desc(NULL), + _tuple_idx(0), + _slot_num(0), + _tuple_pool(NULL), + _schema_scanner(NULL), + _src_tuple(NULL), + _dest_tuple(NULL) {} SchemaScanNode::~SchemaScanNode() { delete[] reinterpret_cast(_src_tuple); @@ -68,7 +66,8 @@ Status SchemaScanNode::init(const TPlanNode& tnode, RuntimeState* state) { } if (tnode.schema_scan_node.__isset.current_user_ident) { - _scanner_param.current_user_ident = _pool->add(new TUserIdentity(tnode.schema_scan_node.current_user_ident)); + _scanner_param.current_user_ident = + _pool->add(new TUserIdentity(tnode.schema_scan_node.current_user_ident)); } else { if (tnode.schema_scan_node.__isset.user) { _scanner_param.user = _pool->add(new std::string(tnode.schema_scan_node.user)); @@ -103,7 +102,7 @@ Status SchemaScanNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ScanNode::prepare(state)); // new one mem pool - _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool(mem_tracker().get())); if (NULL == _tuple_pool.get()) { return Status::InternalError("Allocate MemPool failed."); @@ -119,7 +118,7 @@ Status SchemaScanNode::prepare(RuntimeState* state) { _slot_num = _dest_tuple_desc->slots().size(); // get src tuple desc const SchemaTableDescriptor* schema_table = - static_cast(_dest_tuple_desc->table_desc()); + static_cast(_dest_tuple_desc->table_desc()); if (NULL == schema_table) { return Status::InternalError("Failed to get schema table descriptor."); @@ -140,7 +139,7 @@ Status SchemaScanNode::prepare(RuntimeState* state) { return Status::InternalError("failed to get src schema tuple desc."); } - _src_tuple = reinterpret_cast(new(std::nothrow) char[_src_tuple_desc->byte_size()]); + _src_tuple = reinterpret_cast(new (std::nothrow) char[_src_tuple_desc->byte_size()]); if (NULL == _src_tuple) { return Status::InternalError("new src tuple failed."); @@ -160,14 +159,14 @@ Status SchemaScanNode::prepare(RuntimeState* state) { int j = 0; for (; j < _src_tuple_desc->slots().size(); ++j) { if (boost::iequals(_dest_tuple_desc->slots()[i]->col_name(), - _src_tuple_desc->slots()[j]->col_name())) { + _src_tuple_desc->slots()[j]->col_name())) { break; } } if (j >= _src_tuple_desc->slots().size()) { LOG(WARNING) << "no match column for this column(" - << _dest_tuple_desc->slots()[i]->col_name() << ")"; + << _dest_tuple_desc->slots()[i]->col_name() << ")"; return Status::InternalError("no match column for this column."); } @@ -231,8 +230,7 @@ void SchemaScanNode::copy_one_row() { } } -Status SchemaScanNode::get_next(RuntimeState* state, RowBatch* row_batch, - bool* eos) { +Status SchemaScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { if (!_is_init) { return Status::InternalError("GetNext before Init."); } @@ -273,8 +271,7 @@ Status SchemaScanNode::get_next(RuntimeState* state, RowBatch* row_batch, return Status::OK(); } - RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_tuple, - _tuple_pool.get(), &scanner_eos)); + RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_tuple, _tuple_pool.get(), &scanner_eos)); if (scanner_eos) { row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); @@ -327,6 +324,6 @@ Status SchemaScanNode::set_scan_ranges(const std::vector& scan return Status::OK(); } -} +} // namespace doris /* vim: set ts=4 sw=4 sts=4 tw=100 : */ diff --git a/be/src/exec/schema_scan_node.h b/be/src/exec/schema_scan_node.h index add50492568aa3..3bbc8043ce8d8f 100644 --- a/be/src/exec/schema_scan_node.h +++ b/be/src/exec/schema_scan_node.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCAN_NODE_H +#ifndef DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCAN_NODE_H +#define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCAN_NODE_H #include @@ -90,8 +90,8 @@ class SchemaScanNode : public ScanNode { std::vector _index_map; }; -} +} // namespace doris -#endif //__DORIS_MYSQLSCANNODE_H +#endif //__DORIS_MYSQLSCANNODE_H /* vim: set ts=4 sw=4 sts=4 tw=100 noet: */ diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index f9d4e833738b43..3fc048cc469162 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -16,34 +16,31 @@ // under the License. #include "exec/schema_scanner.h" -#include "exec/schema_scanner/schema_tables_scanner.h" -#include "exec/schema_scanner/schema_schemata_scanner.h" -#include "exec/schema_scanner/schema_dummy_scanner.h" -#include "exec/schema_scanner/schema_columns_scanner.h" -#include "exec/schema_scanner/schema_variables_scanner.h" + #include "exec/schema_scanner/schema_charsets_scanner.h" #include "exec/schema_scanner/schema_collations_scanner.h" -#include "exec/schema_scanner/schema_views_scanner.h" -#include "exec/schema_scanner/schema_table_privileges_scanner.h" +#include "exec/schema_scanner/schema_columns_scanner.h" +#include "exec/schema_scanner/schema_dummy_scanner.h" #include "exec/schema_scanner/schema_schema_privileges_scanner.h" +#include "exec/schema_scanner/schema_schemata_scanner.h" +#include "exec/schema_scanner/schema_table_privileges_scanner.h" +#include "exec/schema_scanner/schema_tables_scanner.h" #include "exec/schema_scanner/schema_user_privileges_scanner.h" - - +#include "exec/schema_scanner/schema_variables_scanner.h" +#include "exec/schema_scanner/schema_views_scanner.h" namespace doris { DorisServer* SchemaScanner::_s_doris_server; SchemaScanner::SchemaScanner(ColumnDesc* columns, int column_num) - : _is_init(false), - _param(NULL), - _columns(columns), - _column_num(column_num), - _tuple_desc(NULL) { -} + : _is_init(false), + _param(NULL), + _columns(columns), + _column_num(column_num), + _tuple_desc(NULL) {} -SchemaScanner::~SchemaScanner() { -} +SchemaScanner::~SchemaScanner() {} Status SchemaScanner::start(RuntimeState* state) { if (!_is_init) { @@ -85,30 +82,30 @@ Status SchemaScanner::init(SchemaScannerParam* param, ObjectPool* pool) { SchemaScanner* SchemaScanner::create(TSchemaTableType::type type) { switch (type) { case TSchemaTableType::SCH_TABLES: - return new(std::nothrow) SchemaTablesScanner(); + return new (std::nothrow) SchemaTablesScanner(); case TSchemaTableType::SCH_SCHEMATA: - return new(std::nothrow) SchemaSchemataScanner(); + return new (std::nothrow) SchemaSchemataScanner(); case TSchemaTableType::SCH_COLUMNS: - return new(std::nothrow) SchemaColumnsScanner(); + return new (std::nothrow) SchemaColumnsScanner(); case TSchemaTableType::SCH_CHARSETS: - return new(std::nothrow) SchemaCharsetsScanner(); + return new (std::nothrow) SchemaCharsetsScanner(); case TSchemaTableType::SCH_COLLATIONS: - return new(std::nothrow) SchemaCollationsScanner(); + return new (std::nothrow) SchemaCollationsScanner(); case TSchemaTableType::SCH_GLOBAL_VARIABLES: - return new(std::nothrow) SchemaVariablesScanner(TVarType::GLOBAL); + return new (std::nothrow) SchemaVariablesScanner(TVarType::GLOBAL); case TSchemaTableType::SCH_SESSION_VARIABLES: case TSchemaTableType::SCH_VARIABLES: - return new(std::nothrow) SchemaVariablesScanner(TVarType::SESSION); + return new (std::nothrow) SchemaVariablesScanner(TVarType::SESSION); case TSchemaTableType::SCH_VIEWS: - return new(std::nothrow) SchemaViewsScanner(); + return new (std::nothrow) SchemaViewsScanner(); case TSchemaTableType::SCH_TABLE_PRIVILEGES: - return new(std::nothrow) SchemaTablePrivilegesScanner(); + return new (std::nothrow) SchemaTablePrivilegesScanner(); case TSchemaTableType::SCH_SCHEMA_PRIVILEGES: - return new(std::nothrow) SchemaSchemaPrivilegesScanner(); + return new (std::nothrow) SchemaSchemaPrivilegesScanner(); case TSchemaTableType::SCH_USER_PRIVILEGES: - return new(std::nothrow) SchemaUserPrivilegesScanner(); + return new (std::nothrow) SchemaUserPrivilegesScanner(); default: - return new(std::nothrow) SchemaDummyScanner(); + return new (std::nothrow) SchemaDummyScanner(); break; } } @@ -150,7 +147,7 @@ Status SchemaScanner::create_tuple_desc(ObjectPool* pool) { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); - SlotDescriptor* slot = pool->add(new(std::nothrow) SlotDescriptor(t_slot_desc)); + SlotDescriptor* slot = pool->add(new (std::nothrow) SlotDescriptor(t_slot_desc)); if (NULL == slot) { return Status::InternalError("no memory for _tuple_desc."); @@ -163,7 +160,7 @@ Status SchemaScanner::create_tuple_desc(ObjectPool* pool) { TTupleDescriptor t_tuple_desc; t_tuple_desc.__set_byteSize(offset); t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8); - _tuple_desc = pool->add(new(std::nothrow) TupleDescriptor(t_tuple_desc)); + _tuple_desc = pool->add(new (std::nothrow) TupleDescriptor(t_tuple_desc)); if (NULL == _tuple_desc) { return Status::InternalError("no memory for _tuple_desc."); @@ -176,4 +173,4 @@ Status SchemaScanner::create_tuple_desc(ObjectPool* pool) { return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner.h b/be/src/exec/schema_scanner.h index 8ee2004e9b04a1..74d0a92b69cf7d 100644 --- a/be/src/exec/schema_scanner.h +++ b/be/src/exec/schema_scanner.h @@ -20,12 +20,12 @@ #include -#include "common/status.h" #include "common/object_pool.h" +#include "common/status.h" #include "gen_cpp/Descriptors_types.h" #include "gen_cpp/Types_types.h" -#include "runtime/tuple.h" #include "runtime/mem_pool.h" +#include "runtime/tuple.h" namespace doris { @@ -38,15 +38,22 @@ struct SchemaScannerParam { const std::string* db; const std::string* table; const std::string* wild; - const std::string* user; // deprecated - const std::string* user_ip; // deprecated + const std::string* user; // deprecated + const std::string* user_ip; // deprecated const TUserIdentity* current_user_ident; // to replace the user and user ip - const std::string* ip; // frontend ip - int32_t port; // frontend thrift port + const std::string* ip; // frontend ip + int32_t port; // frontend thrift port int64_t thread_id; SchemaScannerParam() - : db(NULL), table(NULL), wild(NULL), user(NULL), user_ip(NULL), current_user_ident(NULL), ip(NULL), port(0) { } + : db(NULL), + table(NULL), + wild(NULL), + user(NULL), + user_ip(NULL), + current_user_ident(NULL), + ip(NULL), + port(0) {} }; // virtual scanner for all schema table @@ -69,13 +76,9 @@ class SchemaScanner { // factory function static SchemaScanner* create(TSchemaTableType::type type); - const TupleDescriptor* tuple_desc() const { - return _tuple_desc; - } + const TupleDescriptor* tuple_desc() const { return _tuple_desc; } - static void set_doris_server(DorisServer* doris_server) { - _s_doris_server = doris_server; - } + static void set_doris_server(DorisServer* doris_server) { _s_doris_server = doris_server; } protected: Status create_tuple_desc(ObjectPool* pool); @@ -90,9 +93,8 @@ class SchemaScanner { TupleDescriptor* _tuple_desc; static DorisServer* _s_doris_server; - }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp index ed015273c36779..757a0504d8d98a 100644 --- a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp @@ -16,39 +16,38 @@ // under the License. #include "exec/schema_scanner/schema_charsets_scanner.h" + #include "runtime/primitive_type.h" #include "runtime/string_value.h" namespace doris { SchemaScanner::ColumnDesc SchemaCharsetsScanner::_s_css_columns[] = { - // name, type, size - { "CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "DEFAULT_COLLATE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "DESCRIPTION", TYPE_VARCHAR, sizeof(StringValue), false}, - { "MAXLEN", TYPE_BIGINT, sizeof(int64_t), false}, + // name, type, size + {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"DEFAULT_COLLATE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"DESCRIPTION", TYPE_VARCHAR, sizeof(StringValue), false}, + {"MAXLEN", TYPE_BIGINT, sizeof(int64_t), false}, }; SchemaCharsetsScanner::CharsetStruct SchemaCharsetsScanner::_s_charsets[] = { - { "utf8", "utf8_general_ci", "UTF-8 Unicode", 3 }, - { NULL, NULL, 0 }, + {"utf8", "utf8_general_ci", "UTF-8 Unicode", 3}, + {NULL, NULL, 0}, }; -SchemaCharsetsScanner::SchemaCharsetsScanner() : - SchemaScanner(_s_css_columns, sizeof(_s_css_columns) / sizeof(SchemaScanner::ColumnDesc)), - _index(0) { -} +SchemaCharsetsScanner::SchemaCharsetsScanner() + : SchemaScanner(_s_css_columns, sizeof(_s_css_columns) / sizeof(SchemaScanner::ColumnDesc)), + _index(0) {} -SchemaCharsetsScanner::~SchemaCharsetsScanner() { -} +SchemaCharsetsScanner::~SchemaCharsetsScanner() {} -Status SchemaCharsetsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaCharsetsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // variables names { - void *slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_charsets[_index].charset); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -57,10 +56,10 @@ Status SchemaCharsetsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // DEFAULT_COLLATE_NAME { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_charsets[_index].default_collation); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -69,10 +68,10 @@ Status SchemaCharsetsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // DESCRIPTION { - void *slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_charsets[_index].description); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -81,14 +80,14 @@ Status SchemaCharsetsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // maxlen { - void *slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); *(int64_t*)slot = _s_charsets[_index].maxlen; } _index++; return Status::OK(); } -Status SchemaCharsetsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaCharsetsScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("call this before initial."); } @@ -103,5 +102,4 @@ Status SchemaCharsetsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eo return fill_one_row(tuple, pool); } -} - +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.h b/be/src/exec/schema_scanner/schema_charsets_scanner.h index c8cf07a35d4c3c..e78cfe37081128 100644 --- a/be/src/exec/schema_scanner/schema_charsets_scanner.h +++ b/be/src/exec/schema_scanner/schema_charsets_scanner.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_SCHEMA_CHARSETS_SCANNER_H #include + #include "exec/schema_scanner.h" #include "gen_cpp/FrontendService_types.h" @@ -29,23 +30,23 @@ class SchemaCharsetsScanner : public SchemaScanner { SchemaCharsetsScanner(); virtual ~SchemaCharsetsScanner(); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: struct CharsetStruct { - const char *charset; - const char *default_collation; - const char *description; + const char* charset; + const char* default_collation; + const char* description; int64_t maxlen; }; - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); int _index; static SchemaScanner::ColumnDesc _s_css_columns[]; static CharsetStruct _s_charsets[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.cpp b/be/src/exec/schema_scanner/schema_collations_scanner.cpp index 5418edfc842645..cce723b8db52cb 100644 --- a/be/src/exec/schema_scanner/schema_collations_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_collations_scanner.cpp @@ -16,42 +16,41 @@ // under the License. #include "exec/schema_scanner/schema_collations_scanner.h" + #include "runtime/primitive_type.h" #include "runtime/string_value.h" namespace doris { SchemaScanner::ColumnDesc SchemaCollationsScanner::_s_cols_columns[] = { - // name, type, size - { "COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "ID", TYPE_BIGINT, sizeof(int64_t), false}, - { "IS_DEFAULT", TYPE_VARCHAR, sizeof(StringValue), false}, - { "IS_COMPILED", TYPE_VARCHAR, sizeof(StringValue), false}, - { "SORTLEN", TYPE_BIGINT, sizeof(int64_t), false}, + // name, type, size + {"COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"ID", TYPE_BIGINT, sizeof(int64_t), false}, + {"IS_DEFAULT", TYPE_VARCHAR, sizeof(StringValue), false}, + {"IS_COMPILED", TYPE_VARCHAR, sizeof(StringValue), false}, + {"SORTLEN", TYPE_BIGINT, sizeof(int64_t), false}, }; SchemaCollationsScanner::CollationStruct SchemaCollationsScanner::_s_collations[] = { - { "utf8_general_ci", "utf8", 33, "Yes", "Yes", 1}, - { NULL, NULL, 0, NULL, NULL, 0 }, + {"utf8_general_ci", "utf8", 33, "Yes", "Yes", 1}, + {NULL, NULL, 0, NULL, NULL, 0}, }; -SchemaCollationsScanner::SchemaCollationsScanner() : - SchemaScanner(_s_cols_columns, - sizeof(_s_cols_columns) / sizeof(SchemaScanner::ColumnDesc)), - _index(0) { -} +SchemaCollationsScanner::SchemaCollationsScanner() + : SchemaScanner(_s_cols_columns, + sizeof(_s_cols_columns) / sizeof(SchemaScanner::ColumnDesc)), + _index(0) {} -SchemaCollationsScanner::~SchemaCollationsScanner() { -} +SchemaCollationsScanner::~SchemaCollationsScanner() {} -Status SchemaCollationsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaCollationsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // COLLATION_NAME { - void *slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].name); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -60,10 +59,10 @@ Status SchemaCollationsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // charset { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].charset); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -72,15 +71,15 @@ Status SchemaCollationsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // id { - void *slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); *(int64_t*)slot = _s_collations[_index].id; } // is_default { - void *slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].is_default); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -89,10 +88,10 @@ Status SchemaCollationsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // IS_COMPILED { - void *slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].is_compile); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -101,14 +100,14 @@ Status SchemaCollationsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // sortlen { - void *slot = tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset()); *(int64_t*)slot = _s_collations[_index].sortlen; } _index++; return Status::OK(); } -Status SchemaCollationsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaCollationsScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("call this before initial."); } @@ -123,4 +122,4 @@ Status SchemaCollationsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool * return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.h b/be/src/exec/schema_scanner/schema_collations_scanner.h index a4a2f99672c999..a76d8b66e077d4 100644 --- a/be/src/exec/schema_scanner/schema_collations_scanner.h +++ b/be/src/exec/schema_scanner/schema_collations_scanner.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_SCHEMA_COLLATIONS_SCANNER_H #include + #include "exec/schema_scanner.h" #include "gen_cpp/FrontendService_types.h" @@ -29,25 +30,25 @@ class SchemaCollationsScanner : public SchemaScanner { SchemaCollationsScanner(); virtual ~SchemaCollationsScanner(); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: struct CollationStruct { - const char *name; - const char *charset; + const char* name; + const char* charset; int64_t id; - const char *is_default; - const char *is_compile; + const char* is_default; + const char* is_compile; int64_t sortlen; }; - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); int _index; static SchemaScanner::ColumnDesc _s_cols_columns[]; static CollationStruct _s_collations[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp index 416df14dc75eae..3bef284b865aa7 100644 --- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp @@ -18,52 +18,51 @@ #include "exec/schema_scanner/schema_columns_scanner.h" #include + +#include "exec/schema_scanner/schema_helper.h" +#include "runtime/datetime_value.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" -#include "runtime/datetime_value.h" -#include "exec/schema_scanner/schema_helper.h" namespace doris { SchemaScanner::ColumnDesc SchemaColumnsScanner::_s_col_columns[] = { - // name, type, size, is_null - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true }, - { "TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false }, - { "TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false }, - { "COLUMN_NAME", TYPE_VARCHAR, sizeof(StringValue), false }, - { "ORDINAL_POSITION", TYPE_BIGINT, sizeof(int64_t), false }, - { "COLUMN_DEFAULT", TYPE_VARCHAR, sizeof(StringValue), true }, - { "IS_NULLABLE", TYPE_VARCHAR, sizeof(StringValue), false }, - { "DATA_TYPE", TYPE_VARCHAR, sizeof(StringValue), false }, - { "CHARACTER_MAXIMUM_LENGTH", TYPE_BIGINT, sizeof(int64_t), true }, - { "CHARACTER_OCTET_LENGTH", TYPE_BIGINT, sizeof(int64_t), true }, - { "NUMERIC_PRECISION", TYPE_BIGINT, sizeof(int64_t), true }, - { "NUMERIC_SCALE", TYPE_BIGINT, sizeof(int64_t), true }, - { "DATETIME_PRECISION", TYPE_BIGINT, sizeof(int64_t), true }, - { "CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), true }, - { "COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), true }, - { "COLUMN_TYPE", TYPE_VARCHAR, sizeof(StringValue), false }, - { "COLUMN_KEY", TYPE_VARCHAR, sizeof(StringValue), false }, - { "EXTRA", TYPE_VARCHAR, sizeof(StringValue), false }, - { "PRIVILEGES", TYPE_VARCHAR, sizeof(StringValue), false }, - { "COLUMN_COMMENT", TYPE_VARCHAR, sizeof(StringValue), false }, - { "COLUMN_SIZE", TYPE_BIGINT, sizeof(int64_t), true }, - { "DECIMAL_DIGITS", TYPE_BIGINT, sizeof(int64_t), true }, - { "GENERATION_EXPRESSION", TYPE_VARCHAR, sizeof(StringValue), true }, - { "SRS_ID", TYPE_BIGINT, sizeof(int64_t), true }, + // name, type, size, is_null + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"ORDINAL_POSITION", TYPE_BIGINT, sizeof(int64_t), false}, + {"COLUMN_DEFAULT", TYPE_VARCHAR, sizeof(StringValue), true}, + {"IS_NULLABLE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"DATA_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"CHARACTER_MAXIMUM_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, + {"CHARACTER_OCTET_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, + {"NUMERIC_PRECISION", TYPE_BIGINT, sizeof(int64_t), true}, + {"NUMERIC_SCALE", TYPE_BIGINT, sizeof(int64_t), true}, + {"DATETIME_PRECISION", TYPE_BIGINT, sizeof(int64_t), true}, + {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, + {"COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, + {"COLUMN_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLUMN_KEY", TYPE_VARCHAR, sizeof(StringValue), false}, + {"EXTRA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"PRIVILEGES", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLUMN_COMMENT", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLUMN_SIZE", TYPE_BIGINT, sizeof(int64_t), true}, + {"DECIMAL_DIGITS", TYPE_BIGINT, sizeof(int64_t), true}, + {"GENERATION_EXPRESSION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"SRS_ID", TYPE_BIGINT, sizeof(int64_t), true}, }; -SchemaColumnsScanner::SchemaColumnsScanner() : - SchemaScanner(_s_col_columns, sizeof(_s_col_columns) / sizeof(SchemaScanner::ColumnDesc)), - _db_index(0), - _table_index(0), - _column_index(0) { -} +SchemaColumnsScanner::SchemaColumnsScanner() + : SchemaScanner(_s_col_columns, sizeof(_s_col_columns) / sizeof(SchemaScanner::ColumnDesc)), + _db_index(0), + _table_index(0), + _column_index(0) {} -SchemaColumnsScanner::~SchemaColumnsScanner() { -} +SchemaColumnsScanner::~SchemaColumnsScanner() {} -Status SchemaColumnsScanner::start(RuntimeState *state) { +Status SchemaColumnsScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("schema columns scanner not inited."); } @@ -82,10 +81,10 @@ Status SchemaColumnsScanner::start(RuntimeState *state) { db_params.__set_user_ip(*(_param->user_ip)); } } - + if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip), - _param->port, db_params, &_db_result)); + RETURN_IF_ERROR( + SchemaHelper::get_db_names(*(_param->ip), _param->port, db_params, &_db_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -94,174 +93,170 @@ Status SchemaColumnsScanner::start(RuntimeState *state) { } //For compatibility with mysql the result of DATA_TYPE in information_schema.columns -std::string SchemaColumnsScanner::to_mysql_data_type_string(TColumnDesc &desc) { +std::string SchemaColumnsScanner::to_mysql_data_type_string(TColumnDesc& desc) { switch (desc.columnType) { - case TPrimitiveType::BOOLEAN: - return "tinyint"; - case TPrimitiveType::TINYINT: - return "tinyint"; - case TPrimitiveType::SMALLINT: - return "smallint"; - case TPrimitiveType::INT: - return "int"; - case TPrimitiveType::BIGINT: - return "bigint"; - case TPrimitiveType::LARGEINT: - return "bigint unsigned"; - case TPrimitiveType::FLOAT: - return "float"; - case TPrimitiveType::DOUBLE: - return "double"; - case TPrimitiveType::VARCHAR: - return "varchar"; - case TPrimitiveType::CHAR: - return "char"; - case TPrimitiveType::DATE: - return "date"; - case TPrimitiveType::DATETIME: - return "datetime"; - case TPrimitiveType::DECIMALV2: - case TPrimitiveType::DECIMAL: { - return "decimal"; - } - default: - return "unknown"; + case TPrimitiveType::BOOLEAN: + return "tinyint"; + case TPrimitiveType::TINYINT: + return "tinyint"; + case TPrimitiveType::SMALLINT: + return "smallint"; + case TPrimitiveType::INT: + return "int"; + case TPrimitiveType::BIGINT: + return "bigint"; + case TPrimitiveType::LARGEINT: + return "bigint unsigned"; + case TPrimitiveType::FLOAT: + return "float"; + case TPrimitiveType::DOUBLE: + return "double"; + case TPrimitiveType::VARCHAR: + return "varchar"; + case TPrimitiveType::CHAR: + return "char"; + case TPrimitiveType::DATE: + return "date"; + case TPrimitiveType::DATETIME: + return "datetime"; + case TPrimitiveType::DECIMALV2: + case TPrimitiveType::DECIMAL: { + return "decimal"; + } + default: + return "unknown"; } } -std::string SchemaColumnsScanner::type_to_string(TColumnDesc &desc) { +std::string SchemaColumnsScanner::type_to_string(TColumnDesc& desc) { switch (desc.columnType) { - case TPrimitiveType::BOOLEAN: - return "tinyint(4)"; - case TPrimitiveType::TINYINT: - return "tinyint(4)"; - case TPrimitiveType::SMALLINT: - return "smallint(6)"; - case TPrimitiveType::INT: - return "int(11)"; - case TPrimitiveType::BIGINT: - return "bigint(20)"; - case TPrimitiveType::LARGEINT: - return "bigint(20) unsigned"; - case TPrimitiveType::FLOAT: - return "float"; - case TPrimitiveType::DOUBLE: - return "double"; - case TPrimitiveType::VARCHAR: - if (desc.__isset.columnLength) { - return "varchar(" + std::to_string(desc.columnLength) +")"; - } else { - return "varchar(20)"; - } - case TPrimitiveType::CHAR: - if (desc.__isset.columnLength) { - return "char(" + std::to_string(desc.columnLength) +")"; - } else { - return "char(20)"; - } - case TPrimitiveType::DATE: - return "date"; - case TPrimitiveType::DATETIME: - return "datetime"; - case TPrimitiveType::DECIMALV2: - case TPrimitiveType::DECIMAL: { - std::stringstream stream; - stream << "decimal("; - if (desc.__isset.columnPrecision) { - stream << desc.columnPrecision; - } else { - stream << 27; - } - stream << ","; - if (desc.__isset.columnScale) { - stream << desc.columnScale; - } else { - stream << 9; - } - stream << ")"; - return stream.str(); + case TPrimitiveType::BOOLEAN: + return "tinyint(4)"; + case TPrimitiveType::TINYINT: + return "tinyint(4)"; + case TPrimitiveType::SMALLINT: + return "smallint(6)"; + case TPrimitiveType::INT: + return "int(11)"; + case TPrimitiveType::BIGINT: + return "bigint(20)"; + case TPrimitiveType::LARGEINT: + return "bigint(20) unsigned"; + case TPrimitiveType::FLOAT: + return "float"; + case TPrimitiveType::DOUBLE: + return "double"; + case TPrimitiveType::VARCHAR: + if (desc.__isset.columnLength) { + return "varchar(" + std::to_string(desc.columnLength) + ")"; + } else { + return "varchar(20)"; + } + case TPrimitiveType::CHAR: + if (desc.__isset.columnLength) { + return "char(" + std::to_string(desc.columnLength) + ")"; + } else { + return "char(20)"; } - default: - return "unknown"; + case TPrimitiveType::DATE: + return "date"; + case TPrimitiveType::DATETIME: + return "datetime"; + case TPrimitiveType::DECIMALV2: + case TPrimitiveType::DECIMAL: { + std::stringstream stream; + stream << "decimal("; + if (desc.__isset.columnPrecision) { + stream << desc.columnPrecision; + } else { + stream << 27; + } + stream << ","; + if (desc.__isset.columnScale) { + stream << desc.columnScale; + } else { + stream << 9; + } + stream << ")"; + return stream.str(); + } + default: + return "unknown"; } } //fill row in the "INFORMATION_SCHEMA COLUMNS" //Reference from https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html -Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); // TABLE_CATALOG - { - tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } // TABLE_SCHEMA { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]); - str_slot->ptr = (char *)pool->allocate(db_name.size()); + str_slot->ptr = (char*)pool->allocate(db_name.size()); str_slot->len = db_name.size(); memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); } // TABLE_NAME { - void *slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = (char *)pool->allocate(_table_result.tables[_table_index - 1].length()); + str_slot->ptr = (char*)pool->allocate(_table_result.tables[_table_index - 1].length()); str_slot->len = _table_result.tables[_table_index - 1].length(); memcpy(str_slot->ptr, _table_result.tables[_table_index - 1].c_str(), str_slot->len); } // COLUMN_NAME { - void *slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = (char *)pool->allocate( + str_slot->ptr = (char*)pool->allocate( _desc_result.columns[_column_index].columnDesc.columnName.length()); str_slot->len = _desc_result.columns[_column_index].columnDesc.columnName.length(); - memcpy(str_slot->ptr, - _desc_result.columns[_column_index].columnDesc.columnName.c_str(), + memcpy(str_slot->ptr, _desc_result.columns[_column_index].columnDesc.columnName.c_str(), str_slot->len); } // ORDINAL_POSITION { - void *slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); int64_t* bigint_slot = reinterpret_cast(slot); *bigint_slot = _column_index + 1; } // COLUMN_DEFAULT - { - tuple->set_null(_tuple_desc->slots()[5]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[5]->null_indicator_offset()); } // IS_NULLABLE { - void *slot = tuple->get_slot(_tuple_desc->slots()[6]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[6]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.isAllowNull) { if (_desc_result.columns[_column_index].columnDesc.isAllowNull) { str_slot->len = strlen("YES"); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, "YES", str_slot->len); } else { str_slot->len = strlen("NO"); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, "NO", str_slot->len); } } else { str_slot->len = strlen("NO"); - str_slot->ptr = (char *) pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, "NO", str_slot->len); } } // DATA_TYPE { - void *slot = tuple->get_slot(_tuple_desc->slots()[7]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[7]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); - std::string buffer = to_mysql_data_type_string(_desc_result.columns[_column_index].columnDesc); + std::string buffer = + to_mysql_data_type_string(_desc_result.columns[_column_index].columnDesc); str_slot->len = buffer.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, buffer.c_str(), str_slot->len); } // CHARACTER_MAXIMUM_LENGTH @@ -269,7 +264,7 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { { int data_type = _desc_result.columns[_column_index].columnDesc.columnType; if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR) { - void *slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) { *str_slot = _desc_result.columns[_column_index].columnDesc.columnLength; @@ -285,7 +280,7 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { { int data_type = _desc_result.columns[_column_index].columnDesc.columnType; if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR) { - void *slot = tuple->get_slot(_tuple_desc->slots()[9]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[9]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) { *str_slot = _desc_result.columns[_column_index].columnDesc.columnLength * 4; @@ -298,7 +293,7 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // NUMERIC_PRECISION { - void *slot = tuple->get_slot(_tuple_desc->slots()[10]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[10]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnPrecision) { *str_slot = _desc_result.columns[_column_index].columnDesc.columnPrecision; @@ -308,7 +303,7 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // NUMERIC_SCALE { - void *slot = tuple->get_slot(_tuple_desc->slots()[11]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[11]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnScale) { *str_slot = _desc_result.columns[_column_index].columnDesc.columnScale; @@ -317,62 +312,55 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } } // DATETIME_PRECISION - { - tuple->set_null(_tuple_desc->slots()[12]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[12]->null_indicator_offset()); } // CHARACTER_SET_NAME - { - tuple->set_null(_tuple_desc->slots()[13]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[13]->null_indicator_offset()); } // COLLATION_NAME - { - tuple->set_null(_tuple_desc->slots()[14]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[14]->null_indicator_offset()); } // COLUMN_TYPE { - void *slot = tuple->get_slot(_tuple_desc->slots()[15]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[15]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); std::string buffer = type_to_string(_desc_result.columns[_column_index].columnDesc); str_slot->len = buffer.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, buffer.c_str(), str_slot->len); } // COLUMN_KEY { - void *slot = tuple->get_slot(_tuple_desc->slots()[16]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[16]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); str_slot->len = strlen("") + 1; - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, "", str_slot->len); } // EXTRA { - void *slot = tuple->get_slot(_tuple_desc->slots()[17]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[17]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); str_slot->len = strlen("") + 1; - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, "", str_slot->len); } // PRIVILEGES { - void *slot = tuple->get_slot(_tuple_desc->slots()[18]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[18]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); str_slot->len = strlen("") + 1; - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); memcpy(str_slot->ptr, "", str_slot->len); } // COLUMN_COMMENT { - void *slot = tuple->get_slot(_tuple_desc->slots()[19]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[19]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = - (char *)pool->allocate(_desc_result.columns[_column_index].comment.length()); + str_slot->ptr = (char*)pool->allocate(_desc_result.columns[_column_index].comment.length()); str_slot->len = _desc_result.columns[_column_index].comment.length(); memcpy(str_slot->ptr, _desc_result.columns[_column_index].comment.c_str(), str_slot->len); } // COLUMN_SIZE { - void *slot = tuple->get_slot(_tuple_desc->slots()[20]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[20]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) { *str_slot = _desc_result.columns[_column_index].columnDesc.columnLength; @@ -382,7 +370,7 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // DECIMAL_DIGITS { - void *slot = tuple->get_slot(_tuple_desc->slots()[21]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[21]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnScale) { *str_slot = _desc_result.columns[_column_index].columnDesc.columnScale; @@ -391,13 +379,9 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } } // GENERATION_EXPRESSION - { - tuple->set_null(_tuple_desc->slots()[22]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[22]->null_indicator_offset()); } // SRS_ID - { - tuple->set_null(_tuple_desc->slots()[23]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[23]->null_indicator_offset()); } _column_index++; return Status::OK(); } @@ -418,8 +402,8 @@ Status SchemaColumnsScanner::get_new_desc() { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::describe_table(*(_param->ip), - _param->port, desc_params, &_desc_result)); + RETURN_IF_ERROR(SchemaHelper::describe_table(*(_param->ip), _param->port, desc_params, + &_desc_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -446,8 +430,8 @@ Status SchemaColumnsScanner::get_new_table() { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::get_table_names(*(_param->ip), - _param->port, table_params, &_table_result)); + RETURN_IF_ERROR(SchemaHelper::get_table_names(*(_param->ip), _param->port, table_params, + &_table_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -455,7 +439,7 @@ Status SchemaColumnsScanner::get_new_table() { return Status::OK(); } -Status SchemaColumnsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaColumnsScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("use this class before inited."); } @@ -479,4 +463,4 @@ Status SchemaColumnsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.h b/be/src/exec/schema_scanner/schema_columns_scanner.h index 3ff0c14cb6a843..edc99d40ba52ac 100644 --- a/be/src/exec/schema_scanner/schema_columns_scanner.h +++ b/be/src/exec/schema_scanner/schema_columns_scanner.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_SCHEMA_COLUMNS_SCANNER_H #include + #include "exec/schema_scanner.h" #include "gen_cpp/FrontendService_types.h" @@ -28,26 +29,26 @@ class SchemaColumnsScanner : public SchemaScanner { public: SchemaColumnsScanner(); virtual ~SchemaColumnsScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: Status get_new_table(); - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); Status get_new_desc(); - Status get_create_table(std::string *result); - std::string to_mysql_data_type_string(TColumnDesc &desc); - std::string type_to_string(TColumnDesc &desc); + Status get_create_table(std::string* result); + std::string to_mysql_data_type_string(TColumnDesc& desc); + std::string type_to_string(TColumnDesc& desc); int _db_index; int _table_index; int _column_index; - TGetDbsResult _db_result; + TGetDbsResult _db_result; TGetTablesResult _table_result; TDescribeTableResult _desc_result; static SchemaScanner::ColumnDesc _s_col_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_dummy_scanner.cpp b/be/src/exec/schema_scanner/schema_dummy_scanner.cpp index 957b581a3eaf39..bbc409e3755857 100644 --- a/be/src/exec/schema_scanner/schema_dummy_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_dummy_scanner.cpp @@ -20,27 +20,23 @@ #include "runtime/primitive_type.h" #include "runtime/string_value.h" -namespace doris -{ +namespace doris { -SchemaScanner::ColumnDesc SchemaDummyScanner::_s_dummy_columns[] = { -}; +SchemaScanner::ColumnDesc SchemaDummyScanner::_s_dummy_columns[] = {}; -SchemaDummyScanner::SchemaDummyScanner() - : SchemaScanner(_s_dummy_columns, - sizeof(_s_dummy_columns) / sizeof(SchemaScanner::ColumnDesc)) { -} +SchemaDummyScanner::SchemaDummyScanner() + : SchemaScanner(_s_dummy_columns, + sizeof(_s_dummy_columns) / sizeof(SchemaScanner::ColumnDesc)) {} -SchemaDummyScanner::~SchemaDummyScanner() { -} +SchemaDummyScanner::~SchemaDummyScanner() {} Status SchemaDummyScanner::start() { return Status::OK(); } -Status SchemaDummyScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaDummyScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { *eos = true; return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_dummy_scanner.h b/be/src/exec/schema_scanner/schema_dummy_scanner.h index de6de5665193da..b5ba51405145c5 100644 --- a/be/src/exec/schema_scanner/schema_dummy_scanner.h +++ b/be/src/exec/schema_scanner/schema_dummy_scanner.h @@ -21,19 +21,19 @@ #include "exec/schema_scanner.h" #include "gen_cpp/FrontendService_types.h" -namespace doris -{ +namespace doris { class SchemaDummyScanner : public SchemaScanner { public: SchemaDummyScanner(); virtual ~SchemaDummyScanner(); virtual Status start(); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); + private: static SchemaScanner::ColumnDesc _s_dummy_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_helper.cpp b/be/src/exec/schema_scanner/schema_helper.cpp index f35063fcb20d2a..335b250aa61b0c 100644 --- a/be/src/exec/schema_scanner/schema_helper.cpp +++ b/be/src/exec/schema_scanner/schema_helper.cpp @@ -17,118 +17,98 @@ #include "exec/schema_scanner/schema_helper.h" -#include - #include #include #include #include +#include #include "exec/text_converter.hpp" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/FrontendService_types.h" #include "gen_cpp/FrontendService.h" -#include "runtime/runtime_state.h" +#include "gen_cpp/FrontendService_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/row_batch.h" +#include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" -#include "runtime/client_cache.h" #include "util/debug_util.h" -#include "util/thrift_rpc_helper.h" #include "util/network_util.h" -#include "util/thrift_util.h" #include "util/runtime_profile.h" -#include "runtime/client_cache.h" +#include "util/thrift_rpc_helper.h" +#include "util/thrift_util.h" namespace doris { -Status SchemaHelper::get_db_names( - const std::string& ip, - const int32_t port, - const TGetDbsParams &request, - TGetDbsResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::get_db_names(const std::string& ip, const int32_t port, + const TGetDbsParams& request, TGetDbsResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->getDbNames(*result, request); - }); + }); } -Status SchemaHelper::get_table_names( - const std::string& ip, - const int32_t port, - const TGetTablesParams &request, - TGetTablesResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::get_table_names(const std::string& ip, const int32_t port, + const TGetTablesParams& request, TGetTablesResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->getTableNames(*result, request); - }); + }); } -Status SchemaHelper::list_table_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &request, - TListTableStatusResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::list_table_status(const std::string& ip, const int32_t port, + const TGetTablesParams& request, + TListTableStatusResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->listTableStatus(*result, request); - }); + }); } -Status SchemaHelper::describe_table( - const std::string& ip, - const int32_t port, - const TDescribeTableParams &request, - TDescribeTableResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::describe_table(const std::string& ip, const int32_t port, + const TDescribeTableParams& request, + TDescribeTableResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->describeTable(*result, request); - }); + }); } -Status SchemaHelper::show_variables( - const std::string& ip, - const int32_t port, - const TShowVariableRequest &request, - TShowVariableResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::show_variables(const std::string& ip, const int32_t port, + const TShowVariableRequest& request, + TShowVariableResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->showVariables(*result, request); - }); + }); } -Status SchemaHelper::list_table_privilege_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &request, - TListPrivilegesResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::list_table_privilege_status(const std::string& ip, const int32_t port, + const TGetTablesParams& request, + TListPrivilegesResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->listTablePrivilegeStatus(*result, request); - }); + }); } -Status SchemaHelper::list_schema_privilege_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &request, - TListPrivilegesResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::list_schema_privilege_status(const std::string& ip, const int32_t port, + const TGetTablesParams& request, + TListPrivilegesResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->listSchemaPrivilegeStatus(*result, request); - }); + }); } -Status SchemaHelper::list_user_privilege_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &request, - TListPrivilegesResult *result) { - return ThriftRpcHelper::rpc(ip, port, - [&request, &result] (FrontendServiceConnection& client) { +Status SchemaHelper::list_user_privilege_status(const std::string& ip, const int32_t port, + const TGetTablesParams& request, + TListPrivilegesResult* result) { + return ThriftRpcHelper::rpc( + ip, port, [&request, &result](FrontendServiceConnection& client) { client->listUserPrivilegeStatus(*result, request); - }); + }); } std::string SchemaHelper::extract_db_name(const std::string& full_name) { @@ -140,5 +120,4 @@ std::string SchemaHelper::extract_db_name(const std::string& full_name) { return std::string(full_name.c_str() + found, full_name.size() - found); } -} - +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_helper.h b/be/src/exec/schema_scanner/schema_helper.h index b9e7ed9a0c8b67..12c17ee62513c3 100644 --- a/be/src/exec/schema_scanner/schema_helper.h +++ b/be/src/exec/schema_scanner/schema_helper.h @@ -25,56 +25,38 @@ namespace doris { // this class is a helper for getting schema info from FE class SchemaHelper { public: - static Status get_db_names( - const std::string& ip, - const int32_t port, - const TGetDbsParams &db_params, - TGetDbsResult *db_result); + static Status get_db_names(const std::string& ip, const int32_t port, + const TGetDbsParams& db_params, TGetDbsResult* db_result); - static Status get_table_names( - const std::string& ip, - const int32_t port, - const TGetTablesParams &table_params, - TGetTablesResult *table_result); + static Status get_table_names(const std::string& ip, const int32_t port, + const TGetTablesParams& table_params, + TGetTablesResult* table_result); - static Status list_table_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &table_params, - TListTableStatusResult *table_result); + static Status list_table_status(const std::string& ip, const int32_t port, + const TGetTablesParams& table_params, + TListTableStatusResult* table_result); - static Status describe_table( - const std::string& ip, - const int32_t port, - const TDescribeTableParams &desc_params, - TDescribeTableResult *desc_result); + static Status describe_table(const std::string& ip, const int32_t port, + const TDescribeTableParams& desc_params, + TDescribeTableResult* desc_result); - static Status show_variables( - const std::string& ip, - const int32_t port, - const TShowVariableRequest &var_params, - TShowVariableResult *var_result); + static Status show_variables(const std::string& ip, const int32_t port, + const TShowVariableRequest& var_params, + TShowVariableResult* var_result); - static Status list_table_privilege_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &table_params, - TListPrivilegesResult *privileges_result); + static Status list_table_privilege_status(const std::string& ip, const int32_t port, + const TGetTablesParams& table_params, + TListPrivilegesResult* privileges_result); - static Status list_schema_privilege_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &table_params, - TListPrivilegesResult *privileges_result); + static Status list_schema_privilege_status(const std::string& ip, const int32_t port, + const TGetTablesParams& table_params, + TListPrivilegesResult* privileges_result); - static Status list_user_privilege_status( - const std::string& ip, - const int32_t port, - const TGetTablesParams &table_params, - TListPrivilegesResult *privileges_result); + static Status list_user_privilege_status(const std::string& ip, const int32_t port, + const TGetTablesParams& table_params, + TListPrivilegesResult* privileges_result); static std::string extract_db_name(const std::string& full_name); }; -} - +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp index b5589ee4de56a1..d0e665eefd2b79 100644 --- a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp @@ -15,34 +15,32 @@ // specific language governing permissions and limitations // under the License. -#include "exec/schema_scanner/schema_helper.h" #include "exec/schema_scanner/schema_schema_privileges_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" //#include "runtime/datetime_value.h" -namespace doris -{ +namespace doris { SchemaScanner::ColumnDesc SchemaSchemaPrivilegesScanner::_s_tbls_columns[] = { - // name, type, size, is_null - { "GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - { "PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - { "IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + // name, type, size, is_null + {"GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, }; SchemaSchemaPrivilegesScanner::SchemaSchemaPrivilegesScanner() : SchemaScanner(_s_tbls_columns, sizeof(_s_tbls_columns) / sizeof(SchemaScanner::ColumnDesc)), - _priv_index(0) { -} + _priv_index(0) {} -SchemaSchemaPrivilegesScanner::~SchemaSchemaPrivilegesScanner() { -} +SchemaSchemaPrivilegesScanner::~SchemaSchemaPrivilegesScanner() {} -Status SchemaSchemaPrivilegesScanner::start(RuntimeState *state) { +Status SchemaSchemaPrivilegesScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("used before initialized."); } @@ -50,14 +48,14 @@ Status SchemaSchemaPrivilegesScanner::start(RuntimeState *state) { return Status::OK(); } -Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); const TPrivilegeStatus& priv_status = _priv_result.privileges[_priv_index]; // grantee { Status status = fill_one_col(&priv_status.grantee, pool, - tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset())); if (!status.ok()) { return status; } @@ -67,7 +65,7 @@ Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { std::string definer = "def"; Status status = fill_one_col(&definer, pool, - tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset())); if (!status.ok()) { return status; } @@ -75,7 +73,7 @@ Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) // schema { Status status = fill_one_col(&priv_status.schema, pool, - tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset())); if (!status.ok()) { return status; } @@ -83,7 +81,7 @@ Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) // privilege type { Status status = fill_one_col(&priv_status.privilege_type, pool, - tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset())); if (!status.ok()) { return status; } @@ -91,7 +89,7 @@ Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) // is grantable { Status status = fill_one_col(&priv_status.is_grantable, pool, - tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset())); if (!status.ok()) { return status; } @@ -100,14 +98,14 @@ Status SchemaSchemaPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) return Status::OK(); } -Status SchemaSchemaPrivilegesScanner::fill_one_col(const std::string* src, - MemPool *pool, void *slot) { +Status SchemaSchemaPrivilegesScanner::fill_one_col(const std::string* src, MemPool* pool, + void* slot) { if (NULL == slot || NULL == pool || NULL == src) { return Status::InternalError("input pointer is NULL."); } StringValue* str_slot = reinterpret_cast(slot); str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -115,7 +113,6 @@ Status SchemaSchemaPrivilegesScanner::fill_one_col(const std::string* src, return Status::OK(); } - Status SchemaSchemaPrivilegesScanner::get_new_table() { TGetTablesParams table_params; if (NULL != _param->wild) { @@ -133,8 +130,8 @@ Status SchemaSchemaPrivilegesScanner::get_new_table() { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::list_schema_privilege_status(*(_param->ip), - _param->port, table_params, &_priv_result)); + RETURN_IF_ERROR(SchemaHelper::list_schema_privilege_status(*(_param->ip), _param->port, + table_params, &_priv_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -142,7 +139,7 @@ Status SchemaSchemaPrivilegesScanner::get_new_table() { return Status::OK(); } -Status SchemaSchemaPrivilegesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaSchemaPrivilegesScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("Used before initialized."); } @@ -157,4 +154,4 @@ Status SchemaSchemaPrivilegesScanner::get_next_row(Tuple *tuple, MemPool *pool, return fill_one_row(tuple, pool); } -} \ No newline at end of file +} // namespace doris \ No newline at end of file diff --git a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.h b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.h index 6d9fc03867b443..6d4333804cf8f1 100644 --- a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.h +++ b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.h @@ -28,19 +28,19 @@ class SchemaSchemaPrivilegesScanner : public SchemaScanner { SchemaSchemaPrivilegesScanner(); virtual ~SchemaSchemaPrivilegesScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: Status get_new_table(); - Status fill_one_row(Tuple *tuple, MemPool *pool); - Status fill_one_col(const std::string* src, MemPool *pool, void* slot); + Status fill_one_row(Tuple* tuple, MemPool* pool); + Status fill_one_col(const std::string* src, MemPool* pool, void* slot); int _priv_index; TListPrivilegesResult _priv_result; static SchemaScanner::ColumnDesc _s_tbls_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp index 820f06e03722c0..5f9ae4ebc33198 100644 --- a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp @@ -16,30 +16,29 @@ // under the License. #include "exec/schema_scanner/schema_schemata_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" -#include "exec/schema_scanner/schema_helper.h" namespace doris { SchemaScanner::ColumnDesc SchemaSchemataScanner::_s_columns[] = { - // name, type, size - { "CATALOG_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, - { "SCHEMA_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "DEFAULT_CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "DEFAULT_COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "SQL_PATH", TYPE_VARCHAR, sizeof(StringValue), true}, + // name, type, size + {"CATALOG_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, + {"SCHEMA_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"DEFAULT_CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"DEFAULT_COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"SQL_PATH", TYPE_VARCHAR, sizeof(StringValue), true}, }; -SchemaSchemataScanner::SchemaSchemataScanner() : - SchemaScanner(_s_columns, sizeof(_s_columns) / sizeof(SchemaScanner::ColumnDesc)), - _db_index(0) { -} +SchemaSchemataScanner::SchemaSchemataScanner() + : SchemaScanner(_s_columns, sizeof(_s_columns) / sizeof(SchemaScanner::ColumnDesc)), + _db_index(0) {} -SchemaSchemataScanner::~SchemaSchemataScanner() { -} +SchemaSchemataScanner::~SchemaSchemataScanner() {} -Status SchemaSchemataScanner::start(RuntimeState *state) { +Status SchemaSchemataScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("used before initial."); } @@ -59,8 +58,8 @@ Status SchemaSchemataScanner::start(RuntimeState *state) { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip), - _param->port, db_params, &_db_result)); + RETURN_IF_ERROR( + SchemaHelper::get_db_names(*(_param->ip), _param->port, db_params, &_db_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -68,29 +67,27 @@ Status SchemaSchemataScanner::start(RuntimeState *state) { return Status::OK(); } -Status SchemaSchemataScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaSchemataScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); // catalog - { - tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } // schema { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index]); - str_slot->ptr = (char *)pool->allocate(db_name.size()); + str_slot->ptr = (char*)pool->allocate(db_name.size()); str_slot->len = db_name.size(); memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); } // DEFAULT_CHARACTER_SET_NAME { - void *slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); str_slot->len = strlen("utf8") + 1; - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memory failed."); } @@ -98,24 +95,22 @@ Status SchemaSchemataScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // DEFAULT_COLLATION_NAME { - void *slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); str_slot->len = strlen("utf8_general_ci") + 1; - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memory failed."); } memcpy(str_slot->ptr, "utf8_general_ci", str_slot->len); } // SQL_PATH - { - tuple->set_null(_tuple_desc->slots()[4]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[4]->null_indicator_offset()); } _db_index++; return Status::OK(); } -Status SchemaSchemataScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaSchemataScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("Used before Initialized."); } @@ -130,4 +125,4 @@ Status SchemaSchemataScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eo return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.h b/be/src/exec/schema_scanner/schema_schemata_scanner.h index 90625eddf690e0..8a1c66b092f152 100644 --- a/be/src/exec/schema_scanner/schema_schemata_scanner.h +++ b/be/src/exec/schema_scanner/schema_schemata_scanner.h @@ -28,17 +28,17 @@ class SchemaSchemataScanner : public SchemaScanner { SchemaSchemataScanner(); virtual ~SchemaSchemataScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); int _db_index; TGetDbsResult _db_result; static SchemaScanner::ColumnDesc _s_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp index 6d3d39000450a8..993c1bb31928ae 100644 --- a/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp @@ -15,35 +15,33 @@ // specific language governing permissions and limitations // under the License. -#include "exec/schema_scanner/schema_helper.h" #include "exec/schema_scanner/schema_table_privileges_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" //#include "runtime/datetime_value.h" -namespace doris -{ +namespace doris { SchemaScanner::ColumnDesc SchemaTablePrivilegesScanner::_s_tbls_columns[] = { - // name, type, size, is_null - { "GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - { "TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - { "IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + // name, type, size, is_null + {"GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, }; SchemaTablePrivilegesScanner::SchemaTablePrivilegesScanner() : SchemaScanner(_s_tbls_columns, sizeof(_s_tbls_columns) / sizeof(SchemaScanner::ColumnDesc)), - _priv_index(0) { -} + _priv_index(0) {} -SchemaTablePrivilegesScanner::~SchemaTablePrivilegesScanner() { -} +SchemaTablePrivilegesScanner::~SchemaTablePrivilegesScanner() {} -Status SchemaTablePrivilegesScanner::start(RuntimeState *state) { +Status SchemaTablePrivilegesScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("used before initialized."); } @@ -51,14 +49,14 @@ Status SchemaTablePrivilegesScanner::start(RuntimeState *state) { return Status::OK(); } -Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaTablePrivilegesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); const TPrivilegeStatus& priv_status = _priv_result.privileges[_priv_index]; // grantee { Status status = fill_one_col(&priv_status.grantee, pool, - tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset())); if (!status.ok()) { return status; } @@ -68,7 +66,7 @@ Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { { std::string definer = "def"; Status status = fill_one_col(&definer, pool, - tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset())); if (!status.ok()) { return status; } @@ -76,7 +74,7 @@ Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { // schema { Status status = fill_one_col(&priv_status.schema, pool, - tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset())); if (!status.ok()) { return status; } @@ -84,7 +82,7 @@ Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { // table name { Status status = fill_one_col(&priv_status.table_name, pool, - tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset())); if (!status.ok()) { return status; } @@ -92,7 +90,7 @@ Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { // privilege type { Status status = fill_one_col(&priv_status.privilege_type, pool, - tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset())); if (!status.ok()) { return status; } @@ -100,7 +98,7 @@ Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { // is grantable { Status status = fill_one_col(&priv_status.is_grantable, pool, - tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset())); if (!status.ok()) { return status; } @@ -109,14 +107,14 @@ Status SchemaTablePrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { return Status::OK(); } -Status SchemaTablePrivilegesScanner::fill_one_col(const std::string* src, - MemPool *pool, void* slot) { +Status SchemaTablePrivilegesScanner::fill_one_col(const std::string* src, MemPool* pool, + void* slot) { if (NULL == slot || NULL == pool || NULL == src) { return Status::InternalError("input pointer is NULL."); } StringValue* str_slot = reinterpret_cast(slot); str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -141,8 +139,8 @@ Status SchemaTablePrivilegesScanner::get_new_table() { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::list_table_privilege_status(*(_param->ip), - _param->port, table_params, &_priv_result)); + RETURN_IF_ERROR(SchemaHelper::list_table_privilege_status(*(_param->ip), _param->port, + table_params, &_priv_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -150,7 +148,7 @@ Status SchemaTablePrivilegesScanner::get_new_table() { return Status::OK(); } -Status SchemaTablePrivilegesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaTablePrivilegesScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("Used before initialized."); } @@ -165,4 +163,4 @@ Status SchemaTablePrivilegesScanner::get_next_row(Tuple *tuple, MemPool *pool, b return fill_one_row(tuple, pool); } -} \ No newline at end of file +} // namespace doris \ No newline at end of file diff --git a/be/src/exec/schema_scanner/schema_table_privileges_scanner.h b/be/src/exec/schema_scanner/schema_table_privileges_scanner.h index 1e5f2e6e864b34..89985788658711 100644 --- a/be/src/exec/schema_scanner/schema_table_privileges_scanner.h +++ b/be/src/exec/schema_scanner/schema_table_privileges_scanner.h @@ -28,19 +28,19 @@ class SchemaTablePrivilegesScanner : public SchemaScanner { SchemaTablePrivilegesScanner(); virtual ~SchemaTablePrivilegesScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: Status get_new_table(); - Status fill_one_row(Tuple *tuple, MemPool *pool); - Status fill_one_col(const std::string* src, MemPool *pool, void* slot); + Status fill_one_row(Tuple* tuple, MemPool* pool); + Status fill_one_col(const std::string* src, MemPool* pool, void* slot); int _priv_index; TListPrivilegesResult _priv_result; static SchemaScanner::ColumnDesc _s_tbls_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_tables_scanner.cpp b/be/src/exec/schema_scanner/schema_tables_scanner.cpp index 51dd3f86181f39..178424e86fe1b3 100644 --- a/be/src/exec/schema_scanner/schema_tables_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_tables_scanner.cpp @@ -15,51 +15,49 @@ // specific language governing permissions and limitations // under the License. -#include "exec/schema_scanner/schema_helper.h" #include "exec/schema_scanner/schema_tables_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" //#include "runtime/datetime_value.h" -namespace doris -{ +namespace doris { SchemaScanner::ColumnDesc SchemaTablesScanner::_s_tbls_columns[] = { - // name, type, size, is_null - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - { "TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "TABLE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - { "ENGINE", TYPE_VARCHAR, sizeof(StringValue), true}, - { "VERSION", TYPE_BIGINT, sizeof(int64_t), true}, - { "ROW_FORMAT", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_ROWS", TYPE_BIGINT, sizeof(int64_t), true}, - { "AVG_ROW_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, - { "DATA_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, - { "MAX_DATA_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, - { "INDEX_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, - { "DATA_FREE", TYPE_BIGINT, sizeof(int64_t), true}, - { "AUTO_INCREMENT", TYPE_BIGINT, sizeof(int64_t), true}, - { "CREATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, - { "UPDATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, - { "CHECK_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, - { "TABLE_COLLATION", TYPE_VARCHAR, sizeof(StringValue), true}, - { "CHECKSUM", TYPE_BIGINT, sizeof(int64_t), true}, - { "CREATE_OPTIONS", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_COMMENT", TYPE_VARCHAR, sizeof(StringValue), false}, + // name, type, size, is_null + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"ENGINE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"VERSION", TYPE_BIGINT, sizeof(int64_t), true}, + {"ROW_FORMAT", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_ROWS", TYPE_BIGINT, sizeof(int64_t), true}, + {"AVG_ROW_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, + {"DATA_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, + {"MAX_DATA_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, + {"INDEX_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, + {"DATA_FREE", TYPE_BIGINT, sizeof(int64_t), true}, + {"AUTO_INCREMENT", TYPE_BIGINT, sizeof(int64_t), true}, + {"CREATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, + {"UPDATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, + {"CHECK_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, + {"TABLE_COLLATION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"CHECKSUM", TYPE_BIGINT, sizeof(int64_t), true}, + {"CREATE_OPTIONS", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_COMMENT", TYPE_VARCHAR, sizeof(StringValue), false}, }; SchemaTablesScanner::SchemaTablesScanner() : SchemaScanner(_s_tbls_columns, sizeof(_s_tbls_columns) / sizeof(SchemaScanner::ColumnDesc)), - _db_index(0), - _table_index(0) { -} + _db_index(0), + _table_index(0) {} -SchemaTablesScanner::~SchemaTablesScanner() { -} +SchemaTablesScanner::~SchemaTablesScanner() {} -Status SchemaTablesScanner::start(RuntimeState *state) { +Status SchemaTablesScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("used before initialized."); } @@ -79,38 +77,36 @@ Status SchemaTablesScanner::start(RuntimeState *state) { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip), - _param->port, db_params, &_db_result)); + RETURN_IF_ERROR( + SchemaHelper::get_db_names(*(_param->ip), _param->port, db_params, &_db_result)); } else { return Status::InternalError("IP or port doesn't exists"); } return Status::OK(); } -Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaTablesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); const TTableStatus& tbl_status = _table_result.tables[_table_index]; // catalog - { - tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } // schema { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]); - str_slot->ptr = (char *)pool->allocate(db_name.size()); + str_slot->ptr = (char*)pool->allocate(db_name.size()); str_slot->len = db_name.size(); memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); } // name { - void *slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.name; str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -118,11 +114,11 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // type { - void *slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.type; str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -130,11 +126,11 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // engine if (tbl_status.__isset.engine) { - void *slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.engine; str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -143,41 +139,23 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { tuple->set_null(_tuple_desc->slots()[4]->null_indicator_offset()); } // version - { - tuple->set_null(_tuple_desc->slots()[5]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[5]->null_indicator_offset()); } // row_format - { - tuple->set_null(_tuple_desc->slots()[6]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[6]->null_indicator_offset()); } // rows - { - tuple->set_null(_tuple_desc->slots()[7]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[7]->null_indicator_offset()); } // avg_row_length - { - tuple->set_null(_tuple_desc->slots()[8]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[8]->null_indicator_offset()); } // data_length - { - tuple->set_null(_tuple_desc->slots()[9]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[9]->null_indicator_offset()); } // max_data_length - { - tuple->set_null(_tuple_desc->slots()[10]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[10]->null_indicator_offset()); } // index_length - { - tuple->set_null(_tuple_desc->slots()[11]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[11]->null_indicator_offset()); } // data_free - { - tuple->set_null(_tuple_desc->slots()[12]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[12]->null_indicator_offset()); } // auto_increment - { - tuple->set_null(_tuple_desc->slots()[13]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[13]->null_indicator_offset()); } // creation_time if (tbl_status.__isset.create_time) { int64_t create_time = tbl_status.create_time; @@ -185,16 +163,13 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { tuple->set_null(_tuple_desc->slots()[14]->null_indicator_offset()); } else { tuple->set_not_null(_tuple_desc->slots()[14]->null_indicator_offset()); - void *slot = tuple->get_slot(_tuple_desc->slots()[14]->tuple_offset()); - DateTimeValue *time_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[14]->tuple_offset()); + DateTimeValue* time_slot = reinterpret_cast(slot); time_slot->from_unixtime(create_time, TimezoneUtils::default_time_zone); } - } // update_time - { - tuple->set_null(_tuple_desc->slots()[15]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[15]->null_indicator_offset()); } // check_time if (tbl_status.__isset.last_check_time) { int64_t check_time = tbl_status.last_check_time; @@ -202,33 +177,27 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { tuple->set_null(_tuple_desc->slots()[16]->null_indicator_offset()); } else { tuple->set_not_null(_tuple_desc->slots()[16]->null_indicator_offset()); - void *slot = tuple->get_slot(_tuple_desc->slots()[16]->tuple_offset()); - DateTimeValue *time_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[16]->tuple_offset()); + DateTimeValue* time_slot = reinterpret_cast(slot); time_slot->from_unixtime(check_time, TimezoneUtils::default_time_zone); } } // collation - { - tuple->set_null(_tuple_desc->slots()[17]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[17]->null_indicator_offset()); } // checksum - { - tuple->set_null(_tuple_desc->slots()[18]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[18]->null_indicator_offset()); } // create_options - { - tuple->set_null(_tuple_desc->slots()[19]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[19]->null_indicator_offset()); } // create_comment { - void *slot = tuple->get_slot(_tuple_desc->slots()[20]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[20]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.comment; str_slot->len = src->length(); if (str_slot->len == 0) { str_slot->ptr = nullptr; } else { - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -257,8 +226,8 @@ Status SchemaTablesScanner::get_new_table() { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::list_table_status(*(_param->ip), - _param->port, table_params, &_table_result)); + RETURN_IF_ERROR(SchemaHelper::list_table_status(*(_param->ip), _param->port, table_params, + &_table_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -266,7 +235,7 @@ Status SchemaTablesScanner::get_new_table() { return Status::OK(); } -Status SchemaTablesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaTablesScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("Used before initialized."); } @@ -285,4 +254,4 @@ Status SchemaTablesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_tables_scanner.h b/be/src/exec/schema_scanner/schema_tables_scanner.h index 2fa8160bc51c2f..60c38b3b7f0fc4 100644 --- a/be/src/exec/schema_scanner/schema_tables_scanner.h +++ b/be/src/exec/schema_scanner/schema_tables_scanner.h @@ -28,12 +28,12 @@ class SchemaTablesScanner : public SchemaScanner { SchemaTablesScanner(); virtual ~SchemaTablesScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: Status get_new_table(); - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); int _db_index; int _table_index; @@ -42,6 +42,6 @@ class SchemaTablesScanner : public SchemaScanner { static SchemaScanner::ColumnDesc _s_tbls_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp index 3d56b89dd9f706..9436bc90e5b170 100644 --- a/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp @@ -15,33 +15,31 @@ // specific language governing permissions and limitations // under the License. -#include "exec/schema_scanner/schema_helper.h" #include "exec/schema_scanner/schema_user_privileges_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" //#include "runtime/datetime_value.h" -namespace doris -{ +namespace doris { SchemaScanner::ColumnDesc SchemaUserPrivilegesScanner::_s_tbls_columns[] = { - // name, type, size, is_null - { "GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - { "PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - { "IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + // name, type, size, is_null + {"GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, }; SchemaUserPrivilegesScanner::SchemaUserPrivilegesScanner() : SchemaScanner(_s_tbls_columns, sizeof(_s_tbls_columns) / sizeof(SchemaScanner::ColumnDesc)), - _priv_index(0) { -} + _priv_index(0) {} -SchemaUserPrivilegesScanner::~SchemaUserPrivilegesScanner() { -} +SchemaUserPrivilegesScanner::~SchemaUserPrivilegesScanner() {} -Status SchemaUserPrivilegesScanner::start(RuntimeState *state) { +Status SchemaUserPrivilegesScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("used before initialized."); } @@ -49,14 +47,14 @@ Status SchemaUserPrivilegesScanner::start(RuntimeState *state) { return Status::OK(); } -Status SchemaUserPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaUserPrivilegesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); const TPrivilegeStatus& priv_status = _priv_result.privileges[_priv_index]; // grantee { Status status = fill_one_col(&priv_status.grantee, pool, - tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset())); if (!status.ok()) { return status; } @@ -66,7 +64,7 @@ Status SchemaUserPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { { std::string definer = "def"; Status status = fill_one_col(&definer, pool, - tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset())); if (!status.ok()) { return status; } @@ -74,7 +72,7 @@ Status SchemaUserPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { // privilege type { Status status = fill_one_col(&priv_status.privilege_type, pool, - tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset())); if (!status.ok()) { return status; } @@ -82,7 +80,7 @@ Status SchemaUserPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { // is grantable { Status status = fill_one_col(&priv_status.is_grantable, pool, - tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset())); + tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset())); if (!status.ok()) { return status; } @@ -91,14 +89,14 @@ Status SchemaUserPrivilegesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { return Status::OK(); } -Status SchemaUserPrivilegesScanner::fill_one_col(const std::string* src, - MemPool *pool, void *slot) { +Status SchemaUserPrivilegesScanner::fill_one_col(const std::string* src, MemPool* pool, + void* slot) { if (NULL == slot || NULL == pool || NULL == src) { return Status::InternalError("input pointer is NULL."); } StringValue* str_slot = reinterpret_cast(slot); str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -123,8 +121,8 @@ Status SchemaUserPrivilegesScanner::get_new_table() { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::list_user_privilege_status(*(_param->ip), - _param->port, table_params, &_priv_result)); + RETURN_IF_ERROR(SchemaHelper::list_user_privilege_status(*(_param->ip), _param->port, + table_params, &_priv_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -132,7 +130,7 @@ Status SchemaUserPrivilegesScanner::get_new_table() { return Status::OK(); } -Status SchemaUserPrivilegesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaUserPrivilegesScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("Used before initialized."); } @@ -147,4 +145,4 @@ Status SchemaUserPrivilegesScanner::get_next_row(Tuple *tuple, MemPool *pool, bo return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_user_privileges_scanner.h b/be/src/exec/schema_scanner/schema_user_privileges_scanner.h index aada2e7b69c782..724f5cba8dc7d3 100644 --- a/be/src/exec/schema_scanner/schema_user_privileges_scanner.h +++ b/be/src/exec/schema_scanner/schema_user_privileges_scanner.h @@ -28,19 +28,19 @@ class SchemaUserPrivilegesScanner : public SchemaScanner { SchemaUserPrivilegesScanner(); virtual ~SchemaUserPrivilegesScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: Status get_new_table(); - Status fill_one_row(Tuple *tuple, MemPool *pool); - Status fill_one_col(const std::string* src, MemPool *pool, void* slot); + Status fill_one_row(Tuple* tuple, MemPool* pool); + Status fill_one_col(const std::string* src, MemPool* pool, void* slot); int _priv_index; TListPrivilegesResult _priv_result; static SchemaScanner::ColumnDesc _s_tbls_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_variables_scanner.cpp b/be/src/exec/schema_scanner/schema_variables_scanner.cpp index d117475c48b0f3..27763c33754949 100644 --- a/be/src/exec/schema_scanner/schema_variables_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_variables_scanner.cpp @@ -16,29 +16,28 @@ // under the License. #include "exec/schema_scanner/schema_variables_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" #include "runtime/runtime_state.h" -#include "exec/schema_scanner/schema_helper.h" +#include "runtime/string_value.h" namespace doris { SchemaScanner::ColumnDesc SchemaVariablesScanner::_s_vars_columns[] = { - // name, type, size - { "VARIABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "VARIABLE_VALUE", TYPE_VARCHAR, sizeof(StringValue), false}, + // name, type, size + {"VARIABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"VARIABLE_VALUE", TYPE_VARCHAR, sizeof(StringValue), false}, }; -SchemaVariablesScanner::SchemaVariablesScanner(TVarType::type type) : - SchemaScanner(_s_vars_columns, - sizeof(_s_vars_columns) / sizeof(SchemaScanner::ColumnDesc)), - _type(type) { -} +SchemaVariablesScanner::SchemaVariablesScanner(TVarType::type type) + : SchemaScanner(_s_vars_columns, + sizeof(_s_vars_columns) / sizeof(SchemaScanner::ColumnDesc)), + _type(type) {} -SchemaVariablesScanner::~SchemaVariablesScanner() { -} +SchemaVariablesScanner::~SchemaVariablesScanner() {} -Status SchemaVariablesScanner::start(RuntimeState *state) { +Status SchemaVariablesScanner::start(RuntimeState* state) { TShowVariableRequest var_params; // Use db to save type if (_param->db != nullptr) { @@ -51,10 +50,10 @@ Status SchemaVariablesScanner::start(RuntimeState *state) { var_params.__set_varType(_type); } var_params.__set_threadId(_param->thread_id); - + if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::show_variables(*(_param->ip), - _param->port, var_params, &_var_result)); + RETURN_IF_ERROR(SchemaHelper::show_variables(*(_param->ip), _param->port, var_params, + &_var_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -62,13 +61,13 @@ Status SchemaVariablesScanner::start(RuntimeState *state) { return Status::OK(); } -Status SchemaVariablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaVariablesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // variables names { - void *slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_begin->first.c_str()); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -77,10 +76,10 @@ Status SchemaVariablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // value { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue *str_slot = reinterpret_cast(slot); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); int len = strlen(_begin->second.c_str()); - str_slot->ptr = (char *)pool->allocate(len + 1); + str_slot->ptr = (char*)pool->allocate(len + 1); if (NULL == str_slot->ptr) { return Status::InternalError("No Memory."); } @@ -91,7 +90,7 @@ Status SchemaVariablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { return Status::OK(); } -Status SchemaVariablesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaVariablesScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("call this before initial."); } @@ -106,4 +105,4 @@ Status SchemaVariablesScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *e return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_variables_scanner.h b/be/src/exec/schema_scanner/schema_variables_scanner.h index 09fb423e7d16ed..2abe8385c4c96d 100644 --- a/be/src/exec/schema_scanner/schema_variables_scanner.h +++ b/be/src/exec/schema_scanner/schema_variables_scanner.h @@ -18,8 +18,9 @@ #ifndef DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_SCHEMA_VARIABLES_SCANNER_H #define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_SCHEMA_VARIABLES_SCANNER_H -#include #include +#include + #include "exec/schema_scanner.h" #include "gen_cpp/FrontendService_types.h" @@ -30,16 +31,16 @@ class SchemaVariablesScanner : public SchemaScanner { SchemaVariablesScanner(TVarType::type type); virtual ~SchemaVariablesScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: struct VariableStruct { - const char *name; - const char *value; + const char* name; + const char* value; }; - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); int _index; static SchemaScanner::ColumnDesc _s_vars_columns[]; @@ -49,5 +50,5 @@ class SchemaVariablesScanner : public SchemaScanner { std::map::iterator _begin; }; -} +} // namespace doris #endif diff --git a/be/src/exec/schema_scanner/schema_views_scanner.cpp b/be/src/exec/schema_scanner/schema_views_scanner.cpp index ac41a45bc7f05c..f62658414829dc 100644 --- a/be/src/exec/schema_scanner/schema_views_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_views_scanner.cpp @@ -15,40 +15,38 @@ // specific language governing permissions and limitations // under the License. -#include "exec/schema_scanner/schema_helper.h" #include "exec/schema_scanner/schema_views_scanner.h" + +#include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" //#include "runtime/datetime_value.h" -namespace doris -{ +namespace doris { SchemaScanner::ColumnDesc SchemaViewsScanner::_s_tbls_columns[] = { - // name, type, size, is_null - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - { "TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - { "TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - { "VIEW_DEFINITION", TYPE_VARCHAR, sizeof(StringValue), true}, - { "CHECK_OPTION", TYPE_VARCHAR, sizeof(StringValue), true}, - { "IS_UPDATABLE", TYPE_VARCHAR, sizeof(StringValue), true}, - { "DEFINER", TYPE_VARCHAR, sizeof(StringValue), true}, - { "SECURITY_TYPE", TYPE_VARCHAR, sizeof(StringValue), true}, - { "CHARACTER_SET_CLIENT", TYPE_VARCHAR, sizeof(StringValue), true}, - { "COLLATION_CONNECTION", TYPE_VARCHAR, sizeof(StringValue), true}, + // name, type, size, is_null + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"VIEW_DEFINITION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"CHECK_OPTION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"IS_UPDATABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"DEFINER", TYPE_VARCHAR, sizeof(StringValue), true}, + {"SECURITY_TYPE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"CHARACTER_SET_CLIENT", TYPE_VARCHAR, sizeof(StringValue), true}, + {"COLLATION_CONNECTION", TYPE_VARCHAR, sizeof(StringValue), true}, }; SchemaViewsScanner::SchemaViewsScanner() : SchemaScanner(_s_tbls_columns, sizeof(_s_tbls_columns) / sizeof(SchemaScanner::ColumnDesc)), - _db_index(0), - _table_index(0) { -} + _db_index(0), + _table_index(0) {} -SchemaViewsScanner::~SchemaViewsScanner() { -} +SchemaViewsScanner::~SchemaViewsScanner() {} -Status SchemaViewsScanner::start(RuntimeState *state) { +Status SchemaViewsScanner::start(RuntimeState* state) { if (!_is_init) { return Status::InternalError("used before initialized."); } @@ -68,38 +66,36 @@ Status SchemaViewsScanner::start(RuntimeState *state) { } if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip), - _param->port, db_params, &_db_result)); + RETURN_IF_ERROR( + SchemaHelper::get_db_names(*(_param->ip), _param->port, db_params, &_db_result)); } else { return Status::InternalError("IP or port doesn't exists"); } return Status::OK(); } -Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { +Status SchemaViewsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // set all bit to not null - memset((void *)tuple, 0, _tuple_desc->num_null_bytes()); + memset((void*)tuple, 0, _tuple_desc->num_null_bytes()); const TTableStatus& tbl_status = _table_result.tables[_table_index]; // catalog - { - tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } // schema { - void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]); - str_slot->ptr = (char *)pool->allocate(db_name.size()); + str_slot->ptr = (char*)pool->allocate(db_name.size()); str_slot->len = db_name.size(); memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); } // name { - void *slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.name; str_slot->len = src->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -107,11 +103,11 @@ Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // definition { - void *slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); const std::string* ddl_sql = &tbl_status.ddl_sql; str_slot->len = ddl_sql->length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -119,12 +115,12 @@ Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // check_option { - void *slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string check_option = "NONE"; str_slot->len = check_option.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -132,12 +128,12 @@ Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // is_updatable { - void *slot = tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string is_updatable = "NO"; str_slot->len = is_updatable.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -145,12 +141,12 @@ Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // definer { - void *slot = tuple->get_slot(_tuple_desc->slots()[6]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[6]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string definer = "root@%"; str_slot->len = definer.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -158,12 +154,12 @@ Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // security_type { - void *slot = tuple->get_slot(_tuple_desc->slots()[7]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[7]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string security_type = "DEFINER"; str_slot->len = security_type.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } @@ -171,21 +167,19 @@ Status SchemaViewsScanner::fill_one_row(Tuple *tuple, MemPool *pool) { } // character_set_client { - void *slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset()); + void* slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset()); StringValue* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string encoding = "utf8"; str_slot->len = encoding.length(); - str_slot->ptr = (char *)pool->allocate(str_slot->len); + str_slot->ptr = (char*)pool->allocate(str_slot->len); if (NULL == str_slot->ptr) { return Status::InternalError("Allocate memcpy failed."); } memcpy(str_slot->ptr, encoding.c_str(), str_slot->len); } // collation_connection - { - tuple->set_null(_tuple_desc->slots()[9]->null_indicator_offset()); - } + { tuple->set_null(_tuple_desc->slots()[9]->null_indicator_offset()); } _table_index++; return Status::OK(); } @@ -209,8 +203,8 @@ Status SchemaViewsScanner::get_new_table() { table_params.__set_type("VIEW"); if (NULL != _param->ip && 0 != _param->port) { - RETURN_IF_ERROR(SchemaHelper::list_table_status(*(_param->ip), - _param->port, table_params, &_table_result)); + RETURN_IF_ERROR(SchemaHelper::list_table_status(*(_param->ip), _param->port, table_params, + &_table_result)); } else { return Status::InternalError("IP or port doesn't exists"); } @@ -218,7 +212,7 @@ Status SchemaViewsScanner::get_new_table() { return Status::OK(); } -Status SchemaViewsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) { +Status SchemaViewsScanner::get_next_row(Tuple* tuple, MemPool* pool, bool* eos) { if (!_is_init) { return Status::InternalError("Used before initialized."); } @@ -237,4 +231,4 @@ Status SchemaViewsScanner::get_next_row(Tuple *tuple, MemPool *pool, bool *eos) return fill_one_row(tuple, pool); } -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_views_scanner.h b/be/src/exec/schema_scanner/schema_views_scanner.h index e674d5227a53a1..c768103fb6b145 100644 --- a/be/src/exec/schema_scanner/schema_views_scanner.h +++ b/be/src/exec/schema_scanner/schema_views_scanner.h @@ -28,12 +28,12 @@ class SchemaViewsScanner : public SchemaScanner { SchemaViewsScanner(); virtual ~SchemaViewsScanner(); - virtual Status start(RuntimeState *state); - virtual Status get_next_row(Tuple *tuple, MemPool *pool, bool *eos); + virtual Status start(RuntimeState* state); + virtual Status get_next_row(Tuple* tuple, MemPool* pool, bool* eos); private: Status get_new_table(); - Status fill_one_row(Tuple *tuple, MemPool *pool); + Status fill_one_row(Tuple* tuple, MemPool* pool); int _db_index; int _table_index; @@ -42,6 +42,6 @@ class SchemaViewsScanner : public SchemaScanner { static SchemaScanner::ColumnDesc _s_tbls_columns[]; }; -} +} // namespace doris #endif diff --git a/be/src/exec/select_node.cpp b/be/src/exec/select_node.cpp index 015713f17e5e43..1c8585d4c33f23 100644 --- a/be/src/exec/select_node.cpp +++ b/be/src/exec/select_node.cpp @@ -16,26 +16,25 @@ // under the License. #include "exec/select_node.h" + #include "exprs/expr.h" #include "gen_cpp/PlanNodes_types.h" +#include "runtime/raw_value.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" -#include "runtime/raw_value.h" namespace doris { -SelectNode::SelectNode( - ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _child_row_batch(NULL), - _child_row_idx(0), - _child_eos(false) { -} +SelectNode::SelectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _child_row_batch(NULL), + _child_row_idx(0), + _child_eos(false) {} Status SelectNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); return Status::OK(); } @@ -75,8 +74,8 @@ Status SelectNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) } if (copy_rows(row_batch)) { - *eos = reached_limit() - || (_child_row_idx == _child_row_batch->num_rows() && _child_eos); + *eos = reached_limit() || + (_child_row_idx == _child_row_batch->num_rows() && _child_eos); if (*eos) { _child_row_batch->transfer_resource_ownership(row_batch); } @@ -139,4 +138,4 @@ Status SelectNode::close(RuntimeState* state) { return ExecNode::close(state); } -} +} // namespace doris diff --git a/be/src/exec/select_node.h b/be/src/exec/select_node.h index b707bbb7c63c01..1d6c4682530939 100644 --- a/be/src/exec/select_node.h +++ b/be/src/exec/select_node.h @@ -55,6 +55,6 @@ class SelectNode : public ExecNode { bool copy_rows(RowBatch* output_batch); }; -} +} // namespace doris #endif diff --git a/be/src/exec/set_operation_node.h b/be/src/exec/set_operation_node.h index 52d178cdda6b84..8ec585b324e807 100644 --- a/be/src/exec/set_operation_node.h +++ b/be/src/exec/set_operation_node.h @@ -43,7 +43,6 @@ class SetOperationNode : public ExecNode { virtual Status close(RuntimeState* state); virtual Status open(RuntimeState* state); - protected: std::string get_row_output_string(TupleRow* row, const RowDescriptor& row_desc); void create_output_row(TupleRow* input_row, RowBatch* row_batch, uint8_t* tuple_buf); diff --git a/be/src/exec/sort_exec_exprs.cpp b/be/src/exec/sort_exec_exprs.cpp index 3c3c52e11bc143..35f8e63685e098 100644 --- a/be/src/exec/sort_exec_exprs.cpp +++ b/be/src/exec/sort_exec_exprs.cpp @@ -21,20 +21,17 @@ namespace doris { Status SortExecExprs::init(const TSortInfo& sort_info, ObjectPool* pool) { return init(sort_info.ordering_exprs, - sort_info.__isset.sort_tuple_slot_exprs ? &sort_info.sort_tuple_slot_exprs : NULL, - pool); + sort_info.__isset.sort_tuple_slot_exprs ? &sort_info.sort_tuple_slot_exprs : NULL, + pool); } -Status SortExecExprs::init( - const std::vector& ordering_exprs, - const std::vector* sort_tuple_slot_exprs, - ObjectPool* pool) { - RETURN_IF_ERROR(Expr::create_expr_trees( - pool, ordering_exprs, &_lhs_ordering_expr_ctxs)); +Status SortExecExprs::init(const std::vector& ordering_exprs, + const std::vector* sort_tuple_slot_exprs, ObjectPool* pool) { + RETURN_IF_ERROR(Expr::create_expr_trees(pool, ordering_exprs, &_lhs_ordering_expr_ctxs)); if (sort_tuple_slot_exprs != NULL) { _materialize_tuple = true; - RETURN_IF_ERROR(Expr::create_expr_trees( - pool, *sort_tuple_slot_exprs, &_sort_tuple_slot_expr_ctxs)); + RETURN_IF_ERROR( + Expr::create_expr_trees(pool, *sort_tuple_slot_exprs, &_sort_tuple_slot_expr_ctxs)); } else { _materialize_tuple = false; } @@ -42,7 +39,7 @@ Status SortExecExprs::init( } Status SortExecExprs::init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs) { + const std::vector& rhs_ordering_expr_ctxs) { _lhs_ordering_expr_ctxs = lhs_ordering_expr_ctxs; _rhs_ordering_expr_ctxs = rhs_ordering_expr_ctxs; return Status::OK(); @@ -52,11 +49,11 @@ Status SortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_ro const RowDescriptor& output_row_desc, const std::shared_ptr& expr_mem_tracker) { if (_materialize_tuple) { - RETURN_IF_ERROR(Expr::prepare( - _sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker)); + RETURN_IF_ERROR( + Expr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker)); } - RETURN_IF_ERROR(Expr::prepare( - _lhs_ordering_expr_ctxs, state, output_row_desc, expr_mem_tracker)); + RETURN_IF_ERROR( + Expr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc, expr_mem_tracker)); return Status::OK(); } @@ -65,8 +62,8 @@ Status SortExecExprs::open(RuntimeState* state) { RETURN_IF_ERROR(Expr::open(_sort_tuple_slot_expr_ctxs, state)); } RETURN_IF_ERROR(Expr::open(_lhs_ordering_expr_ctxs, state)); - RETURN_IF_ERROR(Expr::clone_if_not_exists( - _lhs_ordering_expr_ctxs, state, &_rhs_ordering_expr_ctxs)); + RETURN_IF_ERROR( + Expr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, &_rhs_ordering_expr_ctxs)); return Status::OK(); } diff --git a/be/src/exec/sort_exec_exprs.h b/be/src/exec/sort_exec_exprs.h index 91cb03ce0ffb41..03a6ea188c0022 100644 --- a/be/src/exec/sort_exec_exprs.h +++ b/be/src/exec/sort_exec_exprs.h @@ -84,9 +84,9 @@ class SortExecExprs { // Prepare(), Open(), and Close() on input ExprContexts (instead of calling the // analogous functions in this class). Used for testing. Status init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs); + const std::vector& rhs_ordering_expr_ctxs); }; -} +} // namespace doris #endif diff --git a/be/src/exec/spill_sort_node.cc b/be/src/exec/spill_sort_node.cc index 4463fa8d3df76c..3003058d613124 100644 --- a/be/src/exec/spill_sort_node.cc +++ b/be/src/exec/spill_sort_node.cc @@ -16,6 +16,7 @@ // under the License. #include "exec/spill_sort_node.h" + #include "exec/sort_exec_exprs.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -24,15 +25,13 @@ namespace doris { -SpillSortNode::SpillSortNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0), - _sorter(NULL), - _num_rows_skipped(0) { -} +SpillSortNode::SpillSortNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0), + _sorter(NULL), + _num_rows_skipped(0) {} -SpillSortNode::~SpillSortNode() { -} +SpillSortNode::~SpillSortNode() {} Status SpillSortNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); @@ -45,8 +44,8 @@ Status SpillSortNode::init(const TPlanNode& tnode, RuntimeState* state) { Status SpillSortNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - RETURN_IF_ERROR(_sort_exec_exprs.prepare( - state, child(0)->row_desc(), _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor, + expr_mem_tracker())); // AddExprCtxsToFree(_sort_exec_exprs); return Status::OK(); } @@ -64,9 +63,8 @@ Status SpillSortNode::open(RuntimeState* state) { if (_sorter.get() == NULL) { TupleRowComparator less_than(_sort_exec_exprs, _is_asc_order, _nulls_first); // Create and initialize the external sort impl object - _sorter.reset(new SpillSorter( - less_than, _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), - &_row_descriptor, mem_tracker(), runtime_profile(), state)); + _sorter.reset(new SpillSorter(less_than, _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), + &_row_descriptor, mem_tracker(), runtime_profile(), state)); RETURN_IF_ERROR(_sorter->init()); } @@ -145,12 +143,10 @@ Status SpillSortNode::close(RuntimeState* state) { void SpillSortNode::debug_string(int indentation_level, stringstream* out) const { *out << string(indentation_level * 2, ' '); - *out << "SpillSortNode(" - << Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs()); + *out << "SpillSortNode(" << Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs()); for (int i = 0; i < _is_asc_order.size(); ++i) { - *out << (i > 0 ? " " : "") - << (_is_asc_order[i] ? "asc" : "desc") - << " nulls " << (_nulls_first[i] ? "first" : "last"); + *out << (i > 0 ? " " : "") << (_is_asc_order[i] ? "asc" : "desc") << " nulls " + << (_nulls_first[i] ? "first" : "last"); } ExecNode::debug_string(indentation_level, out); *out << ")"; diff --git a/be/src/exec/spill_sort_node.h b/be/src/exec/spill_sort_node.h index b3b9157c6d3e02..c33c70e8eccf70 100644 --- a/be/src/exec/spill_sort_node.h +++ b/be/src/exec/spill_sort_node.h @@ -20,8 +20,8 @@ #include "exec/exec_node.h" #include "exec/sort_exec_exprs.h" -#include "runtime/spill_sorter.h" #include "runtime/buffered_block_mgr2.h" +#include "runtime/spill_sorter.h" namespace doris { diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 54628e47557ab6..d734a4ca5c9afa 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -61,8 +61,8 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { } std::sort(_indexes.begin(), _indexes.end(), - [] (const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) { - return lhs->index_id < rhs->index_id; + [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) { + return lhs->index_id < rhs->index_id; }); return Status::OK(); } @@ -95,8 +95,8 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { } std::sort(_indexes.begin(), _indexes.end(), - [] (const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) { - return lhs->index_id < rhs->index_id; + [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) { + return lhs->index_id < rhs->index_id; }); return Status::OK(); } @@ -122,11 +122,9 @@ std::string OlapTableSchemaParam::debug_string() const { std::string OlapTablePartition::debug_string(TupleDescriptor* tuple_desc) const { std::stringstream ss; - ss << "(id=" << id - << ",start_key=" << Tuple::to_string(start_key, *tuple_desc) - << ",end_key=" << Tuple::to_string(end_key, *tuple_desc) - << ",num_buckets=" << num_buckets - << ",indexes=["; + ss << "(id=" << id << ",start_key=" << Tuple::to_string(start_key, *tuple_desc) + << ",end_key=" << Tuple::to_string(end_key, *tuple_desc) << ",num_buckets=" << num_buckets + << ",indexes=["; int idx = 0; for (auto& index : indexes) { if (idx++ > 0) { @@ -146,16 +144,14 @@ std::string OlapTablePartition::debug_string(TupleDescriptor* tuple_desc) const return ss.str(); } -OlapTablePartitionParam::OlapTablePartitionParam( - std::shared_ptr schema, - const TOlapTablePartitionParam& t_param) - : _schema(schema), _t_param(t_param), - _mem_tracker(MemTracker::CreateTracker(-1, "OlapTablePartitionParam")), - _mem_pool(new MemPool(_mem_tracker.get())) { -} +OlapTablePartitionParam::OlapTablePartitionParam(std::shared_ptr schema, + const TOlapTablePartitionParam& t_param) + : _schema(schema), + _t_param(t_param), + _mem_tracker(MemTracker::CreateTracker(-1, "OlapTablePartitionParam")), + _mem_pool(new MemPool(_mem_tracker.get())) {} -OlapTablePartitionParam::~OlapTablePartitionParam() { -} +OlapTablePartitionParam::~OlapTablePartitionParam() {} Status OlapTablePartitionParam::init() { std::map slots_map; @@ -182,8 +178,7 @@ Status OlapTablePartitionParam::init() { } } - _partitions_map.reset( - new std::map( + _partitions_map.reset(new std::map( OlapTablePartKeyComparator(_partition_slot_descs))); if (_t_param.__isset.distributed_columns) { for (auto& col : _t_param.distributed_columns) { @@ -204,14 +199,14 @@ Status OlapTablePartitionParam::init() { if (t_part.__isset.start_key) { // deprecated, use start_keys instead - std::vector exprs = { t_part.start_key }; + std::vector exprs = {t_part.start_key}; RETURN_IF_ERROR(_create_partition_keys(exprs, &part->start_key)); } else if (t_part.__isset.start_keys) { RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part->start_key)); } if (t_part.__isset.end_key) { // deprecated, use end_keys instead - std::vector exprs = { t_part.end_key }; + std::vector exprs = {t_part.end_key}; RETURN_IF_ERROR(_create_partition_keys(exprs, &part->end_key)); } else if (t_part.__isset.end_keys) { RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part->end_key)); @@ -222,22 +217,22 @@ Status OlapTablePartitionParam::init() { if (t_part.indexes.size() != num_indexes) { std::stringstream ss; ss << "number of partition's index is not equal with schema's" - << ", num_part_indexes=" << t_part.indexes.size() - << ", num_schema_indexes=" << num_indexes; + << ", num_part_indexes=" << t_part.indexes.size() + << ", num_schema_indexes=" << num_indexes; return Status::InternalError(ss.str()); } part->indexes = t_part.indexes; std::sort(part->indexes.begin(), part->indexes.end(), - [] (const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) { - return lhs.index_id < rhs.index_id; + [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) { + return lhs.index_id < rhs.index_id; }); // check index for (int j = 0; j < num_indexes; ++j) { if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) { std::stringstream ss; ss << "partition's index is not equal with schema's" - << ", part_index=" << part->indexes[j].index_id - << ", schema_index=" << _schema->indexes()[j]->index_id; + << ", part_index=" << part->indexes[j].index_id + << ", schema_index=" << _schema->indexes()[j]->index_id; return Status::InternalError(ss.str()); } } @@ -247,8 +242,7 @@ Status OlapTablePartitionParam::init() { return Status::OK(); } -bool OlapTablePartitionParam::find_tablet(Tuple* tuple, - const OlapTablePartition** partition, +bool OlapTablePartitionParam::find_tablet(Tuple* tuple, const OlapTablePartition** partition, uint32_t* dist_hashes) const { auto it = _partitions_map->upper_bound(tuple); if (it == _partitions_map->end()) { @@ -262,7 +256,8 @@ bool OlapTablePartitionParam::find_tablet(Tuple* tuple, return false; } -Status OlapTablePartitionParam::_create_partition_keys(const std::vector& t_exprs, Tuple** part_key) { +Status OlapTablePartitionParam::_create_partition_keys(const std::vector& t_exprs, + Tuple** part_key) { Tuple* tuple = (Tuple*)_mem_pool->allocate(_schema->tuple_desc()->byte_size()); for (int i = 0; i < t_exprs.size(); i++) { const TExprNode& t_expr = t_exprs[i]; @@ -272,13 +267,14 @@ Status OlapTablePartitionParam::_create_partition_keys(const std::vectorget_slot(slot_desc->tuple_offset()); tuple->set_not_null(slot_desc->null_indicator_offset()); switch (t_expr.node_type) { case TExprNodeType::DATE_LITERAL: { if (!reinterpret_cast(slot)->from_date_str( - t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size())) { + t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size())) { std::stringstream ss; ss << "invalid date literal in partition column, date=" << t_expr.date_literal; return Status::InternalError(ss.str()); @@ -307,9 +303,9 @@ Status OlapTablePartitionParam::_create_partition_key(const TExprNode& t_expr, T } case TExprNodeType::LARGE_INT_LITERAL: { StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - __int128 val = StringParser::string_to_int<__int128>( - t_expr.large_int_literal.value.c_str(), t_expr.large_int_literal.value.size(), - &parse_result); + __int128 val = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(), + t_expr.large_int_literal.value.size(), + &parse_result); if (parse_result != StringParser::PARSE_SUCCESS) { val = MAX_INT128; } @@ -358,4 +354,4 @@ uint32_t OlapTablePartitionParam::_compute_dist_hash(Tuple* key) const { return hash_val; } -} +} // namespace doris diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index dece0bf830504a..194fd9079fbd84 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -28,8 +28,8 @@ #include "gen_cpp/Descriptors_types.h" #include "gen_cpp/descriptors.pb.h" #include "runtime/descriptors.h" -#include "runtime/tuple.h" #include "runtime/raw_value.h" +#include "runtime/tuple.h" namespace doris { @@ -47,8 +47,8 @@ struct OlapTableIndexSchema { class OlapTableSchemaParam { public: - OlapTableSchemaParam() { } - ~OlapTableSchemaParam() noexcept { } + OlapTableSchemaParam() {} + ~OlapTableSchemaParam() noexcept {} Status init(const TOlapTableSchemaParam& tschema); Status init(const POlapTableSchemaParam& pschema); @@ -58,9 +58,7 @@ class OlapTableSchemaParam { int64_t version() const { return _version; } TupleDescriptor* tuple_desc() const { return _tuple_desc; } - const std::vector& indexes() const { - return _indexes; - } + const std::vector& indexes() const { return _indexes; } void to_protobuf(POlapTableSchemaParam* pschema) const; @@ -104,8 +102,8 @@ struct OlapTablePartition { class OlapTablePartKeyComparator { public: - OlapTablePartKeyComparator(const std::vector& slot_descs) : - _slot_descs(slot_descs) { } + OlapTablePartKeyComparator(const std::vector& slot_descs) + : _slot_descs(slot_descs) {} // return true if lhs < rhs // 'nullptr' is max value, but 'null' is min value bool operator()(const Tuple* lhs, const Tuple* rhs) const { @@ -118,18 +116,25 @@ class OlapTablePartKeyComparator { for (auto slot_desc : _slot_descs) { bool lhs_null = lhs->is_null(slot_desc->null_indicator_offset()); bool rhs_null = rhs->is_null(slot_desc->null_indicator_offset()); - if (lhs_null && rhs_null) { continue; } - if (lhs_null || rhs_null) { return !rhs_null; } + if (lhs_null && rhs_null) { + continue; + } + if (lhs_null || rhs_null) { + return !rhs_null; + } auto lhs_value = lhs->get_slot(slot_desc->tuple_offset()); auto rhs_value = rhs->get_slot(slot_desc->tuple_offset()); int res = RawValue::compare(lhs_value, rhs_value, slot_desc->type()); - if (res != 0) { return res < 0; } + if (res != 0) { + return res < 0; + } } // equal, return false return false; } + private: std::vector _slot_descs; }; @@ -137,9 +142,8 @@ class OlapTablePartKeyComparator { // store an olap table's tablet information class OlapTablePartitionParam { public: - OlapTablePartitionParam( - std::shared_ptr schema, - const TOlapTablePartitionParam& param); + OlapTablePartitionParam(std::shared_ptr schema, + const TOlapTablePartitionParam& param); ~OlapTablePartitionParam(); Status init(); @@ -149,14 +153,12 @@ class OlapTablePartitionParam { int64_t version() const { return _t_param.version; } // return true if we found this tuple in partition - bool find_tablet(Tuple* tuple, - const OlapTablePartition** partitions, + bool find_tablet(Tuple* tuple, const OlapTablePartition** partitions, uint32_t* dist_hash) const; - const std::vector& get_partitions() const { - return _partitions; - } + const std::vector& get_partitions() const { return _partitions; } std::string debug_string() const; + private: Status _create_partition_keys(const std::vector& t_exprs, Tuple** part_key); @@ -173,6 +175,7 @@ class OlapTablePartitionParam { OlapTablePartKeyComparator comparator(_partition_slot_descs); return !comparator(key, part->start_key); } + private: // this partition only valid in this schema std::shared_ptr _schema; @@ -185,8 +188,8 @@ class OlapTablePartitionParam { std::shared_ptr _mem_tracker; std::unique_ptr _mem_pool; std::vector _partitions; - std::unique_ptr< - std::map> _partitions_map; + std::unique_ptr> + _partitions_map; }; using TabletLocation = TTabletLocation; @@ -214,6 +217,7 @@ class OlapTableLocationParam { } return nullptr; } + private: TOlapTableLocationParam _t_param; @@ -227,11 +231,10 @@ struct NodeInfo { int32_t brpc_port; NodeInfo(const TNodeInfo& tnode) - : id(tnode.id), - option(tnode.option), - host(tnode.host), - brpc_port(tnode.async_internal_port) { - } + : id(tnode.id), + option(tnode.option), + host(tnode.host), + brpc_port(tnode.async_internal_port) {} }; class DorisNodesInfo { @@ -248,8 +251,9 @@ class DorisNodesInfo { } return nullptr; } + private: std::unordered_map _nodes; }; -} +} // namespace doris diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 5fa6d17e7fabfb..a080b44c27334c 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -20,12 +20,11 @@ #include #include "exprs/expr.h" +#include "olap/hll.h" #include "runtime/exec_env.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "runtime/tuple_row.h" - -#include "olap/hll.h" #include "service/brpc.h" #include "util/brpc_stub_cache.h" #include "util/monotime.h" @@ -167,7 +166,8 @@ Status NodeChannel::open_wait() { std::lock_guard l(_cancel_msg_lock); if (_cancel_msg == "") { std::stringstream ss; - ss << "node=" << node_info()->host << ":" << node_info()->brpc_port << ", errmsg=" << status.get_error_msg(); + ss << "node=" << node_info()->host << ":" << node_info()->brpc_port + << ", errmsg=" << status.get_error_msg(); _cancel_msg = ss.str(); } } @@ -427,7 +427,9 @@ bool IndexChannel::has_intolerable_failure() { OlapTableSink::OlapTableSink(ObjectPool* pool, const RowDescriptor& row_desc, const std::vector& texprs, Status* status) - : _pool(pool), _input_row_desc(row_desc), _filter_bitmap(1024), + : _pool(pool), + _input_row_desc(row_desc), + _filter_bitmap(1024), _stop_background_threads_latch(1) { if (!texprs.empty()) { *status = Expr::create_expr_trees(_pool, texprs, &_output_expr_ctxs); @@ -609,9 +611,9 @@ Status OlapTableSink::open(RuntimeState* state) { } } - RETURN_IF_ERROR(Thread::create("OlapTableSink", "send_batch_process", - [this]() { this->_send_batch_process(); }, - &_sender_thread)); + RETURN_IF_ERROR(Thread::create( + "OlapTableSink", "send_batch_process", [this]() { this->_send_batch_process(); }, + &_sender_thread)); return Status::OK(); } @@ -932,7 +934,8 @@ void OlapTableSink::_send_batch_process() { "consumer thread exit."; return; } - } while (!_stop_background_threads_latch.wait_for(MonoDelta::FromMilliseconds(config::olap_table_sink_send_interval_ms))); + } while (!_stop_background_threads_latch.wait_for( + MonoDelta::FromMilliseconds(config::olap_table_sink_send_interval_ms))); } } // namespace stream_load diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 09d7d3cf44f4ab..6a6e32e74812bd 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -32,11 +32,11 @@ #include "gen_cpp/Types_types.h" #include "gen_cpp/internal_service.pb.h" #include "util/bitmap.h" -#include "util/ref_count_closure.h" -#include "util/thrift_util.h" #include "util/countdown_latch.h" +#include "util/ref_count_closure.h" #include "util/spinlock.h" #include "util/thread.h" +#include "util/thrift_util.h" namespace doris { diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp index 9ac2471c6ea871..7801f662085bf5 100644 --- a/be/src/exec/text_converter.cpp +++ b/be/src/exec/text_converter.cpp @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include "text_converter.h" + #include #include "runtime/descriptors.h" @@ -22,14 +24,11 @@ #include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple.h" -#include "text_converter.h" #include "util/string_parser.hpp" namespace doris { -TextConverter::TextConverter(char escape_char) - : _escape_char(escape_char) { -} +TextConverter::TextConverter(char escape_char) : _escape_char(escape_char) {} void TextConverter::unescape_string(StringValue* value, MemPool* pool) { char* new_data = reinterpret_cast(pool->allocate(value->len)); @@ -60,4 +59,4 @@ void TextConverter::unescape_string(const char* src, char* dest, size_t* len) { *len = dest_ptr - dest_start; } -} +} // namespace doris diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h index 3f8227969e300f..4d97b7c62bf8ee 100644 --- a/be/src/exec/text_converter.h +++ b/be/src/exec/text_converter.h @@ -45,8 +45,8 @@ class TextConverter { // 'pool' is unused. // Unsuccessful conversions are turned into NULLs. // Returns true if the value was written successfully. - bool write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, - const char* data, int len, bool copy_string, bool need_escape, MemPool* pool); + bool write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, const char* data, int len, + bool copy_string, bool need_escape, MemPool* pool); // Removes escape characters from len characters of the null-terminated string src, // and copies the unescaped string into dest, changing *len to the unescaped length. @@ -61,6 +61,6 @@ class TextConverter { char _escape_char; }; -} +} // namespace doris #endif diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp index 29fd60e1c43e83..837d8bb7a14c63 100644 --- a/be/src/exec/topn_node.cpp +++ b/be/src/exec/topn_node.cpp @@ -17,6 +17,8 @@ #include "exec/topn_node.h" +#include + #include #include "exprs/expr.h" @@ -31,22 +33,19 @@ #include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "util/tuple_row_compare.h" -#include namespace doris { -TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : - ExecNode(pool, tnode, descs), - _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0), - _materialized_tuple_desc(NULL), - _tuple_row_less_than(NULL), - _tuple_pool(NULL), - _num_rows_skipped(0), - _priority_queue(NULL) { -} +TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0), + _materialized_tuple_desc(NULL), + _tuple_row_less_than(NULL), + _tuple_pool(NULL), + _num_rows_skipped(0), + _priority_queue(NULL) {} -TopNNode::~TopNNode() { -} +TopNNode::~TopNNode() {} Status TopNNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); @@ -63,15 +62,15 @@ Status TopNNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); _tuple_pool.reset(new MemPool(mem_tracker().get())); - RETURN_IF_ERROR(_sort_exec_exprs.prepare( - state, child(0)->row_desc(), _row_descriptor, expr_mem_tracker())); + RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor, + expr_mem_tracker())); // AddExprCtxsToFree(_sort_exec_exprs); _tuple_row_less_than.reset( new TupleRowComparator(_sort_exec_exprs, _is_asc_order, _nulls_first)); - _abort_on_default_limit_exceeded = _abort_on_default_limit_exceeded && - state->abort_on_default_limit_exceeded(); + _abort_on_default_limit_exceeded = + _abort_on_default_limit_exceeded && state->abort_on_default_limit_exceeded(); _materialized_tuple_desc = _row_descriptor.tuple_descriptors()[0]; return Status::OK(); } @@ -88,13 +87,13 @@ Status TopNNode::open(RuntimeState* state) { // regression. Why?? if (_priority_queue.get() == NULL) { _priority_queue.reset( - new std::priority_queue, TupleRowComparator>( - *_tuple_row_less_than)); + new std::priority_queue, TupleRowComparator>( + *_tuple_row_less_than)); } // Allocate memory for a temporary tuple. - _tmp_tuple = reinterpret_cast( - _tuple_pool->allocate(_materialized_tuple_desc->byte_size())); + _tmp_tuple = + reinterpret_cast(_tuple_pool->allocate(_materialized_tuple_desc->byte_size())); RETURN_IF_ERROR(child(0)->open(state)); // Limit of 0, no need to fetch anything from children. @@ -187,12 +186,14 @@ void TopNNode::insert_tuple_row(TupleRow* input_row) { insert_tuple = reinterpret_cast( _tuple_pool->allocate(_materialized_tuple_desc->byte_size())); insert_tuple->materialize_exprs(input_row, *_materialized_tuple_desc, - _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), _tuple_pool.get(), NULL, NULL); + _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), + _tuple_pool.get(), NULL, NULL); } else { DCHECK(!_priority_queue->empty()); Tuple* top_tuple = _priority_queue->top(); _tmp_tuple->materialize_exprs(input_row, *_materialized_tuple_desc, - _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), NULL, NULL, NULL); + _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), NULL, + NULL, NULL); if ((*_tuple_row_less_than)(_tmp_tuple, top_tuple)) { // TODO: DeepCopy will allocate new buffers for the string data. This needs @@ -226,14 +227,12 @@ void TopNNode::prepare_for_output() { void TopNNode::debug_string(int indentation_level, std::stringstream* out) const { *out << std::string(indentation_level * 2, ' '); *out << "TopNNode(" - // << " ordering_exprs=" << Expr::debug_string(_lhs_ordering_expr_ctxs) - << Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs()) - << " sort_order=["; + // << " ordering_exprs=" << Expr::debug_string(_lhs_ordering_expr_ctxs) + << Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs()) << " sort_order=["; for (int i = 0; i < _is_asc_order.size(); ++i) { - *out << (i > 0 ? " " : "") - << (_is_asc_order[i] ? "asc" : "desc") - << " nulls " << (_nulls_first[i] ? "first" : "last"); + *out << (i > 0 ? " " : "") << (_is_asc_order[i] ? "asc" : "desc") << " nulls " + << (_nulls_first[i] ? "first" : "last"); } *out << "]"; @@ -241,8 +240,7 @@ void TopNNode::debug_string(int indentation_level, std::stringstream* out) const *out << ")"; } -void TopNNode::push_down_predicate( - RuntimeState *state, std::list *expr_ctxs) { +void TopNNode::push_down_predicate(RuntimeState* state, std::list* expr_ctxs) { std::list::iterator iter = expr_ctxs->begin(); while (iter != expr_ctxs->end()) { if ((*iter)->root()->is_bound(&_tuple_ids)) { @@ -258,4 +256,4 @@ void TopNNode::push_down_predicate( } } -} +} // namespace doris diff --git a/be/src/exec/topn_node.h b/be/src/exec/topn_node.h index cf37f974481325..2077da1be06828 100644 --- a/be/src/exec/topn_node.h +++ b/be/src/exec/topn_node.h @@ -46,8 +46,7 @@ class TopNNode : public ExecNode { virtual Status open(RuntimeState* state); virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); virtual Status close(RuntimeState* state); - virtual void push_down_predicate( - RuntimeState *state, std::list *expr_ctxs); + virtual void push_down_predicate(RuntimeState* state, std::list* expr_ctxs); protected: virtual void debug_string(int indentation_level, std::stringstream* out) const; @@ -106,15 +105,13 @@ class TopNNode : public ExecNode { // priority queue doesn't support a max size, so to get that functionality, the order // of the queue is the opposite of what the ORDER BY clause specifies, such that the top // of the queue is the last sorted element. - boost::scoped_ptr< - std::priority_queue< - Tuple*, std::vector, TupleRowComparator>> _priority_queue; + boost::scoped_ptr, TupleRowComparator>> + _priority_queue; // END: Members that must be Reset() ///////////////////////////////////////// }; -}; +}; // namespace doris #endif - diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index c8455bfd3e2b2f..e860f1296b44f6 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -24,26 +24,24 @@ #include "runtime/tuple.h" #include "runtime/tuple_row.h" // #include "util/runtime_profile_counters.h" -#include "util/runtime_profile.h" #include "gen_cpp/PlanNodes_types.h" +#include "util/runtime_profile.h" // namespace doris { -UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _tuple_id(tnode.union_node.tuple_id), - _tuple_desc(nullptr), - _first_materialized_child_idx(tnode.union_node.first_materialized_child_idx), - _child_idx(0), - _child_batch(nullptr), - _child_row_idx(0), - _child_eos(false), - _const_expr_list_idx(0), - _to_close_child_idx(-1) { -} +UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _tuple_id(tnode.union_node.tuple_id), + _tuple_desc(nullptr), + _first_materialized_child_idx(tnode.union_node.first_materialized_child_idx), + _child_idx(0), + _child_batch(nullptr), + _child_row_idx(0), + _child_eos(false), + _const_expr_list_idx(0), + _to_close_child_idx(-1) {} Status UnionNode::init(const TPlanNode& tnode, RuntimeState* state) { // TODO(zc): @@ -73,7 +71,8 @@ Status UnionNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); DCHECK(_tuple_desc != nullptr); - _materialize_exprs_evaluate_timer = ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer"); + _materialize_exprs_evaluate_timer = + ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer"); _codegend_union_materialize_batch_fns.resize(_child_expr_lists.size()); // Prepare const expr lists. for (const std::vector& exprs : _const_expr_lists) { @@ -142,7 +141,7 @@ Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch int64_t tuple_buf_size; uint8_t* tuple_buf; RETURN_IF_ERROR( - row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); + row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); memset(tuple_buf, 0, tuple_buf_size); while (has_more_materialized() && !row_batch->at_capacity()) { @@ -152,15 +151,14 @@ Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch // Child row batch was either never set or we're moving on to a different child. if (_child_batch.get() == nullptr) { DCHECK_LT(_child_idx, _children.size()); - _child_batch.reset(new RowBatch( - child(_child_idx)->row_desc(), state->batch_size(), mem_tracker().get())); + _child_batch.reset(new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), + mem_tracker().get())); _child_row_idx = 0; // open the current child unless it's the first child, which was already opened in // UnionNode::open(). if (_child_eos) RETURN_IF_ERROR(child(_child_idx)->open(state)); // The first batch from each child is always fetched here. - RETURN_IF_ERROR(child(_child_idx)->get_next( - state, _child_batch.get(), &_child_eos)); + RETURN_IF_ERROR(child(_child_idx)->get_next(state, _child_batch.get(), &_child_eos)); } while (!row_batch->at_capacity()) { @@ -173,8 +171,8 @@ Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch _child_batch->reset(); _child_row_idx = 0; // All batches except the first batch from each child are fetched here. - RETURN_IF_ERROR(child(_child_idx)->get_next( - state, _child_batch.get(), &_child_eos)); + RETURN_IF_ERROR( + child(_child_idx)->get_next(state, _child_batch.get(), &_child_eos)); // If we fetched an empty batch, go back to the beginning of this while loop, and // try again. if (_child_batch->num_rows() == 0) continue; @@ -215,12 +213,11 @@ Status UnionNode::get_next_const(RuntimeState* state, RowBatch* row_batch) { int64_t tuple_buf_size; uint8_t* tuple_buf; RETURN_IF_ERROR( - row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); + row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); memset(tuple_buf, 0, tuple_buf_size); while (_const_expr_list_idx < _const_expr_lists.size() && !row_batch->at_capacity()) { - materialize_exprs( - _const_expr_lists[_const_expr_list_idx], nullptr, tuple_buf, row_batch); + materialize_exprs(_const_expr_lists[_const_expr_list_idx], nullptr, tuple_buf, row_batch); RETURN_IF_ERROR(get_error_msg(_const_expr_lists[_const_expr_list_idx])); tuple_buf += _tuple_desc->byte_size(); ++_const_expr_list_idx; @@ -268,7 +265,7 @@ Status UnionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) _num_rows_returned += num_rows_added; *eos = reached_limit() || - (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state)); + (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state)); COUNTER_SET(_rows_returned_counter, _num_rows_returned); return Status::OK(); @@ -303,7 +300,7 @@ Status UnionNode::close(RuntimeState* state) { void UnionNode::debug_string(int indentation_level, std::stringstream* out) const { *out << string(indentation_level * 2, ' '); *out << "_union(_first_materialized_child_idx=" << _first_materialized_child_idx - << " _row_descriptor=[" << row_desc().debug_string() << "] " + << " _row_descriptor=[" << row_desc().debug_string() << "] " << " _child_expr_lists=["; for (int i = 0; i < _child_expr_lists.size(); ++i) { *out << Expr::debug_string(_child_expr_lists[i]) << ", "; @@ -313,5 +310,4 @@ void UnionNode::debug_string(int indentation_level, std::stringstream* out) cons *out << ")" << std::endl; } -} - +} // namespace doris diff --git a/be/src/exec/union_node.h b/be/src/exec/union_node.h index d05349f227572a..756ff87c1973ff 100644 --- a/be/src/exec/union_node.h +++ b/be/src/exec/union_node.h @@ -129,8 +129,8 @@ class UnionNode : public ExecNode { /// Evaluates 'exprs' over 'row', materializes the results in 'tuple_buf'. /// and appends the new tuple to 'dst_batch'. Increments '_num_rows_returned'. - void materialize_exprs(const std::vector& exprs, - TupleRow* row, uint8_t* tuple_buf, RowBatch* dst_batch); + void materialize_exprs(const std::vector& exprs, TupleRow* row, + uint8_t* tuple_buf, RowBatch* dst_batch); Status get_error_msg(const std::vector& exprs); @@ -141,24 +141,19 @@ class UnionNode : public ExecNode { } /// Returns true if there are still rows to be returned from passthrough children. - bool has_more_passthrough() const { - return _child_idx < _first_materialized_child_idx; - } + bool has_more_passthrough() const { return _child_idx < _first_materialized_child_idx; } /// Returns true if there are still rows to be returned from children that need /// materialization. bool has_more_materialized() const { - return _first_materialized_child_idx != _children.size() && - _child_idx < _children.size(); + return _first_materialized_child_idx != _children.size() && _child_idx < _children.size(); } /// Returns true if there are still rows to be returned from constant expressions. bool has_more_const(const RuntimeState* state) const { return state->per_fragment_instance_idx() == 0 && - _const_expr_list_idx < _const_expr_lists.size(); + _const_expr_list_idx < _const_expr_lists.size(); } - }; -} - +} // namespace doris diff --git a/be/src/exec/union_node_ir.cpp b/be/src/exec/union_node_ir.cpp index 32198201c7230e..f29d534aae1434 100644 --- a/be/src/exec/union_node_ir.cpp +++ b/be/src/exec/union_node_ir.cpp @@ -22,13 +22,14 @@ namespace doris { void IR_ALWAYS_INLINE UnionNode::materialize_exprs(const std::vector& exprs, - TupleRow* row, uint8_t* tuple_buf, RowBatch* dst_batch) { + TupleRow* row, uint8_t* tuple_buf, + RowBatch* dst_batch) { DCHECK(!dst_batch->at_capacity()); Tuple* dst_tuple = reinterpret_cast(tuple_buf); TupleRow* dst_row = dst_batch->get_row(dst_batch->add_row()); // dst_tuple->materialize_exprs(row, *_tuple_desc, exprs, - dst_tuple->materialize_exprs(row, *_tuple_desc, exprs, - dst_batch->tuple_data_pool(), nullptr, nullptr); + dst_tuple->materialize_exprs(row, *_tuple_desc, exprs, dst_batch->tuple_data_pool(), + nullptr, nullptr); dst_row->set_tuple(0, dst_tuple); dst_batch->commit_last_row(); } @@ -54,7 +55,7 @@ void UnionNode::materialize_batch(RowBatch* dst_batch, uint8_t** tuple_buf) { } Status UnionNode::get_error_msg(const std::vector& exprs) { - for (auto expr_ctx: exprs) { + for (auto expr_ctx : exprs) { std::string expr_error = expr_ctx->get_error_msg(); if (!expr_error.empty()) { return Status::RuntimeError(expr_error); @@ -63,4 +64,4 @@ Status UnionNode::get_error_msg(const std::vector& exprs) { return Status::OK(); } -} +} // namespace doris diff --git a/be/src/exprs/agg_fn.cc b/be/src/exprs/agg_fn.cc index c739f43cb5c1ab..dcecab5867a6c9 100644 --- a/be/src/exprs/agg_fn.cc +++ b/be/src/exprs/agg_fn.cc @@ -19,177 +19,172 @@ #include "exprs/anyval_util.h" #include "runtime/descriptors.h" -#include "runtime/user_function_cache.h" #include "runtime/runtime_state.h" +#include "runtime/user_function_cache.h" using namespace doris_udf; namespace doris { AggFn::AggFn(const TExprNode& tnode, const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc) - : Expr(tnode), - is_merge_(tnode.agg_expr.is_merge_agg), - intermediate_slot_desc_(intermediate_slot_desc), - output_slot_desc_(output_slot_desc), - _vararg_start_idx(tnode.__isset.vararg_start_idx ? tnode.vararg_start_idx : -1) { - // TODO(pengyubing) arg_type_descs_ is used for codegen - // arg_type_descs_(AnyValUtil::column_type_to_type_desc( - // TypeDescriptor::from_thrift(tnode.agg_expr.arg_types))) { - DCHECK(tnode.__isset.fn); - DCHECK(tnode.fn.__isset.aggregate_fn); - // TODO chenhao - DCHECK_EQ(tnode.node_type, TExprNodeType::AGG_EXPR); - DCHECK_EQ(TypeDescriptor::from_thrift(tnode.type).type, - TypeDescriptor::from_thrift(_fn.ret_type).type); - const std::string& fn_name = _fn.name.function_name; - if (fn_name == "count") { - agg_op_ = COUNT; - } else if (fn_name == "min") { - agg_op_ = MIN; - } else if (fn_name == "max") { - agg_op_ = MAX; - } else if (fn_name == "sum" || fn_name == "sum_init_zero") { - agg_op_ = SUM; - } else if (fn_name == "avg") { - agg_op_ = AVG; - } else if (fn_name == "ndv" || fn_name == "ndv_no_finalize") { - agg_op_ = NDV; + const SlotDescriptor& output_slot_desc) + : Expr(tnode), + is_merge_(tnode.agg_expr.is_merge_agg), + intermediate_slot_desc_(intermediate_slot_desc), + output_slot_desc_(output_slot_desc), + _vararg_start_idx(tnode.__isset.vararg_start_idx ? tnode.vararg_start_idx : -1) { + // TODO(pengyubing) arg_type_descs_ is used for codegen + // arg_type_descs_(AnyValUtil::column_type_to_type_desc( + // TypeDescriptor::from_thrift(tnode.agg_expr.arg_types))) { + DCHECK(tnode.__isset.fn); + DCHECK(tnode.fn.__isset.aggregate_fn); + // TODO chenhao + DCHECK_EQ(tnode.node_type, TExprNodeType::AGG_EXPR); + DCHECK_EQ(TypeDescriptor::from_thrift(tnode.type).type, + TypeDescriptor::from_thrift(_fn.ret_type).type); + const std::string& fn_name = _fn.name.function_name; + if (fn_name == "count") { + agg_op_ = COUNT; + } else if (fn_name == "min") { + agg_op_ = MIN; + } else if (fn_name == "max") { + agg_op_ = MAX; + } else if (fn_name == "sum" || fn_name == "sum_init_zero") { + agg_op_ = SUM; + } else if (fn_name == "avg") { + agg_op_ = AVG; + } else if (fn_name == "ndv" || fn_name == "ndv_no_finalize") { + agg_op_ = NDV; } else if (fn_name == "multi_distinct_count") { agg_op_ = COUNT_DISTINCT; } else if (fn_name == "multi_distinct_sum") { agg_op_ = SUM_DISTINCT; - } else { - agg_op_ = OTHER; - } + } else { + agg_op_ = OTHER; + } } Status AggFn::Init(const RowDescriptor& row_desc, RuntimeState* state) { - // TODO chenhao , calling expr's prepare in NewAggFnEvaluator create - // Initialize all children (i.e. input exprs to this aggregate expr). - //for (Expr* input_expr : children()) { - // RETURN_IF_ERROR(input_expr->prepare(row_desc, state)); - //} - - // Initialize the aggregate expressions' internals. - const TAggregateFunction& aggregate_fn = _fn.aggregate_fn; - DCHECK_EQ(intermediate_slot_desc_.type().type, - TypeDescriptor::from_thrift(aggregate_fn.intermediate_type).type); - DCHECK_EQ(output_slot_desc_.type().type, TypeDescriptor::from_thrift(_fn.ret_type).type); - - // Load the function pointers. Must have init() and update(). - if (aggregate_fn.init_fn_symbol.empty() || - aggregate_fn.update_fn_symbol.empty() || - (aggregate_fn.merge_fn_symbol.empty() && !aggregate_fn.is_analytic_only_fn)) { - // This path is only for partially implemented builtins. - DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::BUILTIN); - std::stringstream ss; - ss << "Function " << _fn.name.function_name << " is not implemented."; - return Status::InternalError(ss.str()); - } - - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.init_fn_symbol, - _fn.hdfs_location, _fn.checksum, &init_fn_, &_cache_entry)); - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.update_fn_symbol, - _fn.hdfs_location, _fn.checksum, &update_fn_, &_cache_entry)); - - // Merge() is not defined for purely analytic function. - if (!aggregate_fn.is_analytic_only_fn) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.merge_fn_symbol, - _fn.hdfs_location, _fn.checksum, &merge_fn_, &_cache_entry)); - } - // Serialize(), GetValue(), Remove() and Finalize() are optional - if (!aggregate_fn.serialize_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.serialize_fn_symbol, - _fn.hdfs_location, _fn.checksum, - &serialize_fn_, &_cache_entry)); - } - if (!aggregate_fn.get_value_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.get_value_fn_symbol, _fn.hdfs_location, _fn.checksum, - &get_value_fn_, &_cache_entry)); - } - if (!aggregate_fn.remove_fn_symbol.empty()) { + // TODO chenhao , calling expr's prepare in NewAggFnEvaluator create + // Initialize all children (i.e. input exprs to this aggregate expr). + //for (Expr* input_expr : children()) { + // RETURN_IF_ERROR(input_expr->prepare(row_desc, state)); + //} + + // Initialize the aggregate expressions' internals. + const TAggregateFunction& aggregate_fn = _fn.aggregate_fn; + DCHECK_EQ(intermediate_slot_desc_.type().type, + TypeDescriptor::from_thrift(aggregate_fn.intermediate_type).type); + DCHECK_EQ(output_slot_desc_.type().type, TypeDescriptor::from_thrift(_fn.ret_type).type); + + // Load the function pointers. Must have init() and update(). + if (aggregate_fn.init_fn_symbol.empty() || aggregate_fn.update_fn_symbol.empty() || + (aggregate_fn.merge_fn_symbol.empty() && !aggregate_fn.is_analytic_only_fn)) { + // This path is only for partially implemented builtins. + DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::BUILTIN); + std::stringstream ss; + ss << "Function " << _fn.name.function_name << " is not implemented."; + return Status::InternalError(ss.str()); + } + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.remove_fn_symbol, - _fn.hdfs_location, _fn.checksum, - &remove_fn_, &_cache_entry)); - } - if (!aggregate_fn.finalize_fn_symbol.empty()) { + _fn.id, aggregate_fn.init_fn_symbol, _fn.hdfs_location, _fn.checksum, &init_fn_, + &_cache_entry)); RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.finalize_fn_symbol, - _fn.hdfs_location, _fn.checksum, - &finalize_fn_, &_cache_entry)); - } - return Status::OK(); + _fn.id, aggregate_fn.update_fn_symbol, _fn.hdfs_location, _fn.checksum, &update_fn_, + &_cache_entry)); + + // Merge() is not defined for purely analytic function. + if (!aggregate_fn.is_analytic_only_fn) { + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( + _fn.id, aggregate_fn.merge_fn_symbol, _fn.hdfs_location, _fn.checksum, &merge_fn_, + &_cache_entry)); + } + // Serialize(), GetValue(), Remove() and Finalize() are optional + if (!aggregate_fn.serialize_fn_symbol.empty()) { + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( + _fn.id, aggregate_fn.serialize_fn_symbol, _fn.hdfs_location, _fn.checksum, + &serialize_fn_, &_cache_entry)); + } + if (!aggregate_fn.get_value_fn_symbol.empty()) { + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( + _fn.id, aggregate_fn.get_value_fn_symbol, _fn.hdfs_location, _fn.checksum, + &get_value_fn_, &_cache_entry)); + } + if (!aggregate_fn.remove_fn_symbol.empty()) { + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( + _fn.id, aggregate_fn.remove_fn_symbol, _fn.hdfs_location, _fn.checksum, &remove_fn_, + &_cache_entry)); + } + if (!aggregate_fn.finalize_fn_symbol.empty()) { + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( + _fn.id, _fn.aggregate_fn.finalize_fn_symbol, _fn.hdfs_location, _fn.checksum, + &finalize_fn_, &_cache_entry)); + } + return Status::OK(); } Status AggFn::Create(const TExpr& texpr, const RowDescriptor& row_desc, - const SlotDescriptor& intermediate_slot_desc, const SlotDescriptor& output_slot_desc, - RuntimeState* state, AggFn** agg_fn) { - *agg_fn = nullptr; - ObjectPool* pool = state->obj_pool(); - const TExprNode& texpr_node = texpr.nodes[0]; - //TODO chenhao - DCHECK_EQ(texpr_node.node_type, TExprNodeType::AGG_EXPR); - if (!texpr_node.__isset.fn) { - return Status::InternalError("Function not set in thrift AGGREGATE_EXPR node"); - } - AggFn* new_agg_fn = - pool->add(new AggFn(texpr_node, intermediate_slot_desc, output_slot_desc)); - RETURN_IF_ERROR(Expr::create_tree(texpr, pool, new_agg_fn)); - Status status = new_agg_fn->Init(row_desc, state); - if (UNLIKELY(!status.ok())) { - new_agg_fn->Close(); - return status; - } - for (Expr* input_expr : new_agg_fn->children()) { - int fn_ctx_idx = 0; - input_expr->assign_fn_ctx_idx(&fn_ctx_idx); - } - *agg_fn = new_agg_fn; - return Status::OK(); + const SlotDescriptor& intermediate_slot_desc, + const SlotDescriptor& output_slot_desc, RuntimeState* state, AggFn** agg_fn) { + *agg_fn = nullptr; + ObjectPool* pool = state->obj_pool(); + const TExprNode& texpr_node = texpr.nodes[0]; + //TODO chenhao + DCHECK_EQ(texpr_node.node_type, TExprNodeType::AGG_EXPR); + if (!texpr_node.__isset.fn) { + return Status::InternalError("Function not set in thrift AGGREGATE_EXPR node"); + } + AggFn* new_agg_fn = pool->add(new AggFn(texpr_node, intermediate_slot_desc, output_slot_desc)); + RETURN_IF_ERROR(Expr::create_tree(texpr, pool, new_agg_fn)); + Status status = new_agg_fn->Init(row_desc, state); + if (UNLIKELY(!status.ok())) { + new_agg_fn->Close(); + return status; + } + for (Expr* input_expr : new_agg_fn->children()) { + int fn_ctx_idx = 0; + input_expr->assign_fn_ctx_idx(&fn_ctx_idx); + } + *agg_fn = new_agg_fn; + return Status::OK(); } FunctionContext::TypeDesc AggFn::GetIntermediateTypeDesc() const { - return AnyValUtil::column_type_to_type_desc(intermediate_slot_desc_.type()); + return AnyValUtil::column_type_to_type_desc(intermediate_slot_desc_.type()); } FunctionContext::TypeDesc AggFn::GetOutputTypeDesc() const { - return AnyValUtil::column_type_to_type_desc(output_slot_desc_.type()); + return AnyValUtil::column_type_to_type_desc(output_slot_desc_.type()); } void AggFn::Close() { - // This also closes all the input expressions. - Expr::close(); + // This also closes all the input expressions. + Expr::close(); } void AggFn::Close(const std::vector& exprs) { - for (AggFn* expr : exprs) expr->Close(); + for (AggFn* expr : exprs) expr->Close(); } std::string AggFn::DebugString() const { - std::stringstream out; - out << "AggFn(op=" << agg_op_; - for (Expr* input_expr : children()) { - out << " " << input_expr->debug_string() << ")"; - } - out << ")"; - return out.str(); + std::stringstream out; + out << "AggFn(op=" << agg_op_; + for (Expr* input_expr : children()) { + out << " " << input_expr->debug_string() << ")"; + } + out << ")"; + return out.str(); } std::string AggFn::DebugString(const std::vector& agg_fns) { - std::stringstream out; - out << "["; - for (int i = 0; i < agg_fns.size(); ++i) { - out << (i == 0 ? "" : " ") << agg_fns[i]->DebugString(); - } - out << "]"; - return out.str(); + std::stringstream out; + out << "["; + for (int i = 0; i < agg_fns.size(); ++i) { + out << (i == 0 ? "" : " ") << agg_fns[i]->DebugString(); + } + out << "]"; + return out.str(); } -} +} // namespace doris diff --git a/be/src/exprs/agg_fn.h b/be/src/exprs/agg_fn.h index 97abd20f1a2ebc..4c2a9c4c298be4 100644 --- a/be/src/exprs/agg_fn.h +++ b/be/src/exprs/agg_fn.h @@ -74,113 +74,106 @@ class TExprNode; /// by an input row as it falls out of a sliding window. /// class AggFn : public Expr { - public: - - /// Override the base class' implementation. - virtual bool IsAggFn() const { return true; } - - /// Enum for some built-in aggregation ops. - enum AggregationOp { - COUNT, - MIN, - MAX, - SUM, - AVG, - NDV, - SUM_DISTINCT, - COUNT_DISTINCT, - HLL_UNION_AGG, - OTHER, - }; - - /// Creates and initializes an aggregate function from 'texpr' and returns it in - /// 'agg_fn'. The returned AggFn lives in the ObjectPool of 'state'. 'row_desc' is - /// the row descriptor of the input tuple row; 'intermediate_slot_desc' is the slot - /// descriptor of the intermediate value; 'output_slot_desc' is the slot descriptor - /// of the output value. On failure, returns error status and sets 'agg_fn' to NULL. - static Status Create(const TExpr& texpr, const RowDescriptor& row_desc, - const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc, RuntimeState* state, AggFn** agg_fn) - WARN_UNUSED_RESULT; - - bool is_merge() const { return is_merge_; } - AggregationOp agg_op() const { return agg_op_; } - bool is_count_star() const { return agg_op_ == COUNT && _children.empty(); } - bool is_count_distinct() const { return agg_op_ == COUNT_DISTINCT; } - bool is_sum_distinct() const { return agg_op_ == SUM_DISTINCT; } - bool is_builtin() const { return _fn.binary_type == TFunctionBinaryType::BUILTIN; } - const std::string& fn_name() const { return _fn.name.function_name; } - const TypeDescriptor& intermediate_type() const { return intermediate_slot_desc_.type(); } - const SlotDescriptor& intermediate_slot_desc() const { return intermediate_slot_desc_; } - // Output type is the same as Expr::type(). - const SlotDescriptor& output_slot_desc() const { return output_slot_desc_; } - void* remove_fn() const { return remove_fn_; } - void* merge_or_update_fn() const { return is_merge_ ? merge_fn_ : update_fn_; } - void* serialize_fn() const { return serialize_fn_; } - void* get_value_fn() const { return get_value_fn_; } - void* finalize_fn() const { return finalize_fn_; } - bool SupportsRemove() const { return remove_fn_ != nullptr; } - bool SupportsSerialize() const { return serialize_fn_ != nullptr; } - FunctionContext::TypeDesc GetIntermediateTypeDesc() const; - FunctionContext::TypeDesc GetOutputTypeDesc() const; - const std::vector& arg_type_descs() const { - return arg_type_descs_; - } - - /// Releases all cache entries to libCache for all nodes in the expr tree. - virtual void Close(); - static void Close(const std::vector& exprs); - - Expr* clone(ObjectPool* pool) const { - return nullptr; - } - - virtual std::string DebugString() const; - static std::string DebugString(const std::vector& exprs); - - const int get_vararg_start_idx() const { - return _vararg_start_idx; - } +public: + /// Override the base class' implementation. + virtual bool IsAggFn() const { return true; } + + /// Enum for some built-in aggregation ops. + enum AggregationOp { + COUNT, + MIN, + MAX, + SUM, + AVG, + NDV, + SUM_DISTINCT, + COUNT_DISTINCT, + HLL_UNION_AGG, + OTHER, + }; + + /// Creates and initializes an aggregate function from 'texpr' and returns it in + /// 'agg_fn'. The returned AggFn lives in the ObjectPool of 'state'. 'row_desc' is + /// the row descriptor of the input tuple row; 'intermediate_slot_desc' is the slot + /// descriptor of the intermediate value; 'output_slot_desc' is the slot descriptor + /// of the output value. On failure, returns error status and sets 'agg_fn' to NULL. + static Status Create(const TExpr& texpr, const RowDescriptor& row_desc, + const SlotDescriptor& intermediate_slot_desc, + const SlotDescriptor& output_slot_desc, RuntimeState* state, + AggFn** agg_fn) WARN_UNUSED_RESULT; + + bool is_merge() const { return is_merge_; } + AggregationOp agg_op() const { return agg_op_; } + bool is_count_star() const { return agg_op_ == COUNT && _children.empty(); } + bool is_count_distinct() const { return agg_op_ == COUNT_DISTINCT; } + bool is_sum_distinct() const { return agg_op_ == SUM_DISTINCT; } + bool is_builtin() const { return _fn.binary_type == TFunctionBinaryType::BUILTIN; } + const std::string& fn_name() const { return _fn.name.function_name; } + const TypeDescriptor& intermediate_type() const { return intermediate_slot_desc_.type(); } + const SlotDescriptor& intermediate_slot_desc() const { return intermediate_slot_desc_; } + // Output type is the same as Expr::type(). + const SlotDescriptor& output_slot_desc() const { return output_slot_desc_; } + void* remove_fn() const { return remove_fn_; } + void* merge_or_update_fn() const { return is_merge_ ? merge_fn_ : update_fn_; } + void* serialize_fn() const { return serialize_fn_; } + void* get_value_fn() const { return get_value_fn_; } + void* finalize_fn() const { return finalize_fn_; } + bool SupportsRemove() const { return remove_fn_ != nullptr; } + bool SupportsSerialize() const { return serialize_fn_ != nullptr; } + FunctionContext::TypeDesc GetIntermediateTypeDesc() const; + FunctionContext::TypeDesc GetOutputTypeDesc() const; + const std::vector& arg_type_descs() const { return arg_type_descs_; } + + /// Releases all cache entries to libCache for all nodes in the expr tree. + virtual void Close(); + static void Close(const std::vector& exprs); + + Expr* clone(ObjectPool* pool) const { return nullptr; } + + virtual std::string DebugString() const; + static std::string DebugString(const std::vector& exprs); + + const int get_vararg_start_idx() const { return _vararg_start_idx; } private: - friend class Expr; - friend class NewAggFnEvaluator; - - /// True if this is a merging aggregation. - const bool is_merge_; - - /// Slot into which Update()/Merge()/Serialize() write their result. Not owned. - const SlotDescriptor& intermediate_slot_desc_; - - /// Slot into which Finalize() results are written. Not owned. Identical to - /// intermediate_slot_desc_ if this agg fn has the same intermediate and result type. - const SlotDescriptor& output_slot_desc_; - - /// The types of the arguments to the aggregate function. - const std::vector arg_type_descs_; - - /// The aggregation operation. - AggregationOp agg_op_; - - /// Function pointers for the different phases of the aggregate function. - void* init_fn_ = nullptr; - void* update_fn_ = nullptr; - void* remove_fn_ = nullptr; - void* merge_fn_ = nullptr; - void* serialize_fn_ = nullptr; - void* get_value_fn_ = nullptr; - void* finalize_fn_ = nullptr; - - int _vararg_start_idx; - - AggFn(const TExprNode& node, const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc); - - /// Initializes the AggFn and its input expressions. May load the UDAF from LibCache - /// if necessary. - virtual Status Init(const RowDescriptor& desc, RuntimeState* state) WARN_UNUSED_RESULT; + friend class Expr; + friend class NewAggFnEvaluator; + + /// True if this is a merging aggregation. + const bool is_merge_; + + /// Slot into which Update()/Merge()/Serialize() write their result. Not owned. + const SlotDescriptor& intermediate_slot_desc_; + + /// Slot into which Finalize() results are written. Not owned. Identical to + /// intermediate_slot_desc_ if this agg fn has the same intermediate and result type. + const SlotDescriptor& output_slot_desc_; + + /// The types of the arguments to the aggregate function. + const std::vector arg_type_descs_; + + /// The aggregation operation. + AggregationOp agg_op_; + + /// Function pointers for the different phases of the aggregate function. + void* init_fn_ = nullptr; + void* update_fn_ = nullptr; + void* remove_fn_ = nullptr; + void* merge_fn_ = nullptr; + void* serialize_fn_ = nullptr; + void* get_value_fn_ = nullptr; + void* finalize_fn_ = nullptr; + + int _vararg_start_idx; + + AggFn(const TExprNode& node, const SlotDescriptor& intermediate_slot_desc, + const SlotDescriptor& output_slot_desc); + + /// Initializes the AggFn and its input expressions. May load the UDAF from LibCache + /// if necessary. + virtual Status Init(const RowDescriptor& desc, RuntimeState* state) WARN_UNUSED_RESULT; }; -} +} // namespace doris #endif diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp old mode 100755 new mode 100644 index dc0558e10a4691..f91475ca1d306f --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -23,13 +23,13 @@ #include "exec/aggregation_node.h" #include "exprs/aggregate_functions.h" #include "exprs/anyval_util.h" -#include "runtime/user_function_cache.h" -#include "udf/udf_internal.h" -#include "util/debug_util.h" #include "runtime/datetime_value.h" #include "runtime/mem_tracker.h" -#include "thrift/protocol/TDebugProtocol.h" #include "runtime/raw_value.h" +#include "runtime/user_function_cache.h" +#include "thrift/protocol/TDebugProtocol.h" +#include "udf/udf_internal.h" +#include "util/debug_util.h" namespace doris { using doris_udf::FunctionContext; @@ -56,100 +56,89 @@ typedef void (*InitFn)(FunctionContext*, AnyVal*); typedef void (*UpdateFn0)(FunctionContext*, AnyVal*); typedef void (*UpdateFn1)(FunctionContext*, const AnyVal&, AnyVal*); typedef void (*UpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, +typedef void (*UpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); +typedef void (*UpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, +typedef void (*UpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, +typedef void (*UpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, +typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, +typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, AnyVal*); -typedef StringVal(*SerializeFn)(FunctionContext*, const StringVal&); -typedef AnyVal(*GetValueFn)(FunctionContext*, const AnyVal&); -typedef AnyVal(*FinalizeFn)(FunctionContext*, const AnyVal&); + AnyVal*); +typedef StringVal (*SerializeFn)(FunctionContext*, const StringVal&); +typedef AnyVal (*GetValueFn)(FunctionContext*, const AnyVal&); +typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&); -Status AggFnEvaluator::create(ObjectPool* pool, - const TExpr& desc, - AggFnEvaluator** result) { +Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result) { return create(pool, desc, false, result); } -Status AggFnEvaluator::create( - ObjectPool* pool, - const TExpr& desc, - bool is_analytic_fn, - AggFnEvaluator** result) { +Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, bool is_analytic_fn, + AggFnEvaluator** result) { *result = pool->add(new AggFnEvaluator(desc.nodes[0], is_analytic_fn)); int node_idx = 0; for (int i = 0; i < desc.nodes[0].num_children; ++i) { ++node_idx; Expr* expr = NULL; ExprContext* ctx = NULL; - RETURN_IF_ERROR(Expr::create_tree_from_thrift( - pool, desc.nodes, NULL, &node_idx, &expr, &ctx)); + RETURN_IF_ERROR( + Expr::create_tree_from_thrift(pool, desc.nodes, NULL, &node_idx, &expr, &ctx)); (*result)->_input_exprs_ctxs.push_back(ctx); } return Status::OK(); } -AggFnEvaluator::AggFnEvaluator(const TExprNode& desc, bool is_analytic_fn) : - _fn(desc.fn), - _is_merge(desc.agg_expr.is_merge_agg), - _is_analytic_fn(is_analytic_fn), - _return_type(TypeDescriptor::from_thrift(desc.fn.ret_type)), - _intermediate_type(TypeDescriptor::from_thrift(desc.fn.aggregate_fn.intermediate_type)), - _function_type(desc.fn.binary_type), - _total_mem_consumption(0), - _accumulated_mem_consumption(0), - _intermediate_slot_desc(NULL), - _output_slot_desc(NULL), - _init_fn(NULL), - _update_fn(NULL), - _remove_fn(NULL), - _merge_fn(NULL), - _serialize_fn(NULL), - _get_value_fn(NULL), - _finalize_fn(NULL) { - if (_fn.name.function_name == "count") { - _agg_op = COUNT; - } else if (_fn.name.function_name == "min") { - _agg_op = MIN; - } else if (_fn.name.function_name == "max") { - _agg_op = MAX; - } else if (_fn.name.function_name == "sum") { - _agg_op = SUM; - } else if (_fn.name.function_name == "avg") { - _agg_op = AVG; - } else if (_fn.name.function_name == "ndv" || - _fn.name.function_name == "ndv_no_finalize") { - _agg_op = NDV; - } else if (_fn.name.function_name == "count_distinct" || - _fn.name.function_name == "count_distinct") { - _agg_op = COUNT_DISTINCT; - } else if (_fn.name.function_name == "sum_distinct" || - _fn.name.function_name == "sum_distinct") { - _agg_op = SUM_DISTINCT; - } else if (_fn.name.function_name == "hll_union_agg") { - _agg_op = HLL_UNION_AGG; - } else { - _agg_op = OTHER; - } +AggFnEvaluator::AggFnEvaluator(const TExprNode& desc, bool is_analytic_fn) + : _fn(desc.fn), + _is_merge(desc.agg_expr.is_merge_agg), + _is_analytic_fn(is_analytic_fn), + _return_type(TypeDescriptor::from_thrift(desc.fn.ret_type)), + _intermediate_type(TypeDescriptor::from_thrift(desc.fn.aggregate_fn.intermediate_type)), + _function_type(desc.fn.binary_type), + _total_mem_consumption(0), + _accumulated_mem_consumption(0), + _intermediate_slot_desc(NULL), + _output_slot_desc(NULL), + _init_fn(NULL), + _update_fn(NULL), + _remove_fn(NULL), + _merge_fn(NULL), + _serialize_fn(NULL), + _get_value_fn(NULL), + _finalize_fn(NULL) { + if (_fn.name.function_name == "count") { + _agg_op = COUNT; + } else if (_fn.name.function_name == "min") { + _agg_op = MIN; + } else if (_fn.name.function_name == "max") { + _agg_op = MAX; + } else if (_fn.name.function_name == "sum") { + _agg_op = SUM; + } else if (_fn.name.function_name == "avg") { + _agg_op = AVG; + } else if (_fn.name.function_name == "ndv" || _fn.name.function_name == "ndv_no_finalize") { + _agg_op = NDV; + } else if (_fn.name.function_name == "count_distinct" || + _fn.name.function_name == "count_distinct") { + _agg_op = COUNT_DISTINCT; + } else if (_fn.name.function_name == "sum_distinct" || + _fn.name.function_name == "sum_distinct") { + _agg_op = SUM_DISTINCT; + } else if (_fn.name.function_name == "hll_union_agg") { + _agg_op = HLL_UNION_AGG; + } else { + _agg_op = OTHER; + } } -Status AggFnEvaluator::prepare( - RuntimeState* state, - const RowDescriptor& desc, - MemPool* pool, - const SlotDescriptor* intermediate_slot_desc, - const SlotDescriptor* output_slot_desc, - const std::shared_ptr& mem_tracker, - FunctionContext** agg_fn_ctx) { +Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool, + const SlotDescriptor* intermediate_slot_desc, + const SlotDescriptor* output_slot_desc, + const std::shared_ptr& mem_tracker, + FunctionContext** agg_fn_ctx) { DCHECK(pool != NULL); DCHECK(intermediate_slot_desc != NULL); DCHECK(_intermediate_slot_desc == NULL); @@ -166,8 +155,8 @@ Status AggFnEvaluator::prepare( ObjectPool* obj_pool = state->obj_pool(); for (int i = 0; i < _input_exprs_ctxs.size(); ++i) { - _staging_input_vals.push_back(create_any_val( - obj_pool, input_expr_ctxs()[i]->root()->type())); + _staging_input_vals.push_back( + create_any_val(obj_pool, input_expr_ctxs()[i]->root()->type())); } // window has intermediate_slot_type @@ -193,8 +182,7 @@ Status AggFnEvaluator::prepare( // Load the function pointers. Merge is not required if this is evaluating an // analytic function. - if (_fn.aggregate_fn.init_fn_symbol.empty() || - _fn.aggregate_fn.update_fn_symbol.empty() || + if (_fn.aggregate_fn.init_fn_symbol.empty() || _fn.aggregate_fn.update_fn_symbol.empty() || (!_is_analytic_fn && _fn.aggregate_fn.merge_fn_symbol.empty())) { // This path is only for partially implemented builtins. DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::BUILTIN); @@ -205,58 +193,56 @@ Status AggFnEvaluator::prepare( // Load the function pointers. RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.init_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_init_fn, NULL)); + _fn.id, _fn.aggregate_fn.init_fn_symbol, _fn.hdfs_location, _fn.checksum, &_init_fn, + NULL)); RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.update_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_update_fn, NULL)); + _fn.id, _fn.aggregate_fn.update_fn_symbol, _fn.hdfs_location, _fn.checksum, &_update_fn, + NULL)); // Merge() is not loaded if evaluating the agg fn as an analytic function. if (!_is_analytic_fn) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.merge_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_merge_fn, NULL)); + RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( + _fn.id, _fn.aggregate_fn.merge_fn_symbol, _fn.hdfs_location, _fn.checksum, + &_merge_fn, NULL)); } // Serialize and Finalize are optional if (!_fn.aggregate_fn.serialize_fn_symbol.empty()) { RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.serialize_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_serialize_fn, NULL)); + _fn.id, _fn.aggregate_fn.serialize_fn_symbol, _fn.hdfs_location, _fn.checksum, + &_serialize_fn, NULL)); } if (!_fn.aggregate_fn.finalize_fn_symbol.empty()) { RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.finalize_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_finalize_fn, NULL)); + _fn.id, _fn.aggregate_fn.finalize_fn_symbol, _fn.hdfs_location, _fn.checksum, + &_finalize_fn, NULL)); } if (!_fn.aggregate_fn.get_value_fn_symbol.empty()) { RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.get_value_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_get_value_fn, - NULL)); + _fn.id, _fn.aggregate_fn.get_value_fn_symbol, _fn.hdfs_location, _fn.checksum, + &_get_value_fn, NULL)); } if (!_fn.aggregate_fn.remove_fn_symbol.empty()) { RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.remove_fn_symbol, - _fn.hdfs_location, _fn.checksum, &_remove_fn, - NULL)); + _fn.id, _fn.aggregate_fn.remove_fn_symbol, _fn.hdfs_location, _fn.checksum, + &_remove_fn, NULL)); } std::vector arg_types; for (int j = 0; j < _input_exprs_ctxs.size(); ++j) { arg_types.push_back( - AnyValUtil::column_type_to_type_desc(_input_exprs_ctxs[j]->root()->type())); + AnyValUtil::column_type_to_type_desc(_input_exprs_ctxs[j]->root()->type())); } FunctionContext::TypeDesc intermediate_type = - AnyValUtil::column_type_to_type_desc(_intermediate_type); + AnyValUtil::column_type_to_type_desc(_intermediate_type); FunctionContext::TypeDesc output_type = - AnyValUtil::column_type_to_type_desc(_output_slot_desc->type()); + AnyValUtil::column_type_to_type_desc(_output_slot_desc->type()); - *agg_fn_ctx = FunctionContextImpl::create_context(state, pool, - intermediate_type, output_type, arg_types, 0, false); + *agg_fn_ctx = FunctionContextImpl::create_context(state, pool, intermediate_type, output_type, + arg_types, 0, false); return Status::OK(); } @@ -281,9 +267,7 @@ void AggFnEvaluator::close(RuntimeState* state) { } // Utility to put val into an AnyVal struct -inline void AggFnEvaluator::set_any_val( - const void* slot, - const TypeDescriptor& type, AnyVal* dst) { +inline void AggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor& type, AnyVal* dst) { if (slot == NULL) { dst->is_null = true; return; @@ -343,8 +327,8 @@ inline void AggFnEvaluator::set_any_val( return; case TYPE_DECIMALV2: - reinterpret_cast(dst)->val - = reinterpret_cast(slot)->value; + reinterpret_cast(dst)->val = + reinterpret_cast(slot)->value; return; case TYPE_LARGEINT: @@ -356,8 +340,8 @@ inline void AggFnEvaluator::set_any_val( } } -inline void AggFnEvaluator::set_output_slot(const AnyVal* src, - const SlotDescriptor* dst_slot_desc, Tuple* dst) { +inline void AggFnEvaluator::set_output_slot(const AnyVal* src, const SlotDescriptor* dst_slot_desc, + Tuple* dst) { if (src->is_null) { dst->set_null(dst_slot_desc->null_indicator_offset()); return; @@ -403,23 +387,22 @@ inline void AggFnEvaluator::set_output_slot(const AnyVal* src, case TYPE_HLL: case TYPE_OBJECT: *reinterpret_cast(slot) = - StringValue::from_string_val(*reinterpret_cast(src)); + StringValue::from_string_val(*reinterpret_cast(src)); return; case TYPE_DATE: case TYPE_DATETIME: - *reinterpret_cast(slot) = DateTimeValue::from_datetime_val( - *reinterpret_cast(src)); + *reinterpret_cast(slot) = + DateTimeValue::from_datetime_val(*reinterpret_cast(src)); return; case TYPE_DECIMAL: - *reinterpret_cast(slot) = DecimalValue::from_decimal_val( - *reinterpret_cast(src)); + *reinterpret_cast(slot) = + DecimalValue::from_decimal_val(*reinterpret_cast(src)); return; case TYPE_DECIMALV2: - *reinterpret_cast(slot) = - reinterpret_cast(src)->val; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; return; case TYPE_LARGEINT: { @@ -452,7 +435,6 @@ void AggFnEvaluator::init(FunctionContext* agg_fn_ctx, Tuple* dst) { set_output_slot(_staging_intermediate_val, _intermediate_slot_desc, dst); agg_fn_ctx->impl()->set_num_updates(0); agg_fn_ctx->impl()->set_num_removes(0); - } void AggFnEvaluator::update_mem_limlits(int len) { @@ -465,8 +447,7 @@ void AggFnEvaluator::update_mem_limlits(int len) { } } -AggFnEvaluator::~AggFnEvaluator() { -} +AggFnEvaluator::~AggFnEvaluator() {} inline void AggFnEvaluator::update_mem_trackers(bool is_filter, bool is_add_buckets, int len) { if (!is_filter) { @@ -509,7 +490,7 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { // 2. merge multi parameter into one parameter(StringVal) if (_string_buffer_len < total_len) { - _string_buffer_len = ((total_len << 10) + 1) >> 10 ; // (len/1024+1)*1024 + _string_buffer_len = ((total_len << 10) + 1) >> 10; // (len/1024+1)*1024 _string_buffer.reset(new char[_string_buffer_len]); } @@ -530,21 +511,21 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { return true; case TYPE_BOOLEAN: { - *begin = (uint8_t)reinterpret_cast(_staging_input_vals[i])->val; + *begin = (uint8_t) reinterpret_cast(_staging_input_vals[i])->val; begin += TINYINT_SIZE; break; } case TYPE_TINYINT: { - memcpy(begin, - &reinterpret_cast(_staging_input_vals[i])->val, TINYINT_SIZE); + memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, + TINYINT_SIZE); begin += TINYINT_SIZE; break; } case TYPE_SMALLINT: { - memcpy(begin, - &reinterpret_cast(_staging_input_vals[i])->val, SMALLINT_SIZE); + memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, + SMALLINT_SIZE); begin += SMALLINT_SIZE; break; } @@ -621,7 +602,7 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { DCHECK(begin == string_val.ptr + string_val.len) << "COUNT_DISTINCT: StringVal's len doesn't match"; bool is_add_buckets = false; - bool is_filter = is_in_hybridmap(&string_val, dst, &is_add_buckets); + bool is_filter = is_in_hybridmap(&string_val, dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, string_val.len); return is_filter; } @@ -646,21 +627,21 @@ bool AggFnEvaluator::sum_distinct_data_filter(TupleRow* row, Tuple* dst) { case TYPE_BIGINT: { const BigIntVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*) & (value->val), dst, &is_add_buckets); + is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, BIGINT_SIZE); return is_filter; } case TYPE_FLOAT: { const FloatVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*) & (value->val), dst, &is_add_buckets); + is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, FLOAT_SIZE); return is_filter; } case TYPE_DOUBLE: { const DoubleVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*) & (value->val), dst, &is_add_buckets); + is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, DOUBLE_SIZE); return is_filter; } @@ -668,7 +649,7 @@ bool AggFnEvaluator::sum_distinct_data_filter(TupleRow* row, Tuple* dst) { case TYPE_DECIMAL: { const DecimalVal* value = reinterpret_cast(_staging_input_vals[0]); DecimalValue temp_value = DecimalValue::from_decimal_val(*value); - is_filter = is_in_hybridmap((void*) & (temp_value), dst, &is_add_buckets); + is_filter = is_in_hybridmap((void*)&(temp_value), dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, DECIMAL_SIZE); return is_filter; } @@ -676,14 +657,14 @@ bool AggFnEvaluator::sum_distinct_data_filter(TupleRow* row, Tuple* dst) { case TYPE_DECIMALV2: { const DecimalV2Val* value = reinterpret_cast(_staging_input_vals[0]); DecimalV2Value temp_value = DecimalV2Value::from_decimal_val(*value); - is_filter = is_in_hybridmap((void*) & (temp_value), dst, &is_add_buckets); + is_filter = is_in_hybridmap((void*)&(temp_value), dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, DECIMALV2_SIZE); return is_filter; } case TYPE_LARGEINT: { const LargeIntVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*) & (value->val), dst, &is_add_buckets); + is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); update_mem_trackers(is_filter, is_add_buckets, LARGEINT_SIZE); return is_filter; } @@ -696,8 +677,8 @@ bool AggFnEvaluator::sum_distinct_data_filter(TupleRow* row, Tuple* dst) { return false; } -void AggFnEvaluator::update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, - Tuple* dst, void* fn) { +void AggFnEvaluator::update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, + void* fn) { if (fn == NULL) { return; } @@ -741,8 +722,8 @@ void AggFnEvaluator::update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, // if _agg_op is TAggregationOp::COUNT_DISTINCT, it has only one // input parameter, we consider the first parameter as the only input parameter if (_is_multi_distinct && _agg_op == AggregationOp::COUNT_DISTINCT) { - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + _staging_intermediate_val); } else { switch (input_expr_ctxs().size()) { case 0: @@ -750,61 +731,54 @@ void AggFnEvaluator::update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, break; case 1: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + _staging_intermediate_val); break; case 2: - reinterpret_cast(fn)( - agg_fn_ctx, - *_staging_input_vals[0], - *_staging_input_vals[1], - _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + *_staging_input_vals[1], _staging_intermediate_val); break; case 3: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + *_staging_input_vals[1], *_staging_input_vals[2], + _staging_intermediate_val); break; case 4: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], *_staging_input_vals[3], - _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + *_staging_input_vals[1], *_staging_input_vals[2], + *_staging_input_vals[3], _staging_intermediate_val); break; case 5: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], *_staging_input_vals[3], - *_staging_input_vals[4], _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + *_staging_input_vals[1], *_staging_input_vals[2], + *_staging_input_vals[3], *_staging_input_vals[4], + _staging_intermediate_val); break; case 6: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], *_staging_input_vals[3], - *_staging_input_vals[4], *_staging_input_vals[5], - _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + *_staging_input_vals[1], *_staging_input_vals[2], + *_staging_input_vals[3], *_staging_input_vals[4], + *_staging_input_vals[5], _staging_intermediate_val); break; case 7: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], *_staging_input_vals[3], - *_staging_input_vals[4], *_staging_input_vals[5], - *_staging_input_vals[6], _staging_intermediate_val); + reinterpret_cast(fn)( + agg_fn_ctx, *_staging_input_vals[0], *_staging_input_vals[1], + *_staging_input_vals[2], *_staging_input_vals[3], *_staging_input_vals[4], + *_staging_input_vals[5], *_staging_input_vals[6], _staging_intermediate_val); break; case 8: - reinterpret_cast(fn)(agg_fn_ctx, - *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], *_staging_input_vals[3], - *_staging_input_vals[4], *_staging_input_vals[5], - *_staging_input_vals[6], *_staging_input_vals[7], - _staging_intermediate_val); + reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], + *_staging_input_vals[1], *_staging_input_vals[2], + *_staging_input_vals[3], *_staging_input_vals[4], + *_staging_input_vals[5], *_staging_input_vals[6], + *_staging_input_vals[7], _staging_intermediate_val); break; default: @@ -815,13 +789,12 @@ void AggFnEvaluator::update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, set_output_slot(_staging_intermediate_val, _intermediate_slot_desc, dst); } -void AggFnEvaluator::update( - FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, void* fn, MemPool* pool) { +void AggFnEvaluator::update(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, void* fn, + MemPool* pool) { return update_or_merge(agg_fn_ctx, row, dst, fn); } -void AggFnEvaluator::merge( - FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, MemPool* pool) { +void AggFnEvaluator::merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, MemPool* pool) { return update_or_merge(agg_fn_ctx, row, dst, _merge_fn); } @@ -841,14 +814,14 @@ void AggFnEvaluator::merge(FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst) set_any_val2(_intermediate_slot_desc, src, _staging_merge_input_val); // The merge fn always takes one input argument. - reinterpret_cast(_merge_fn)(agg_fn_ctx, - *_staging_merge_input_val, _staging_intermediate_val); + reinterpret_cast(_merge_fn)(agg_fn_ctx, *_staging_merge_input_val, + _staging_intermediate_val); set_output_slot(_staging_intermediate_val, _intermediate_slot_desc, dst); } -void AggFnEvaluator::choose_update_or_merge( - FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst) { +void AggFnEvaluator::choose_update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, + Tuple* dst) { if (_is_merge) { return update_or_merge(agg_fn_ctx, row, dst, _merge_fn); } else { @@ -857,7 +830,8 @@ void AggFnEvaluator::choose_update_or_merge( } void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* src, - const SlotDescriptor* dst_slot_desc, Tuple* dst, void* fn) { + const SlotDescriptor* dst_slot_desc, Tuple* dst, + void* fn) { // DCHECK_EQ(dst_slot_desc->type().type, _return_type.type); if (src == NULL) { src = dst; @@ -876,72 +850,71 @@ void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* s // not same // if (_is_analytic_fn) { - // No fn was given but the src and dst tuples are different (doing a finalize()). - // Just copy the src slot into the dst tuple. - if (fn == NULL) { - DCHECK_EQ(_intermediate_slot_desc->type(), dst_slot_desc->type()); - RawValue::write(src_slot, dst, dst_slot_desc, NULL); - return; - } + // No fn was given but the src and dst tuples are different (doing a finalize()). + // Just copy the src slot into the dst tuple. + if (fn == NULL) { + DCHECK_EQ(_intermediate_slot_desc->type(), dst_slot_desc->type()); + RawValue::write(src_slot, dst, dst_slot_desc, NULL); + return; + } // } - set_any_val(src_slot, _intermediate_slot_desc->type(), - _staging_intermediate_val); + set_any_val(src_slot, _intermediate_slot_desc->type(), _staging_intermediate_val); switch (dst_slot_desc->type().type) { case TYPE_BOOLEAN: { - typedef BooleanVal(*Fn)(FunctionContext*, AnyVal*); + typedef BooleanVal (*Fn)(FunctionContext*, AnyVal*); BooleanVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_TINYINT: { - typedef TinyIntVal(*Fn)(FunctionContext*, AnyVal*); + typedef TinyIntVal (*Fn)(FunctionContext*, AnyVal*); TinyIntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_SMALLINT: { - typedef SmallIntVal(*Fn)(FunctionContext*, AnyVal*); + typedef SmallIntVal (*Fn)(FunctionContext*, AnyVal*); SmallIntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_INT: { - typedef IntVal(*Fn)(FunctionContext*, AnyVal*); + typedef IntVal (*Fn)(FunctionContext*, AnyVal*); IntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_BIGINT: { - typedef BigIntVal(*Fn)(FunctionContext*, AnyVal*); + typedef BigIntVal (*Fn)(FunctionContext*, AnyVal*); BigIntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_FLOAT: { - typedef FloatVal(*Fn)(FunctionContext*, AnyVal*); + typedef FloatVal (*Fn)(FunctionContext*, AnyVal*); FloatVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_DOUBLE: { - typedef DoubleVal(*Fn)(FunctionContext*, AnyVal*); + typedef DoubleVal (*Fn)(FunctionContext*, AnyVal*); DoubleVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_CHAR: - case TYPE_VARCHAR: + case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: { - typedef StringVal(*Fn)(FunctionContext*, AnyVal*); + typedef StringVal (*Fn)(FunctionContext*, AnyVal*); StringVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; @@ -949,21 +922,21 @@ void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* s case TYPE_DATE: case TYPE_DATETIME: { - typedef DateTimeVal(*Fn)(FunctionContext*, AnyVal*); + typedef DateTimeVal (*Fn)(FunctionContext*, AnyVal*); DateTimeVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_DECIMAL: { - typedef DecimalVal(*Fn)(FunctionContext*, AnyVal*); + typedef DecimalVal (*Fn)(FunctionContext*, AnyVal*); DecimalVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; } case TYPE_DECIMALV2: { - typedef DecimalV2Val(*Fn)(FunctionContext*, AnyVal*); + typedef DecimalV2Val (*Fn)(FunctionContext*, AnyVal*); DecimalV2Val v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); break; @@ -1007,4 +980,4 @@ std::string AggFnEvaluator::debug_string() const { return out.str(); } -} +} // namespace doris diff --git a/be/src/exprs/agg_fn_evaluator.h b/be/src/exprs/agg_fn_evaluator.h old mode 100755 new mode 100644 index 1c9bd598a2b53c..855ca15526d082 --- a/be/src/exprs/agg_fn_evaluator.h +++ b/be/src/exprs/agg_fn_evaluator.h @@ -18,19 +18,20 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_AGG_FN_EVALUATOR_H #define DORIS_BE_SRC_QUERY_EXPRS_AGG_FN_EVALUATOR_H +#include +#include #include #include -#include "udf/udf.h" -#include -#include + #include "gen_cpp/Exprs_types.h" +#include "udf/udf.h" //#include "exprs/opcode_registry.h" -#include "util/hash_util.hpp" +#include "exprs/expr_context.h" #include "exprs/hybrid_map.h" -#include "runtime/runtime_state.h" #include "runtime/descriptors.h" -#include "exprs/expr_context.h" +#include "runtime/runtime_state.h" #include "runtime/tuple.h" +#include "util/hash_util.hpp" namespace doris { @@ -66,11 +67,10 @@ class AggFnEvaluator { // and returned in *result. This constructs the input Expr trees for // this aggregate function as specified in desc. The result is returned in // *result. - static Status create(ObjectPool* pool, const TExpr& desc, - AggFnEvaluator** result); + static Status create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result); static Status create(ObjectPool* pool, const TExpr& desc, bool is_analytic_fn, - AggFnEvaluator** result); + AggFnEvaluator** result); // Initializes the agg expr. 'desc' must be the row descriptor for the input TupleRow. // It is used to get the input values in the Update() and Merge() functions. @@ -79,42 +79,26 @@ class AggFnEvaluator { // either string data for intermediate results or whatever memory the UDA might // need. // TODO: should we give them their own pool? - Status prepare( - RuntimeState* state, - const RowDescriptor& desc, - MemPool* pool, - const SlotDescriptor* intermediate_slot_desc, - const SlotDescriptor* output_slot_desc, - const std::shared_ptr& mem_tracker, - FunctionContext** agg_fn_ctx); + Status prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool, + const SlotDescriptor* intermediate_slot_desc, + const SlotDescriptor* output_slot_desc, + const std::shared_ptr& mem_tracker, FunctionContext** agg_fn_ctx); Status open(RuntimeState* state, FunctionContext* agg_fn_ctx); void close(RuntimeState* state); - const TypeDescriptor& intermediate_type() const { - return _intermediate_slot_desc->type(); - } + const TypeDescriptor& intermediate_type() const { return _intermediate_slot_desc->type(); } //PrimitiveType type() const { return _type.type; } - AggregationOp agg_op() const { - return _agg_op; - } - const std::vector& input_expr_ctxs() const { - return _input_exprs_ctxs; - } - bool is_merge() const { - return _is_merge; - } + AggregationOp agg_op() const { return _agg_op; } + const std::vector& input_expr_ctxs() const { return _input_exprs_ctxs; } + bool is_merge() const { return _is_merge; } bool is_count_star() const { return _agg_op == AggregationOp::COUNT && _input_exprs_ctxs.empty(); } - bool is_builtin() const { - return _function_type == TFunctionBinaryType::BUILTIN; - } - bool supports_serialize() const { - return _serialize_fn != NULL; - } + bool is_builtin() const { return _function_type == TFunctionBinaryType::BUILTIN; } + bool supports_serialize() const { return _serialize_fn != NULL; } static std::string debug_string(const std::vector& exprs); std::string debug_string() const; @@ -133,11 +117,9 @@ class AggFnEvaluator { // builtins. void get_value(doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst); - // Functions for different phases of the aggregation. void init(FunctionContext* agg_fn_ctx, Tuple* dst); - void update(FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst, - void* fn, MemPool* pool); + void update(FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst, void* fn, MemPool* pool); void merge(FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst, MemPool* pool); // Explicitly does a merge, even if this evaluator is not marked as merging. // This is used by the partitioned agg node when it needs to merge spill results. @@ -167,39 +149,36 @@ class AggFnEvaluator { // DATETIME VAL has two part: packet_time is 8 byte, and type is 4 byte // MySQL packet time : int64_t packed_time; // Indicate which type of this value : int type; - static const size_t DATETIME_SIZE = 16; + static const size_t DATETIME_SIZE = 16; inline void update_mem_limlits(int len); inline void update_mem_trackers(bool is_filter, bool is_add_buckets, int len); bool count_distinct_data_filter(TupleRow* row, Tuple* dst); bool sum_distinct_data_filter(TupleRow* row, Tuple* dst); - bool is_multi_distinct() { - return _is_multi_distinct; - } + bool is_multi_distinct() { return _is_multi_distinct; } bool is_in_hybridmap(void* input_val, Tuple* dst, bool* is_add_buckets); void choose_update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst); static void add(const std::vector& evaluators, - const std::vector& fn_ctxs, TupleRow* src, Tuple* dst); + const std::vector& fn_ctxs, TupleRow* src, + Tuple* dst); static void remove(const std::vector& evaluators, - const std::vector& fn_ctxs, TupleRow* src, Tuple* dst); + const std::vector& fn_ctxs, TupleRow* src, + Tuple* dst); static void get_value(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* src, Tuple* dst); + const std::vector& fn_ctxs, Tuple* src, + Tuple* dst); static void finalize(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* - src, Tuple* dst); + const std::vector& fn_ctxs, Tuple* src, + Tuple* dst); static void init(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* dst); + const std::vector& fn_ctxs, Tuple* dst); static void serialize(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* dst); + const std::vector& fn_ctxs, Tuple* dst); - const std::string& fn_name() const { - return _fn.name.function_name; - } + const std::string& fn_name() const { return _fn.name.function_name; } - const SlotDescriptor* output_slot_desc() const { - return _output_slot_desc; - } + const SlotDescriptor* output_slot_desc() const { return _output_slot_desc; } private: const TFunction _fn; @@ -212,8 +191,8 @@ class AggFnEvaluator { bool _is_multi_distinct; std::vector _input_exprs_ctxs; boost::scoped_array _string_buffer; //for count distinct - int _string_buffer_len; //for count distinct - std::shared_ptr _mem_tracker; // saved c'tor param + int _string_buffer_len; //for count distinct + std::shared_ptr _mem_tracker; // saved c'tor param const TypeDescriptor _return_type; const TypeDescriptor _intermediate_type; @@ -278,44 +257,43 @@ class AggFnEvaluator { // Sets up the arguments to call fn. This converts from the agg-expr signature, // taking TupleRow to the UDA signature taking AnvVals. - void update_or_merge(FunctionContext* agg_fn_ctx, - TupleRow* row, Tuple* dst, void* fn); + void update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, void* fn); // Sets up the arguments to call fn. This converts from the agg-expr signature, // taking TupleRow to the UDA signature taking AnvVals. // void serialize_or_finalize(FunctionContext* agg_fn_ctx, const SlotDescriptor* dst_slot_desc, Tuple* dst, void* fn); void serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* src, - const SlotDescriptor* dst_slot_desc, Tuple* dst, void* fn); + const SlotDescriptor* dst_slot_desc, Tuple* dst, void* fn); // Writes the result in src into dst pointed to by _output_slot_desc void set_output_slot(const doris_udf::AnyVal* src, const SlotDescriptor* dst_slot_desc, - Tuple* dst); + Tuple* dst); // Sets 'dst' to the value from 'slot'. void set_any_val(const void* slot, const TypeDescriptor& type, doris_udf::AnyVal* dst); }; -inline void AggFnEvaluator::add( - doris_udf::FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst) { +inline void AggFnEvaluator::add(doris_udf::FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst) { agg_fn_ctx->impl()->increment_num_updates(); update(agg_fn_ctx, row, dst, _is_merge ? _merge_fn : _update_fn, NULL); } -inline void AggFnEvaluator::remove( - doris_udf::FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst) { +inline void AggFnEvaluator::remove(doris_udf::FunctionContext* agg_fn_ctx, TupleRow* row, + Tuple* dst) { agg_fn_ctx->impl()->increment_num_removes(); update(agg_fn_ctx, row, dst, _remove_fn, NULL); } -inline void AggFnEvaluator::finalize( - doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst) { +inline void AggFnEvaluator::finalize(doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, + Tuple* dst) { serialize_or_finalize(agg_fn_ctx, src, _output_slot_desc, dst, _finalize_fn); } -inline void AggFnEvaluator::get_value( - doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst) { +inline void AggFnEvaluator::get_value(doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, + Tuple* dst) { serialize_or_finalize(agg_fn_ctx, src, _output_slot_desc, dst, _get_value_fn); } inline void AggFnEvaluator::init(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* dst) { + const std::vector& fn_ctxs, + Tuple* dst) { DCHECK_EQ(evaluators.size(), fn_ctxs.size()); for (int i = 0; i < evaluators.size(); ++i) { @@ -323,7 +301,8 @@ inline void AggFnEvaluator::init(const std::vector& evaluators, } } inline void AggFnEvaluator::add(const std::vector& evaluators, - const std::vector& fn_ctxs, TupleRow* src, Tuple* dst) { + const std::vector& fn_ctxs, + TupleRow* src, Tuple* dst) { DCHECK_EQ(evaluators.size(), fn_ctxs.size()); for (int i = 0; i < evaluators.size(); ++i) { @@ -331,7 +310,8 @@ inline void AggFnEvaluator::add(const std::vector& evaluators, } } inline void AggFnEvaluator::remove(const std::vector& evaluators, - const std::vector& fn_ctxs, TupleRow* src, Tuple* dst) { + const std::vector& fn_ctxs, + TupleRow* src, Tuple* dst) { DCHECK_EQ(evaluators.size(), fn_ctxs.size()); for (int i = 0; i < evaluators.size(); ++i) { @@ -339,7 +319,8 @@ inline void AggFnEvaluator::remove(const std::vector& evaluator } } inline void AggFnEvaluator::serialize(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* dst) { + const std::vector& fn_ctxs, + Tuple* dst) { DCHECK_EQ(evaluators.size(), fn_ctxs.size()); for (int i = 0; i < evaluators.size(); ++i) { @@ -347,15 +328,17 @@ inline void AggFnEvaluator::serialize(const std::vector& evalua } } inline void AggFnEvaluator::get_value(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* src, Tuple* dst) { + const std::vector& fn_ctxs, + Tuple* src, Tuple* dst) { DCHECK_EQ(evaluators.size(), fn_ctxs.size()); for (int i = 0; i < evaluators.size(); ++i) { evaluators[i]->get_value(fn_ctxs[i], src, dst); } } -inline void AggFnEvaluator::finalize(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* src, Tuple* dst) { +inline void AggFnEvaluator::finalize(const std::vector& evaluators, + const std::vector& fn_ctxs, + Tuple* src, Tuple* dst) { DCHECK_EQ(evaluators.size(), fn_ctxs.size()); for (int i = 0; i < evaluators.size(); ++i) { @@ -363,6 +346,6 @@ inline void AggFnEvaluator::finalize(const std::vector& evalua } } -} +} // namespace doris #endif diff --git a/be/src/exprs/aggregate_functions.cpp b/be/src/exprs/aggregate_functions.cpp index 164552aea27bcc..885da8f8c59da8 100644 --- a/be/src/exprs/aggregate_functions.cpp +++ b/be/src/exprs/aggregate_functions.cpp @@ -19,17 +19,18 @@ #include "exprs/aggregate_functions.h" #include + #include #include #include "common/logging.h" -#include "runtime/string_value.h" -#include "runtime/datetime_value.h" -#include "runtime/runtime_state.h" #include "exprs/anyval_util.h" #include "exprs/hybrid_set.h" -#include "util/tdigest.h" +#include "runtime/datetime_value.h" +#include "runtime/runtime_state.h" +#include "runtime/string_value.h" #include "util/debug_util.h" +#include "util/tdigest.h" // TODO: this file should be cross compiled and then all of the builtin // aggregate functions will have a codegen enabled path. Then we can remove @@ -57,25 +58,24 @@ void AggregateFunctions::init_null(FunctionContext*, AnyVal* dst) { dst->is_null = true; } -template +template void AggregateFunctions::init_zero(FunctionContext*, T* dst) { dst->is_null = false; dst->val = 0; } -template<> +template <> void AggregateFunctions::init_zero(FunctionContext*, DecimalVal* dst) { dst->set_to_zero(); } -template<> +template <> void AggregateFunctions::init_zero(FunctionContext*, DecimalV2Val* dst) { dst->set_to_zero(); } -template -void AggregateFunctions::sum_remove(FunctionContext* ctx, const SRC_VAL& src, - DST_VAL* dst) { +template +void AggregateFunctions::sum_remove(FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst) { // Do not count null values towards the number of removes if (src.is_null) { ctx->impl()->increment_num_removes(-1); @@ -93,9 +93,8 @@ void AggregateFunctions::sum_remove(FunctionContext* ctx, const SRC_VAL& src, dst->val -= src.val; } -template<> -void AggregateFunctions::sum_remove(FunctionContext* ctx, const DecimalVal& src, - DecimalVal* dst) { +template <> +void AggregateFunctions::sum_remove(FunctionContext* ctx, const DecimalVal& src, DecimalVal* dst) { if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { *dst = DecimalVal::null(); return; @@ -113,9 +112,9 @@ void AggregateFunctions::sum_remove(FunctionContext* ctx, const DecimalVal& src, new_dst.to_decimal_val(dst); } -template<> +template <> void AggregateFunctions::sum_remove(FunctionContext* ctx, const DecimalV2Val& src, - DecimalV2Val* dst) { + DecimalV2Val* dst) { if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { *dst = DecimalV2Val::null(); return; @@ -133,9 +132,7 @@ void AggregateFunctions::sum_remove(FunctionContext* ctx, const DecimalV2Val& sr new_dst.to_decimal_val(dst); } - -StringVal AggregateFunctions::string_val_get_value( - FunctionContext* ctx, const StringVal& src) { +StringVal AggregateFunctions::string_val_get_value(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return src; } @@ -144,8 +141,8 @@ StringVal AggregateFunctions::string_val_get_value( return result; } -StringVal AggregateFunctions::string_val_serialize_or_finalize( - FunctionContext* ctx, const StringVal& src) { +StringVal AggregateFunctions::string_val_serialize_or_finalize(FunctionContext* ctx, + const StringVal& src) { StringVal result = string_val_get_value(ctx, src); if (!src.is_null) { ctx->free(src.ptr); @@ -153,8 +150,7 @@ StringVal AggregateFunctions::string_val_serialize_or_finalize( return result; } -void AggregateFunctions::count_update( - FunctionContext*, const AnyVal& src, BigIntVal* dst) { +void AggregateFunctions::count_update(FunctionContext*, const AnyVal& src, BigIntVal* dst) { DCHECK(!dst->is_null); if (!src.is_null) { @@ -162,15 +158,13 @@ void AggregateFunctions::count_update( } } -void AggregateFunctions::count_merge(FunctionContext*, const BigIntVal& src, - BigIntVal* dst) { +void AggregateFunctions::count_merge(FunctionContext*, const BigIntVal& src, BigIntVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); dst->val += src.val; } -void AggregateFunctions::count_remove( - FunctionContext*, const AnyVal& src, BigIntVal* dst) { +void AggregateFunctions::count_remove(FunctionContext*, const AnyVal& src, BigIntVal* dst) { DCHECK(!dst->is_null); if (!src.is_null) { --dst->val; @@ -182,11 +176,9 @@ struct PercentileApproxState { public: PercentileApproxState() : digest(new TDigest()) {} PercentileApproxState(double compression) : digest(new TDigest(compression)) {} - ~PercentileApproxState() { - delete digest; - } + ~PercentileApproxState() { delete digest; } - TDigest *digest = nullptr; + TDigest* digest = nullptr; double targetQuantile = -1.0; }; @@ -197,16 +189,17 @@ void AggregateFunctions::percentile_approx_init(FunctionContext* ctx, StringVal* if (digest_compression != nullptr) { double compression = reinterpret_cast(digest_compression)->val; if (compression >= 2048 && compression <= 10000) { - dst->ptr = (uint8_t*) new PercentileApproxState(compression); + dst->ptr = (uint8_t*)new PercentileApproxState(compression); return; } } - dst->ptr = (uint8_t*) new PercentileApproxState(); + dst->ptr = (uint8_t*)new PercentileApproxState(); }; -template -void AggregateFunctions::percentile_approx_update(FunctionContext* ctx, const T& src, const DoubleVal& quantile, StringVal* dst) { +template +void AggregateFunctions::percentile_approx_update(FunctionContext* ctx, const T& src, + const DoubleVal& quantile, StringVal* dst) { if (src.is_null) { return; } @@ -218,9 +211,11 @@ void AggregateFunctions::percentile_approx_update(FunctionContext* ctx, const T& percentile->targetQuantile = quantile.val; } -template -void AggregateFunctions::percentile_approx_update(FunctionContext* ctx, const T& src, const DoubleVal& quantile, - const DoubleVal& digest_compression, StringVal* dst) { +template +void AggregateFunctions::percentile_approx_update(FunctionContext* ctx, const T& src, + const DoubleVal& quantile, + const DoubleVal& digest_compression, + StringVal* dst) { if (src.is_null) { return; } @@ -232,7 +227,8 @@ void AggregateFunctions::percentile_approx_update(FunctionContext* ctx, const T& percentile->targetQuantile = quantile.val; } -StringVal AggregateFunctions::percentile_approx_serialize(FunctionContext* ctx, const StringVal& src) { +StringVal AggregateFunctions::percentile_approx_serialize(FunctionContext* ctx, + const StringVal& src) { DCHECK(!src.is_null); PercentileApproxState* percentile = reinterpret_cast(src.ptr); @@ -245,14 +241,15 @@ StringVal AggregateFunctions::percentile_approx_serialize(FunctionContext* ctx, return result; } -void AggregateFunctions::percentile_approx_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { +void AggregateFunctions::percentile_approx_merge(FunctionContext* ctx, const StringVal& src, + StringVal* dst) { DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(PercentileApproxState), dst->len); double quantile; memcpy(&quantile, src.ptr, sizeof(double)); - PercentileApproxState *src_percentile = new PercentileApproxState(); + PercentileApproxState* src_percentile = new PercentileApproxState(); src_percentile->targetQuantile = quantile; src_percentile->digest->unserialize(src.ptr + sizeof(double)); @@ -263,10 +260,11 @@ void AggregateFunctions::percentile_approx_merge(FunctionContext* ctx, const Str delete src_percentile; } -DoubleVal AggregateFunctions::percentile_approx_finalize(FunctionContext* ctx, const StringVal& src) { +DoubleVal AggregateFunctions::percentile_approx_finalize(FunctionContext* ctx, + const StringVal& src) { DCHECK(!src.is_null); - PercentileApproxState* percentile = reinterpret_cast(src.ptr); + PercentileApproxState* percentile = reinterpret_cast(src.ptr); double quantile = percentile->targetQuantile; double result = percentile->digest->quantile(quantile); @@ -312,7 +310,7 @@ void AggregateFunctions::decimalv2_avg_init(FunctionContext* ctx, StringVal* dst // The memory for int128 need to be aligned by 16. // So the constructor has been used instead of allocating memory. // Also, it will be release in finalize. - dst->ptr = (uint8_t*) new DecimalV2AvgState; + dst->ptr = (uint8_t*)new DecimalV2AvgState; } template @@ -327,9 +325,8 @@ void AggregateFunctions::avg_update(FunctionContext* ctx, const T& src, StringVa ++avg->count; } -void AggregateFunctions::decimal_avg_update(FunctionContext* ctx, - const DecimalVal& src, - StringVal* dst) { +void AggregateFunctions::decimal_avg_update(FunctionContext* ctx, const DecimalVal& src, + StringVal* dst) { if (src.is_null) { return; } @@ -345,9 +342,8 @@ void AggregateFunctions::decimal_avg_update(FunctionContext* ctx, ++avg->count; } -void AggregateFunctions::decimalv2_avg_update(FunctionContext* ctx, - const DecimalV2Val& src, - StringVal* dst) { +void AggregateFunctions::decimalv2_avg_update(FunctionContext* ctx, const DecimalV2Val& src, + StringVal* dst) { if (src.is_null) { return; } @@ -363,8 +359,7 @@ void AggregateFunctions::decimalv2_avg_update(FunctionContext* ctx, ++avg->count; } -StringVal AggregateFunctions::decimalv2_avg_serialize( - FunctionContext* ctx, const StringVal& src) { +StringVal AggregateFunctions::decimalv2_avg_serialize(FunctionContext* ctx, const StringVal& src) { DCHECK(!src.is_null); StringVal result(ctx, src.len); memcpy(result.ptr, src.ptr, src.len); @@ -387,9 +382,8 @@ void AggregateFunctions::avg_remove(FunctionContext* ctx, const T& src, StringVa DCHECK_GE(avg->count, 0); } -void AggregateFunctions::decimal_avg_remove(doris_udf::FunctionContext* ctx, - const DecimalVal& src, - StringVal* dst) { +void AggregateFunctions::decimal_avg_remove(doris_udf::FunctionContext* ctx, const DecimalVal& src, + StringVal* dst) { // Remove doesn't need to explicitly check the number of calls to Update() or Remove() // because Finalize() returns NULL if count is 0. if (src.is_null) { @@ -409,8 +403,7 @@ void AggregateFunctions::decimal_avg_remove(doris_udf::FunctionContext* ctx, } void AggregateFunctions::decimalv2_avg_remove(doris_udf::FunctionContext* ctx, - const DecimalV2Val& src, - StringVal* dst) { + const DecimalV2Val& src, StringVal* dst) { // Remove doesn't need to explicitly check the number of calls to Update() or Remove() // because Finalize() returns NULL if count is 0. if (src.is_null) { @@ -429,8 +422,7 @@ void AggregateFunctions::decimalv2_avg_remove(doris_udf::FunctionContext* ctx, DCHECK_GE(avg->count, 0); } -void AggregateFunctions::avg_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { +void AggregateFunctions::avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { const AvgState* src_struct = reinterpret_cast(src.ptr); DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(AvgState), dst->len); @@ -440,7 +432,7 @@ void AggregateFunctions::avg_merge(FunctionContext* ctx, const StringVal& src, } void AggregateFunctions::decimal_avg_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { const DecimalAvgState* src_struct = reinterpret_cast(src.ptr); DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(DecimalAvgState), dst->len); @@ -454,7 +446,7 @@ void AggregateFunctions::decimal_avg_merge(FunctionContext* ctx, const StringVal } void AggregateFunctions::decimalv2_avg_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { DecimalV2AvgState src_struct; memcpy(&src_struct, src.ptr, sizeof(DecimalV2AvgState)); DCHECK(dst->ptr != NULL); @@ -489,7 +481,8 @@ DecimalVal AggregateFunctions::decimal_avg_get_value(FunctionContext* ctx, const return res; } -DecimalV2Val AggregateFunctions::decimalv2_avg_get_value(FunctionContext* ctx, const StringVal& src) { +DecimalV2Val AggregateFunctions::decimalv2_avg_get_value(FunctionContext* ctx, + const StringVal& src) { DecimalV2AvgState* val_struct = reinterpret_cast(src.ptr); if (val_struct->count == 0) { return DecimalV2Val::null(); @@ -520,14 +513,15 @@ DecimalVal AggregateFunctions::decimal_avg_finalize(FunctionContext* ctx, const return result; } -DecimalV2Val AggregateFunctions::decimalv2_avg_finalize(FunctionContext* ctx, const StringVal& src) { +DecimalV2Val AggregateFunctions::decimalv2_avg_finalize(FunctionContext* ctx, + const StringVal& src) { DecimalV2Val result = decimalv2_avg_get_value(ctx, src); delete (DecimalV2AvgState*)src.ptr; return result; } -void AggregateFunctions::timestamp_avg_update(FunctionContext* ctx, - const DateTimeVal& src, StringVal* dst) { +void AggregateFunctions::timestamp_avg_update(FunctionContext* ctx, const DateTimeVal& src, + StringVal* dst) { if (src.is_null) { return; } @@ -539,8 +533,8 @@ void AggregateFunctions::timestamp_avg_update(FunctionContext* ctx, ++avg->count; } -void AggregateFunctions::timestamp_avg_remove(FunctionContext* ctx, - const DateTimeVal& src, StringVal* dst) { +void AggregateFunctions::timestamp_avg_remove(FunctionContext* ctx, const DateTimeVal& src, + StringVal* dst) { if (src.is_null) { return; } @@ -554,7 +548,7 @@ void AggregateFunctions::timestamp_avg_remove(FunctionContext* ctx, } DateTimeVal AggregateFunctions::timestamp_avg_get_value(FunctionContext* ctx, - const StringVal& src) { + const StringVal& src) { AvgState* val_struct = reinterpret_cast(src.ptr); if (val_struct->count == 0) { return DateTimeVal::null(); @@ -565,8 +559,7 @@ DateTimeVal AggregateFunctions::timestamp_avg_get_value(FunctionContext* ctx, return result; } -DateTimeVal AggregateFunctions::timestamp_avg_finalize(FunctionContext* ctx, - const StringVal& src) { +DateTimeVal AggregateFunctions::timestamp_avg_finalize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return DateTimeVal::null(); } @@ -586,7 +579,7 @@ void AggregateFunctions::count_star_remove(FunctionContext*, BigIntVal* dst) { DCHECK_GE(dst->val, 0); } -template +template void AggregateFunctions::sum(FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst) { if (src.is_null) { return; @@ -599,7 +592,7 @@ void AggregateFunctions::sum(FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst->val += src.val; } -template<> +template <> void AggregateFunctions::sum(FunctionContext* ctx, const DecimalVal& src, DecimalVal* dst) { if (src.is_null) { return; @@ -616,7 +609,7 @@ void AggregateFunctions::sum(FunctionContext* ctx, const DecimalVal& src, Decima new_dst.to_decimal_val(dst); } -template<> +template <> void AggregateFunctions::sum(FunctionContext* ctx, const DecimalV2Val& src, DecimalV2Val* dst) { if (src.is_null) { return; @@ -633,7 +626,7 @@ void AggregateFunctions::sum(FunctionContext* ctx, const DecimalV2Val& src, Deci new_dst.to_decimal_val(dst); } -template<> +template <> void AggregateFunctions::sum(FunctionContext* ctx, const LargeIntVal& src, LargeIntVal* dst) { if (src.is_null) { return; @@ -647,7 +640,7 @@ void AggregateFunctions::sum(FunctionContext* ctx, const LargeIntVal& src, Large dst->val += src.val; } -template +template void AggregateFunctions::min(FunctionContext*, const T& src, T* dst) { if (src.is_null) { return; @@ -658,7 +651,7 @@ void AggregateFunctions::min(FunctionContext*, const T& src, T* dst) { } } -template +template void AggregateFunctions::max(FunctionContext*, const T& src, T* dst) { if (src.is_null) { return; @@ -669,7 +662,7 @@ void AggregateFunctions::max(FunctionContext*, const T& src, T* dst) { } } -template<> +template <> void AggregateFunctions::min(FunctionContext*, const DecimalVal& src, DecimalVal* dst) { if (src.is_null) { return; @@ -687,7 +680,7 @@ void AggregateFunctions::min(FunctionContext*, const DecimalVal& src, DecimalVal } } -template<> +template <> void AggregateFunctions::min(FunctionContext*, const DecimalV2Val& src, DecimalV2Val* dst) { if (src.is_null) { return; @@ -705,8 +698,7 @@ void AggregateFunctions::min(FunctionContext*, const DecimalV2Val& src, DecimalV } } - -template<> +template <> void AggregateFunctions::min(FunctionContext*, const LargeIntVal& src, LargeIntVal* dst) { if (src.is_null) { return; @@ -722,7 +714,7 @@ void AggregateFunctions::min(FunctionContext*, const LargeIntVal& src, LargeIntV } } -template<> +template <> void AggregateFunctions::max(FunctionContext*, const DecimalVal& src, DecimalVal* dst) { if (src.is_null) { return; @@ -740,7 +732,7 @@ void AggregateFunctions::max(FunctionContext*, const DecimalVal& src, DecimalVal } } -template<> +template <> void AggregateFunctions::max(FunctionContext*, const DecimalV2Val& src, DecimalV2Val* dst) { if (src.is_null) { return; @@ -758,8 +750,7 @@ void AggregateFunctions::max(FunctionContext*, const DecimalV2Val& src, DecimalV } } - -template<> +template <> void AggregateFunctions::max(FunctionContext*, const LargeIntVal& src, LargeIntVal* dst) { if (src.is_null) { return; @@ -781,14 +772,13 @@ void AggregateFunctions::init_null_string(FunctionContext* c, StringVal* dst) { dst->len = 0; } -template<> +template <> void AggregateFunctions::min(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (src.is_null) { return; } - if (dst->is_null || - StringValue::from_string_val(src) < StringValue::from_string_val(*dst)) { + if (dst->is_null || StringValue::from_string_val(src) < StringValue::from_string_val(*dst)) { if (!dst->is_null) { ctx->free(dst->ptr); } @@ -798,14 +788,13 @@ void AggregateFunctions::min(FunctionContext* ctx, const StringVal& src, StringV } } -template<> +template <> void AggregateFunctions::max(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (src.is_null) { return; } - if (dst->is_null || - StringValue::from_string_val(src) > StringValue::from_string_val(*dst)) { + if (dst->is_null || StringValue::from_string_val(src) > StringValue::from_string_val(*dst)) { if (!dst->is_null) { ctx->free(dst->ptr); } @@ -815,9 +804,8 @@ void AggregateFunctions::max(FunctionContext* ctx, const StringVal& src, StringV } } -template<> -void AggregateFunctions::min(FunctionContext*, - const DateTimeVal& src, DateTimeVal* dst) { +template <> +void AggregateFunctions::min(FunctionContext*, const DateTimeVal& src, DateTimeVal* dst) { if (src.is_null) { return; } @@ -835,9 +823,8 @@ void AggregateFunctions::min(FunctionContext*, } } -template<> -void AggregateFunctions::max(FunctionContext*, - const DateTimeVal& src, DateTimeVal* dst) { +template <> +void AggregateFunctions::max(FunctionContext*, const DateTimeVal& src, DateTimeVal* dst) { if (src.is_null) { return; } @@ -856,7 +843,7 @@ void AggregateFunctions::max(FunctionContext*, } void AggregateFunctions::string_concat(FunctionContext* ctx, const StringVal& src, - const StringVal& separator, StringVal* result) { + const StringVal& separator, StringVal* result) { if (src.is_null) { return; } @@ -868,8 +855,7 @@ void AggregateFunctions::string_concat(FunctionContext* ctx, const StringVal& sr return; } - const StringVal* sep_ptr = separator.is_null ? &DEFAULT_STRING_CONCAT_DELIM : - &separator; + const StringVal* sep_ptr = separator.is_null ? &DEFAULT_STRING_CONCAT_DELIM : &separator; int new_size = result->len + sep_ptr->len + src.len; result->ptr = ctx->reallocate(result->ptr, new_size); @@ -886,13 +872,13 @@ void AggregateFunctions::string_concat(FunctionContext* ctx, const StringVal& sr using StringConcatHeader = int64_t; // Delimiter to use if the separator is NULL. -void AggregateFunctions::string_concat_update(FunctionContext* ctx, - const StringVal& src, StringVal* result) { +void AggregateFunctions::string_concat_update(FunctionContext* ctx, const StringVal& src, + StringVal* result) { string_concat_update(ctx, src, DEFAULT_STRING_CONCAT_DELIM, result); } -void AggregateFunctions::string_concat_update(FunctionContext* ctx, - const StringVal& src, const StringVal& separator, StringVal* result) { +void AggregateFunctions::string_concat_update(FunctionContext* ctx, const StringVal& src, + const StringVal& separator, StringVal* result) { if (src.is_null) { return; } @@ -907,27 +893,26 @@ void AggregateFunctions::string_concat_update(FunctionContext* ctx, result->append(ctx, sep->ptr, sep->len, src.ptr, src.len); } -void AggregateFunctions::string_concat_merge(FunctionContext* ctx, - const StringVal& src, StringVal* result) { +void AggregateFunctions::string_concat_merge(FunctionContext* ctx, const StringVal& src, + StringVal* result) { if (src.is_null) { return; } const auto header_len = sizeof(StringConcatHeader); if (result->is_null) { - // Copy the header from the first intermediate value. + // Copy the header from the first intermediate value. *result = StringVal(ctx->allocate(header_len), header_len); if (result->is_null) { return; } *reinterpret_cast(result->ptr) = - *reinterpret_cast(src.ptr); + *reinterpret_cast(src.ptr); } // Append the string portion of the intermediate src to result (omit src's header). result->append(ctx, src.ptr + header_len, src.len - header_len); } -StringVal AggregateFunctions::string_concat_finalize(FunctionContext* ctx, - const StringVal& src) { +StringVal AggregateFunctions::string_concat_finalize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return src; } @@ -937,12 +922,11 @@ StringVal AggregateFunctions::string_concat_finalize(FunctionContext* ctx, DCHECK(src.len >= header_len + sep_len); // Remove the header and the first separator. StringVal result = StringVal::copy_from(ctx, src.ptr + header_len + sep_len, - src.len - header_len - sep_len); + src.len - header_len - sep_len); ctx->free(src.ptr); return result; } - // Compute distinctpc and distinctpcsa using Flajolet and Martin's algorithm // (Probabilistic Counting Algorithms for Data Base Applications) // We have implemented two variants here: one with stochastic averaging (with PCSA @@ -954,7 +938,7 @@ StringVal AggregateFunctions::string_concat_finalize(FunctionContext* ctx, // (UpdateMergeEstimateSlot) // 4. compute the estimate using the bitmaps when all the rows are processed // (FinalizeEstimateSlot) -const static int NUM_PC_BITMAPS = 64; // number of bitmaps +const static int NUM_PC_BITMAPS = 64; // number of bitmaps const static int PC_BITMAP_LENGTH = 32; // the length of each bit map const static float PC_THETA = 0.77351f; // the magic number to compute the final result @@ -984,8 +968,8 @@ void AggregateFunctions::pc_init(FunctionContext* c, StringVal* dst) { memset(dst->ptr, 0, str_len); } -static inline void set_distinct_estimate_bit(uint8_t* bitmap, - uint32_t row_index, uint32_t bit_index) { +static inline void set_distinct_estimate_bit(uint8_t* bitmap, uint32_t row_index, + uint32_t bit_index) { // We need to convert Bitmap[alpha,index] into the index of the string. // alpha tells which of the 32bit we've to jump to. // index then lead us to the byte and bit. @@ -993,13 +977,13 @@ static inline void set_distinct_estimate_bit(uint8_t* bitmap, int_bitmap[row_index] |= (1 << bit_index); } -static inline bool get_distinct_estimate_bit(uint8_t* bitmap, - uint32_t row_index, uint32_t bit_index) { +static inline bool get_distinct_estimate_bit(uint8_t* bitmap, uint32_t row_index, + uint32_t bit_index) { uint32_t* int_bitmap = reinterpret_cast(bitmap); return ((int_bitmap[row_index] & (1 << bit_index)) > 0); } -template +template void AggregateFunctions::pc_update(FunctionContext* c, const T& input, StringVal* dst) { if (input.is_null) { return; @@ -1022,7 +1006,7 @@ void AggregateFunctions::pc_update(FunctionContext* c, const T& input, StringVal } } -template +template void AggregateFunctions::pcsa_update(FunctionContext* c, const T& input, StringVal* dst) { if (input.is_null) { return; @@ -1063,8 +1047,7 @@ std::string distinct_estimate_bitmap_to_string(uint8_t* v) { return debugstr.str(); } -void AggregateFunctions::pc_merge(FunctionContext* c, - const StringVal& src, StringVal* dst) { +void AggregateFunctions::pc_merge(FunctionContext* c, const StringVal& src, StringVal* dst) { DCHECK(!src.is_null); DCHECK(!dst->is_null); DCHECK_EQ(src.len, NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8); @@ -1085,8 +1068,7 @@ void AggregateFunctions::pc_merge(FunctionContext* c, double distinct_estimate_finalize(const StringVal& src) { DCHECK(!src.is_null); DCHECK_EQ(src.len, NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8); - VLOG_ROW << "FinalizeEstimateSlot Bit map:\n" - << distinct_estimate_bitmap_to_string(src.ptr); + VLOG_ROW << "FinalizeEstimateSlot Bit map:\n" << distinct_estimate_bitmap_to_string(src.ptr); // We haven't processed any rows if none of the bits are set. Therefore, we have zero // distinct rows. We're overwriting the result in the same string buffer we've @@ -1118,8 +1100,8 @@ double distinct_estimate_finalize(const StringVal& src) { // Count the number of leading ones for each row in the bitmap // We could have used the build in __builtin_clz to count of number of leading zeros // but we first need to invert the 1 and 0. - while (get_distinct_estimate_bit(src.ptr, i, row_bit_count) - && row_bit_count < PC_BITMAP_LENGTH) { + while (get_distinct_estimate_bit(src.ptr, i, row_bit_count) && + row_bit_count < PC_BITMAP_LENGTH) { ++row_bit_count; } @@ -1183,8 +1165,7 @@ void AggregateFunctions::hll_update(FunctionContext* ctx, const T& src, StringVa } } -void AggregateFunctions::hll_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { +void AggregateFunctions::hll_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); DCHECK_EQ(dst->len, std::pow(2, HLL_COLUMN_PRECISION)); @@ -1210,15 +1191,15 @@ void AggregateFunctions::hll_union_agg_init(FunctionContext* ctx, HllVal* dst) { dst->init(ctx); } -void AggregateFunctions::hll_union_agg_update(FunctionContext* ctx, - const HllVal& src, HllVal* dst) { +void AggregateFunctions::hll_union_agg_update(FunctionContext* ctx, const HllVal& src, + HllVal* dst) { if (src.is_null) { return; } DCHECK(!dst->is_null); dst->agg_parse_and_cal(ctx, src); - return ; + return; } void AggregateFunctions::hll_union_agg_merge(FunctionContext* ctx, const HllVal& src, HllVal* dst) { @@ -1231,13 +1212,13 @@ void AggregateFunctions::hll_union_agg_merge(FunctionContext* ctx, const HllVal& } doris_udf::BigIntVal AggregateFunctions::hll_union_agg_finalize(doris_udf::FunctionContext* ctx, - const HllVal& src) { + const HllVal& src) { double estimate = hll_algorithm(src); BigIntVal result((int64_t)estimate); return result; } -int64_t AggregateFunctions::hll_algorithm(uint8_t *pdata, int data_len) { +int64_t AggregateFunctions::hll_algorithm(uint8_t* pdata, int data_len) { DCHECK_EQ(data_len, HLL_REGISTERS_COUNT); const int num_streams = HLL_REGISTERS_COUNT; @@ -1278,39 +1259,29 @@ int64_t AggregateFunctions::hll_algorithm(uint8_t *pdata, int data_len) { } else if (num_streams == 16384 && estimate < 72000) { // when Linear Count change to HyperLoglog according to HyperLogLog Correction, // there are relatively large fluctuations, we fixed the problem refer to redis. - double bias = 5.9119 * 1.0e-18 * (estimate * estimate * estimate * estimate) - - 1.4253 * 1.0e-12 * (estimate * estimate * estimate) + - 1.2940 * 1.0e-7 * (estimate * estimate) - - 5.2921 * 1.0e-3 * estimate + - 83.3216; + double bias = 5.9119 * 1.0e-18 * (estimate * estimate * estimate * estimate) - + 1.4253 * 1.0e-12 * (estimate * estimate * estimate) + + 1.2940 * 1.0e-7 * (estimate * estimate) - 5.2921 * 1.0e-3 * estimate + + 83.3216; estimate -= estimate * (bias / 100); } return (int64_t)(estimate + 0.5); } -void AggregateFunctions::hll_raw_agg_init( - FunctionContext* ctx, - HllVal* dst) { +void AggregateFunctions::hll_raw_agg_init(FunctionContext* ctx, HllVal* dst) { hll_union_agg_init(ctx, dst); } -void AggregateFunctions::hll_raw_agg_update( - FunctionContext* ctx, - const HllVal& src, - HllVal* dst) { +void AggregateFunctions::hll_raw_agg_update(FunctionContext* ctx, const HllVal& src, HllVal* dst) { hll_union_agg_update(ctx, src, dst); } -void AggregateFunctions::hll_raw_agg_merge( - FunctionContext* ctx, - const HllVal& src, - HllVal* dst) { +void AggregateFunctions::hll_raw_agg_merge(FunctionContext* ctx, const HllVal& src, HllVal* dst) { hll_union_agg_merge(ctx, src, dst); } -doris_udf::HllVal AggregateFunctions::hll_raw_agg_finalize( - doris_udf::FunctionContext* ctx, - const HllVal& src) { +doris_udf::HllVal AggregateFunctions::hll_raw_agg_finalize(doris_udf::FunctionContext* ctx, + const HllVal& src) { DCHECK(!src.is_null); DCHECK_EQ(src.len, HLL_COLUMN_DEFAULT_LEN); @@ -1326,7 +1297,6 @@ doris_udf::HllVal AggregateFunctions::hll_raw_agg_finalize( template class MultiDistinctNumericState { public: - static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctNumericState); @@ -1352,13 +1322,9 @@ class MultiDistinctNumericState { dst->ptr = (uint8_t*)state; } - static void destroy(const StringVal& dst) { - delete (MultiDistinctNumericState*)dst.ptr; - } + static void destroy(const StringVal& dst) { delete (MultiDistinctNumericState*)dst.ptr; } - void update(T& t) { - _set.insert(t); - } + void update(T& t) { _set.insert(t); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { @@ -1400,9 +1366,7 @@ class MultiDistinctNumericState { } // count - BigIntVal count_finalize() { - return BigIntVal(_set.size()); - } + BigIntVal count_finalize() { return BigIntVal(_set.size()); } // sum for double, decimal DoubleVal sum_finalize_double() { @@ -1431,12 +1395,9 @@ class MultiDistinctNumericState { return BigIntVal(sum); } - FunctionContext::Type set_type() { - return _type; - } + FunctionContext::Type set_type() { return _type; } private: - class NumericHashHelper { public: size_t operator()(const T& obj) const { @@ -1457,7 +1418,6 @@ class MultiDistinctNumericState { // serialize order type:len:value:len:value ... class MultiDistinctStringCountState { public: - static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctStringCountState); @@ -1468,21 +1428,16 @@ class MultiDistinctStringCountState { dst->ptr = (uint8_t*)state; } - static void destroy(const StringVal& dst) { - delete (MultiDistinctStringCountState*)dst.ptr; - } + static void destroy(const StringVal& dst) { delete (MultiDistinctStringCountState*)dst.ptr; } - inline void update(StringValue* sv) { - _set.insert(sv); - } + inline void update(StringValue* sv) { _set.insert(sv); } StringVal serialize(FunctionContext* ctx) { // calculate total serialize buffer length int total_serialized_set_length = 1; HybridSetBase::IteratorBase* iterator = _set.begin(); while (iterator->has_next()) { - const StringValue* value = - reinterpret_cast(iterator->get_value()); + const StringValue* value = reinterpret_cast(iterator->get_value()); total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + value->len; iterator->next(); } @@ -1490,11 +1445,10 @@ class MultiDistinctStringCountState { uint8_t* writer = result.ptr; // type *writer = _type; - writer ++; + writer++; iterator = _set.begin(); while (iterator->has_next()) { - const StringValue* value = reinterpret_cast - (iterator->get_value()); + const StringValue* value = reinterpret_cast(iterator->get_value()); // length, it is unnecessary to consider little or big endian for // all running in little-endian. *(int*)writer = value->len; @@ -1512,7 +1466,7 @@ class MultiDistinctStringCountState { // skip type ,no used now _type = (FunctionContext::Type)*reader; DCHECK(_type == FunctionContext::TYPE_STRING); - reader ++; + reader++; const uint8_t* end = src.ptr + src.len; while (reader < end) { const int length = *(int*)reader; @@ -1525,21 +1479,15 @@ class MultiDistinctStringCountState { } // merge set - void merge(MultiDistinctStringCountState& state) { - _set.insert(&(state._set)); - } + void merge(MultiDistinctStringCountState& state) { _set.insert(&(state._set)); } - BigIntVal finalize() { - return BigIntVal(_set.size()); - } + BigIntVal finalize() { return BigIntVal(_set.size()); } - FunctionContext::Type set_type() { - return _type; - } + FunctionContext::Type set_type() { return _type; } static const int STRING_LENGTH_RECORD_LENGTH = 4; -private: +private: StringValueSet _set; // _type is serialized into buffer by one byte FunctionContext::Type _type; @@ -1549,7 +1497,6 @@ class MultiDistinctStringCountState { // serialize order type:int_len:frac_len:sign:int_len ... class MultiDistinctDecimalState { public: - static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctDecimalState); @@ -1559,21 +1506,16 @@ class MultiDistinctDecimalState { dst->ptr = (uint8_t*)state; } - static void destroy(const StringVal& dst) { - delete (MultiDistinctDecimalState*)dst.ptr; - } + static void destroy(const StringVal& dst) { delete (MultiDistinctDecimalState*)dst.ptr; } - void update(DecimalVal& t) { - _set.insert(DecimalValue::from_decimal_val(t)); - } + void update(DecimalVal& t) { _set.insert(DecimalValue::from_decimal_val(t)); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { - const int serialized_set_length = sizeof(uint8_t) - + (DECIMAL_INT_LEN_BYTE_SIZE - + DECIMAL_FRAC_BYTE_SIZE - + DECIMAL_SIGN_BYTE_SIZE - + DECIMAL_BUFFER_BYTE_SIZE) * _set.size(); + const int serialized_set_length = + sizeof(uint8_t) + (DECIMAL_INT_LEN_BYTE_SIZE + DECIMAL_FRAC_BYTE_SIZE + + DECIMAL_SIGN_BYTE_SIZE + DECIMAL_BUFFER_BYTE_SIZE) * + _set.size(); StringVal result(ctx, serialized_set_length); uint8_t* writer = result.ptr; *writer = (uint8_t)_type; @@ -1614,9 +1556,7 @@ class MultiDistinctDecimalState { } } - FunctionContext::Type set_type() { - return _type; - } + FunctionContext::Type set_type() { return _type; } // merge set void merge(MultiDistinctDecimalState& state) { @@ -1624,14 +1564,12 @@ class MultiDistinctDecimalState { } // count - BigIntVal count_finalize() { - return BigIntVal(_set.size()); - } + BigIntVal count_finalize() { return BigIntVal(_set.size()); } DecimalVal sum_finalize() { DecimalValue sum; for (auto& value : _set) { - sum += value; + sum += value; } DecimalVal result; sum.to_decimal_val(&result); @@ -1639,7 +1577,6 @@ class MultiDistinctDecimalState { } private: - const int DECIMAL_INT_LEN_BYTE_SIZE = 1; const int DECIMAL_FRAC_BYTE_SIZE = 1; const int DECIMAL_SIGN_BYTE_SIZE = 1; @@ -1651,7 +1588,6 @@ class MultiDistinctDecimalState { class MultiDistinctDecimalV2State { public: - static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctDecimalV2State); @@ -1661,18 +1597,13 @@ class MultiDistinctDecimalV2State { dst->ptr = (uint8_t*)state; } - static void destroy(const StringVal& dst) { - delete (MultiDistinctDecimalV2State*)dst.ptr; - } + static void destroy(const StringVal& dst) { delete (MultiDistinctDecimalV2State*)dst.ptr; } - void update(DecimalV2Val& t) { - _set.insert(DecimalV2Value::from_decimal_val(t)); - } + void update(DecimalV2Val& t) { _set.insert(DecimalV2Value::from_decimal_val(t)); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { - const int serialized_set_length = sizeof(uint8_t) - + DECIMAL_BYTE_SIZE * _set.size(); + const int serialized_set_length = sizeof(uint8_t) + DECIMAL_BYTE_SIZE * _set.size(); StringVal result(ctx, serialized_set_length); uint8_t* writer = result.ptr; *writer = (uint8_t)_type; @@ -1702,9 +1633,7 @@ class MultiDistinctDecimalV2State { } } - FunctionContext::Type set_type() { - return _type; - } + FunctionContext::Type set_type() { return _type; } // merge set void merge(MultiDistinctDecimalV2State& state) { @@ -1712,14 +1641,12 @@ class MultiDistinctDecimalV2State { } // count - BigIntVal count_finalize() { - return BigIntVal(_set.size()); - } + BigIntVal count_finalize() { return BigIntVal(_set.size()); } DecimalV2Val sum_finalize() { DecimalV2Value sum; for (auto& value : _set) { - sum += value; + sum += value; } DecimalV2Val result; sum.to_decimal_val(&result); @@ -1737,7 +1664,6 @@ class MultiDistinctDecimalV2State { // serialize order type:packed_time:type:packed_time:type ... class MultiDistinctCountDateState { public: - static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctCountDateState); @@ -1747,18 +1673,15 @@ class MultiDistinctCountDateState { dst->ptr = (uint8_t*)state; } - static void destroy(const StringVal& dst) { - delete (MultiDistinctCountDateState*)dst.ptr; - } + static void destroy(const StringVal& dst) { delete (MultiDistinctCountDateState*)dst.ptr; } - void update(DateTimeVal& t) { - _set.insert(t); - } + void update(DateTimeVal& t) { _set.insert(t); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { - const int serialized_set_length = sizeof(uint8_t) + - (DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE) * _set.size(); + const int serialized_set_length = + sizeof(uint8_t) + + (DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE) * _set.size(); StringVal result(ctx, serialized_set_length); uint8_t* writer = result.ptr; // type @@ -1802,16 +1725,11 @@ class MultiDistinctCountDateState { } // count - BigIntVal count_finalize() { - return BigIntVal(_set.size()); - } + BigIntVal count_finalize() { return BigIntVal(_set.size()); } - FunctionContext::Type set_type() { - return _type; - } + FunctionContext::Type set_type() { return _type; } private: - class DateTimeHashHelper { public: size_t operator()(const DateTimeVal& obj) const { @@ -1840,7 +1758,8 @@ void AggregateFunctions::count_or_sum_distinct_decimal_init(FunctionContext* ctx MultiDistinctDecimalState::create(dst); } -void AggregateFunctions::count_or_sum_distinct_decimalv2_init(FunctionContext* ctx, StringVal* dst) { +void AggregateFunctions::count_or_sum_distinct_decimalv2_init(FunctionContext* ctx, + StringVal* dst) { MultiDistinctDecimalV2State::create(dst); } @@ -1850,7 +1769,7 @@ void AggregateFunctions::count_distinct_date_init(FunctionContext* ctx, StringVa template void AggregateFunctions::count_or_sum_distinct_numeric_update(FunctionContext* ctx, T& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctNumericState* state = reinterpret_cast*>(dst->ptr); @@ -1858,10 +1777,11 @@ void AggregateFunctions::count_or_sum_distinct_numeric_update(FunctionContext* c } void AggregateFunctions::count_distinct_string_update(FunctionContext* ctx, StringVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; - MultiDistinctStringCountState* state = reinterpret_cast(dst->ptr); + MultiDistinctStringCountState* state = + reinterpret_cast(dst->ptr); StringValue sv = StringValue::from_string_val(src); state->update(&sv); } @@ -1874,8 +1794,8 @@ void AggregateFunctions::count_or_sum_distinct_decimal_update(FunctionContext* c state->update(src); } -void AggregateFunctions::count_or_sum_distinct_decimalv2_update(FunctionContext* ctx, DecimalV2Val& src, - StringVal* dst) { +void AggregateFunctions::count_or_sum_distinct_decimalv2_update(FunctionContext* ctx, + DecimalV2Val& src, StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctDecimalV2State* state = reinterpret_cast(dst->ptr); @@ -1883,7 +1803,7 @@ void AggregateFunctions::count_or_sum_distinct_decimalv2_update(FunctionContext* } void AggregateFunctions::count_distinct_date_update(FunctionContext* ctx, DateTimeVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctCountDateState* state = reinterpret_cast(dst->ptr); @@ -1892,36 +1812,39 @@ void AggregateFunctions::count_distinct_date_update(FunctionContext* ctx, DateTi template void AggregateFunctions::count_or_sum_distinct_numeric_merge(FunctionContext* ctx, StringVal& src, - StringVal* dst) { - DCHECK(!dst->is_null); - DCHECK(!src.is_null); - MultiDistinctNumericState* dst_state = reinterpret_cast*>(dst->ptr); - // unserialize src - StringVal src_state_val; - MultiDistinctNumericState::create(&src_state_val); - MultiDistinctNumericState* src_state = reinterpret_cast*>(src_state_val.ptr); - src_state->unserialize(src); - DCHECK(dst_state->set_type() == src_state->set_type()); - dst_state->merge(*src_state); - MultiDistinctNumericState::destroy(src_state_val); + StringVal* dst) { + DCHECK(!dst->is_null); + DCHECK(!src.is_null); + MultiDistinctNumericState* dst_state = + reinterpret_cast*>(dst->ptr); + // unserialize src + StringVal src_state_val; + MultiDistinctNumericState::create(&src_state_val); + MultiDistinctNumericState* src_state = + reinterpret_cast*>(src_state_val.ptr); + src_state->unserialize(src); + DCHECK(dst_state->set_type() == src_state->set_type()); + dst_state->merge(*src_state); + MultiDistinctNumericState::destroy(src_state_val); } void AggregateFunctions::count_distinct_string_merge(FunctionContext* ctx, StringVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); - MultiDistinctStringCountState* dst_state = reinterpret_cast(dst->ptr); + MultiDistinctStringCountState* dst_state = + reinterpret_cast(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctStringCountState::create(&src_state_val); - MultiDistinctStringCountState* src_state = reinterpret_cast(src_state_val.ptr); + MultiDistinctStringCountState* src_state = + reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); MultiDistinctStringCountState::destroy(src_state_val); } - void AggregateFunctions::count_or_sum_distinct_decimal_merge(FunctionContext* ctx, StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); @@ -1930,7 +1853,8 @@ void AggregateFunctions::count_or_sum_distinct_decimal_merge(FunctionContext* ct // unserialize src StringVal src_state_val; MultiDistinctDecimalState::create(&src_state_val); - MultiDistinctDecimalState* src_state = reinterpret_cast(src_state_val.ptr); + MultiDistinctDecimalState* src_state = + reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); @@ -1938,14 +1862,16 @@ void AggregateFunctions::count_or_sum_distinct_decimal_merge(FunctionContext* ct } void AggregateFunctions::count_or_sum_distinct_decimalv2_merge(FunctionContext* ctx, StringVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); - MultiDistinctDecimalV2State* dst_state = reinterpret_cast(dst->ptr); + MultiDistinctDecimalV2State* dst_state = + reinterpret_cast(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctDecimalV2State::create(&src_state_val); - MultiDistinctDecimalV2State* src_state = reinterpret_cast(src_state_val.ptr); + MultiDistinctDecimalV2State* src_state = + reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); @@ -1953,14 +1879,16 @@ void AggregateFunctions::count_or_sum_distinct_decimalv2_merge(FunctionContext* } void AggregateFunctions::count_distinct_date_merge(FunctionContext* ctx, StringVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); - MultiDistinctCountDateState* dst_state = reinterpret_cast(dst->ptr); + MultiDistinctCountDateState* dst_state = + reinterpret_cast(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctCountDateState::create(&src_state_val); - MultiDistinctCountDateState* src_state = reinterpret_cast(src_state_val.ptr); + MultiDistinctCountDateState* src_state = + reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); @@ -1968,25 +1896,30 @@ void AggregateFunctions::count_distinct_date_merge(FunctionContext* ctx, StringV } template -StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize(FunctionContext* ctx, const StringVal& state_sv) { +StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); + MultiDistinctNumericState* state = + reinterpret_cast*>(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctNumericState::destroy(state_sv); return result; } -StringVal AggregateFunctions::count_distinct_string_serialize(FunctionContext* ctx, const StringVal& state_sv) { +StringVal AggregateFunctions::count_distinct_string_serialize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctStringCountState* state = reinterpret_cast(state_sv.ptr); + MultiDistinctStringCountState* state = + reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctStringCountState::destroy(state_sv); return result; } -StringVal AggregateFunctions::count_or_sum_distinct_decimal_serialize(FunctionContext* ctx, const StringVal& state_sv) { +StringVal AggregateFunctions::count_or_sum_distinct_decimal_serialize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctDecimalState* state = reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); @@ -1995,18 +1928,22 @@ StringVal AggregateFunctions::count_or_sum_distinct_decimal_serialize(FunctionCo return result; } -StringVal AggregateFunctions::count_or_sum_distinct_decimalv2_serialize(FunctionContext* ctx, const StringVal& state_sv) { +StringVal AggregateFunctions::count_or_sum_distinct_decimalv2_serialize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctDecimalV2State* state = reinterpret_cast(state_sv.ptr); + MultiDistinctDecimalV2State* state = + reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctDecimalV2State::destroy(state_sv); return result; } -StringVal AggregateFunctions::count_distinct_date_serialize(FunctionContext* ctx, const StringVal& state_sv) { +StringVal AggregateFunctions::count_distinct_date_serialize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctCountDateState* state = reinterpret_cast(state_sv.ptr); + MultiDistinctCountDateState* state = + reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctCountDateState::destroy(state_sv); @@ -2014,50 +1951,61 @@ StringVal AggregateFunctions::count_distinct_date_serialize(FunctionContext* ctx } template -BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize(FunctionContext* ctx, const StringVal& state_sv) { +BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); + MultiDistinctNumericState* state = + reinterpret_cast*>(state_sv.ptr); BigIntVal result = state->count_finalize(); MultiDistinctNumericState::destroy(state_sv); return result; } -BigIntVal AggregateFunctions::count_distinct_string_finalize(FunctionContext* ctx, const StringVal& state_sv) { +BigIntVal AggregateFunctions::count_distinct_string_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctStringCountState* state = reinterpret_cast(state_sv.ptr); + MultiDistinctStringCountState* state = + reinterpret_cast(state_sv.ptr); BigIntVal result = state->finalize(); MultiDistinctStringCountState::destroy(state_sv); return result; } template -DoubleVal AggregateFunctions::sum_distinct_double_finalize(FunctionContext* ctx, const StringVal& state_sv) { +DoubleVal AggregateFunctions::sum_distinct_double_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); + MultiDistinctNumericState* state = + reinterpret_cast*>(state_sv.ptr); DoubleVal result = state->sum_finalize_double(); MultiDistinctNumericState::destroy(state_sv); return result; } template -LargeIntVal AggregateFunctions::sum_distinct_largeint_finalize(FunctionContext* ctx, const StringVal& state_sv) { +LargeIntVal AggregateFunctions::sum_distinct_largeint_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); + MultiDistinctNumericState* state = + reinterpret_cast*>(state_sv.ptr); LargeIntVal result = state->sum_finalize_largeint(); MultiDistinctNumericState::destroy(state_sv); return result; } template -BigIntVal AggregateFunctions::sum_distinct_bigint_finalize(FunctionContext* ctx, const StringVal& state_sv) { +BigIntVal AggregateFunctions::sum_distinct_bigint_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); + MultiDistinctNumericState* state = + reinterpret_cast*>(state_sv.ptr); BigIntVal result = state->sum_finalize_bigint(); MultiDistinctNumericState::destroy(state_sv); return result; } -BigIntVal AggregateFunctions::count_distinct_decimal_finalize(FunctionContext* ctx, const StringVal& state_sv) { +BigIntVal AggregateFunctions::count_distinct_decimal_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctDecimalState* state = reinterpret_cast(state_sv.ptr); BigIntVal result = state->count_finalize(); @@ -2065,15 +2013,18 @@ BigIntVal AggregateFunctions::count_distinct_decimal_finalize(FunctionContext* c return result; } -BigIntVal AggregateFunctions::count_distinct_decimalv2_finalize(FunctionContext* ctx, const StringVal& state_sv) { +BigIntVal AggregateFunctions::count_distinct_decimalv2_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctDecimalV2State* state = reinterpret_cast(state_sv.ptr); + MultiDistinctDecimalV2State* state = + reinterpret_cast(state_sv.ptr); BigIntVal result = state->count_finalize(); MultiDistinctDecimalV2State::destroy(state_sv); return result; } -DecimalVal AggregateFunctions::sum_distinct_decimal_finalize(FunctionContext* ctx, const StringVal& state_sv) { +DecimalVal AggregateFunctions::sum_distinct_decimal_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctDecimalState* state = reinterpret_cast(state_sv.ptr); DecimalVal result = state->sum_finalize(); @@ -2081,17 +2032,21 @@ DecimalVal AggregateFunctions::sum_distinct_decimal_finalize(FunctionContext* ct return result; } -DecimalV2Val AggregateFunctions::sum_distinct_decimalv2_finalize(FunctionContext* ctx, const StringVal& state_sv) { +DecimalV2Val AggregateFunctions::sum_distinct_decimalv2_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctDecimalV2State* state = reinterpret_cast(state_sv.ptr); + MultiDistinctDecimalV2State* state = + reinterpret_cast(state_sv.ptr); DecimalV2Val result = state->sum_finalize(); MultiDistinctDecimalV2State::destroy(state_sv); return result; } -BigIntVal AggregateFunctions::count_distinct_date_finalize(FunctionContext* ctx, const StringVal& state_sv) { +BigIntVal AggregateFunctions::count_distinct_date_finalize(FunctionContext* ctx, + const StringVal& state_sv) { DCHECK(!state_sv.is_null); - MultiDistinctCountDateState* state = reinterpret_cast(state_sv.ptr); + MultiDistinctCountDateState* state = + reinterpret_cast(state_sv.ptr); BigIntVal result = state->count_finalize(); MultiDistinctCountDateState::destroy(state_sv); return result; @@ -2125,8 +2080,7 @@ void AggregateFunctions::knuth_var_init(FunctionContext* ctx, StringVal* dst) { } template -void AggregateFunctions::knuth_var_update(FunctionContext* ctx, const T& src, - StringVal* dst) { +void AggregateFunctions::knuth_var_update(FunctionContext* ctx, const T& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK_EQ(dst->len, sizeof(KnuthVarianceState)); if (src.is_null) return; @@ -2140,7 +2094,7 @@ void AggregateFunctions::knuth_var_update(FunctionContext* ctx, const T& src, } void AggregateFunctions::knuth_var_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(!dst->is_null); DCHECK_EQ(dst->len, sizeof(KnuthVarianceState)); DCHECK(!src.is_null); @@ -2154,7 +2108,7 @@ void AggregateFunctions::knuth_var_merge(FunctionContext* ctx, const StringVal& double sum_count = dst_state->count + src_state->count; dst_state->mean = src_state->mean + delta * (dst_state->count / sum_count); dst_state->m2 = (src_state->m2) + dst_state->m2 + - (delta * delta) * (src_state->count * dst_state->count / sum_count); + (delta * delta) * (src_state->count * dst_state->count / sum_count); dst_state->count = sum_count; } @@ -2168,7 +2122,7 @@ DoubleVal AggregateFunctions::knuth_var_finalize(FunctionContext* ctx, const Str } DoubleVal AggregateFunctions::knuth_var_pop_finalize(FunctionContext* ctx, - const StringVal& state_sv) { + const StringVal& state_sv) { DCHECK(!state_sv.is_null); DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState)); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); @@ -2179,7 +2133,7 @@ DoubleVal AggregateFunctions::knuth_var_pop_finalize(FunctionContext* ctx, } DoubleVal AggregateFunctions::knuth_stddev_finalize(FunctionContext* ctx, - const StringVal& state_sv) { + const StringVal& state_sv) { DCHECK(!state_sv.is_null); DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState)); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); @@ -2190,7 +2144,7 @@ DoubleVal AggregateFunctions::knuth_stddev_finalize(FunctionContext* ctx, } DoubleVal AggregateFunctions::knuth_stddev_pop_finalize(FunctionContext* ctx, - const StringVal& state_sv) { + const StringVal& state_sv) { DCHECK(!state_sv.is_null); DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState)); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); @@ -2203,7 +2157,7 @@ DoubleVal AggregateFunctions::knuth_stddev_pop_finalize(FunctionContext* ctx, struct RankState { int64_t rank; int64_t count; - RankState() : rank(1), count(0) { } + RankState() : rank(1), count(0) {} }; void AggregateFunctions::rank_init(FunctionContext* ctx, StringVal* dst) { @@ -2221,10 +2175,9 @@ void AggregateFunctions::rank_update(FunctionContext* ctx, StringVal* dst) { ++state->count; } -void AggregateFunctions::dense_rank_update(FunctionContext* ctx, StringVal* dst) { } +void AggregateFunctions::dense_rank_update(FunctionContext* ctx, StringVal* dst) {} -BigIntVal AggregateFunctions::rank_get_value(FunctionContext* ctx, - StringVal& src_val) { +BigIntVal AggregateFunctions::rank_get_value(FunctionContext* ctx, StringVal& src_val) { DCHECK(!src_val.is_null); DCHECK_EQ(src_val.len, sizeof(RankState)); RankState* state = reinterpret_cast(src_val.ptr); @@ -2238,8 +2191,7 @@ BigIntVal AggregateFunctions::rank_get_value(FunctionContext* ctx, return BigIntVal(result); } -BigIntVal AggregateFunctions::dense_rank_get_value(FunctionContext* ctx, - StringVal& src_val) { +BigIntVal AggregateFunctions::dense_rank_get_value(FunctionContext* ctx, StringVal& src_val) { DCHECK(!src_val.is_null); DCHECK_EQ(src_val.len, sizeof(RankState)); RankState* state = reinterpret_cast(src_val.ptr); @@ -2252,8 +2204,7 @@ BigIntVal AggregateFunctions::dense_rank_get_value(FunctionContext* ctx, return BigIntVal(result); } -BigIntVal AggregateFunctions::rank_finalize(FunctionContext* ctx, - StringVal& src_val) { +BigIntVal AggregateFunctions::rank_finalize(FunctionContext* ctx, StringVal& src_val) { DCHECK(!src_val.is_null); DCHECK_EQ(src_val.len, sizeof(RankState)); RankState* state = reinterpret_cast(src_val.ptr); @@ -2269,7 +2220,7 @@ void AggregateFunctions::last_val_update(FunctionContext* ctx, const T& src, T* template <> void AggregateFunctions::last_val_update(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { if (src.is_null) { if (!dst->is_null) { ctx->free(dst->ptr); @@ -2297,7 +2248,7 @@ void AggregateFunctions::last_val_remove(FunctionContext* ctx, const T& src, T* template <> void AggregateFunctions::last_val_remove(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { if (!dst->is_null) { ctx->free(dst->ptr); @@ -2334,7 +2285,7 @@ void AggregateFunctions::first_val_update(FunctionContext* ctx, const IntVal& sr template <> void AggregateFunctions::first_val_update(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { if (ctx->impl()->num_updates() > 1) { return; } @@ -2350,7 +2301,7 @@ void AggregateFunctions::first_val_update(FunctionContext* ctx, const StringVal& template void AggregateFunctions::first_val_rewrite_update(FunctionContext* ctx, const T& src, - const BigIntVal&, T* dst) { + const BigIntVal&, T* dst) { last_val_update(ctx, src, dst); } @@ -2402,444 +2353,474 @@ void AggregateFunctions::offset_fn_init(FunctionContext* ctx, IntVal* dst) { } */ template -void AggregateFunctions::offset_fn_update(FunctionContext* ctx, const T& src, - const BigIntVal&, const T& default_value, T* dst) { +void AggregateFunctions::offset_fn_update(FunctionContext* ctx, const T& src, const BigIntVal&, + const T& default_value, T* dst) { *dst = src; } template <> -void AggregateFunctions::offset_fn_update(FunctionContext* ctx, const IntVal& src, - const BigIntVal&, const IntVal& default_value, IntVal* dst) { +void AggregateFunctions::offset_fn_update(FunctionContext* ctx, const IntVal& src, const BigIntVal&, + const IntVal& default_value, IntVal* dst) { *dst = src; } // Stamp out the templates for the types we need. template void AggregateFunctions::init_zero(FunctionContext*, BigIntVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const BooleanVal& src, BigIntVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const TinyIntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const SmallIntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const IntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const FloatVal& src, DoubleVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const DecimalVal& src, DecimalVal* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const DecimalV2Val& src, DecimalV2Val* dst); -template void AggregateFunctions::sum_remove( - FunctionContext*, const LargeIntVal& src, LargeIntVal* dst); - -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::BooleanVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::IntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::IntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::BigIntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::BigIntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::FloatVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::FloatVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::DoubleVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::DoubleVal const&, doris_udf::StringVal*); +template void AggregateFunctions::sum_remove(FunctionContext*, + const BooleanVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const TinyIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const SmallIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, const IntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const BigIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const FloatVal& src, + DoubleVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const DoubleVal& src, + DoubleVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const DecimalVal& src, + DecimalVal* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const DecimalV2Val& src, + DecimalV2Val* dst); +template void AggregateFunctions::sum_remove(FunctionContext*, + const LargeIntVal& src, + LargeIntVal* dst); + +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::BooleanVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::IntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::IntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::BigIntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::BigIntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::FloatVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::FloatVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::DoubleVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::DoubleVal const&, + doris_udf::StringVal*); //template void AggregateFunctions::AvgUpdate( //doris_udf::FunctionContext*, doris_udf::LargeIntVal const&, doris_udf::StringVal*); //template void AggregateFunctions::AvgRemove( //doris_udf::FunctionContext*, doris_udf::LargeIntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::sum( - FunctionContext*, const BooleanVal& src, BigIntVal* dst); -template void AggregateFunctions::sum( - FunctionContext*, const TinyIntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum( - FunctionContext*, const SmallIntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum( - FunctionContext*, const IntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::sum( - FunctionContext*, const FloatVal& src, DoubleVal* dst); -template void AggregateFunctions::sum( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); - -template void AggregateFunctions::min( - FunctionContext*, const BooleanVal& src, BooleanVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const IntVal& src, IntVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const FloatVal& src, FloatVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); -template void AggregateFunctions::min( - FunctionContext*, const StringVal& src, StringVal* dst); - -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::BooleanVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::TinyIntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::TinyIntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_update( - doris_udf::FunctionContext*, doris_udf::SmallIntVal const&, doris_udf::StringVal*); -template void AggregateFunctions::avg_remove( - doris_udf::FunctionContext*, doris_udf::SmallIntVal const&, doris_udf::StringVal*); - -template void AggregateFunctions::max( - FunctionContext*, const BooleanVal& src, BooleanVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const IntVal& src, IntVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const FloatVal& src, FloatVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); -template void AggregateFunctions::max( - FunctionContext*, const StringVal& src, StringVal* dst); - -template void AggregateFunctions::pc_update( - FunctionContext*, const BooleanVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const TinyIntVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const SmallIntVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const IntVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const BigIntVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const FloatVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const DoubleVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const StringVal&, StringVal*); -template void AggregateFunctions::pc_update( - FunctionContext*, const DateTimeVal&, StringVal*); - -template void AggregateFunctions::pcsa_update( - FunctionContext*, const BooleanVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const TinyIntVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const SmallIntVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const IntVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const BigIntVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const FloatVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const DoubleVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const StringVal&, StringVal*); -template void AggregateFunctions::pcsa_update( - FunctionContext*, const DateTimeVal&, StringVal*); - -template void AggregateFunctions::hll_update( - FunctionContext*, const BooleanVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const TinyIntVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const SmallIntVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const IntVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const BigIntVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const FloatVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const DoubleVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const StringVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const DateTimeVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const LargeIntVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const DecimalVal&, StringVal*); -template void AggregateFunctions::hll_update( - FunctionContext*, const DecimalV2Val&, StringVal*); +template void AggregateFunctions::sum(FunctionContext*, + const BooleanVal& src, BigIntVal* dst); +template void AggregateFunctions::sum(FunctionContext*, + const TinyIntVal& src, BigIntVal* dst); +template void AggregateFunctions::sum(FunctionContext*, + const SmallIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum(FunctionContext*, const IntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum(FunctionContext*, const BigIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::sum(FunctionContext*, const FloatVal& src, + DoubleVal* dst); +template void AggregateFunctions::sum(FunctionContext*, const DoubleVal& src, + DoubleVal* dst); + +template void AggregateFunctions::min(FunctionContext*, const BooleanVal& src, + BooleanVal* dst); +template void AggregateFunctions::min(FunctionContext*, const TinyIntVal& src, + TinyIntVal* dst); +template void AggregateFunctions::min(FunctionContext*, const SmallIntVal& src, + SmallIntVal* dst); +template void AggregateFunctions::min(FunctionContext*, const IntVal& src, IntVal* dst); +template void AggregateFunctions::min(FunctionContext*, const BigIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::min(FunctionContext*, const FloatVal& src, + FloatVal* dst); +template void AggregateFunctions::min(FunctionContext*, const DoubleVal& src, + DoubleVal* dst); +template void AggregateFunctions::min(FunctionContext*, const StringVal& src, + StringVal* dst); + +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::BooleanVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::TinyIntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::TinyIntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_update(doris_udf::FunctionContext*, + doris_udf::SmallIntVal const&, + doris_udf::StringVal*); +template void AggregateFunctions::avg_remove(doris_udf::FunctionContext*, + doris_udf::SmallIntVal const&, + doris_udf::StringVal*); + +template void AggregateFunctions::max(FunctionContext*, const BooleanVal& src, + BooleanVal* dst); +template void AggregateFunctions::max(FunctionContext*, const TinyIntVal& src, + TinyIntVal* dst); +template void AggregateFunctions::max(FunctionContext*, const SmallIntVal& src, + SmallIntVal* dst); +template void AggregateFunctions::max(FunctionContext*, const IntVal& src, IntVal* dst); +template void AggregateFunctions::max(FunctionContext*, const BigIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::max(FunctionContext*, const FloatVal& src, + FloatVal* dst); +template void AggregateFunctions::max(FunctionContext*, const DoubleVal& src, + DoubleVal* dst); +template void AggregateFunctions::max(FunctionContext*, const StringVal& src, + StringVal* dst); + +template void AggregateFunctions::pc_update(FunctionContext*, const BooleanVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const TinyIntVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const SmallIntVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const IntVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const BigIntVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const FloatVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const DoubleVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const StringVal&, StringVal*); +template void AggregateFunctions::pc_update(FunctionContext*, const DateTimeVal&, StringVal*); + +template void AggregateFunctions::pcsa_update(FunctionContext*, const BooleanVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const TinyIntVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const SmallIntVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const IntVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const BigIntVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const FloatVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const DoubleVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const StringVal&, StringVal*); +template void AggregateFunctions::pcsa_update(FunctionContext*, const DateTimeVal&, StringVal*); + +template void AggregateFunctions::hll_update(FunctionContext*, const BooleanVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const TinyIntVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const SmallIntVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const IntVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const BigIntVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const FloatVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const DoubleVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const StringVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const DateTimeVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const LargeIntVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const DecimalVal&, StringVal*); +template void AggregateFunctions::hll_update(FunctionContext*, const DecimalV2Val&, StringVal*); template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); + FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); -template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); + FunctionContext* ctx, StringVal* dst); +template void AggregateFunctions::count_or_sum_distinct_numeric_init(FunctionContext* ctx, + StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); -template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); + FunctionContext* ctx, StringVal* dst); +template void AggregateFunctions::count_or_sum_distinct_numeric_init(FunctionContext* ctx, + StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); + FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( - FunctionContext* ctx, StringVal* dst); - + FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, TinyIntVal& src, StringVal* dst); + FunctionContext* ctx, TinyIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, SmallIntVal& src, StringVal* dst); -template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, IntVal& src, StringVal* dst); + FunctionContext* ctx, SmallIntVal& src, StringVal* dst); +template void AggregateFunctions::count_or_sum_distinct_numeric_update(FunctionContext* ctx, + IntVal& src, + StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, BigIntVal& src, StringVal* dst); + FunctionContext* ctx, BigIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, FloatVal& src, StringVal* dst); + FunctionContext* ctx, FloatVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, DoubleVal& src, StringVal* dst); + FunctionContext* ctx, DoubleVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( - FunctionContext* ctx, LargeIntVal& src, StringVal* dst); + FunctionContext* ctx, LargeIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); + FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); -template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); + FunctionContext* ctx, StringVal& src, StringVal* dst); +template void AggregateFunctions::count_or_sum_distinct_numeric_merge(FunctionContext* ctx, + StringVal& src, + StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); + FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); + FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); + FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( - FunctionContext* ctx, StringVal& src, StringVal* dst); + FunctionContext* ctx, StringVal& src, StringVal* dst); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template DoubleVal AggregateFunctions::sum_distinct_double_finalize( - FunctionContext* ctx, const StringVal& state_sv); + FunctionContext* ctx, const StringVal& state_sv); template LargeIntVal AggregateFunctions::sum_distinct_largeint_finalize( - FunctionContext* ctx, const StringVal& state_sv); - -template void AggregateFunctions::knuth_var_update( - FunctionContext*, const TinyIntVal&, StringVal*); -template void AggregateFunctions::knuth_var_update( - FunctionContext*, const SmallIntVal&, StringVal*); -template void AggregateFunctions::knuth_var_update( - FunctionContext*, const IntVal&, StringVal*); -template void AggregateFunctions::knuth_var_update( - FunctionContext*, const BigIntVal&, StringVal*); -template void AggregateFunctions::knuth_var_update( - FunctionContext*, const FloatVal&, StringVal*); -template void AggregateFunctions::knuth_var_update( - FunctionContext*, const DoubleVal&, StringVal*); - -template void AggregateFunctions::first_val_update( - FunctionContext*, const BooleanVal& src, BooleanVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const IntVal& src, IntVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const FloatVal& src, FloatVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const StringVal& src, StringVal* dst); -template void AggregateFunctions::first_val_update( - FunctionContext*, const DateTimeVal& src, DateTimeVal* dst); - -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const BooleanVal& src, const BigIntVal&, BooleanVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const TinyIntVal& src, const BigIntVal&, TinyIntVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const SmallIntVal& src, const BigIntVal&, SmallIntVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const IntVal& src, const BigIntVal&, IntVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const BigIntVal& src, const BigIntVal&, BigIntVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const FloatVal& src, const BigIntVal&, FloatVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const DoubleVal& src, const BigIntVal&, DoubleVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const StringVal& src, const BigIntVal&, StringVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const DateTimeVal& src, const BigIntVal&, DateTimeVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const DecimalVal& src, const BigIntVal&, DecimalVal* dst); -template void AggregateFunctions::first_val_rewrite_update( - FunctionContext*, const DecimalV2Val& src, const BigIntVal&, DecimalV2Val* dst); + FunctionContext* ctx, const StringVal& state_sv); + +template void AggregateFunctions::knuth_var_update(FunctionContext*, const TinyIntVal&, StringVal*); +template void AggregateFunctions::knuth_var_update(FunctionContext*, const SmallIntVal&, + StringVal*); +template void AggregateFunctions::knuth_var_update(FunctionContext*, const IntVal&, StringVal*); +template void AggregateFunctions::knuth_var_update(FunctionContext*, const BigIntVal&, StringVal*); +template void AggregateFunctions::knuth_var_update(FunctionContext*, const FloatVal&, StringVal*); +template void AggregateFunctions::knuth_var_update(FunctionContext*, const DoubleVal&, StringVal*); + +template void AggregateFunctions::first_val_update(FunctionContext*, + const BooleanVal& src, + BooleanVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, + const TinyIntVal& src, + TinyIntVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, + const SmallIntVal& src, + SmallIntVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, const IntVal& src, + IntVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, + const BigIntVal& src, BigIntVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, const FloatVal& src, + FloatVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, + const DoubleVal& src, DoubleVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, + const StringVal& src, StringVal* dst); +template void AggregateFunctions::first_val_update(FunctionContext*, + const DateTimeVal& src, + DateTimeVal* dst); + +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const BooleanVal& src, + const BigIntVal&, + BooleanVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const TinyIntVal& src, + const BigIntVal&, + TinyIntVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const SmallIntVal& src, + const BigIntVal&, + SmallIntVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const IntVal& src, + const BigIntVal&, IntVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const BigIntVal& src, + const BigIntVal&, + BigIntVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const FloatVal& src, + const BigIntVal&, + FloatVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const DoubleVal& src, + const BigIntVal&, + DoubleVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const StringVal& src, + const BigIntVal&, + StringVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const DateTimeVal& src, + const BigIntVal&, + DateTimeVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const DecimalVal& src, + const BigIntVal&, + DecimalVal* dst); +template void AggregateFunctions::first_val_rewrite_update(FunctionContext*, + const DecimalV2Val& src, + const BigIntVal&, + DecimalV2Val* dst); //template void AggregateFunctions::FirstValUpdate( // doris_udf::FunctionContext*, impala::StringValue const&, impala::StringValue*); template void AggregateFunctions::first_val_update( - doris_udf::FunctionContext*, doris_udf::DecimalVal const&, doris_udf::DecimalVal*); + doris_udf::FunctionContext*, doris_udf::DecimalVal const&, doris_udf::DecimalVal*); template void AggregateFunctions::first_val_update( - doris_udf::FunctionContext*, doris_udf::DecimalV2Val const&, doris_udf::DecimalV2Val*); - -template void AggregateFunctions::last_val_update( - FunctionContext*, const BooleanVal& src, BooleanVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const IntVal& src, IntVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const FloatVal& src, FloatVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const StringVal& src, StringVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const DateTimeVal& src, DateTimeVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const DecimalVal& src, DecimalVal* dst); -template void AggregateFunctions::last_val_update( - FunctionContext*, const DecimalV2Val& src, DecimalV2Val* dst); - -template void AggregateFunctions::last_val_remove( - FunctionContext*, const BooleanVal& src, BooleanVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const IntVal& src, IntVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const BigIntVal& src, BigIntVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const FloatVal& src, FloatVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const DoubleVal& src, DoubleVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const StringVal& src, StringVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const DateTimeVal& src, DateTimeVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const DecimalVal& src, DecimalVal* dst); -template void AggregateFunctions::last_val_remove( - FunctionContext*, const DecimalV2Val& src, DecimalV2Val* dst); - -template void AggregateFunctions::offset_fn_init( - FunctionContext*, BooleanVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, TinyIntVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, SmallIntVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, IntVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, BigIntVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, FloatVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, DoubleVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, DateTimeVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, DecimalVal*); -template void AggregateFunctions::offset_fn_init( - FunctionContext*, DecimalV2Val*); - -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const BooleanVal& src, const BigIntVal&, const BooleanVal&, - BooleanVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const TinyIntVal& src, const BigIntVal&, const TinyIntVal&, - TinyIntVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const SmallIntVal& src, const BigIntVal&, const SmallIntVal&, - SmallIntVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const IntVal& src, const BigIntVal&, const IntVal&, IntVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const BigIntVal& src, const BigIntVal&, const BigIntVal&, - BigIntVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const FloatVal& src, const BigIntVal&, const FloatVal&, - FloatVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const DoubleVal& src, const BigIntVal&, const DoubleVal&, - DoubleVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const StringVal& src, const BigIntVal&, const StringVal&, - StringVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const DateTimeVal& src, const BigIntVal&, const DateTimeVal&, - DateTimeVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const DecimalVal& src, const BigIntVal&, const DecimalVal&, - DecimalVal* dst); -template void AggregateFunctions::offset_fn_update( - FunctionContext*, const DecimalV2Val& src, const BigIntVal&, const DecimalV2Val&, - DecimalV2Val* dst); + doris_udf::FunctionContext*, doris_udf::DecimalV2Val const&, doris_udf::DecimalV2Val*); + +template void AggregateFunctions::last_val_update(FunctionContext*, + const BooleanVal& src, + BooleanVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, + const TinyIntVal& src, + TinyIntVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, + const SmallIntVal& src, + SmallIntVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, const IntVal& src, + IntVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, const BigIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, const FloatVal& src, + FloatVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, const DoubleVal& src, + DoubleVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, const StringVal& src, + StringVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, + const DateTimeVal& src, + DateTimeVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, + const DecimalVal& src, + DecimalVal* dst); +template void AggregateFunctions::last_val_update(FunctionContext*, + const DecimalV2Val& src, + DecimalV2Val* dst); + +template void AggregateFunctions::last_val_remove(FunctionContext*, + const BooleanVal& src, + BooleanVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, + const TinyIntVal& src, + TinyIntVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, + const SmallIntVal& src, + SmallIntVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, const IntVal& src, + IntVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, const BigIntVal& src, + BigIntVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, const FloatVal& src, + FloatVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, const DoubleVal& src, + DoubleVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, const StringVal& src, + StringVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, + const DateTimeVal& src, + DateTimeVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, + const DecimalVal& src, + DecimalVal* dst); +template void AggregateFunctions::last_val_remove(FunctionContext*, + const DecimalV2Val& src, + DecimalV2Val* dst); + +template void AggregateFunctions::offset_fn_init(FunctionContext*, BooleanVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, TinyIntVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, SmallIntVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, IntVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, BigIntVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, FloatVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, DoubleVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, DateTimeVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, DecimalVal*); +template void AggregateFunctions::offset_fn_init(FunctionContext*, DecimalV2Val*); + +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const BooleanVal& src, + const BigIntVal&, const BooleanVal&, + BooleanVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const TinyIntVal& src, + const BigIntVal&, const TinyIntVal&, + TinyIntVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const SmallIntVal& src, + const BigIntVal&, + const SmallIntVal&, + SmallIntVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, const IntVal& src, + const BigIntVal&, const IntVal&, + IntVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const BigIntVal& src, + const BigIntVal&, const BigIntVal&, + BigIntVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, const FloatVal& src, + const BigIntVal&, const FloatVal&, + FloatVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const DoubleVal& src, + const BigIntVal&, const DoubleVal&, + DoubleVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const StringVal& src, + const BigIntVal&, const StringVal&, + StringVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const DateTimeVal& src, + const BigIntVal&, + const DateTimeVal&, + DateTimeVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const DecimalVal& src, + const BigIntVal&, const DecimalVal&, + DecimalVal* dst); +template void AggregateFunctions::offset_fn_update(FunctionContext*, + const DecimalV2Val& src, + const BigIntVal&, + const DecimalV2Val&, + DecimalV2Val* dst); template void AggregateFunctions::percentile_approx_update( - FunctionContext* ctx, const doris_udf::DoubleVal&, const doris_udf::DoubleVal&, doris_udf::StringVal*); + FunctionContext* ctx, const doris_udf::DoubleVal&, const doris_udf::DoubleVal&, + doris_udf::StringVal*); template void AggregateFunctions::percentile_approx_update( - FunctionContext* ctx, const doris_udf::DoubleVal&, const doris_udf::DoubleVal&, const doris_udf::DoubleVal&, doris_udf::StringVal*); -} + FunctionContext* ctx, const doris_udf::DoubleVal&, const doris_udf::DoubleVal&, + const doris_udf::DoubleVal&, doris_udf::StringVal*); +} // namespace doris diff --git a/be/src/exprs/aggregate_functions.h b/be/src/exprs/aggregate_functions.h index 3264beff59073a..eb3e865f032f9f 100644 --- a/be/src/exprs/aggregate_functions.h +++ b/be/src/exprs/aggregate_functions.h @@ -19,9 +19,9 @@ #define DORIS_BE_SRC_QUERY_EXPRS_AGGREGATE_FUNCTIONS_H //#include "exprs/opcode_registry.h" +#include "olap/hll.h" #include "udf/udf.h" #include "udf/udf_internal.h" -#include "olap/hll.h" namespace doris { @@ -45,22 +45,22 @@ class AggregateFunctions { template static void init_zero(doris_udf::FunctionContext*, T* dst); - template + template static void sum_remove(doris_udf::FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst); // doris_udf::StringVal GetValue() function that returns a copy of src static doris_udf::StringVal string_val_get_value(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& src); + const doris_udf::StringVal& src); static doris_udf::StringVal string_val_serialize_or_finalize(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& src); + const doris_udf::StringVal& src); // Implementation of Count and Count(*) static void count_update(doris_udf::FunctionContext*, const doris_udf::AnyVal& src, - doris_udf::BigIntVal* dst); - static void count_merge(doris_udf::FunctionContext*, - const doris_udf::BigIntVal& src, doris_udf::BigIntVal* dst); - static void count_remove(doris_udf::FunctionContext*, - const doris_udf::AnyVal& src, doris_udf::BigIntVal* dst); + doris_udf::BigIntVal* dst); + static void count_merge(doris_udf::FunctionContext*, const doris_udf::BigIntVal& src, + doris_udf::BigIntVal* dst); + static void count_remove(doris_udf::FunctionContext*, const doris_udf::AnyVal& src, + doris_udf::BigIntVal* dst); static void count_star_update(doris_udf::FunctionContext*, doris_udf::BigIntVal* dst); static void count_star_remove(FunctionContext*, BigIntVal* dst); @@ -69,11 +69,13 @@ class AggregateFunctions { static void percentile_approx_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); template - static void percentile_approx_update(FunctionContext* ctx, const T& src, const DoubleVal& quantile, StringVal* dst); + static void percentile_approx_update(FunctionContext* ctx, const T& src, + const DoubleVal& quantile, StringVal* dst); template - static void percentile_approx_update(FunctionContext* ctx, const T& src, const DoubleVal& quantile, - const DoubleVal& digest_compression, StringVal* dst); + static void percentile_approx_update(FunctionContext* ctx, const T& src, + const DoubleVal& quantile, + const DoubleVal& digest_compression, StringVal* dst); static void percentile_approx_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst); @@ -86,59 +88,52 @@ class AggregateFunctions { static void avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); template static void avg_update(doris_udf::FunctionContext* ctx, const T& src, - doris_udf::StringVal* dst); + doris_udf::StringVal* dst); template static void avg_remove(doris_udf::FunctionContext* ctx, const T& src, - doris_udf::StringVal* dst); - static void avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* -dst); + doris_udf::StringVal* dst); + static void avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst); static doris_udf::DoubleVal avg_get_value(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); static doris_udf::DoubleVal avg_finalize(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); // Avg for timestamp. Uses avg_init() and AvgMerge(). static void timestamp_avg_update(doris_udf::FunctionContext* ctx, - const doris_udf::DateTimeVal& src, - doris_udf::StringVal* dst); + const doris_udf::DateTimeVal& src, doris_udf::StringVal* dst); static void timestamp_avg_remove(doris_udf::FunctionContext* ctx, - const doris_udf::DateTimeVal& src, - doris_udf::StringVal* dst); + const doris_udf::DateTimeVal& src, doris_udf::StringVal* dst); static doris_udf::DateTimeVal timestamp_avg_get_value(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); static doris_udf::DateTimeVal timestamp_avg_finalize(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); // Avg for decimals. static void decimal_avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); static void decimalv2_avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); static void decimal_avg_update(doris_udf::FunctionContext* ctx, - const doris_udf::DecimalVal& src, - doris_udf::StringVal* dst); + const doris_udf::DecimalVal& src, doris_udf::StringVal* dst); static void decimalv2_avg_update(doris_udf::FunctionContext* ctx, - const doris_udf::DecimalV2Val& src, - doris_udf::StringVal* dst); + const doris_udf::DecimalV2Val& src, doris_udf::StringVal* dst); static void decimal_avg_merge(FunctionContext* ctx, const doris_udf::StringVal& src, - doris_udf::StringVal* dst); + doris_udf::StringVal* dst); static void decimalv2_avg_merge(FunctionContext* ctx, const doris_udf::StringVal& src, - doris_udf::StringVal* dst); + doris_udf::StringVal* dst); static doris_udf::StringVal decimalv2_avg_serialize(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& src); + const doris_udf::StringVal& src); static void decimal_avg_remove(doris_udf::FunctionContext* ctx, - const doris_udf::DecimalVal& src, - doris_udf::StringVal* dst); + const doris_udf::DecimalVal& src, doris_udf::StringVal* dst); static void decimalv2_avg_remove(doris_udf::FunctionContext* ctx, - const doris_udf::DecimalV2Val& src, - doris_udf::StringVal* dst); + const doris_udf::DecimalV2Val& src, doris_udf::StringVal* dst); static doris_udf::DecimalVal decimal_avg_get_value(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); static doris_udf::DecimalV2Val decimalv2_avg_get_value(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); static doris_udf::DecimalVal decimal_avg_finalize(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); static doris_udf::DecimalV2Val decimalv2_avg_finalize(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& val); + const doris_udf::StringVal& val); // SumUpdate, SumMerge template static void sum(doris_udf::FunctionContext*, const SRC_VAL& src, DST_VAL* dst); @@ -151,23 +146,16 @@ dst); template static void max(doris_udf::FunctionContext*, const T& src, T* dst); - // String concat - static void string_concat( - doris_udf::FunctionContext*, - const doris_udf::StringVal& src, - const doris_udf::StringVal& separator, - doris_udf::StringVal* result); - - /// String concat - static void string_concat_update(FunctionContext*, - const StringVal& src, StringVal* result); - static void string_concat_update(FunctionContext*, - const StringVal& src, const StringVal& separator, StringVal* result); - static void string_concat_merge(FunctionContext*, - const StringVal& src, StringVal* result); - static StringVal string_concat_finalize(FunctionContext*, - const StringVal& src); + static void string_concat(doris_udf::FunctionContext*, const doris_udf::StringVal& src, + const doris_udf::StringVal& separator, doris_udf::StringVal* result); + + /// String concat + static void string_concat_update(FunctionContext*, const StringVal& src, StringVal* result); + static void string_concat_update(FunctionContext*, const StringVal& src, + const StringVal& separator, StringVal* result); + static void string_concat_merge(FunctionContext*, const StringVal& src, StringVal* result); + static StringVal string_concat_finalize(FunctionContext*, const StringVal& src); // Probabilistic Counting (PC), a distinct estimate algorithms. // Probabilistic Counting with Stochastic Averaging (PCSA) is a variant @@ -179,54 +167,70 @@ dst); template static void pcsa_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst); - static void pc_merge( - doris_udf::FunctionContext*, - const doris_udf::StringVal& src, - doris_udf::StringVal* dst); + static void pc_merge(doris_udf::FunctionContext*, const doris_udf::StringVal& src, + doris_udf::StringVal* dst); - static doris_udf::StringVal pc_finalize( - doris_udf::FunctionContext*, - const doris_udf::StringVal& src); + static doris_udf::StringVal pc_finalize(doris_udf::FunctionContext*, + const doris_udf::StringVal& src); - static doris_udf::StringVal pcsa_finalize( - doris_udf::FunctionContext*, - const doris_udf::StringVal& src); + static doris_udf::StringVal pcsa_finalize(doris_udf::FunctionContext*, + const doris_udf::StringVal& src); // count and sum distinct algorithm in multi distinct template - static void count_or_sum_distinct_numeric_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); + static void count_or_sum_distinct_numeric_init(doris_udf::FunctionContext* ctx, + doris_udf::StringVal* dst); template static void count_or_sum_distinct_numeric_update(FunctionContext* ctx, T& src, StringVal* dst); template - static void count_or_sum_distinct_numeric_merge(FunctionContext* ctx, StringVal& src, StringVal* dst); + static void count_or_sum_distinct_numeric_merge(FunctionContext* ctx, StringVal& src, + StringVal* dst); template - static StringVal count_or_sum_distinct_numeric_serialize(FunctionContext* ctx, const StringVal& state_sv); + static StringVal count_or_sum_distinct_numeric_serialize(FunctionContext* ctx, + const StringVal& state_sv); template - static BigIntVal count_or_sum_distinct_numeric_finalize(FunctionContext* ctx, const StringVal& state_sv); + static BigIntVal count_or_sum_distinct_numeric_finalize(FunctionContext* ctx, + const StringVal& state_sv); // count distinct in multi distinct for string - static void count_distinct_string_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); + static void count_distinct_string_init(doris_udf::FunctionContext* ctx, + doris_udf::StringVal* dst); static void count_distinct_string_update(FunctionContext* ctx, StringVal& src, StringVal* dst); static void count_distinct_string_merge(FunctionContext* ctx, StringVal& src, StringVal* dst); - static StringVal count_distinct_string_serialize(FunctionContext* ctx, const StringVal& state_sv); - static BigIntVal count_distinct_string_finalize(FunctionContext* ctx, const StringVal& state_sv); + static StringVal count_distinct_string_serialize(FunctionContext* ctx, + const StringVal& state_sv); + static BigIntVal count_distinct_string_finalize(FunctionContext* ctx, + const StringVal& state_sv); // count distinct in multi distinct for decimal - static void count_or_sum_distinct_decimal_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); - static void count_or_sum_distinct_decimalv2_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); - static void count_or_sum_distinct_decimal_update(FunctionContext* ctx, DecimalVal& src, StringVal* dst); - static void count_or_sum_distinct_decimalv2_update(FunctionContext* ctx, DecimalV2Val& src, StringVal* dst); - static void count_or_sum_distinct_decimal_merge(FunctionContext* ctx, StringVal& src, StringVal* dst); - static void count_or_sum_distinct_decimalv2_merge(FunctionContext* ctx, StringVal& src, StringVal* dst); - static StringVal count_or_sum_distinct_decimal_serialize(FunctionContext* ctx, const StringVal& state_sv); - static StringVal count_or_sum_distinct_decimalv2_serialize(FunctionContext* ctx, const StringVal& state_sv); - static BigIntVal count_distinct_decimal_finalize(FunctionContext* ctx, const StringVal& state_sv); - static BigIntVal count_distinct_decimalv2_finalize(FunctionContext* ctx, const StringVal& state_sv); - static DecimalVal sum_distinct_decimal_finalize(FunctionContext* ctx, const StringVal& state_sv); - static DecimalV2Val sum_distinct_decimalv2_finalize(FunctionContext* ctx, const StringVal& state_sv); + static void count_or_sum_distinct_decimal_init(doris_udf::FunctionContext* ctx, + doris_udf::StringVal* dst); + static void count_or_sum_distinct_decimalv2_init(doris_udf::FunctionContext* ctx, + doris_udf::StringVal* dst); + static void count_or_sum_distinct_decimal_update(FunctionContext* ctx, DecimalVal& src, + StringVal* dst); + static void count_or_sum_distinct_decimalv2_update(FunctionContext* ctx, DecimalV2Val& src, + StringVal* dst); + static void count_or_sum_distinct_decimal_merge(FunctionContext* ctx, StringVal& src, + StringVal* dst); + static void count_or_sum_distinct_decimalv2_merge(FunctionContext* ctx, StringVal& src, + StringVal* dst); + static StringVal count_or_sum_distinct_decimal_serialize(FunctionContext* ctx, + const StringVal& state_sv); + static StringVal count_or_sum_distinct_decimalv2_serialize(FunctionContext* ctx, + const StringVal& state_sv); + static BigIntVal count_distinct_decimal_finalize(FunctionContext* ctx, + const StringVal& state_sv); + static BigIntVal count_distinct_decimalv2_finalize(FunctionContext* ctx, + const StringVal& state_sv); + static DecimalVal sum_distinct_decimal_finalize(FunctionContext* ctx, + const StringVal& state_sv); + static DecimalV2Val sum_distinct_decimalv2_finalize(FunctionContext* ctx, + const StringVal& state_sv); // count distinct in multi distinct for Date - static void count_distinct_date_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst); + static void count_distinct_date_init(doris_udf::FunctionContext* ctx, + doris_udf::StringVal* dst); static void count_distinct_date_update(FunctionContext* ctx, DateTimeVal& src, StringVal* dst); static void count_distinct_date_merge(FunctionContext* ctx, StringVal& src, StringVal* dst); static StringVal count_distinct_date_serialize(FunctionContext* ctx, const StringVal& state_sv); @@ -235,7 +239,8 @@ dst); template static BigIntVal sum_distinct_bigint_finalize(FunctionContext* ctx, const StringVal& state_sv); template - static LargeIntVal sum_distinct_largeint_finalize(FunctionContext* ctx, const StringVal& state_sv); + static LargeIntVal sum_distinct_largeint_finalize(FunctionContext* ctx, + const StringVal& state_sv); template static DoubleVal sum_distinct_double_finalize(FunctionContext* ctx, const StringVal& state_sv); @@ -244,9 +249,8 @@ dst); /// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm static void knuth_var_init(FunctionContext* context, StringVal* val); template - static void knuth_var_update(FunctionContext* context, const T& input, StringVal* val); - static void knuth_var_merge(FunctionContext* context, const StringVal& src, - StringVal* dst); + static void knuth_var_update(FunctionContext* context, const T& input, StringVal* val); + static void knuth_var_merge(FunctionContext* context, const StringVal& src, StringVal* dst); static DoubleVal knuth_var_finalize(FunctionContext* context, const StringVal& val); /// Calculates the biased variance, uses KnuthVar Init-Update-Merge functions @@ -276,17 +280,17 @@ dst); // Returns the result for RANK and prepares the state for the next Update(). static doris_udf::BigIntVal rank_get_value(doris_udf::FunctionContext*, - doris_udf::StringVal& src); + doris_udf::StringVal& src); // Returns the result for DENSE_RANK and prepares the state for the next Update(). // TODO: Implement DENSE_RANK with a single doris_udf::BigIntVal. Requires src can be modified, // AggFnEvaluator would need to handle copying the src doris_udf::AnyVal back into the src slot. static doris_udf::BigIntVal dense_rank_get_value(doris_udf::FunctionContext*, - doris_udf::StringVal& src); + doris_udf::StringVal& src); // Returns the result for RANK and DENSE_RANK and cleans up intermediate state in src. static doris_udf::BigIntVal rank_finalize(doris_udf::FunctionContext*, - doris_udf::StringVal& src); + doris_udf::StringVal& src); // Implements LAST_VALUE. template @@ -302,7 +306,7 @@ dst); // AnalyticEvalNode). template static void first_val_rewrite_update(doris_udf::FunctionContext*, const T& src, - const doris_udf::BigIntVal&, T* dst); + const doris_udf::BigIntVal&, T* dst); // OffsetFn*() implement LAG and LEAD. Init() sets the default value (the last // constant parameter) as dst. @@ -315,63 +319,47 @@ dst); // in Init(). template static void offset_fn_update(doris_udf::FunctionContext*, const T& src, - const doris_udf::BigIntVal&, const T&, T* dst); + const doris_udf::BigIntVal&, const T&, T* dst); // todo(kks): keep following HLL methods only for backward compatibility, we should remove these methods // when doris 0.12 release static void hll_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot); template static void hll_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst); - static void hll_merge( - doris_udf::FunctionContext*, - const doris_udf::StringVal& src, - doris_udf::StringVal* dst); - static doris_udf::StringVal hll_finalize( - doris_udf::FunctionContext*, - const doris_udf::StringVal& src); + static void hll_merge(doris_udf::FunctionContext*, const doris_udf::StringVal& src, + doris_udf::StringVal* dst); + static doris_udf::StringVal hll_finalize(doris_udf::FunctionContext*, + const doris_udf::StringVal& src); static void hll_union_agg_init(doris_udf::FunctionContext*, doris_udf::HllVal* slot); // fill all register according to hll set type static void hll_union_agg_update(doris_udf::FunctionContext*, const doris_udf::HllVal& src, doris_udf::HllVal* dst); // merge the register value - static void hll_union_agg_merge( - doris_udf::FunctionContext*, - const doris_udf::HllVal& src, - doris_udf::HllVal* dst); + static void hll_union_agg_merge(doris_udf::FunctionContext*, const doris_udf::HllVal& src, + doris_udf::HllVal* dst); // return result - static doris_udf::BigIntVal hll_union_agg_finalize( - doris_udf::FunctionContext*, - const doris_udf::HllVal& src); + static doris_udf::BigIntVal hll_union_agg_finalize(doris_udf::FunctionContext*, + const doris_udf::HllVal& src); // calculate result - static int64_t hll_algorithm(uint8_t *pdata, int data_len); - static int64_t hll_algorithm(const StringVal &dst) { - return hll_algorithm(dst.ptr, dst.len); - } - static int64_t hll_algorithm(const HllVal &dst) { + static int64_t hll_algorithm(uint8_t* pdata, int data_len); + static int64_t hll_algorithm(const StringVal& dst) { return hll_algorithm(dst.ptr, dst.len); } + static int64_t hll_algorithm(const HllVal& dst) { return hll_algorithm(dst.ptr + 1, dst.len - 1); } // HLL value type aggregate to HLL value type - static void hll_raw_agg_init( - doris_udf::FunctionContext*, - doris_udf::HllVal* slot); - static void hll_raw_agg_update( - doris_udf::FunctionContext*, - const doris_udf::HllVal& src, - doris_udf::HllVal* dst); - static void hll_raw_agg_merge( - doris_udf::FunctionContext*, - const doris_udf::HllVal& src, - doris_udf::HllVal* dst); + static void hll_raw_agg_init(doris_udf::FunctionContext*, doris_udf::HllVal* slot); + static void hll_raw_agg_update(doris_udf::FunctionContext*, const doris_udf::HllVal& src, + doris_udf::HllVal* dst); + static void hll_raw_agg_merge(doris_udf::FunctionContext*, const doris_udf::HllVal& src, + doris_udf::HllVal* dst); // return result which is HLL type - static doris_udf::HllVal hll_raw_agg_finalize( - doris_udf::FunctionContext*, - const doris_udf::HllVal& src); + static doris_udf::HllVal hll_raw_agg_finalize(doris_udf::FunctionContext*, + const doris_udf::HllVal& src); }; -} +} // namespace doris #endif - diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp old mode 100755 new mode 100644 index ebe6ebfda4a443..e23999c80d5ccf --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -17,7 +17,6 @@ #include "exprs/anyval_util.h" -#include "exprs/anyval_util.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" @@ -37,17 +36,15 @@ using doris_udf::StringVal; using doris_udf::AnyVal; Status allocate_any_val(RuntimeState* state, MemPool* pool, const TypeDescriptor& type, - const std::string& mem_limit_exceeded_msg, AnyVal** result) { - const int anyval_size = AnyValUtil::any_val_size(type); - const int anyval_alignment = AnyValUtil::any_val_alignment(type); - *result = - reinterpret_cast(pool->try_allocate_aligned(anyval_size, anyval_alignment)); - if (*result == NULL) { - return pool->mem_tracker()->MemLimitExceeded( - state, mem_limit_exceeded_msg, anyval_size); - } - memset(*result, 0, anyval_size); - return Status::OK(); + const std::string& mem_limit_exceeded_msg, AnyVal** result) { + const int anyval_size = AnyValUtil::any_val_size(type); + const int anyval_alignment = AnyValUtil::any_val_alignment(type); + *result = reinterpret_cast(pool->try_allocate_aligned(anyval_size, anyval_alignment)); + if (*result == NULL) { + return pool->mem_tracker()->MemLimitExceeded(state, mem_limit_exceeded_msg, anyval_size); + } + memset(*result, 0, anyval_size); + return Status::OK(); } AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) { @@ -170,4 +167,4 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip return out; } -} +} // namespace doris diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h old mode 100755 new mode 100644 index d6d96fcf87f3db..e841cb16bf532e --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -18,12 +18,12 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_ANYVAL_UTIL_H #define DORIS_BE_SRC_QUERY_EXPRS_ANYVAL_UTIL_H +#include "common/status.h" +#include "exprs/expr.h" #include "runtime/primitive_type.h" #include "udf/udf.h" #include "util/hash_util.hpp" #include "util/types.h" -#include "common/status.h" -#include "exprs/expr.h" namespace doris { @@ -120,7 +120,7 @@ class AnyValUtil { } // TODO(lingbin): fix this method. can not use sizeof directly, because there are a lot of - // storage way for one value. + // storage way for one value. static uint64_t hash64(const doris_udf::DecimalVal& v, int64_t seed) { DecimalValue tv = DecimalValue::from_decimal_val(v); return HashUtil::fnv_hash64(&tv, sizeof(DecimalValue), seed); @@ -235,25 +235,35 @@ class AnyValUtil { /// Returns the byte alignment of *Val for type t. static int any_val_alignment(const TypeDescriptor& t) { - switch (t.type) { - case TYPE_BOOLEAN: return alignof(BooleanVal); - case TYPE_TINYINT: return alignof(TinyIntVal); - case TYPE_SMALLINT: return alignof(SmallIntVal); - case TYPE_INT: return alignof(IntVal); - case TYPE_BIGINT: return alignof(BigIntVal); - case TYPE_LARGEINT: return alignof(LargeIntVal); - case TYPE_FLOAT: return alignof(FloatVal); - case TYPE_DOUBLE: return alignof(DoubleVal); + switch (t.type) { + case TYPE_BOOLEAN: + return alignof(BooleanVal); + case TYPE_TINYINT: + return alignof(TinyIntVal); + case TYPE_SMALLINT: + return alignof(SmallIntVal); + case TYPE_INT: + return alignof(IntVal); + case TYPE_BIGINT: + return alignof(BigIntVal); + case TYPE_LARGEINT: + return alignof(LargeIntVal); + case TYPE_FLOAT: + return alignof(FloatVal); + case TYPE_DOUBLE: + return alignof(DoubleVal); case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: case TYPE_CHAR: - return alignof(StringVal); - case TYPE_DATETIME: + return alignof(StringVal); + case TYPE_DATETIME: case TYPE_DATE: - return alignof(DateTimeVal); - case TYPE_DECIMAL: return alignof(DecimalVal); - case TYPE_DECIMALV2: return alignof(DecimalV2Val); + return alignof(DateTimeVal); + case TYPE_DECIMAL: + return alignof(DecimalVal); + case TYPE_DECIMALV2: + return alignof(DecimalV2Val); default: DCHECK(false) << t; return 0; @@ -269,13 +279,12 @@ class AnyValUtil { return val; } - static void TruncateIfNecessary(const FunctionContext::TypeDesc& type, StringVal *val) { - if (type.type == FunctionContext::TYPE_VARCHAR - || type.type == FunctionContext::TYPE_CHAR) { - DCHECK(type.len >= 0); + static void TruncateIfNecessary(const FunctionContext::TypeDesc& type, StringVal* val) { + if (type.type == FunctionContext::TYPE_VARCHAR || type.type == FunctionContext::TYPE_CHAR) { + DCHECK(type.len >= 0); val->len = std::min(val->len, (int64_t)type.len); - } - } + } + } static StringVal from_buffer(FunctionContext* ctx, const char* ptr, int len) { StringVal result(ctx, len); @@ -305,103 +314,104 @@ class AnyValUtil { dst->is_null = false; switch (type.type) { - case TYPE_NULL: return; + case TYPE_NULL: + return; case TYPE_BOOLEAN: - reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + reinterpret_cast(dst)->val = + *reinterpret_cast(slot); return; case TYPE_TINYINT: reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + *reinterpret_cast(slot); return; case TYPE_SMALLINT: reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + *reinterpret_cast(slot); return; case TYPE_INT: reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + *reinterpret_cast(slot); return; case TYPE_BIGINT: - reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + reinterpret_cast(dst)->val = + *reinterpret_cast(slot); return; case TYPE_LARGEINT: memcpy(&reinterpret_cast(dst)->val, slot, sizeof(__int128)); return; case TYPE_FLOAT: - reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + reinterpret_cast(dst)->val = + *reinterpret_cast(slot); return; case TYPE_TIME: case TYPE_DOUBLE: - reinterpret_cast(dst)->val = - *reinterpret_cast(slot); + reinterpret_cast(dst)->val = + *reinterpret_cast(slot); return; case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: reinterpret_cast(slot)->to_string_val( - reinterpret_cast(dst)); + reinterpret_cast(dst)); return; case TYPE_DECIMAL: reinterpret_cast(slot)->to_decimal_val( - reinterpret_cast(dst)); - return; + reinterpret_cast(dst)); + return; case TYPE_DECIMALV2: - reinterpret_cast(dst)->val = - reinterpret_cast(slot)->value; - return; + reinterpret_cast(dst)->val = + reinterpret_cast(slot)->value; + return; case TYPE_DATE: reinterpret_cast(slot)->to_datetime_val( - reinterpret_cast(dst)); - return; + reinterpret_cast(dst)); + return; case TYPE_DATETIME: reinterpret_cast(slot)->to_datetime_val( - reinterpret_cast(dst)); + reinterpret_cast(dst)); return; default: - DCHECK(false) << "NYI"; + DCHECK(false) << "NYI"; } } /// Templated equality functions. These assume the input values are not NULL. - template + template static inline bool equals(const PrimitiveType& type, const T& x, const T& y) { return equals_internal(x, y); } /// Templated equality functions. These assume the input values are not NULL. - template + template static inline bool equals(const T& x, const T& y) { return equals_internal(x, y); } - template + template static inline bool equals(const TypeDescriptor& type, const T& x, const T& y) { return equals_internal(x, y); } - template + template static inline bool equals(const FunctionContext::TypeDesc& type, const T& x, const T& y) { return equals_internal(x, y); } + private: /// Implementations of Equals(). - template + template static inline bool equals_internal(const T& x, const T& y); - }; -template +template inline bool AnyValUtil::equals_internal(const T& x, const T& y) { DCHECK(!x.is_null); DCHECK(!y.is_null); return x.val == y.val; } -template<> +template <> inline bool AnyValUtil::equals_internal(const StringVal& x, const StringVal& y) { DCHECK(!x.is_null); DCHECK(!y.is_null); @@ -410,7 +420,7 @@ inline bool AnyValUtil::equals_internal(const StringVal& x, const StringVal& y) return x_sv == y_sv; } -template<> +template <> inline bool AnyValUtil::equals_internal(const DateTimeVal& x, const DateTimeVal& y) { DCHECK(!x.is_null); DCHECK(!y.is_null); @@ -419,14 +429,14 @@ inline bool AnyValUtil::equals_internal(const DateTimeVal& x, const DateTimeVal& return x_tv == y_tv; } -template<> +template <> inline bool AnyValUtil::equals_internal(const DecimalVal& x, const DecimalVal& y) { DCHECK(!x.is_null); DCHECK(!y.is_null); return x == y; } -template<> +template <> inline bool AnyValUtil::equals_internal(const DecimalV2Val& x, const DecimalV2Val& y) { DCHECK(!x.is_null); DCHECK(!y.is_null); @@ -439,9 +449,9 @@ doris_udf::AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type); /// Allocates an AnyVal subclass of 'type' from 'pool'. The AnyVal's memory is /// initialized to all 0's. Returns a MemLimitExceeded() error with message /// 'mem_limit_exceeded_msg' if the allocation cannot be made because of a memory -/// limit. +/// limit. Status allocate_any_val(RuntimeState* state, MemPool* pool, const TypeDescriptor& type, - const std::string& mem_limit_exceeded_msg, AnyVal** result); + const std::string& mem_limit_exceeded_msg, AnyVal** result); -} +} // namespace doris #endif diff --git a/be/src/exprs/arithmetic_expr.cpp b/be/src/exprs/arithmetic_expr.cpp index 2fd10436bb4de8..90cce5a3649eb6 100644 --- a/be/src/exprs/arithmetic_expr.cpp +++ b/be/src/exprs/arithmetic_expr.cpp @@ -48,116 +48,115 @@ Expr* ArithmeticExpr::from_thrift(const TExprNode& node) { return NULL; } -#define BINARY_OP_CHECK_ZERO_FN(TYPE, CLASS, FN, OP) \ +#define BINARY_OP_CHECK_ZERO_FN(TYPE, CLASS, FN, OP) \ TYPE CLASS::FN(ExprContext* context, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(context, row); \ - if (v1.is_null) { \ - return TYPE::null(); \ - } \ - TYPE v2 = _children[1]->FN(context, row); \ - if (v2.is_null || v2.val == 0) { \ - return TYPE::null(); \ - } \ - return TYPE(v1.val OP v2.val); \ + TYPE v1 = _children[0]->FN(context, row); \ + if (v1.is_null) { \ + return TYPE::null(); \ + } \ + TYPE v2 = _children[1]->FN(context, row); \ + if (v2.is_null || v2.val == 0) { \ + return TYPE::null(); \ + } \ + return TYPE(v1.val OP v2.val); \ } -#define BINARY_OP_FN(TYPE, CLASS, FN, OP) \ +#define BINARY_OP_FN(TYPE, CLASS, FN, OP) \ TYPE CLASS::FN(ExprContext* context, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(context, row); \ - if (v1.is_null) { \ - return TYPE::null(); \ - } \ - TYPE v2 = _children[1]->FN(context, row); \ - if (v2.is_null) { \ - return TYPE::null(); \ - } \ - return TYPE(v1.val OP v2.val); \ + TYPE v1 = _children[0]->FN(context, row); \ + if (v1.is_null) { \ + return TYPE::null(); \ + } \ + TYPE v2 = _children[1]->FN(context, row); \ + if (v2.is_null) { \ + return TYPE::null(); \ + } \ + return TYPE(v1.val OP v2.val); \ } -#define BINARY_ARITH_FNS(CLASS, OP) \ - BINARY_OP_FN(TinyIntVal, CLASS, get_tiny_int_val, OP) \ +#define BINARY_ARITH_FNS(CLASS, OP) \ + BINARY_OP_FN(TinyIntVal, CLASS, get_tiny_int_val, OP) \ BINARY_OP_FN(SmallIntVal, CLASS, get_small_int_val, OP) \ - BINARY_OP_FN(IntVal, CLASS, get_int_val, OP) \ - BINARY_OP_FN(BigIntVal, CLASS, get_big_int_val, OP) \ + BINARY_OP_FN(IntVal, CLASS, get_int_val, OP) \ + BINARY_OP_FN(BigIntVal, CLASS, get_big_int_val, OP) \ BINARY_OP_FN(LargeIntVal, CLASS, get_large_int_val, OP) \ - BINARY_OP_FN(FloatVal, CLASS, get_float_val, OP) \ - BINARY_OP_FN(DoubleVal, CLASS, get_double_val, OP) \ + BINARY_OP_FN(FloatVal, CLASS, get_float_val, OP) \ + BINARY_OP_FN(DoubleVal, CLASS, get_double_val, OP) BINARY_ARITH_FNS(AddExpr, +) BINARY_ARITH_FNS(SubExpr, -) BINARY_ARITH_FNS(MulExpr, *) -#define BINARY_DIV_FNS() \ - BINARY_OP_CHECK_ZERO_FN(TinyIntVal, DivExpr, get_tiny_int_val, /) \ +#define BINARY_DIV_FNS() \ + BINARY_OP_CHECK_ZERO_FN(TinyIntVal, DivExpr, get_tiny_int_val, /) \ BINARY_OP_CHECK_ZERO_FN(SmallIntVal, DivExpr, get_small_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(IntVal, DivExpr, get_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(BigIntVal, DivExpr, get_big_int_val, /) \ + BINARY_OP_CHECK_ZERO_FN(IntVal, DivExpr, get_int_val, /) \ + BINARY_OP_CHECK_ZERO_FN(BigIntVal, DivExpr, get_big_int_val, /) \ BINARY_OP_CHECK_ZERO_FN(LargeIntVal, DivExpr, get_large_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(FloatVal, DivExpr, get_float_val, /) \ - BINARY_OP_CHECK_ZERO_FN(DoubleVal, DivExpr, get_double_val, /) \ + BINARY_OP_CHECK_ZERO_FN(FloatVal, DivExpr, get_float_val, /) \ + BINARY_OP_CHECK_ZERO_FN(DoubleVal, DivExpr, get_double_val, /) BINARY_DIV_FNS() -#define BINARY_MOD_FNS() \ - BINARY_OP_CHECK_ZERO_FN(TinyIntVal, ModExpr, get_tiny_int_val, %) \ +#define BINARY_MOD_FNS() \ + BINARY_OP_CHECK_ZERO_FN(TinyIntVal, ModExpr, get_tiny_int_val, %) \ BINARY_OP_CHECK_ZERO_FN(SmallIntVal, ModExpr, get_small_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(IntVal, ModExpr, get_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(BigIntVal, ModExpr, get_big_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(LargeIntVal, ModExpr, get_large_int_val, %) \ + BINARY_OP_CHECK_ZERO_FN(IntVal, ModExpr, get_int_val, %) \ + BINARY_OP_CHECK_ZERO_FN(BigIntVal, ModExpr, get_big_int_val, %) \ + BINARY_OP_CHECK_ZERO_FN(LargeIntVal, ModExpr, get_large_int_val, %) BINARY_MOD_FNS() -FloatVal ModExpr::get_float_val(ExprContext* context, TupleRow* row) { - FloatVal v1 = _children[0]->get_float_val(context, row); +FloatVal ModExpr::get_float_val(ExprContext* context, TupleRow* row) { + FloatVal v1 = _children[0]->get_float_val(context, row); if (v1.is_null) { return FloatVal::null(); } - FloatVal v2 = _children[1]->get_float_val(context, row); + FloatVal v2 = _children[1]->get_float_val(context, row); if (v2.is_null) { return FloatVal::null(); } return FloatVal(fmod(v1.val, v2.val)); } -DoubleVal ModExpr::get_double_val(ExprContext* context, TupleRow* row) { - DoubleVal v1 = _children[0]->get_double_val(context, row); +DoubleVal ModExpr::get_double_val(ExprContext* context, TupleRow* row) { + DoubleVal v1 = _children[0]->get_double_val(context, row); if (v1.is_null) { return DoubleVal::null(); } - DoubleVal v2 = _children[1]->get_double_val(context, row); + DoubleVal v2 = _children[1]->get_double_val(context, row); if (v2.is_null) { return DoubleVal::null(); } return DoubleVal(fmod(v1.val, v2.val)); } -#define BINARY_BIT_FNS(CLASS, OP) \ - BINARY_OP_FN(TinyIntVal, CLASS, get_tiny_int_val, OP) \ +#define BINARY_BIT_FNS(CLASS, OP) \ + BINARY_OP_FN(TinyIntVal, CLASS, get_tiny_int_val, OP) \ BINARY_OP_FN(SmallIntVal, CLASS, get_small_int_val, OP) \ - BINARY_OP_FN(IntVal, CLASS, get_int_val, OP) \ - BINARY_OP_FN(BigIntVal, CLASS, get_big_int_val, OP) \ - BINARY_OP_FN(LargeIntVal, CLASS, get_large_int_val, OP) \ + BINARY_OP_FN(IntVal, CLASS, get_int_val, OP) \ + BINARY_OP_FN(BigIntVal, CLASS, get_big_int_val, OP) \ + BINARY_OP_FN(LargeIntVal, CLASS, get_large_int_val, OP) BINARY_BIT_FNS(BitAndExpr, &) BINARY_BIT_FNS(BitOrExpr, |) BINARY_BIT_FNS(BitXorExpr, ^) -#define BITNOT_OP_FN(TYPE, FN) \ +#define BITNOT_OP_FN(TYPE, FN) \ TYPE BitNotExpr::FN(ExprContext* context, TupleRow* row) { \ - TYPE v = _children[0]->FN(context, row); \ - if (v.is_null) { \ - return TYPE::null(); \ - } \ - return TYPE(~v.val); \ + TYPE v = _children[0]->FN(context, row); \ + if (v.is_null) { \ + return TYPE::null(); \ + } \ + return TYPE(~v.val); \ } -#define BITNOT_FNS() \ - BITNOT_OP_FN(TinyIntVal, get_tiny_int_val) \ +#define BITNOT_FNS() \ + BITNOT_OP_FN(TinyIntVal, get_tiny_int_val) \ BITNOT_OP_FN(SmallIntVal, get_small_int_val) \ - BITNOT_OP_FN(IntVal, get_int_val) \ - BITNOT_OP_FN(BigIntVal, get_big_int_val) \ - BITNOT_OP_FN(LargeIntVal, get_large_int_val) \ + BITNOT_OP_FN(IntVal, get_int_val) \ + BITNOT_OP_FN(BigIntVal, get_big_int_val) \ + BITNOT_OP_FN(LargeIntVal, get_large_int_val) BITNOT_FNS() -} - +} // namespace doris diff --git a/be/src/exprs/arithmetic_expr.h b/be/src/exprs/arithmetic_expr.h index 9af571b4510297..2c6c790f2e9670 100644 --- a/be/src/exprs/arithmetic_expr.h +++ b/be/src/exprs/arithmetic_expr.h @@ -26,6 +26,7 @@ namespace doris { class ArithmeticExpr : public Expr { public: static Expr* from_thrift(const TExprNode& node); + protected: enum BinaryOpType { ADD, @@ -39,17 +40,15 @@ class ArithmeticExpr : public Expr { BIT_NOT, }; - ArithmeticExpr(const TExprNode& node) : Expr(node) { } - virtual ~ArithmeticExpr() { } + ArithmeticExpr(const TExprNode& node) : Expr(node) {} + virtual ~ArithmeticExpr() {} }; class AddExpr : public ArithmeticExpr { public: - AddExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~AddExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new AddExpr(*this)); - } + AddExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~AddExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new AddExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); virtual IntVal get_int_val(ExprContext* context, TupleRow*); @@ -61,11 +60,9 @@ class AddExpr : public ArithmeticExpr { class SubExpr : public ArithmeticExpr { public: - SubExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~SubExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new SubExpr(*this)); - } + SubExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~SubExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new SubExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); virtual IntVal get_int_val(ExprContext* context, TupleRow*); @@ -77,11 +74,9 @@ class SubExpr : public ArithmeticExpr { class MulExpr : public ArithmeticExpr { public: - MulExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~MulExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new MulExpr(*this)); - } + MulExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~MulExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new MulExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); virtual IntVal get_int_val(ExprContext* context, TupleRow*); @@ -93,11 +88,9 @@ class MulExpr : public ArithmeticExpr { class DivExpr : public ArithmeticExpr { public: - DivExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~DivExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new DivExpr(*this)); - } + DivExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~DivExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new DivExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); virtual IntVal get_int_val(ExprContext* context, TupleRow*); @@ -109,11 +102,9 @@ class DivExpr : public ArithmeticExpr { class ModExpr : public ArithmeticExpr { public: - ModExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~ModExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new ModExpr(*this)); - } + ModExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~ModExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new ModExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); virtual IntVal get_int_val(ExprContext* context, TupleRow*); @@ -125,10 +116,10 @@ class ModExpr : public ArithmeticExpr { class BitAndExpr : public ArithmeticExpr { public: - BitAndExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~BitAndExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitAndExpr(*this)); + BitAndExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~BitAndExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { + return pool->add(new BitAndExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); @@ -139,11 +130,9 @@ class BitAndExpr : public ArithmeticExpr { class BitOrExpr : public ArithmeticExpr { public: - BitOrExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~BitOrExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitOrExpr(*this)); - } + BitOrExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~BitOrExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new BitOrExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); virtual IntVal get_int_val(ExprContext* context, TupleRow*); @@ -153,10 +142,10 @@ class BitOrExpr : public ArithmeticExpr { class BitXorExpr : public ArithmeticExpr { public: - BitXorExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~BitXorExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitXorExpr(*this)); + BitXorExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~BitXorExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { + return pool->add(new BitXorExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); @@ -167,10 +156,10 @@ class BitXorExpr : public ArithmeticExpr { class BitNotExpr : public ArithmeticExpr { public: - BitNotExpr(const TExprNode& node) : ArithmeticExpr(node) { } - virtual ~BitNotExpr() { } - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitNotExpr(*this)); + BitNotExpr(const TExprNode& node) : ArithmeticExpr(node) {} + virtual ~BitNotExpr() {} + virtual Expr* clone(ObjectPool* pool) const override { + return pool->add(new BitNotExpr(*this)); } virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); @@ -179,6 +168,6 @@ class BitNotExpr : public ArithmeticExpr { virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/binary_predicate.cpp b/be/src/exprs/binary_predicate.cpp index 3e3187e8a2de70..51e889e7b13fde 100644 --- a/be/src/exprs/binary_predicate.cpp +++ b/be/src/exprs/binary_predicate.cpp @@ -19,13 +19,13 @@ #include -#include "util/debug_util.h" #include "gen_cpp/Exprs_types.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "runtime/datetime_value.h" #include "runtime/decimal_value.h" #include "runtime/decimalv2_value.h" +#include "runtime/runtime_state.h" +#include "runtime/string_value.h" +#include "util/debug_util.h" namespace doris { @@ -267,26 +267,26 @@ std::string BinaryPredicate::debug_string() const { return out.str(); } -#define BINARY_PRED_FN(CLASS, TYPE, FN, OP, LLVM_PRED) \ +#define BINARY_PRED_FN(CLASS, TYPE, FN, OP, LLVM_PRED) \ BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - return BooleanVal(v1.val OP v2.val); \ + TYPE v1 = _children[0]->FN(ctx, row); \ + if (v1.is_null) { \ + return BooleanVal::null(); \ + } \ + TYPE v2 = _children[1]->FN(ctx, row); \ + if (v2.is_null) { \ + return BooleanVal::null(); \ + } \ + return BooleanVal(v1.val OP v2.val); \ } // add '/**/' to pass code style check of cooder -#define BINARY_PRED_INT_FNS(TYPE, FN) \ - BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, /**/ == /**/, CmpInst::ICMP_EQ) \ - BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, /**/ != /**/, CmpInst::ICMP_NE) \ - BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, /**/ < /**/, CmpInst::ICMP_SLT) \ +#define BINARY_PRED_INT_FNS(TYPE, FN) \ + BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, /**/ == /**/, CmpInst::ICMP_EQ) \ + BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, /**/ != /**/, CmpInst::ICMP_NE) \ + BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, /**/ < /**/, CmpInst::ICMP_SLT) \ BINARY_PRED_FN(Le##TYPE##Pred, TYPE, FN, /**/ <= /**/, CmpInst::ICMP_SLE) \ - BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, /**/ > /**/, CmpInst::ICMP_SGT) \ + BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, /**/ > /**/, CmpInst::ICMP_SGT) \ BINARY_PRED_FN(Ge##TYPE##Pred, TYPE, FN, /**/ >= /**/, CmpInst::ICMP_SGE) BINARY_PRED_INT_FNS(BooleanVal, get_boolean_val); @@ -296,87 +296,86 @@ BINARY_PRED_INT_FNS(IntVal, get_int_val); BINARY_PRED_INT_FNS(BigIntVal, get_big_int_val); BINARY_PRED_INT_FNS(LargeIntVal, get_large_int_val); -#define BINARY_PRED_FLOAT_FNS(TYPE, FN) \ +#define BINARY_PRED_FLOAT_FNS(TYPE, FN) \ BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, ==, CmpInst::FCMP_OEQ) \ BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, !=, CmpInst::FCMP_UNE) \ - BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, <, CmpInst::FCMP_OLT) \ + BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, <, CmpInst::FCMP_OLT) \ BINARY_PRED_FN(Le##TYPE##Pred, TYPE, FN, <=, CmpInst::FCMP_OLE) \ - BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, >, CmpInst::FCMP_OGT) \ + BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, >, CmpInst::FCMP_OGT) \ BINARY_PRED_FN(Ge##TYPE##Pred, TYPE, FN, >=, CmpInst::FCMP_OGE) BINARY_PRED_FLOAT_FNS(FloatVal, get_float_val); BINARY_PRED_FLOAT_FNS(DoubleVal, get_double_val); #define COMPLICATE_BINARY_PRED_FN(CLASS, TYPE, FN, DORIS_TYPE, FROM_FUNC, OP) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - DORIS_TYPE pv1 = DORIS_TYPE::FROM_FUNC(v1); \ - DORIS_TYPE pv2 = DORIS_TYPE::FROM_FUNC(v2); \ - return BooleanVal(pv1 OP pv2); \ + BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ + TYPE v1 = _children[0]->FN(ctx, row); \ + if (v1.is_null) { \ + return BooleanVal::null(); \ + } \ + TYPE v2 = _children[1]->FN(ctx, row); \ + if (v2.is_null) { \ + return BooleanVal::null(); \ + } \ + DORIS_TYPE pv1 = DORIS_TYPE::FROM_FUNC(v1); \ + DORIS_TYPE pv2 = DORIS_TYPE::FROM_FUNC(v2); \ + return BooleanVal(pv1 OP pv2); \ } - -#define COMPLICATE_BINARY_PRED_FNS(TYPE, FN, DORIS_TYPE, FROM_FUNC) \ +#define COMPLICATE_BINARY_PRED_FNS(TYPE, FN, DORIS_TYPE, FROM_FUNC) \ COMPLICATE_BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, ==) \ COMPLICATE_BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, !=) \ - COMPLICATE_BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, <) \ + COMPLICATE_BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, <) \ COMPLICATE_BINARY_PRED_FN(Le##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, <=) \ - COMPLICATE_BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, >) \ + COMPLICATE_BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, >) \ COMPLICATE_BINARY_PRED_FN(Ge##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, >=) COMPLICATE_BINARY_PRED_FNS(DecimalVal, get_decimal_val, DecimalValue, from_decimal_val) COMPLICATE_BINARY_PRED_FNS(DecimalV2Val, get_decimalv2_val, DecimalV2Value, from_decimal_val) -#define DATETIME_BINARY_PRED_FN(CLASS, OP, LLVM_PRED) \ +#define DATETIME_BINARY_PRED_FN(CLASS, OP, LLVM_PRED) \ BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - DateTimeVal v1 = _children[0]->get_datetime_val(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - DateTimeVal v2 = _children[1]->get_datetime_val(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - return BooleanVal(v1.packed_time OP v2.packed_time); \ + DateTimeVal v1 = _children[0]->get_datetime_val(ctx, row); \ + if (v1.is_null) { \ + return BooleanVal::null(); \ + } \ + DateTimeVal v2 = _children[1]->get_datetime_val(ctx, row); \ + if (v2.is_null) { \ + return BooleanVal::null(); \ + } \ + return BooleanVal(v1.packed_time OP v2.packed_time); \ } -#define DATETIME_BINARY_PRED_FNS() \ - DATETIME_BINARY_PRED_FN(Eq##DateTimeVal##Pred, ==, CmpInst::ICMP_EQ) \ - DATETIME_BINARY_PRED_FN(Ne##DateTimeVal##Pred, !=, CmpInst::ICMP_NE) \ - DATETIME_BINARY_PRED_FN(Lt##DateTimeVal##Pred, <, CmpInst::ICMP_SLT) \ +#define DATETIME_BINARY_PRED_FNS() \ + DATETIME_BINARY_PRED_FN(Eq##DateTimeVal##Pred, ==, CmpInst::ICMP_EQ) \ + DATETIME_BINARY_PRED_FN(Ne##DateTimeVal##Pred, !=, CmpInst::ICMP_NE) \ + DATETIME_BINARY_PRED_FN(Lt##DateTimeVal##Pred, <, CmpInst::ICMP_SLT) \ DATETIME_BINARY_PRED_FN(Le##DateTimeVal##Pred, <=, CmpInst::ICMP_SLE) \ - DATETIME_BINARY_PRED_FN(Gt##DateTimeVal##Pred, >, CmpInst::ICMP_SGT) \ + DATETIME_BINARY_PRED_FN(Gt##DateTimeVal##Pred, >, CmpInst::ICMP_SGT) \ DATETIME_BINARY_PRED_FN(Ge##DateTimeVal##Pred, >=, CmpInst::ICMP_SGE) DATETIME_BINARY_PRED_FNS() -#define STRING_BINARY_PRED_FN(CLASS, OP) \ +#define STRING_BINARY_PRED_FN(CLASS, OP) \ BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - StringVal v1 = _children[0]->get_string_val(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - StringVal v2 = _children[1]->get_string_val(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - StringValue pv1 = StringValue::from_string_val(v1); \ - StringValue pv2 = StringValue::from_string_val(v2); \ - return BooleanVal(pv1 OP pv2); \ + StringVal v1 = _children[0]->get_string_val(ctx, row); \ + if (v1.is_null) { \ + return BooleanVal::null(); \ + } \ + StringVal v2 = _children[1]->get_string_val(ctx, row); \ + if (v2.is_null) { \ + return BooleanVal::null(); \ + } \ + StringValue pv1 = StringValue::from_string_val(v1); \ + StringValue pv2 = StringValue::from_string_val(v2); \ + return BooleanVal(pv1 OP pv2); \ } -#define STRING_BINARY_PRED_FNS() \ +#define STRING_BINARY_PRED_FNS() \ STRING_BINARY_PRED_FN(Ne##StringVal##Pred, !=) \ - STRING_BINARY_PRED_FN(Lt##StringVal##Pred, <) \ + STRING_BINARY_PRED_FN(Lt##StringVal##Pred, <) \ STRING_BINARY_PRED_FN(Le##StringVal##Pred, <=) \ - STRING_BINARY_PRED_FN(Gt##StringVal##Pred, >) \ + STRING_BINARY_PRED_FN(Gt##StringVal##Pred, >) \ STRING_BINARY_PRED_FN(Ge##StringVal##Pred, >=) STRING_BINARY_PRED_FNS() @@ -396,16 +395,16 @@ BooleanVal EqStringValPred::get_boolean_val(ExprContext* ctx, TupleRow* row) { return BooleanVal(string_compare((char*)v1.ptr, v1.len, (char*)v2.ptr, v2.len, v1.len) == 0); } -#define BINARY_PRED_FOR_NULL_FN(CLASS, TYPE, FN, OP, LLVM_PRED) \ +#define BINARY_PRED_FOR_NULL_FN(CLASS, TYPE, FN, OP, LLVM_PRED) \ BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v1.is_null && v2.is_null) { \ - return BooleanVal(true); \ - } else if (v1.is_null || v2.is_null) { \ - return BooleanVal(false); \ - } \ - return BooleanVal(v1.val OP v2.val); \ + TYPE v1 = _children[0]->FN(ctx, row); \ + TYPE v2 = _children[1]->FN(ctx, row); \ + if (v1.is_null && v2.is_null) { \ + return BooleanVal(true); \ + } else if (v1.is_null || v2.is_null) { \ + return BooleanVal(false); \ + } \ + return BooleanVal(v1.val OP v2.val); \ } // add '/**/' to pass code style check of cooder @@ -425,37 +424,37 @@ BINARY_PRED_FOR_NULL_INT_FNS(LargeIntVal, get_large_int_val); BINARY_PRED_FOR_NULL_FLOAT_FNS(FloatVal, get_float_val); BINARY_PRED_FOR_NULL_FLOAT_FNS(DoubleVal, get_double_val); - #define COMPLICATE_BINARY_FOR_NULL_PRED_FN(CLASS, TYPE, FN, DORIS_TYPE, FROM_FUNC, OP) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v1.is_null && v2.is_null) { \ - return BooleanVal(true); \ - } else if (v1.is_null || v2.is_null) { \ - return BooleanVal(false); \ - } \ - DORIS_TYPE pv1 = DORIS_TYPE::FROM_FUNC(v1); \ - DORIS_TYPE pv2 = DORIS_TYPE::FROM_FUNC(v2); \ - return BooleanVal(pv1 OP pv2); \ + BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ + TYPE v1 = _children[0]->FN(ctx, row); \ + TYPE v2 = _children[1]->FN(ctx, row); \ + if (v1.is_null && v2.is_null) { \ + return BooleanVal(true); \ + } else if (v1.is_null || v2.is_null) { \ + return BooleanVal(false); \ + } \ + DORIS_TYPE pv1 = DORIS_TYPE::FROM_FUNC(v1); \ + DORIS_TYPE pv2 = DORIS_TYPE::FROM_FUNC(v2); \ + return BooleanVal(pv1 OP pv2); \ } #define COMPLICATE_BINARY_FOR_NULL_PRED_FNS(TYPE, FN, DORIS_TYPE, FROM_FUNC) \ COMPLICATE_BINARY_FOR_NULL_PRED_FN(EqForNull##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, ==) COMPLICATE_BINARY_FOR_NULL_PRED_FNS(DecimalVal, get_decimal_val, DecimalValue, from_decimal_val) -COMPLICATE_BINARY_FOR_NULL_PRED_FNS(DecimalV2Val, get_decimalv2_val, DecimalV2Value, from_decimal_val) +COMPLICATE_BINARY_FOR_NULL_PRED_FNS(DecimalV2Val, get_decimalv2_val, DecimalV2Value, + from_decimal_val) -#define DATETIME_BINARY_FOR_NULL_PRED_FN(CLASS, OP, LLVM_PRED) \ +#define DATETIME_BINARY_FOR_NULL_PRED_FN(CLASS, OP, LLVM_PRED) \ BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - DateTimeVal v1 = _children[0]->get_datetime_val(ctx, row); \ - DateTimeVal v2 = _children[1]->get_datetime_val(ctx, row); \ - if (v1.is_null && v2.is_null) { \ - return BooleanVal(true); \ - } else if (v1.is_null || v2.is_null) { \ - return BooleanVal(false); \ - } \ - return BooleanVal(v1.packed_time OP v2.packed_time); \ + DateTimeVal v1 = _children[0]->get_datetime_val(ctx, row); \ + DateTimeVal v2 = _children[1]->get_datetime_val(ctx, row); \ + if (v1.is_null && v2.is_null) { \ + return BooleanVal(true); \ + } else if (v1.is_null || v2.is_null) { \ + return BooleanVal(false); \ + } \ + return BooleanVal(v1.packed_time OP v2.packed_time); \ } #define DATETIME_BINARY_FOR_NULL_PRED_FNS() \ @@ -478,4 +477,4 @@ BooleanVal EqForNullStringValPred::get_boolean_val(ExprContext* ctx, TupleRow* r return BooleanVal(string_compare((char*)v1.ptr, v1.len, (char*)v2.ptr, v2.len, v1.len) == 0); } -} +} // namespace doris diff --git a/be/src/exprs/binary_predicate.h b/be/src/exprs/binary_predicate.h index 893f446f933de2..22863629852e59 100644 --- a/be/src/exprs/binary_predicate.h +++ b/be/src/exprs/binary_predicate.h @@ -18,8 +18,8 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_BINARY_PREDICATE_H #define DORIS_BE_SRC_QUERY_EXPRS_BINARY_PREDICATE_H -#include #include +#include #include "common/object_pool.h" #include "exprs/predicate.h" @@ -30,9 +30,8 @@ namespace doris { class BinaryPredicate : public Predicate { public: static Expr* from_thrift(const TExprNode& node); - BinaryPredicate(const TExprNode& node) : Predicate(node) { - } - virtual ~BinaryPredicate() { } + BinaryPredicate(const TExprNode& node) : Predicate(node) {} + virtual ~BinaryPredicate() {} protected: friend class Expr; @@ -41,18 +40,19 @@ class BinaryPredicate : public Predicate { virtual std::string debug_string() const; }; -#define BIN_PRED_CLASS_DEFINE(CLASS) \ - class CLASS : public BinaryPredicate { \ - public: \ - CLASS(const TExprNode& node) : BinaryPredicate(node) { } \ - virtual ~CLASS() { } \ - virtual Expr* clone(ObjectPool* pool) const override { \ - return pool->add(new CLASS(*this)); } \ - \ +#define BIN_PRED_CLASS_DEFINE(CLASS) \ + class CLASS : public BinaryPredicate { \ + public: \ + CLASS(const TExprNode& node) : BinaryPredicate(node) {} \ + virtual ~CLASS() {} \ + virtual Expr* clone(ObjectPool* pool) const override { \ + return pool->add(new CLASS(*this)); \ + } \ + \ virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); \ }; -#define BIN_PRED_CLASSES_DEFINE(TYPE) \ +#define BIN_PRED_CLASSES_DEFINE(TYPE) \ BIN_PRED_CLASS_DEFINE(Eq##TYPE##Pred) \ BIN_PRED_CLASS_DEFINE(Ne##TYPE##Pred) \ BIN_PRED_CLASS_DEFINE(Lt##TYPE##Pred) \ @@ -73,21 +73,19 @@ BIN_PRED_CLASSES_DEFINE(DateTimeVal) BIN_PRED_CLASSES_DEFINE(DecimalVal) BIN_PRED_CLASSES_DEFINE(DecimalV2Val) - -#define BIN_PRED_FOR_NULL_CLASS_DEFINE(CLASS) \ - class CLASS : public BinaryPredicate { \ - public: \ - CLASS(const TExprNode& node) : BinaryPredicate(node) { } \ - virtual ~CLASS() { } \ - virtual Expr* clone(ObjectPool* pool) const override { \ - return pool->add(new CLASS(*this)); } \ - \ +#define BIN_PRED_FOR_NULL_CLASS_DEFINE(CLASS) \ + class CLASS : public BinaryPredicate { \ + public: \ + CLASS(const TExprNode& node) : BinaryPredicate(node) {} \ + virtual ~CLASS() {} \ + virtual Expr* clone(ObjectPool* pool) const override { \ + return pool->add(new CLASS(*this)); \ + } \ + \ virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); \ }; -#define BIN_PRED_FOR_NULL_CLASSES_DEFINE(TYPE) \ - BIN_PRED_FOR_NULL_CLASS_DEFINE(EqForNull##TYPE##Pred) - +#define BIN_PRED_FOR_NULL_CLASSES_DEFINE(TYPE) BIN_PRED_FOR_NULL_CLASS_DEFINE(EqForNull##TYPE##Pred) BIN_PRED_FOR_NULL_CLASSES_DEFINE(BooleanVal) BIN_PRED_FOR_NULL_CLASSES_DEFINE(TinyIntVal) @@ -101,5 +99,5 @@ BIN_PRED_FOR_NULL_CLASSES_DEFINE(StringVal) BIN_PRED_FOR_NULL_CLASSES_DEFINE(DateTimeVal) BIN_PRED_FOR_NULL_CLASSES_DEFINE(DecimalVal) BIN_PRED_FOR_NULL_CLASSES_DEFINE(DecimalV2Val) -} +} // namespace doris #endif diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 4b0a307f6591c2..76267bb0519b70 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -18,10 +18,10 @@ #include "exprs/bitmap_function.h" #include "exprs/anyval_util.h" +#include "gutil/strings/numbers.h" +#include "gutil/strings/split.h" #include "util/bitmap_value.h" #include "util/string_parser.hpp" -#include "gutil/strings/split.h" -#include "gutil/strings/numbers.h" namespace doris { @@ -33,53 +33,53 @@ const int DATETIME_TYPE_BYTE_SIZE = 4; const int DECIMAL_BYTE_SIZE = 16; // get_val start -template +template T get_val(const ValType& x) { DCHECK(!x.is_null); return x.val; } -template<> +template <> StringValue get_val(const StringVal& x) { DCHECK(!x.is_null); return StringValue::from_string_val(x); } -template<> +template <> DateTimeValue get_val(const DateTimeVal& x) { return DateTimeValue::from_datetime_val(x); } -template<> +template <> DecimalV2Value get_val(const DecimalV2Val& x) { return DecimalV2Value::from_decimal_val(x); } // get_val end // serialize_size start -template +template int32_t serialize_size(const T& v) { return sizeof(T); } -template<> +template <> int32_t serialize_size(const DateTimeValue& v) { return DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE; } -template<> +template <> int32_t serialize_size(const DecimalV2Value& v) { return DECIMAL_BYTE_SIZE; } -template<> +template <> int32_t serialize_size(const StringValue& v) { return v.len + 4; } // serialize_size end // write_to start -template +template char* write_to(const T& v, char* dest) { size_t type_size = sizeof(T); memcpy(dest, &v, type_size); @@ -87,7 +87,7 @@ char* write_to(const T& v, char* dest) { return dest; } -template<> +template <> char* write_to(const DateTimeValue& v, char* dest) { DateTimeVal value; v.to_datetime_val(&value); @@ -98,7 +98,7 @@ char* write_to(const DateTimeValue& v, char* dest) { return dest; } -template<> +template <> char* write_to(const DecimalV2Value& v, char* dest) { __int128 value = v.value(); memcpy(dest, &value, DECIMAL_BYTE_SIZE); @@ -106,7 +106,7 @@ char* write_to(const DecimalV2Value& v, char* dest) { return dest; } -template<> +template <> char* write_to(const StringValue& v, char* dest) { *(int32_t*)dest = v.len; dest += 4; @@ -117,14 +117,14 @@ char* write_to(const StringValue& v, char* dest) { // write_to end // read_from start -template +template void read_from(const char** src, T* result) { size_t type_size = sizeof(T); memcpy(result, *src, type_size); *src += type_size; } -template<> +template <> void read_from(const char** src, DateTimeValue* result) { DateTimeVal value; value.is_null = false; @@ -132,10 +132,11 @@ void read_from(const char** src, DateTimeValue* result) { *src += DATETIME_PACKED_TIME_BYTE_SIZE; value.type = *(int*)(*src); *src += DATETIME_TYPE_BYTE_SIZE; - *result = DateTimeValue::from_datetime_val(value);; + *result = DateTimeValue::from_datetime_val(value); + ; } -template<> +template <> void read_from(const char** src, DecimalV2Value* result) { __int128 v = 0; memcpy(&v, *src, DECIMAL_BYTE_SIZE); @@ -143,11 +144,11 @@ void read_from(const char** src, DecimalV2Value* result) { *result = DecimalV2Value(v); } -template<> +template <> void read_from(const char** src, StringValue* result) { int32_t length = *(int32_t*)(*src); *src += 4; - *result = StringValue((char *)*src, length); + *result = StringValue((char*)*src, length); *src += length; } // read_from end @@ -156,7 +157,7 @@ void read_from(const char** src, StringValue* result) { static StringVal serialize(FunctionContext* ctx, BitmapValue* value) { StringVal result(ctx, value->getSizeInBytes()); - value->write((char*) result.ptr); + value->write((char*)result.ptr); return result; } @@ -164,14 +165,12 @@ static StringVal serialize(FunctionContext* ctx, BitmapValue* value) { // Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) // Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps // Todo(kks) Use Array type instead of variable arguments -template +template struct BitmapIntersect { public: BitmapIntersect() {} - explicit BitmapIntersect(const char* src) { - deserialize(src); - } + explicit BitmapIntersect(const char* src) { deserialize(src); } void add_key(const T key) { BitmapValue empty_bitmap; @@ -185,7 +184,7 @@ struct BitmapIntersect { } void merge(const BitmapIntersect& other) { - for (auto& kv: other._bitmaps) { + for (auto& kv : other._bitmaps) { if (_bitmaps.find(kv.first) != _bitmaps.end()) { _bitmaps[kv.first] |= kv.second; } else { @@ -204,7 +203,7 @@ struct BitmapIntersect { auto it = _bitmaps.begin(); result |= it->second; it++; - for (;it != _bitmaps.end(); it++) { + for (; it != _bitmaps.end(); it++) { result &= it->second; } @@ -214,9 +213,10 @@ struct BitmapIntersect { // the serialize size size_t size() { size_t size = 4; - for (auto& kv: _bitmaps) { - size += detail::serialize_size(kv.first);; - size += kv.second.getSizeInBytes(); + for (auto& kv : _bitmaps) { + size += detail::serialize_size(kv.first); + ; + size += kv.second.getSizeInBytes(); } return size; } @@ -226,7 +226,7 @@ struct BitmapIntersect { char* writer = dest; *(int32_t*)writer = _bitmaps.size(); writer += 4; - for (auto& kv: _bitmaps) { + for (auto& kv : _bitmaps) { writer = detail::write_to(kv.first, writer); kv.second.write(writer); writer += kv.second.getSizeInBytes(); @@ -250,8 +250,7 @@ struct BitmapIntersect { std::map _bitmaps; }; -void BitmapFunctions::init() { -} +void BitmapFunctions::init() {} void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; @@ -298,7 +297,7 @@ void BitmapFunctions::bitmap_union(FunctionContext* ctx, const StringVal& src, S if (src.len == 0) { (*dst_bitmap) |= *reinterpret_cast(src.ptr); } else { - (*dst_bitmap) |= BitmapValue((char*) src.ptr); + (*dst_bitmap) |= BitmapValue((char*)src.ptr); } } @@ -315,7 +314,7 @@ void BitmapFunctions::bitmap_intersect(FunctionContext* ctx, const StringVal& sr if (dst->is_null) { dst->is_null = false; dst->len = sizeof(BitmapValue); - dst->ptr = (uint8_t*)new BitmapValue((char*) src.ptr); + dst->ptr = (uint8_t*)new BitmapValue((char*)src.ptr); return; } auto dst_bitmap = reinterpret_cast(dst->ptr); @@ -323,7 +322,7 @@ void BitmapFunctions::bitmap_intersect(FunctionContext* ctx, const StringVal& sr if (src.len == 0) { (*dst_bitmap) &= *reinterpret_cast(src.ptr); } else { - (*dst_bitmap) &= BitmapValue((char*) src.ptr); + (*dst_bitmap) &= BitmapValue((char*)src.ptr); } } @@ -334,22 +333,25 @@ BigIntVal BitmapFunctions::bitmap_count(FunctionContext* ctx, const StringVal& s // zero size means the src input is a agg object if (src.len == 0) { auto bitmap = reinterpret_cast(src.ptr); - return { bitmap->cardinality() }; + return {bitmap->cardinality()}; } else { BitmapValue bitmap((char*)src.ptr); - return { bitmap.cardinality() }; + return {bitmap.cardinality()}; } } -StringVal BitmapFunctions::to_bitmap(doris_udf::FunctionContext* ctx, const doris_udf::StringVal& src) { +StringVal BitmapFunctions::to_bitmap(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& src) { BitmapValue bitmap; if (!src.is_null) { StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - uint64_t int_value = StringParser::string_to_unsigned_int(reinterpret_cast(src.ptr), src.len, &parse_result); + uint64_t int_value = StringParser::string_to_unsigned_int( + reinterpret_cast(src.ptr), src.len, &parse_result); if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) { std::stringstream error_msg; error_msg << "The input: " << std::string(reinterpret_cast(src.ptr), src.len) - << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + << " is not valid, to_bitmap only support bigint value from 0 to " + "18446744073709551615 currently"; ctx->set_error(error_msg.str().c_str()); return StringVal::null(); } @@ -358,10 +360,12 @@ StringVal BitmapFunctions::to_bitmap(doris_udf::FunctionContext* ctx, const dori return serialize(ctx, &bitmap); } -StringVal BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx, const doris_udf::StringVal& src) { +StringVal BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& src) { BitmapValue bitmap; if (!src.is_null) { - uint32_t hash_value = HashUtil::murmur_hash3_32(src.ptr, src.len, HashUtil::MURMUR3_32_SEED); + uint32_t hash_value = + HashUtil::murmur_hash3_32(src.ptr, src.len, HashUtil::MURMUR3_32_SEED); bitmap.add(hash_value); } return serialize(ctx, &bitmap); @@ -379,7 +383,7 @@ StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVa } // This is a init function for intersect_count not for bitmap_intersect. -template +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; dst->len = sizeof(BitmapIntersect); @@ -395,25 +399,28 @@ void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst dst->ptr = (uint8_t*)intersect; } -template -void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key, - int num_key, const ValType* keys, const StringVal* dst) { +template +void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, + const ValType& key, int num_key, const ValType* keys, + const StringVal* dst) { auto* dst_bitmap = reinterpret_cast*>(dst->ptr); // zero size means the src input is a agg object if (src.len == 0) { - dst_bitmap->update(detail::get_val(key), *reinterpret_cast(src.ptr)); + dst_bitmap->update(detail::get_val(key), + *reinterpret_cast(src.ptr)); } else { dst_bitmap->update(detail::get_val(key), BitmapValue((char*)src.ptr)); } } -template -void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, const StringVal* dst) { +template +void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, + const StringVal* dst) { auto* dst_bitmap = reinterpret_cast*>(dst->ptr); dst_bitmap->merge(BitmapIntersect((char*)src.ptr)); } -template +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src) { auto* src_bitmap = reinterpret_cast*>(src.ptr); StringVal result(ctx, src_bitmap->size()); @@ -422,7 +429,7 @@ StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, cons return result; } -template +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src) { auto* src_bitmap = reinterpret_cast*>(src.ptr); BigIntVal result = BigIntVal(src_bitmap->intersect_count()); @@ -430,7 +437,8 @@ BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const return result; } -StringVal BitmapFunctions::bitmap_or(FunctionContext* ctx, const StringVal& lhs, const StringVal& rhs){ +StringVal BitmapFunctions::bitmap_or(FunctionContext* ctx, const StringVal& lhs, + const StringVal& rhs) { if (lhs.is_null || rhs.is_null) { return StringVal::null(); } @@ -449,7 +457,8 @@ StringVal BitmapFunctions::bitmap_or(FunctionContext* ctx, const StringVal& lhs, return serialize(ctx, &bitmap); } -StringVal BitmapFunctions::bitmap_and(FunctionContext* ctx, const StringVal& lhs, const StringVal& rhs){ +StringVal BitmapFunctions::bitmap_and(FunctionContext* ctx, const StringVal& lhs, + const StringVal& rhs) { if (lhs.is_null || rhs.is_null) { return StringVal::null(); } @@ -489,8 +498,8 @@ StringVal BitmapFunctions::bitmap_from_string(FunctionContext* ctx, const String std::vector bits; // The constructor of `stringPiece` only support int type. - if ((input.len > INT32_MAX) - || !SplitStringAndParse({(const char*)input.ptr, (int)input.len}, ",", &safe_strtou64, &bits)) { + if ((input.len > INT32_MAX) || !SplitStringAndParse({(const char*)input.ptr, (int)input.len}, + ",", &safe_strtou64, &bits)) { return StringVal::null(); } @@ -498,21 +507,23 @@ StringVal BitmapFunctions::bitmap_from_string(FunctionContext* ctx, const String return serialize(ctx, &bitmap); } -BooleanVal BitmapFunctions::bitmap_contains(FunctionContext* ctx, const StringVal& src, const BigIntVal& input) { +BooleanVal BitmapFunctions::bitmap_contains(FunctionContext* ctx, const StringVal& src, + const BigIntVal& input) { if (src.is_null || input.is_null) { return BooleanVal::null(); } if (src.len == 0) { auto bitmap = reinterpret_cast(src.ptr); - return { bitmap->contains(input.val) }; + return {bitmap->contains(input.val)}; } BitmapValue bitmap((char*)src.ptr); - return { bitmap.contains(input.val) }; + return {bitmap.contains(input.val)}; } -BooleanVal BitmapFunctions::bitmap_has_any(FunctionContext* ctx, const StringVal& lhs, const StringVal& rhs) { +BooleanVal BitmapFunctions::bitmap_has_any(FunctionContext* ctx, const StringVal& lhs, + const StringVal& rhs) { if (lhs.is_null || rhs.is_null) { return BooleanVal::null(); } @@ -530,134 +541,143 @@ BooleanVal BitmapFunctions::bitmap_has_any(FunctionContext* ctx, const StringVal bitmap &= BitmapValue((char*)rhs.ptr); } - return { bitmap.cardinality() != 0 }; + return {bitmap.cardinality() != 0}; } -template void BitmapFunctions::bitmap_update_int( - FunctionContext* ctx, const TinyIntVal& src, StringVal* dst); -template void BitmapFunctions::bitmap_update_int( - FunctionContext* ctx, const SmallIntVal& src, StringVal* dst); -template void BitmapFunctions::bitmap_update_int( - FunctionContext* ctx, const IntVal& src, StringVal* dst); -template void BitmapFunctions::bitmap_update_int( - FunctionContext* ctx, const BigIntVal& src, StringVal* dst); +template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, + const TinyIntVal& src, StringVal* dst); +template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, + const SmallIntVal& src, + StringVal* dst); +template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const IntVal& src, + StringVal* dst); +template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, + const BigIntVal& src, StringVal* dst); // this is init function for intersect_count not for bitmap_intersect -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init<__int128, LargeIntVal>( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init<__int128, LargeIntVal>(FunctionContext* ctx, + StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); + FunctionContext* ctx, StringVal* dst); template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init( - FunctionContext* ctx, StringVal* dst); - + FunctionContext* ctx, StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const TinyIntVal& key, - int num_key, const TinyIntVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const TinyIntVal& key, int num_key, + const TinyIntVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const SmallIntVal& key, - int num_key, const SmallIntVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const SmallIntVal& key, int num_key, + const SmallIntVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const IntVal& key, - int num_key, const IntVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const IntVal& key, int num_key, + const IntVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const BigIntVal& key, - int num_key, const BigIntVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const BigIntVal& key, int num_key, + const BigIntVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update<__int128, LargeIntVal>( - FunctionContext* ctx, const StringVal& src, const LargeIntVal& key, - int num_key, const LargeIntVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const LargeIntVal& key, int num_key, + const LargeIntVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const FloatVal& key, - int num_key, const FloatVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const FloatVal& key, int num_key, + const FloatVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const DoubleVal& key, - int num_key, const DoubleVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const DoubleVal& key, int num_key, + const DoubleVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const DateTimeVal& key, - int num_key, const DateTimeVal* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const DateTimeVal& key, int num_key, + const DateTimeVal* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const DecimalV2Val& key, - int num_key, const DecimalV2Val* keys, const StringVal* dst); + FunctionContext* ctx, const StringVal& src, const DecimalV2Val& key, int num_key, + const DecimalV2Val* keys, const StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( - FunctionContext* ctx, const StringVal& src, const StringVal& key, - int num_key, const StringVal* keys, const StringVal* dst); - - -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge<__int128>( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge( - FunctionContext* ctx, const StringVal& src, const StringVal* dst); - -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize<__int128>( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); + FunctionContext* ctx, const StringVal& src, const StringVal& key, int num_key, + const StringVal* keys, const StringVal* dst); + +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge<__int128>(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); + +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize<__int128>(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize( - FunctionContext* ctx, const StringVal& src); - -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize<__int128>( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize( - FunctionContext* ctx, const StringVal& src); - -} + FunctionContext* ctx, const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); + +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize<__int128>(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); + +} // namespace doris diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h index 5d86228ff79795..fa110cb3c637b0 100644 --- a/be/src/exprs/bitmap_function.h +++ b/be/src/exprs/bitmap_function.h @@ -59,8 +59,8 @@ class BitmapFunctions { static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal& src); static StringVal to_bitmap(FunctionContext* ctx, const StringVal& src); static StringVal bitmap_hash(FunctionContext* ctx, const StringVal& src); - static StringVal bitmap_or(FunctionContext* ctx, const StringVal& src,const StringVal& dst); - static StringVal bitmap_and(FunctionContext* ctx, const StringVal& src,const StringVal& dst); + static StringVal bitmap_or(FunctionContext* ctx, const StringVal& src, const StringVal& dst); + static StringVal bitmap_and(FunctionContext* ctx, const StringVal& src, const StringVal& dst); static StringVal bitmap_to_string(FunctionContext* ctx, const StringVal& input); // Convert a comma separated string to a Bitmap // Example: @@ -68,22 +68,26 @@ class BitmapFunctions { // "1,2,3" will be converted to Bitmap with its Bit 1, 2, 3 set. // "-1, 1" will get NULL, because -1 is not a valid bit for Bitmap static StringVal bitmap_from_string(FunctionContext* ctx, const StringVal& input); - static BooleanVal bitmap_contains(FunctionContext* ctx, const StringVal& src, const BigIntVal& input); - static BooleanVal bitmap_has_any(FunctionContext* ctx, const StringVal& lhs, const StringVal& rhs); + static BooleanVal bitmap_contains(FunctionContext* ctx, const StringVal& src, + const BigIntVal& input); + static BooleanVal bitmap_has_any(FunctionContext* ctx, const StringVal& lhs, + const StringVal& rhs); // intersect count - template + template // this is init function for intersect_count not for bitmap_intersect static void bitmap_intersect_init(FunctionContext* ctx, StringVal* dst); - template - static void bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key, - int num_key, const ValType* keys, const StringVal* dst); - template - static void bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, const StringVal* dst); - template + template + static void bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, + const ValType& key, int num_key, const ValType* keys, + const StringVal* dst); + template + static void bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, + const StringVal* dst); + template static StringVal bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src); - template + template static BigIntVal bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src); }; -} +} // namespace doris #endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H diff --git a/be/src/exprs/case_expr.cpp b/be/src/exprs/case_expr.cpp index 95a5b9e65b1784..caf0ee9b0b5069 100644 --- a/be/src/exprs/case_expr.cpp +++ b/be/src/exprs/case_expr.cpp @@ -18,8 +18,8 @@ #include "exprs/case_expr.h" #include "exprs/anyval_util.h" -#include "runtime/runtime_state.h" #include "gen_cpp/Exprs_types.h" +#include "runtime/runtime_state.h" namespace doris { @@ -31,29 +31,25 @@ struct CaseExprState { AnyVal* when_val; }; -CaseExpr::CaseExpr(const TExprNode& node) : - Expr(node), - _has_case_expr(node.case_expr.has_case_expr), - _has_else_expr(node.case_expr.has_else_expr) { -} +CaseExpr::CaseExpr(const TExprNode& node) + : Expr(node), + _has_case_expr(node.case_expr.has_case_expr), + _has_else_expr(node.case_expr.has_else_expr) {} -CaseExpr::~CaseExpr() { -} +CaseExpr::~CaseExpr() {} -Status CaseExpr::prepare( - RuntimeState* state, const RowDescriptor& desc, ExprContext* ctx) { +Status CaseExpr::prepare(RuntimeState* state, const RowDescriptor& desc, ExprContext* ctx) { RETURN_IF_ERROR(Expr::prepare(state, desc, ctx)); register_function_context(ctx, state, 0); return Status::OK(); } -Status CaseExpr::open( - RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { +Status CaseExpr::open(RuntimeState* state, ExprContext* ctx, + FunctionContext::FunctionStateScope scope) { RETURN_IF_ERROR(Expr::open(state, ctx, scope)); FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); CaseExprState* case_state = - reinterpret_cast(fn_ctx->allocate(sizeof(CaseExprState))); + reinterpret_cast(fn_ctx->allocate(sizeof(CaseExprState))); fn_ctx->set_function_state(FunctionContext::THREAD_LOCAL, case_state); if (_has_case_expr) { case_state->case_val = create_any_val(state->obj_pool(), _children[0]->type()); @@ -65,9 +61,8 @@ Status CaseExpr::open( return Status::OK(); } -void CaseExpr::close( - RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { +void CaseExpr::close(RuntimeState* state, ExprContext* ctx, + FunctionContext::FunctionStateScope scope) { if (_fn_context_index != -1) { FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); void* case_state = fn_ctx->get_function_state(FunctionContext::THREAD_LOCAL); @@ -78,9 +73,8 @@ void CaseExpr::close( std::string CaseExpr::debug_string() const { std::stringstream out; - out << "CaseExpr(has_case_expr=" << _has_case_expr - << " has_else_expr=" << _has_else_expr - << " " << Expr::debug_string() << ")"; + out << "CaseExpr(has_case_expr=" << _has_case_expr << " has_else_expr=" << _has_else_expr << " " + << Expr::debug_string() << ")"; return out.str(); } @@ -179,49 +173,48 @@ bool CaseExpr::any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const An } } -#define CASE_COMPUTE_FN(THEN_TYPE, TYPE_NAME) \ - THEN_TYPE CaseExpr::get_##TYPE_NAME(ExprContext* ctx, TupleRow* row) { \ - FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); \ - CaseExprState* state = reinterpret_cast( \ - fn_ctx->get_function_state(FunctionContext::THREAD_LOCAL)); \ - DCHECK(state->case_val != NULL); \ - DCHECK(state->when_val != NULL); \ - int num_children = _children.size(); \ - if (has_case_expr()) { \ - /* All case and when exprs return the same type */ \ - /* (we guaranteed that during analysis). */ \ - get_child_val(0, ctx, row, state->case_val); \ - } else { \ - /* If there's no case expression, compare the when values to "true". */ \ - *reinterpret_cast(state->case_val) = BooleanVal(true); \ - } \ - if (state->case_val->is_null) { \ - if (has_else_expr()) { \ - /* Return else value. */ \ - return _children[num_children - 1]->get_##TYPE_NAME(ctx, row); \ - } else { \ - return THEN_TYPE::null(); \ - } \ - } \ - int loop_start = has_case_expr() ? 1 : 0; \ - int loop_end = (has_else_expr()) ? num_children - 1 : num_children; \ - for (int i = loop_start; i < loop_end; i += 2) { \ - get_child_val(i, ctx, row, state->when_val); \ - if (state->when_val->is_null) continue; \ - if (any_val_eq(_children[0]->type(), state->case_val, state->when_val)) { \ - /* Return then value. */ \ - return _children[i + 1]->get_##TYPE_NAME(ctx, row); \ - } \ - } \ - if (has_else_expr()) { \ - /* Return else value. */ \ - return _children[num_children - 1]->get_##TYPE_NAME(ctx, row); \ - } \ - return THEN_TYPE::null(); \ +#define CASE_COMPUTE_FN(THEN_TYPE, TYPE_NAME) \ + THEN_TYPE CaseExpr::get_##TYPE_NAME(ExprContext* ctx, TupleRow* row) { \ + FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); \ + CaseExprState* state = reinterpret_cast( \ + fn_ctx->get_function_state(FunctionContext::THREAD_LOCAL)); \ + DCHECK(state->case_val != NULL); \ + DCHECK(state->when_val != NULL); \ + int num_children = _children.size(); \ + if (has_case_expr()) { \ + /* All case and when exprs return the same type */ \ + /* (we guaranteed that during analysis). */ \ + get_child_val(0, ctx, row, state->case_val); \ + } else { \ + /* If there's no case expression, compare the when values to "true". */ \ + *reinterpret_cast(state->case_val) = BooleanVal(true); \ + } \ + if (state->case_val->is_null) { \ + if (has_else_expr()) { \ + /* Return else value. */ \ + return _children[num_children - 1]->get_##TYPE_NAME(ctx, row); \ + } else { \ + return THEN_TYPE::null(); \ + } \ + } \ + int loop_start = has_case_expr() ? 1 : 0; \ + int loop_end = (has_else_expr()) ? num_children - 1 : num_children; \ + for (int i = loop_start; i < loop_end; i += 2) { \ + get_child_val(i, ctx, row, state->when_val); \ + if (state->when_val->is_null) continue; \ + if (any_val_eq(_children[0]->type(), state->case_val, state->when_val)) { \ + /* Return then value. */ \ + return _children[i + 1]->get_##TYPE_NAME(ctx, row); \ + } \ + } \ + if (has_else_expr()) { \ + /* Return else value. */ \ + return _children[num_children - 1]->get_##TYPE_NAME(ctx, row); \ + } \ + return THEN_TYPE::null(); \ } -#define CASE_COMPUTE_FN_WRAPPER(TYPE, TYPE_NAME) \ - CASE_COMPUTE_FN(TYPE, TYPE_NAME) +#define CASE_COMPUTE_FN_WRAPPER(TYPE, TYPE_NAME) CASE_COMPUTE_FN(TYPE, TYPE_NAME) CASE_COMPUTE_FN_WRAPPER(BooleanVal, boolean_val) CASE_COMPUTE_FN_WRAPPER(TinyIntVal, tiny_int_val) @@ -235,5 +228,4 @@ CASE_COMPUTE_FN_WRAPPER(DateTimeVal, datetime_val) CASE_COMPUTE_FN_WRAPPER(DecimalVal, decimal_val) CASE_COMPUTE_FN_WRAPPER(DecimalV2Val, decimalv2_val) - -} +} // namespace doris diff --git a/be/src/exprs/case_expr.h b/be/src/exprs/case_expr.h index f78aa2813c011e..33816bed4354a7 100644 --- a/be/src/exprs/case_expr.h +++ b/be/src/exprs/case_expr.h @@ -19,19 +19,18 @@ #define DORIS_BE_SRC_QUERY_EXPRS_CASE_EXPR_H #include -#include "expr.h" + #include "common/object_pool.h" +#include "expr.h" namespace doris { class TExprNode; -class CaseExpr: public Expr { +class CaseExpr : public Expr { public: virtual ~CaseExpr(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new CaseExpr(*this)); - } + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new CaseExpr(*this)); } virtual BooleanVal get_boolean_val(ExprContext* ctx, TupleRow* row); virtual TinyIntVal get_tiny_int_val(ExprContext* ctx, TupleRow* row); virtual SmallIntVal get_small_int_val(ExprContext* ctx, TupleRow* row); @@ -52,22 +51,18 @@ class CaseExpr: public Expr { friend class DecimalV2Operators; CaseExpr(const TExprNode& node); - virtual Status prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context); - virtual Status open( - RuntimeState* state, ExprContext* context, FunctionContext::FunctionStateScope scope); - virtual void close( - RuntimeState* state, ExprContext* context, FunctionContext::FunctionStateScope scope); + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + ExprContext* context); + virtual Status open(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); + virtual void close(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); virtual std::string debug_string() const; - bool has_case_expr() { - return _has_case_expr; - } + bool has_case_expr() { return _has_case_expr; } - bool has_else_expr() { - return _has_else_expr; - } + bool has_else_expr() { return _has_else_expr; } private: const bool _has_case_expr; @@ -81,6 +76,6 @@ class CaseExpr: public Expr { bool any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const AnyVal* v2); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/cast_expr.cpp b/be/src/exprs/cast_expr.cpp index f33ae0d2e6d160..3e907bddbf6c3b 100644 --- a/be/src/exprs/cast_expr.cpp +++ b/be/src/exprs/cast_expr.cpp @@ -46,18 +46,16 @@ Expr* CastExpr::from_thrift(const TExprNode& node) { } #define CAST_SAME(CLASS, TYPE, FN) \ - TYPE CLASS::FN(ExprContext* context, TupleRow* row) { \ - return _children[0]->FN(context, row); \ - } + TYPE CLASS::FN(ExprContext* context, TupleRow* row) { return _children[0]->FN(context, row); } #define CAST_FUNCTION(CLASS, TO_TYPE, TO_FN, FROM_TYPE, FROM_FN) \ - TO_TYPE CLASS::TO_FN(ExprContext* context, TupleRow* row) { \ - FROM_TYPE v = _children[0]->FROM_FN(context, row); \ - if (v.is_null) { \ - return TO_TYPE::null(); \ - } \ - return TO_TYPE(v.val); \ - } + TO_TYPE CLASS::TO_FN(ExprContext* context, TupleRow* row) { \ + FROM_TYPE v = _children[0]->FROM_FN(context, row); \ + if (v.is_null) { \ + return TO_TYPE::null(); \ + } \ + return TO_TYPE(v.val); \ + } #define CAST_FROM_BOOLEAN(TO_TYPE, TO_FN) \ CAST_FUNCTION(CastBooleanExpr, TO_TYPE, TO_FN, BooleanVal, get_boolean_val) @@ -154,4 +152,4 @@ CAST_FROM_DOUBLE(IntVal, get_int_val) CAST_FROM_DOUBLE(BigIntVal, get_big_int_val) CAST_FROM_DOUBLE(LargeIntVal, get_large_int_val) CAST_FROM_DOUBLE(FloatVal, get_float_val) -} +} // namespace doris diff --git a/be/src/exprs/cast_expr.h b/be/src/exprs/cast_expr.h index e4b16a3579cb6f..6c9df4a61e8587 100644 --- a/be/src/exprs/cast_expr.h +++ b/be/src/exprs/cast_expr.h @@ -25,27 +25,27 @@ namespace doris { class CastExpr : public Expr { public: - CastExpr(const TExprNode& node) : Expr(node) { } - virtual ~CastExpr() { } + CastExpr(const TExprNode& node) : Expr(node) {} + virtual ~CastExpr() {} static Expr* from_thrift(const TExprNode& node); }; -#define CAST_EXPR_DEFINE(CLASS) \ - class CLASS : public CastExpr { \ - public: \ - CLASS(const TExprNode& node) : CastExpr(node) { } \ - virtual ~CLASS() { } \ - virtual Expr* clone(ObjectPool* pool) const override { \ - return pool->add(new CLASS(*this)); \ - } \ - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow*); \ - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); \ +#define CAST_EXPR_DEFINE(CLASS) \ + class CLASS : public CastExpr { \ + public: \ + CLASS(const TExprNode& node) : CastExpr(node) {} \ + virtual ~CLASS() {} \ + virtual Expr* clone(ObjectPool* pool) const override { \ + return pool->add(new CLASS(*this)); \ + } \ + virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow*); \ + virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); \ virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); \ - virtual IntVal get_int_val(ExprContext* context, TupleRow*); \ - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*); \ + virtual IntVal get_int_val(ExprContext* context, TupleRow*); \ + virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*); \ virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*); \ - virtual FloatVal get_float_val(ExprContext* context, TupleRow*); \ - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*); \ + virtual FloatVal get_float_val(ExprContext* context, TupleRow*); \ + virtual DoubleVal get_double_val(ExprContext* context, TupleRow*); \ }; CAST_EXPR_DEFINE(CastBooleanExpr); @@ -57,6 +57,6 @@ CAST_EXPR_DEFINE(CastLargeIntExpr); CAST_EXPR_DEFINE(CastFloatExpr); CAST_EXPR_DEFINE(CastDoubleExpr); -} +} // namespace doris #endif diff --git a/be/src/exprs/cast_functions.cpp b/be/src/exprs/cast_functions.cpp index e63eeaf4038f33..351596f787c2ef 100644 --- a/be/src/exprs/cast_functions.cpp +++ b/be/src/exprs/cast_functions.cpp @@ -22,24 +22,23 @@ #include "exprs/anyval_util.h" #include "runtime/datetime_value.h" #include "runtime/string_value.h" -#include "util/string_parser.hpp" #include "string_functions.h" #include "util/mysql_global.h" +#include "util/string_parser.hpp" namespace doris { -void CastFunctions::init() { -} +void CastFunctions::init() {} // The maximum number of characters need to represent a floating-point number (float or // double) as a string. 24 = 17 (maximum significant digits) + 1 (decimal point) + 1 ('E') // + 3 (exponent digits) + 2 (negative signs) (see http://stackoverflow.com/a/1701085) const int MAX_FLOAT_CHARS = 24; -#define CAST_FUNCTION(from_type, to_type, type_name) \ +#define CAST_FUNCTION(from_type, to_type, type_name) \ to_type CastFunctions::cast_to_##type_name(FunctionContext* ctx, const from_type& val) { \ - if (val.is_null) return to_type::null(); \ - return to_type(val.val); \ + if (val.is_null) return to_type::null(); \ + return to_type(val.val); \ } CAST_FUNCTION(TinyIntVal, BooleanVal, boolean_val) @@ -106,31 +105,31 @@ CAST_FUNCTION(BigIntVal, DoubleVal, double_val) CAST_FUNCTION(LargeIntVal, DoubleVal, double_val) CAST_FUNCTION(FloatVal, DoubleVal, double_val) -#define CAST_FROM_STRING(num_type, type_name, native_type, string_parser_fn) \ - num_type CastFunctions::cast_to_##type_name(FunctionContext* ctx, const StringVal& val) { \ - if (val.is_null) return num_type::null(); \ - StringParser::ParseResult result; \ - num_type ret; \ - ret.val = StringParser::string_parser_fn( \ - reinterpret_cast(val.ptr), val.len, &result); \ - if (UNLIKELY(result != StringParser::PARSE_SUCCESS || std::isnan(ret.val) || std::isinf(ret.val))) { \ - return num_type::null(); \ - } \ - return ret; \ +#define CAST_FROM_STRING(num_type, type_name, native_type, string_parser_fn) \ + num_type CastFunctions::cast_to_##type_name(FunctionContext* ctx, const StringVal& val) { \ + if (val.is_null) return num_type::null(); \ + StringParser::ParseResult result; \ + num_type ret; \ + ret.val = StringParser::string_parser_fn(reinterpret_cast(val.ptr), \ + val.len, &result); \ + if (UNLIKELY(result != StringParser::PARSE_SUCCESS || std::isnan(ret.val) || \ + std::isinf(ret.val))) { \ + return num_type::null(); \ + } \ + return ret; \ } -#define CAST_FROM_STRINGS() \ - CAST_FROM_STRING(TinyIntVal, tiny_int_val, int8_t, string_to_int);\ - CAST_FROM_STRING(SmallIntVal, small_int_val, int16_t, string_to_int);\ - CAST_FROM_STRING(IntVal, int_val, int32_t, string_to_int);\ - CAST_FROM_STRING(BigIntVal, big_int_val, int64_t, string_to_int);\ - CAST_FROM_STRING(LargeIntVal, large_int_val, __int128, string_to_int);\ - CAST_FROM_STRING(FloatVal, float_val, float, string_to_float);\ +#define CAST_FROM_STRINGS() \ + CAST_FROM_STRING(TinyIntVal, tiny_int_val, int8_t, string_to_int); \ + CAST_FROM_STRING(SmallIntVal, small_int_val, int16_t, string_to_int); \ + CAST_FROM_STRING(IntVal, int_val, int32_t, string_to_int); \ + CAST_FROM_STRING(BigIntVal, big_int_val, int64_t, string_to_int); \ + CAST_FROM_STRING(LargeIntVal, large_int_val, __int128, string_to_int); \ + CAST_FROM_STRING(FloatVal, float_val, float, string_to_float); \ CAST_FROM_STRING(DoubleVal, double_val, double, string_to_float); CAST_FROM_STRINGS(); - // Special-case tinyint because boost thinks it's a char and handles it differently. // e.g. '0' is written as an empty string. StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const TinyIntVal& val) { @@ -141,11 +140,11 @@ StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const TinyIntV return AnyValUtil::from_string_temp(ctx, std::to_string(tmp_val)); } -#define CAST_TO_STRING(num_type) \ - StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const num_type& val) { \ - if (val.is_null) return StringVal::null(); \ - return AnyValUtil::from_string_temp(ctx, std::to_string(val.val)); \ - } +#define CAST_TO_STRING(num_type) \ + StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const num_type& val) { \ + if (val.is_null) return StringVal::null(); \ + return AnyValUtil::from_string_temp(ctx, std::to_string(val.val)); \ + } CAST_TO_STRING(BooleanVal); CAST_TO_STRING(SmallIntVal); @@ -162,44 +161,44 @@ StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const LargeInt return AnyValUtil::from_buffer_temp(ctx, d, len); } -template +template int float_to_string(T value, char* buf); -template<> +template <> int float_to_string(float value, char* buf) { return FloatToBuffer(value, MAX_FLOAT_STR_LENGTH + 2, buf); } -template<> +template <> int float_to_string(double value, char* buf) { return DoubleToBuffer(value, MAX_DOUBLE_STR_LENGTH + 2, buf); } -#define CAST_FLOAT_TO_STRING(float_type, format) \ - StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const float_type& val) { \ - if (val.is_null) return StringVal::null(); \ - /* val.val could be -nan, return "nan" instead */ \ - if (std::isnan(val.val)) return StringVal("nan"); \ - /* Add 1 to MAX_FLOAT_CHARS since snprintf adds a trailing '\0' */ \ - StringVal sv(ctx, MAX_DOUBLE_STR_LENGTH + 2); \ - if (UNLIKELY(sv.is_null)) { \ - return sv; \ - } \ - const FunctionContext::TypeDesc& returnType = ctx->get_return_type(); \ - if (returnType.len > 0) { \ - sv.len = snprintf(reinterpret_cast(sv.ptr), sv.len, format, val.val); \ - DCHECK_GT(sv.len, 0); \ - DCHECK_LE(sv.len, MAX_FLOAT_CHARS); \ - AnyValUtil::TruncateIfNecessary(returnType, &sv); \ - } else if (returnType.len == -1) { \ - char buf[MAX_DOUBLE_STR_LENGTH + 2]; \ - sv.len = float_to_string(val.val, buf); \ - memcpy(sv.ptr, buf, sv.len); \ - } else { \ - DCHECK(false); \ - } \ - return sv; \ - } +#define CAST_FLOAT_TO_STRING(float_type, format) \ + StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const float_type& val) { \ + if (val.is_null) return StringVal::null(); \ + /* val.val could be -nan, return "nan" instead */ \ + if (std::isnan(val.val)) return StringVal("nan"); \ + /* Add 1 to MAX_FLOAT_CHARS since snprintf adds a trailing '\0' */ \ + StringVal sv(ctx, MAX_DOUBLE_STR_LENGTH + 2); \ + if (UNLIKELY(sv.is_null)) { \ + return sv; \ + } \ + const FunctionContext::TypeDesc& returnType = ctx->get_return_type(); \ + if (returnType.len > 0) { \ + sv.len = snprintf(reinterpret_cast(sv.ptr), sv.len, format, val.val); \ + DCHECK_GT(sv.len, 0); \ + DCHECK_LE(sv.len, MAX_FLOAT_CHARS); \ + AnyValUtil::TruncateIfNecessary(returnType, &sv); \ + } else if (returnType.len == -1) { \ + char buf[MAX_DOUBLE_STR_LENGTH + 2]; \ + sv.len = float_to_string(val.val, buf); \ + memcpy(sv.ptr, buf, sv.len); \ + } else { \ + DCHECK(false); \ + } \ + return sv; \ + } // Floats have up to 9 significant digits, doubles up to 17 // (see http://en.wikipedia.org/wiki/Single-precision_floating-point_format @@ -218,21 +217,21 @@ StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const DateTime } StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const StringVal& val) { - if (val.is_null) return StringVal::null(); - StringVal sv; - sv.ptr = val.ptr; - sv.len = val.len; - - const FunctionContext::TypeDesc& result_type = ctx->get_return_type(); - if (result_type.len > 0) { - AnyValUtil::TruncateIfNecessary(result_type, &sv); - } - return sv; + if (val.is_null) return StringVal::null(); + StringVal sv; + sv.ptr = val.ptr; + sv.len = val.len; + + const FunctionContext::TypeDesc& result_type = ctx->get_return_type(); + if (result_type.len > 0) { + AnyValUtil::TruncateIfNecessary(result_type, &sv); + } + return sv; } BooleanVal CastFunctions::cast_to_boolean_val(FunctionContext* ctx, const StringVal& val) { if (val.is_null) { - return BooleanVal::null(); + return BooleanVal::null(); } StringParser::ParseResult result; BooleanVal ret; @@ -243,7 +242,7 @@ BooleanVal CastFunctions::cast_to_boolean_val(FunctionContext* ctx, const String ret.val = true; } else { ret.val = StringParser::string_to_bool(reinterpret_cast(val.ptr), val.len, &result); - if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) { + if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) { return BooleanVal::null(); } } @@ -272,12 +271,11 @@ StringVal CastFunctions::CastToChar(FunctionContext* ctx, const StringVal& val) } #endif -#define CAST_FROM_DATETIME(to_type, type_name) \ - to_type CastFunctions::cast_to_##type_name( \ - FunctionContext* ctx, const DateTimeVal& val) { \ - if (val.is_null) return to_type::null(); \ - DateTimeValue tv = DateTimeValue::from_datetime_val(val); \ - return to_type(tv.to_int64()); \ +#define CAST_FROM_DATETIME(to_type, type_name) \ + to_type CastFunctions::cast_to_##type_name(FunctionContext* ctx, const DateTimeVal& val) { \ + if (val.is_null) return to_type::null(); \ + DateTimeValue tv = DateTimeValue::from_datetime_val(val); \ + return to_type(tv.to_int64()); \ } CAST_FROM_DATETIME(BooleanVal, boolean_val); @@ -289,54 +287,51 @@ CAST_FROM_DATETIME(LargeIntVal, large_int_val); CAST_FROM_DATETIME(FloatVal, float_val); CAST_FROM_DATETIME(DoubleVal, double_val); -#define CAST_TO_DATETIME(from_type) \ - DateTimeVal CastFunctions::cast_to_datetime_val(FunctionContext* ctx, \ - const from_type& val) { \ - if (val.is_null) return DateTimeVal::null(); \ - DateTimeValue date_value; \ - if (!date_value.from_date_int64(val.val)) return DateTimeVal::null(); \ - date_value.to_datetime(); \ - DateTimeVal result; \ - date_value.to_datetime_val(&result); \ - return result; \ +#define CAST_TO_DATETIME(from_type) \ + DateTimeVal CastFunctions::cast_to_datetime_val(FunctionContext* ctx, const from_type& val) { \ + if (val.is_null) return DateTimeVal::null(); \ + DateTimeValue date_value; \ + if (!date_value.from_date_int64(val.val)) return DateTimeVal::null(); \ + date_value.to_datetime(); \ + DateTimeVal result; \ + date_value.to_datetime_val(&result); \ + return result; \ } -#define CAST_TO_DATETIMES() \ - CAST_TO_DATETIME(TinyIntVal);\ - CAST_TO_DATETIME(SmallIntVal);\ - CAST_TO_DATETIME(IntVal);\ - CAST_TO_DATETIME(BigIntVal);\ - CAST_TO_DATETIME(LargeIntVal);\ - CAST_TO_DATETIME(FloatVal);\ +#define CAST_TO_DATETIMES() \ + CAST_TO_DATETIME(TinyIntVal); \ + CAST_TO_DATETIME(SmallIntVal); \ + CAST_TO_DATETIME(IntVal); \ + CAST_TO_DATETIME(BigIntVal); \ + CAST_TO_DATETIME(LargeIntVal); \ + CAST_TO_DATETIME(FloatVal); \ CAST_TO_DATETIME(DoubleVal); CAST_TO_DATETIMES(); -#define CAST_TO_DATE(from_type) \ - DateTimeVal CastFunctions::cast_to_date_val(FunctionContext* ctx, \ - const from_type& val) { \ - if (val.is_null) return DateTimeVal::null(); \ - DateTimeValue date_value; \ - if (!date_value.from_date_int64(val.val)) return DateTimeVal::null(); \ - date_value.cast_to_date(); \ - DateTimeVal result; \ - date_value.to_datetime_val(&result); \ - return result; \ +#define CAST_TO_DATE(from_type) \ + DateTimeVal CastFunctions::cast_to_date_val(FunctionContext* ctx, const from_type& val) { \ + if (val.is_null) return DateTimeVal::null(); \ + DateTimeValue date_value; \ + if (!date_value.from_date_int64(val.val)) return DateTimeVal::null(); \ + date_value.cast_to_date(); \ + DateTimeVal result; \ + date_value.to_datetime_val(&result); \ + return result; \ } -#define CAST_TO_DATES() \ - CAST_TO_DATE(TinyIntVal);\ - CAST_TO_DATE(SmallIntVal);\ - CAST_TO_DATE(IntVal);\ - CAST_TO_DATE(BigIntVal);\ - CAST_TO_DATE(LargeIntVal);\ - CAST_TO_DATE(FloatVal);\ +#define CAST_TO_DATES() \ + CAST_TO_DATE(TinyIntVal); \ + CAST_TO_DATE(SmallIntVal); \ + CAST_TO_DATE(IntVal); \ + CAST_TO_DATE(BigIntVal); \ + CAST_TO_DATE(LargeIntVal); \ + CAST_TO_DATE(FloatVal); \ CAST_TO_DATE(DoubleVal); CAST_TO_DATES(); -DateTimeVal CastFunctions::cast_to_datetime_val( - FunctionContext* ctx, const DateTimeVal& val) { +DateTimeVal CastFunctions::cast_to_datetime_val(FunctionContext* ctx, const DateTimeVal& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -348,8 +343,7 @@ DateTimeVal CastFunctions::cast_to_datetime_val( return result; } -DateTimeVal CastFunctions::cast_to_datetime_val( - FunctionContext* ctx, const StringVal& val) { +DateTimeVal CastFunctions::cast_to_datetime_val(FunctionContext* ctx, const StringVal& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -364,8 +358,7 @@ DateTimeVal CastFunctions::cast_to_datetime_val( return result; } -DateTimeVal CastFunctions::cast_to_date_val( - FunctionContext* ctx, const DateTimeVal& val) { +DateTimeVal CastFunctions::cast_to_date_val(FunctionContext* ctx, const DateTimeVal& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -377,8 +370,7 @@ DateTimeVal CastFunctions::cast_to_date_val( return result; } -DateTimeVal CastFunctions::cast_to_date_val( - FunctionContext* ctx, const StringVal& val) { +DateTimeVal CastFunctions::cast_to_date_val(FunctionContext* ctx, const StringVal& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -393,4 +385,4 @@ DateTimeVal CastFunctions::cast_to_date_val( return result; } -} +} // namespace doris diff --git a/be/src/exprs/cast_functions.h b/be/src/exprs/cast_functions.h index 6b567930e24f27..b92b45fac8ab9c 100644 --- a/be/src/exprs/cast_functions.h +++ b/be/src/exprs/cast_functions.h @@ -141,6 +141,6 @@ class CastFunctions { static DateTimeVal cast_to_date_val(FunctionContext* context, const StringVal& val); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/compound_predicate.cpp b/be/src/exprs/compound_predicate.cpp index ee75564a9ac5c8..88af421902916d 100644 --- a/be/src/exprs/compound_predicate.cpp +++ b/be/src/exprs/compound_predicate.cpp @@ -19,14 +19,12 @@ #include -#include "util/debug_util.h" #include "runtime/runtime_state.h" +#include "util/debug_util.h" namespace doris { -CompoundPredicate::CompoundPredicate(const TExprNode& node) : - Predicate(node) { -} +CompoundPredicate::CompoundPredicate(const TExprNode& node) : Predicate(node) {} #if 0 Status CompoundPredicate::prepare(RuntimeState* state, const RowDescriptor& desc) { DCHECK_LE(_children.size(), 2); @@ -34,8 +32,7 @@ Status CompoundPredicate::prepare(RuntimeState* state, const RowDescriptor& desc } #endif -void CompoundPredicate::init() { -} +void CompoundPredicate::init() {} BooleanVal CompoundPredicate::compound_not(FunctionContext* context, const BooleanVal& v) { if (v.is_null) { @@ -90,4 +87,4 @@ std::string CompoundPredicate::debug_string() const { return out.str(); } -} +} // namespace doris diff --git a/be/src/exprs/compound_predicate.h b/be/src/exprs/compound_predicate.h index dc384db203a32b..25d0858770fb16 100644 --- a/be/src/exprs/compound_predicate.h +++ b/be/src/exprs/compound_predicate.h @@ -26,7 +26,7 @@ namespace doris { -class CompoundPredicate: public Predicate { +class CompoundPredicate : public Predicate { public: static void init(); static BooleanVal compound_not(FunctionContext* context, const BooleanVal&); @@ -39,25 +39,23 @@ class CompoundPredicate: public Predicate { // virtual Status prepare(RuntimeState* state, const RowDescriptor& desc); virtual std::string debug_string() const; - virtual bool is_vectorized() const { - return false; - } + virtual bool is_vectorized() const { return false; } private: friend class OpcodeRegistry; }; /// Expr for evaluating and (&&) operators -class AndPredicate: public CompoundPredicate { +class AndPredicate : public CompoundPredicate { public: - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new AndPredicate(*this)); } virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*); protected: friend class Expr; - AndPredicate(const TExprNode& node) : CompoundPredicate(node) { } + AndPredicate(const TExprNode& node) : CompoundPredicate(node) {} virtual std::string debug_string() const { std::stringstream out; @@ -70,16 +68,16 @@ class AndPredicate: public CompoundPredicate { }; /// Expr for evaluating or (||) operators -class OrPredicate: public CompoundPredicate { +class OrPredicate : public CompoundPredicate { public: - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new OrPredicate(*this)); } virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*); protected: friend class Expr; - OrPredicate(const TExprNode& node) : CompoundPredicate(node) { } + OrPredicate(const TExprNode& node) : CompoundPredicate(node) {} virtual std::string debug_string() const { std::stringstream out; @@ -92,16 +90,16 @@ class OrPredicate: public CompoundPredicate { }; /// Expr for evaluating or (||) operators -class NotPredicate: public CompoundPredicate { +class NotPredicate : public CompoundPredicate { public: - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new NotPredicate(*this)); } virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*); protected: friend class Expr; - NotPredicate(const TExprNode& node) : CompoundPredicate(node) { } + NotPredicate(const TExprNode& node) : CompoundPredicate(node) {} virtual std::string debug_string() const { std::stringstream out; @@ -112,6 +110,6 @@ class NotPredicate: public CompoundPredicate { private: friend class OpcodeRegistry; }; -} +} // namespace doris #endif diff --git a/be/src/exprs/conditional_functions.cpp b/be/src/exprs/conditional_functions.cpp index 5d40bb3d319144..ce1a08cc22985f 100644 --- a/be/src/exprs/conditional_functions.cpp +++ b/be/src/exprs/conditional_functions.cpp @@ -17,23 +17,21 @@ #include "exprs/conditional_functions.h" -#include "exprs/expr.h" #include "exprs/anyval_util.h" #include "exprs/case_expr.h" +#include "exprs/expr.h" #include "runtime/tuple_row.h" #include "udf/udf.h" namespace doris { -#define CTOR_DCTOR_FUN(expr_class) \ - expr_class::expr_class(const TExprNode& node) : Expr(node) { \ - };\ - \ - expr_class::~expr_class() { \ - };\ +#define CTOR_DCTOR_FUN(expr_class) \ + expr_class::expr_class(const TExprNode& node) : Expr(node){}; \ + \ + expr_class::~expr_class(){}; CTOR_DCTOR_FUN(IfNullExpr); CTOR_DCTOR_FUN(NullIfExpr); CTOR_DCTOR_FUN(IfExpr); CTOR_DCTOR_FUN(CoalesceExpr); -} +} // namespace doris diff --git a/be/src/exprs/conditional_functions.h b/be/src/exprs/conditional_functions.h index 28aaf42b57e7a2..44f9e1d725170c 100644 --- a/be/src/exprs/conditional_functions.h +++ b/be/src/exprs/conditional_functions.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXPRS_CONDITIONAL_FUNCTIONS_H #include + #include "common/object_pool.h" #include "exprs/expr.h" #include "udf/udf.h" @@ -37,7 +38,7 @@ class ConditionalFunctions { class IfNullExpr : public Expr { public: virtual ~IfNullExpr(); - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new IfNullExpr(*this)); } virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); @@ -53,9 +54,7 @@ class IfNullExpr : public Expr { virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow* row); virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row); - virtual std::string debug_string() const { - return Expr::debug_string("IfNullExpr"); - } + virtual std::string debug_string() const { return Expr::debug_string("IfNullExpr"); } protected: friend class Expr; @@ -65,7 +64,7 @@ class IfNullExpr : public Expr { class NullIfExpr : public Expr { public: virtual ~NullIfExpr(); - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new NullIfExpr(*this)); } virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); @@ -80,9 +79,7 @@ class NullIfExpr : public Expr { // virtual DecimalVal get_decimal_val(ExprContext* context, TupleRow* row); virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row); - virtual std::string debug_string() const { - return Expr::debug_string("NullIfExpr"); - } + virtual std::string debug_string() const { return Expr::debug_string("NullIfExpr"); } protected: friend class Expr; @@ -92,9 +89,7 @@ class NullIfExpr : public Expr { class IfExpr : public Expr { public: virtual ~IfExpr(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new IfExpr(*this)); - } + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new IfExpr(*this)); } virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow* row); virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow* row); @@ -108,9 +103,7 @@ class IfExpr : public Expr { virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow* row); virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row); - virtual std::string debug_string() const { - return Expr::debug_string("IfExpr"); - } + virtual std::string debug_string() const { return Expr::debug_string("IfExpr"); } protected: friend class Expr; @@ -121,7 +114,7 @@ class IfExpr : public Expr { class CoalesceExpr : public Expr { public: virtual ~CoalesceExpr(); - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new CoalesceExpr(*this)); } virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); @@ -144,6 +137,6 @@ class CoalesceExpr : public Expr { CoalesceExpr(const TExprNode& node); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/conditional_functions_ir.cpp b/be/src/exprs/conditional_functions_ir.cpp index 1dbb718b13aeb5..e446242d6ef64c 100644 --- a/be/src/exprs/conditional_functions_ir.cpp +++ b/be/src/exprs/conditional_functions_ir.cpp @@ -21,13 +21,13 @@ namespace doris { -#define IF_NULL_COMPUTE_FUNCTION(type, type_name) \ +#define IF_NULL_COMPUTE_FUNCTION(type, type_name) \ type IfNullExpr::get_##type_name(ExprContext* context, TupleRow* row) { \ - DCHECK_EQ(_children.size(), 2); \ - type val = _children[0]->get_##type_name(context, row); \ - if (!val.is_null) return val; /* short-circuit */ \ - return _children[1]->get_##type_name(context, row); \ - }\ + DCHECK_EQ(_children.size(), 2); \ + type val = _children[0]->get_##type_name(context, row); \ + if (!val.is_null) return val; /* short-circuit */ \ + return _children[1]->get_##type_name(context, row); \ + } IF_NULL_COMPUTE_FUNCTION(BooleanVal, boolean_val); IF_NULL_COMPUTE_FUNCTION(TinyIntVal, tiny_int_val); @@ -42,23 +42,22 @@ IF_NULL_COMPUTE_FUNCTION(DecimalVal, decimal_val); IF_NULL_COMPUTE_FUNCTION(DecimalV2Val, decimalv2_val); IF_NULL_COMPUTE_FUNCTION(LargeIntVal, large_int_val); -#define NULL_IF_COMPUTE_FUNCTION(TYPE, type_name) \ - TYPE NullIfExpr::get_##type_name(ExprContext* ctx, TupleRow* row) { \ - DCHECK_EQ(_children.size(), 2); \ - TYPE lhs_val = _children[0]->get_##type_name(ctx, row); \ - /* Short-circuit in case lhs_val is NULL. Can never be equal to RHS. */ \ - if (lhs_val.is_null) return TYPE::null(); \ - /* Get rhs and return NULL if lhs == rhs, lhs otherwise */ \ - TYPE rhs_val = _children[1]->get_##type_name(ctx, row); \ +#define NULL_IF_COMPUTE_FUNCTION(TYPE, type_name) \ + TYPE NullIfExpr::get_##type_name(ExprContext* ctx, TupleRow* row) { \ + DCHECK_EQ(_children.size(), 2); \ + TYPE lhs_val = _children[0]->get_##type_name(ctx, row); \ + /* Short-circuit in case lhs_val is NULL. Can never be equal to RHS. */ \ + if (lhs_val.is_null) return TYPE::null(); \ + /* Get rhs and return NULL if lhs == rhs, lhs otherwise */ \ + TYPE rhs_val = _children[1]->get_##type_name(ctx, row); \ if (!rhs_val.is_null && AnyValUtil::equals(_children[0]->type(), lhs_val, rhs_val)) { \ - return TYPE::null(); \ - } \ - return lhs_val; \ + return TYPE::null(); \ + } \ + return lhs_val; \ } // Just for code check..... -#define NULL_IF_COMPUTE_FUNCTION_WRAPPER(TYPE, type_name) \ - NULL_IF_COMPUTE_FUNCTION(TYPE, type_name) +#define NULL_IF_COMPUTE_FUNCTION_WRAPPER(TYPE, type_name) NULL_IF_COMPUTE_FUNCTION(TYPE, type_name) NULL_IF_COMPUTE_FUNCTION_WRAPPER(BooleanVal, boolean_val); NULL_IF_COMPUTE_FUNCTION_WRAPPER(TinyIntVal, tiny_int_val); @@ -72,14 +71,14 @@ NULL_IF_COMPUTE_FUNCTION_WRAPPER(DateTimeVal, datetime_val); // NULL_IF_COMPUTE_FUNCTION(DecimalVal, decimal_val); NULL_IF_COMPUTE_FUNCTION_WRAPPER(LargeIntVal, large_int_val); -#define IF_COMPUTE_FUNCTION(type, type_name) \ +#define IF_COMPUTE_FUNCTION(type, type_name) \ type IfExpr::get_##type_name(ExprContext* context, TupleRow* row) { \ - DCHECK_EQ(_children.size(), 3); \ - BooleanVal cond = _children[0]->get_boolean_val(context, row); \ - if (cond.is_null || !cond.val) { \ - return _children[2]->get_##type_name(context, row); \ - } \ - return _children[1]->get_##type_name(context, row); \ + DCHECK_EQ(_children.size(), 3); \ + BooleanVal cond = _children[0]->get_boolean_val(context, row); \ + if (cond.is_null || !cond.val) { \ + return _children[2]->get_##type_name(context, row); \ + } \ + return _children[1]->get_##type_name(context, row); \ } IF_COMPUTE_FUNCTION(BooleanVal, boolean_val); @@ -95,14 +94,14 @@ IF_COMPUTE_FUNCTION(DecimalVal, decimal_val); IF_COMPUTE_FUNCTION(DecimalV2Val, decimalv2_val); IF_COMPUTE_FUNCTION(LargeIntVal, large_int_val); -#define COALESCE_COMPUTE_FUNCTION(type, type_name) \ +#define COALESCE_COMPUTE_FUNCTION(type, type_name) \ type CoalesceExpr::get_##type_name(ExprContext* context, TupleRow* row) { \ - DCHECK_GE(_children.size(), 1); \ - for (int i = 0; i < _children.size(); ++i) { \ - type val = _children[i]->get_##type_name(context, row); \ - if (!val.is_null) return val; \ - } \ - return type::null(); \ + DCHECK_GE(_children.size(), 1); \ + for (int i = 0; i < _children.size(); ++i) { \ + type val = _children[i]->get_##type_name(context, row); \ + if (!val.is_null) return val; \ + } \ + return type::null(); \ } COALESCE_COMPUTE_FUNCTION(BooleanVal, boolean_val); @@ -118,4 +117,4 @@ COALESCE_COMPUTE_FUNCTION(DecimalVal, decimal_val); COALESCE_COMPUTE_FUNCTION(DecimalV2Val, decimalv2_val); COALESCE_COMPUTE_FUNCTION(LargeIntVal, large_int_val); -} +} // namespace doris diff --git a/be/src/exprs/decimal_operators.cpp b/be/src/exprs/decimal_operators.cpp index 8c19b39735c432..48e1b9ac5c8cf8 100644 --- a/be/src/exprs/decimal_operators.cpp +++ b/be/src/exprs/decimal_operators.cpp @@ -17,9 +17,10 @@ #include "exprs/decimal_operators.h" +#include + #include #include -#include #include "exprs/anyval_util.h" #include "exprs/case_expr.h" @@ -30,30 +31,28 @@ namespace doris { -void DecimalOperators::init() { -} +void DecimalOperators::init() {} -#define CAST_INT_TO_DECIMAL(from_type) \ - DecimalVal DecimalOperators::cast_to_decimal_val( \ - FunctionContext* context, const from_type& val) { \ - if (val.is_null) return DecimalVal::null(); \ - DecimalValue dv = val.val;\ - DecimalVal result;\ - dv.to_decimal_val(&result);\ - return result;\ +#define CAST_INT_TO_DECIMAL(from_type) \ + DecimalVal DecimalOperators::cast_to_decimal_val(FunctionContext* context, \ + const from_type& val) { \ + if (val.is_null) return DecimalVal::null(); \ + DecimalValue dv = val.val; \ + DecimalVal result; \ + dv.to_decimal_val(&result); \ + return result; \ } -#define CAST_INT_TO_DECIMALS() \ - CAST_INT_TO_DECIMAL(TinyIntVal);\ - CAST_INT_TO_DECIMAL(SmallIntVal);\ - CAST_INT_TO_DECIMAL(IntVal);\ - CAST_INT_TO_DECIMAL(BigIntVal);\ - CAST_INT_TO_DECIMAL(LargeIntVal);\ +#define CAST_INT_TO_DECIMALS() \ + CAST_INT_TO_DECIMAL(TinyIntVal); \ + CAST_INT_TO_DECIMAL(SmallIntVal); \ + CAST_INT_TO_DECIMAL(IntVal); \ + CAST_INT_TO_DECIMAL(BigIntVal); \ + CAST_INT_TO_DECIMAL(LargeIntVal); CAST_INT_TO_DECIMALS(); -DecimalVal DecimalOperators::cast_to_decimal_val( - FunctionContext* context, const FloatVal& val) { +DecimalVal DecimalOperators::cast_to_decimal_val(FunctionContext* context, const FloatVal& val) { if (val.is_null) { return DecimalVal::null(); } @@ -64,8 +63,7 @@ DecimalVal DecimalOperators::cast_to_decimal_val( return result; } -DecimalVal DecimalOperators::cast_to_decimal_val( - FunctionContext* context, const DoubleVal& val) { +DecimalVal DecimalOperators::cast_to_decimal_val(FunctionContext* context, const DoubleVal& val) { if (val.is_null) { return DecimalVal::null(); } @@ -76,22 +74,20 @@ DecimalVal DecimalOperators::cast_to_decimal_val( return result; } -DecimalVal DecimalOperators::cast_to_decimal_val( - FunctionContext* context, const DateTimeVal& val) { +DecimalVal DecimalOperators::cast_to_decimal_val(FunctionContext* context, const DateTimeVal& val) { if (val.is_null) { return DecimalVal::null(); } DateTimeValue dt_value = DateTimeValue::from_datetime_val(val); - + DecimalValue dv = dt_value.to_int64(); DecimalVal result; dv.to_decimal_val(&result); return result; } -DecimalVal DecimalOperators::cast_to_decimal_val( - FunctionContext* context, const StringVal& val) { +DecimalVal DecimalOperators::cast_to_decimal_val(FunctionContext* context, const StringVal& val) { if (val.is_null) { return DecimalVal::null(); } @@ -104,28 +100,27 @@ DecimalVal DecimalOperators::cast_to_decimal_val( return result; } -#define CAST_DECIMAL_TO_INT(to_type, type_name) \ - to_type DecimalOperators::cast_to_##type_name( \ - FunctionContext* context, const DecimalVal& val) { \ - if (val.is_null) return to_type::null(); \ - DecimalValue dv = DecimalValue::from_decimal_val(val); \ - return to_type(dv);\ +#define CAST_DECIMAL_TO_INT(to_type, type_name) \ + to_type DecimalOperators::cast_to_##type_name(FunctionContext* context, \ + const DecimalVal& val) { \ + if (val.is_null) return to_type::null(); \ + DecimalValue dv = DecimalValue::from_decimal_val(val); \ + return to_type(dv); \ } -#define CAST_FROM_DECIMAL() \ - CAST_DECIMAL_TO_INT(BooleanVal, boolean_val);\ - CAST_DECIMAL_TO_INT(TinyIntVal, tiny_int_val);\ - CAST_DECIMAL_TO_INT(SmallIntVal, small_int_val);\ - CAST_DECIMAL_TO_INT(IntVal, int_val);\ - CAST_DECIMAL_TO_INT(BigIntVal, big_int_val);\ - CAST_DECIMAL_TO_INT(LargeIntVal, large_int_val);\ - CAST_DECIMAL_TO_INT(FloatVal, float_val);\ +#define CAST_FROM_DECIMAL() \ + CAST_DECIMAL_TO_INT(BooleanVal, boolean_val); \ + CAST_DECIMAL_TO_INT(TinyIntVal, tiny_int_val); \ + CAST_DECIMAL_TO_INT(SmallIntVal, small_int_val); \ + CAST_DECIMAL_TO_INT(IntVal, int_val); \ + CAST_DECIMAL_TO_INT(BigIntVal, big_int_val); \ + CAST_DECIMAL_TO_INT(LargeIntVal, large_int_val); \ + CAST_DECIMAL_TO_INT(FloatVal, float_val); \ CAST_DECIMAL_TO_INT(DoubleVal, double_val); CAST_FROM_DECIMAL(); -StringVal DecimalOperators::cast_to_string_val( - FunctionContext* ctx, const DecimalVal& val) { +StringVal DecimalOperators::cast_to_string_val(FunctionContext* ctx, const DecimalVal& val) { if (val.is_null) { return StringVal::null(); } @@ -133,8 +128,8 @@ StringVal DecimalOperators::cast_to_string_val( return AnyValUtil::from_string_temp(ctx, dv.to_string()); } -DateTimeVal DecimalOperators::cast_to_datetime_val( - FunctionContext* context, const DecimalVal& val) { +DateTimeVal DecimalOperators::cast_to_datetime_val(FunctionContext* context, + const DecimalVal& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -148,45 +143,44 @@ DateTimeVal DecimalOperators::cast_to_datetime_val( return result; } -#define DECIMAL_ARITHMETIC_OP(FN_NAME, OP) \ - DecimalVal DecimalOperators::FN_NAME##_decimal_val_decimal_val( \ +#define DECIMAL_ARITHMETIC_OP(FN_NAME, OP) \ + DecimalVal DecimalOperators::FN_NAME##_decimal_val_decimal_val( \ FunctionContext* context, const DecimalVal& v1, const DecimalVal& v2) { \ - if (v1.is_null || v2.is_null) return DecimalVal::null(); \ - DecimalValue iv1 = DecimalValue::from_decimal_val(v1); \ - DecimalValue iv2 = DecimalValue::from_decimal_val(v2); \ - DecimalValue ir = iv1 OP iv2; \ - DecimalVal result;\ - ir.to_decimal_val(&result); \ - return result; \ + if (v1.is_null || v2.is_null) return DecimalVal::null(); \ + DecimalValue iv1 = DecimalValue::from_decimal_val(v1); \ + DecimalValue iv2 = DecimalValue::from_decimal_val(v2); \ + DecimalValue ir = iv1 OP iv2; \ + DecimalVal result; \ + ir.to_decimal_val(&result); \ + return result; \ } -#define DECIMAL_ARITHMETIC_OPS() \ - DECIMAL_ARITHMETIC_OP(add, +);\ - DECIMAL_ARITHMETIC_OP(subtract, -);\ - DECIMAL_ARITHMETIC_OP(multiply, *);\ - DECIMAL_ARITHMETIC_OP(divide, /);\ - DECIMAL_ARITHMETIC_OP(mod, %);\ +#define DECIMAL_ARITHMETIC_OPS() \ + DECIMAL_ARITHMETIC_OP(add, +); \ + DECIMAL_ARITHMETIC_OP(subtract, -); \ + DECIMAL_ARITHMETIC_OP(multiply, *); \ + DECIMAL_ARITHMETIC_OP(divide, /); \ + DECIMAL_ARITHMETIC_OP(mod, %); DECIMAL_ARITHMETIC_OPS(); -#define DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(NAME, OP) \ - BooleanVal DecimalOperators::NAME##_decimal_val_decimal_val(\ - FunctionContext* c, const DecimalVal& v1, const DecimalVal& v2) {\ - if (v1.is_null || v2.is_null) return BooleanVal::null();\ - DecimalValue iv1 = DecimalValue::from_decimal_val(v1);\ - DecimalValue iv2 = DecimalValue::from_decimal_val(v2);\ - return BooleanVal(iv1 OP iv2);\ +#define DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(NAME, OP) \ + BooleanVal DecimalOperators::NAME##_decimal_val_decimal_val( \ + FunctionContext* c, const DecimalVal& v1, const DecimalVal& v2) { \ + if (v1.is_null || v2.is_null) return BooleanVal::null(); \ + DecimalValue iv1 = DecimalValue::from_decimal_val(v1); \ + DecimalValue iv2 = DecimalValue::from_decimal_val(v2); \ + return BooleanVal(iv1 OP iv2); \ } -#define BINARY_PREDICATE_NONNUMERIC_FNS() \ +#define BINARY_PREDICATE_NONNUMERIC_FNS() \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(eq, ==); \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(ne, !=); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(gt, >); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(lt, <); \ + DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(gt, >); \ + DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(lt, <); \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(ge, >=); \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(le, <=); BINARY_PREDICATE_NONNUMERIC_FNS(); -} - +} // namespace doris diff --git a/be/src/exprs/decimal_operators.h b/be/src/exprs/decimal_operators.h index a46532ca5d3d6e..3710149217b092 100644 --- a/be/src/exprs/decimal_operators.h +++ b/be/src/exprs/decimal_operators.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXPRS_DECIMAL_OPERATORS_H #include + #include "runtime/decimal_value.h" #include "udf/udf.h" @@ -33,7 +34,7 @@ class TupleRow; class DecimalOperators { public: static void init(); - + static DecimalVal cast_to_decimal_val(FunctionContext*, const TinyIntVal&); static DecimalVal cast_to_decimal_val(FunctionContext*, const SmallIntVal&); static DecimalVal cast_to_decimal_val(FunctionContext*, const IntVal&); @@ -55,31 +56,31 @@ class DecimalOperators { static StringVal cast_to_string_val(FunctionContext*, const DecimalVal&); static DateTimeVal cast_to_datetime_val(FunctionContext*, const DecimalVal&); - static DecimalVal add_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static DecimalVal subtract_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static DecimalVal multiply_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static DecimalVal divide_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static DecimalVal mod_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); + static DecimalVal add_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static DecimalVal subtract_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static DecimalVal multiply_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static DecimalVal divide_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static DecimalVal mod_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); - static BooleanVal eq_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static BooleanVal ne_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static BooleanVal gt_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static BooleanVal lt_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static BooleanVal ge_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); - static BooleanVal le_decimal_val_decimal_val( - FunctionContext*, const DecimalVal&, const DecimalVal&); + static BooleanVal eq_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static BooleanVal ne_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static BooleanVal gt_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static BooleanVal lt_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static BooleanVal ge_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); + static BooleanVal le_decimal_val_decimal_val(FunctionContext*, const DecimalVal&, + const DecimalVal&); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/decimalv2_operators.cpp b/be/src/exprs/decimalv2_operators.cpp index 730171562862cb..d641092dc26d57 100644 --- a/be/src/exprs/decimalv2_operators.cpp +++ b/be/src/exprs/decimalv2_operators.cpp @@ -17,9 +17,10 @@ #include "exprs/decimalv2_operators.h" +#include + #include #include -#include #include "exprs/anyval_util.h" #include "exprs/case_expr.h" @@ -30,30 +31,29 @@ namespace doris { -void DecimalV2Operators::init() { -} +void DecimalV2Operators::init() {} -#define CAST_INT_TO_DECIMAL(from_type) \ - DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( \ - FunctionContext* context, const from_type& val) { \ - if (val.is_null) return DecimalV2Val::null(); \ - DecimalV2Value dv(val.val, 0);\ - DecimalV2Val result;\ - dv.to_decimal_val(&result);\ - return result;\ +#define CAST_INT_TO_DECIMAL(from_type) \ + DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, \ + const from_type& val) { \ + if (val.is_null) return DecimalV2Val::null(); \ + DecimalV2Value dv(val.val, 0); \ + DecimalV2Val result; \ + dv.to_decimal_val(&result); \ + return result; \ } -#define CAST_INT_TO_DECIMALS() \ - CAST_INT_TO_DECIMAL(TinyIntVal);\ - CAST_INT_TO_DECIMAL(SmallIntVal);\ - CAST_INT_TO_DECIMAL(IntVal);\ - CAST_INT_TO_DECIMAL(BigIntVal);\ - CAST_INT_TO_DECIMAL(LargeIntVal);\ +#define CAST_INT_TO_DECIMALS() \ + CAST_INT_TO_DECIMAL(TinyIntVal); \ + CAST_INT_TO_DECIMAL(SmallIntVal); \ + CAST_INT_TO_DECIMAL(IntVal); \ + CAST_INT_TO_DECIMAL(BigIntVal); \ + CAST_INT_TO_DECIMAL(LargeIntVal); CAST_INT_TO_DECIMALS(); -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( - FunctionContext* context, const FloatVal& val) { +DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, + const FloatVal& val) { if (val.is_null) { return DecimalV2Val::null(); } @@ -64,8 +64,8 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( return result; } -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( - FunctionContext* context, const DoubleVal& val) { +DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, + const DoubleVal& val) { if (val.is_null) { return DecimalV2Val::null(); } @@ -76,8 +76,8 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( return result; } -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( - FunctionContext* context, const DateTimeVal& val) { +DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, + const DateTimeVal& val) { if (val.is_null) { return DecimalV2Val::null(); } @@ -89,8 +89,8 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( return result; } -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( - FunctionContext* context, const StringVal& val) { +DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, + const StringVal& val) { if (val.is_null) { return DecimalV2Val::null(); } @@ -103,28 +103,27 @@ DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val( return result; } -#define CAST_DECIMAL_TO_INT(to_type, type_name) \ - to_type DecimalV2Operators::cast_to_##type_name( \ - FunctionContext* context, const DecimalV2Val& val) { \ - if (val.is_null) return to_type::null(); \ - DecimalV2Value dv = DecimalV2Value::from_decimal_val(val); \ - return to_type(dv);\ +#define CAST_DECIMAL_TO_INT(to_type, type_name) \ + to_type DecimalV2Operators::cast_to_##type_name(FunctionContext* context, \ + const DecimalV2Val& val) { \ + if (val.is_null) return to_type::null(); \ + DecimalV2Value dv = DecimalV2Value::from_decimal_val(val); \ + return to_type(dv); \ } -#define CAST_FROM_DECIMAL() \ - CAST_DECIMAL_TO_INT(BooleanVal, boolean_val);\ - CAST_DECIMAL_TO_INT(TinyIntVal, tiny_int_val);\ - CAST_DECIMAL_TO_INT(SmallIntVal, small_int_val);\ - CAST_DECIMAL_TO_INT(IntVal, int_val);\ - CAST_DECIMAL_TO_INT(BigIntVal, big_int_val);\ - CAST_DECIMAL_TO_INT(LargeIntVal, large_int_val);\ - CAST_DECIMAL_TO_INT(FloatVal, float_val);\ +#define CAST_FROM_DECIMAL() \ + CAST_DECIMAL_TO_INT(BooleanVal, boolean_val); \ + CAST_DECIMAL_TO_INT(TinyIntVal, tiny_int_val); \ + CAST_DECIMAL_TO_INT(SmallIntVal, small_int_val); \ + CAST_DECIMAL_TO_INT(IntVal, int_val); \ + CAST_DECIMAL_TO_INT(BigIntVal, big_int_val); \ + CAST_DECIMAL_TO_INT(LargeIntVal, large_int_val); \ + CAST_DECIMAL_TO_INT(FloatVal, float_val); \ CAST_DECIMAL_TO_INT(DoubleVal, double_val); CAST_FROM_DECIMAL(); -StringVal DecimalV2Operators::cast_to_string_val( - FunctionContext* ctx, const DecimalV2Val& val) { +StringVal DecimalV2Operators::cast_to_string_val(FunctionContext* ctx, const DecimalV2Val& val) { if (val.is_null) { return StringVal::null(); } @@ -132,8 +131,8 @@ StringVal DecimalV2Operators::cast_to_string_val( return AnyValUtil::from_string_temp(ctx, dv.to_string()); } -DateTimeVal DecimalV2Operators::cast_to_datetime_val( - FunctionContext* context, const DecimalV2Val& val) { +DateTimeVal DecimalV2Operators::cast_to_datetime_val(FunctionContext* context, + const DecimalV2Val& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -147,8 +146,8 @@ DateTimeVal DecimalV2Operators::cast_to_datetime_val( return result; } -DateTimeVal DecimalV2Operators::cast_to_date_val( - FunctionContext* context, const DecimalV2Val& val) { +DateTimeVal DecimalV2Operators::cast_to_date_val(FunctionContext* context, + const DecimalV2Val& val) { if (val.is_null) { return DateTimeVal::null(); } @@ -165,8 +164,8 @@ DateTimeVal DecimalV2Operators::cast_to_date_val( return result; } -DecimalVal DecimalV2Operators::cast_to_decimal_val( - FunctionContext* context, const DecimalV2Val& val) { +DecimalVal DecimalV2Operators::cast_to_decimal_val(FunctionContext* context, + const DecimalV2Val& val) { if (val.is_null) return DecimalVal::null(); DecimalV2Value v2(val.val); DecimalValue dv(v2.int_value(), v2.frac_value()); @@ -175,45 +174,44 @@ DecimalVal DecimalV2Operators::cast_to_decimal_val( return result; } -#define DECIMAL_ARITHMETIC_OP(FN_NAME, OP) \ - DecimalV2Val DecimalV2Operators::FN_NAME##_decimalv2_val_decimalv2_val( \ +#define DECIMAL_ARITHMETIC_OP(FN_NAME, OP) \ + DecimalV2Val DecimalV2Operators::FN_NAME##_decimalv2_val_decimalv2_val( \ FunctionContext* context, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ - if (v1.is_null || v2.is_null) return DecimalV2Val::null(); \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - DecimalV2Value ir = iv1 OP iv2; \ - DecimalV2Val result;\ - ir.to_decimal_val(&result); \ - return result; \ + if (v1.is_null || v2.is_null) return DecimalV2Val::null(); \ + DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ + DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ + DecimalV2Value ir = iv1 OP iv2; \ + DecimalV2Val result; \ + ir.to_decimal_val(&result); \ + return result; \ } -#define DECIMAL_ARITHMETIC_OPS() \ - DECIMAL_ARITHMETIC_OP(add, +);\ - DECIMAL_ARITHMETIC_OP(subtract, -);\ - DECIMAL_ARITHMETIC_OP(multiply, *);\ - DECIMAL_ARITHMETIC_OP(divide, /);\ - DECIMAL_ARITHMETIC_OP(mod, %);\ +#define DECIMAL_ARITHMETIC_OPS() \ + DECIMAL_ARITHMETIC_OP(add, +); \ + DECIMAL_ARITHMETIC_OP(subtract, -); \ + DECIMAL_ARITHMETIC_OP(multiply, *); \ + DECIMAL_ARITHMETIC_OP(divide, /); \ + DECIMAL_ARITHMETIC_OP(mod, %); DECIMAL_ARITHMETIC_OPS(); -#define DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(NAME, OP) \ - BooleanVal DecimalV2Operators::NAME##_decimalv2_val_decimalv2_val(\ - FunctionContext* c, const DecimalV2Val& v1, const DecimalV2Val& v2) {\ - if (v1.is_null || v2.is_null) return BooleanVal::null();\ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1);\ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2);\ - return BooleanVal(iv1 OP iv2);\ +#define DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(NAME, OP) \ + BooleanVal DecimalV2Operators::NAME##_decimalv2_val_decimalv2_val( \ + FunctionContext* c, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ + if (v1.is_null || v2.is_null) return BooleanVal::null(); \ + DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ + DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ + return BooleanVal(iv1 OP iv2); \ } -#define BINARY_PREDICATE_NONNUMERIC_FNS() \ +#define BINARY_PREDICATE_NONNUMERIC_FNS() \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(eq, ==); \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(ne, !=); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(gt, >); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(lt, <); \ + DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(gt, >); \ + DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(lt, <); \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(ge, >=); \ DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(le, <=); BINARY_PREDICATE_NONNUMERIC_FNS(); -} - +} // namespace doris diff --git a/be/src/exprs/decimalv2_operators.h b/be/src/exprs/decimalv2_operators.h index 06d8feed3231d1..deb3bab0559890 100644 --- a/be/src/exprs/decimalv2_operators.h +++ b/be/src/exprs/decimalv2_operators.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_EXPRS_DECIMAL_OPERATORS_H #include + #include "runtime/decimalv2_value.h" #include "udf/udf.h" @@ -33,7 +34,7 @@ class TupleRow; class DecimalV2Operators { public: static void init(); - + static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const TinyIntVal&); static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const SmallIntVal&); static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const IntVal&); @@ -57,31 +58,31 @@ class DecimalV2Operators { static DateTimeVal cast_to_date_val(FunctionContext*, const DecimalV2Val&); static DecimalVal cast_to_decimal_val(FunctionContext*, const DecimalV2Val&); - static DecimalV2Val add_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static DecimalV2Val subtract_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static DecimalV2Val multiply_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static DecimalV2Val divide_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static DecimalV2Val mod_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); + static DecimalV2Val add_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static DecimalV2Val subtract_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static DecimalV2Val multiply_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static DecimalV2Val divide_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static DecimalV2Val mod_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); - static BooleanVal eq_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static BooleanVal ne_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static BooleanVal gt_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static BooleanVal lt_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static BooleanVal ge_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); - static BooleanVal le_decimalv2_val_decimalv2_val( - FunctionContext*, const DecimalV2Val&, const DecimalV2Val&); + static BooleanVal eq_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static BooleanVal ne_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static BooleanVal gt_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static BooleanVal lt_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static BooleanVal ge_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); + static BooleanVal le_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, + const DecimalV2Val&); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/encryption_functions.cpp b/be/src/exprs/encryption_functions.cpp index d3ba87fdbb4473..29c14d7d0406a4 100644 --- a/be/src/exprs/encryption_functions.cpp +++ b/be/src/exprs/encryption_functions.cpp @@ -17,22 +17,22 @@ #include "exprs/encryption_functions.h" -#include "util/aes_util.h" -#include "util/md5.h" +#include + #include "exprs/anyval_util.h" #include "exprs/expr.h" -#include "util/debug_util.h" +#include "runtime/string_value.h" #include "runtime/tuple_row.h" +#include "util/aes_util.h" +#include "util/debug_util.h" +#include "util/md5.h" #include "util/url_coding.h" -#include -#include "runtime/string_value.h" namespace doris { -void EncryptionFunctions::init() { -} +void EncryptionFunctions::init() {} -StringVal EncryptionFunctions::aes_encrypt(FunctionContext* ctx, - const StringVal &src, const StringVal &key) { +StringVal EncryptionFunctions::aes_encrypt(FunctionContext* ctx, const StringVal& src, + const StringVal& key) { if (src.len == 0) { return StringVal::null(); } @@ -42,16 +42,17 @@ StringVal EncryptionFunctions::aes_encrypt(FunctionContext* ctx, boost::scoped_array p; p.reset(new char[cipher_len]); - int ret_code = AesUtil::encrypt(AES_128_ECB, (unsigned char*)src.ptr, src.len, - (unsigned char*)key.ptr, key.len, NULL, true, (unsigned char*)p.get()); + int ret_code = + AesUtil::encrypt(AES_128_ECB, (unsigned char*)src.ptr, src.len, (unsigned char*)key.ptr, + key.len, NULL, true, (unsigned char*)p.get()); if (ret_code < 0) { return StringVal::null(); } return AnyValUtil::from_buffer_temp(ctx, p.get(), ret_code); } -StringVal EncryptionFunctions::aes_decrypt(FunctionContext* ctx, - const StringVal &src, const StringVal &key) { +StringVal EncryptionFunctions::aes_decrypt(FunctionContext* ctx, const StringVal& src, + const StringVal& key) { if (src.len == 0) { return StringVal::null(); } @@ -60,15 +61,16 @@ StringVal EncryptionFunctions::aes_decrypt(FunctionContext* ctx, boost::scoped_array p; p.reset(new char[cipher_len]); - int ret_code = AesUtil::decrypt(AES_128_ECB, (unsigned char*)src.ptr, src.len, - (unsigned char*)key.ptr, key.len, NULL, true, (unsigned char*)p.get()); + int ret_code = + AesUtil::decrypt(AES_128_ECB, (unsigned char*)src.ptr, src.len, (unsigned char*)key.ptr, + key.len, NULL, true, (unsigned char*)p.get()); if (ret_code < 0) { return StringVal::null(); } return AnyValUtil::from_buffer_temp(ctx, p.get(), ret_code); } -StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal &src) { +StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal& src) { if (src.len == 0 || src.is_null) { return StringVal::null(); } @@ -77,31 +79,30 @@ StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal boost::scoped_array p; p.reset(new char[cipher_len]); - int ret_code = base64_decode((const char *)src.ptr, src.len, p.get()); + int ret_code = base64_decode((const char*)src.ptr, src.len, p.get()); if (ret_code < 0) { return StringVal::null(); } return AnyValUtil::from_buffer_temp(ctx, p.get(), ret_code); } -StringVal EncryptionFunctions::to_base64(FunctionContext* ctx, const StringVal &src) { +StringVal EncryptionFunctions::to_base64(FunctionContext* ctx, const StringVal& src) { if (src.len == 0 || src.is_null) { return StringVal::null(); } - int cipher_len = (size_t) (4.0 * ceil((double) src.len / 3.0)); + int cipher_len = (size_t)(4.0 * ceil((double)src.len / 3.0)); boost::scoped_array p; p.reset(new char[cipher_len]); - int ret_code = base64_encode((unsigned char *)src.ptr, src.len, (unsigned char *)p.get()); + int ret_code = base64_encode((unsigned char*)src.ptr, src.len, (unsigned char*)p.get()); if (ret_code < 0) { return StringVal::null(); } return AnyValUtil::from_buffer_temp(ctx, p.get(), ret_code); } -StringVal EncryptionFunctions::md5sum( - FunctionContext* ctx, int num_args, const StringVal* args) { +StringVal EncryptionFunctions::md5sum(FunctionContext* ctx, int num_args, const StringVal* args) { Md5Digest digest; for (int i = 0; i < num_args; ++i) { const StringVal& arg = args[i]; @@ -124,4 +125,4 @@ StringVal EncryptionFunctions::md5(FunctionContext* ctx, const StringVal& src) { return AnyValUtil::from_buffer_temp(ctx, digest.hex().c_str(), digest.hex().size()); } -} +} // namespace doris diff --git a/be/src/exprs/encryption_functions.h b/be/src/exprs/encryption_functions.h index 67d83c859d2660..853ec97f565add 100644 --- a/be/src/exprs/encryption_functions.h +++ b/be/src/exprs/encryption_functions.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXPRS_ENCRYPTION_FUNCTIONS_H #include + #include "udf/udf.h" #include "udf/udf_internal.h" @@ -32,19 +33,21 @@ class EncryptionFunctions { public: static void init(); static doris_udf::StringVal aes_encrypt(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val1, const doris_udf::StringVal& val2); + const doris_udf::StringVal& val1, + const doris_udf::StringVal& val2); static doris_udf::StringVal aes_decrypt(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val1, const doris_udf::StringVal& val2); + const doris_udf::StringVal& val1, + const doris_udf::StringVal& val2); static doris_udf::StringVal from_base64(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val1); + const doris_udf::StringVal& val1); static doris_udf::StringVal to_base64(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val1); - static doris_udf::StringVal md5sum(doris_udf::FunctionContext* ctx, - int num_args, const doris_udf::StringVal* args); - static doris_udf::StringVal md5(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& src); + const doris_udf::StringVal& val1); + static doris_udf::StringVal md5sum(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::StringVal* args); + static doris_udf::StringVal md5(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& src); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/es_functions.cpp b/be/src/exprs/es_functions.cpp index deda38ebb494d3..06ca7a009db0c6 100644 --- a/be/src/exprs/es_functions.cpp +++ b/be/src/exprs/es_functions.cpp @@ -17,19 +17,18 @@ #include "exprs/es_functions.h" -#include "exprs/expr.h" #include "exprs/anyval_util.h" -#include "util/debug_util.h" +#include "exprs/expr.h" #include "runtime/tuple_row.h" +#include "util/debug_util.h" namespace doris { -void ESFunctions::init() { -} +void ESFunctions::init() {} -BooleanVal ESFunctions::match(FunctionContext* ctx, const StringVal& col, - const StringVal& condition) { +BooleanVal ESFunctions::match(FunctionContext* ctx, const StringVal& col, + const StringVal& condition) { return BooleanVal(true); } -} // doris +} // namespace doris diff --git a/be/src/exprs/es_functions.h b/be/src/exprs/es_functions.h index a5e96ba9b94616..600e539a9bf940 100644 --- a/be/src/exprs/es_functions.h +++ b/be/src/exprs/es_functions.h @@ -31,12 +31,11 @@ class ESFunctions { static void init(); // used to push down query conditions to es. - static doris_udf::BooleanVal match( - doris_udf::FunctionContext* ctx, const doris_udf::StringVal& col, - const doris_udf::StringVal& condition); - + static doris_udf::BooleanVal match(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& col, + const doris_udf::StringVal& condition); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index c7661c2b8fbe1e..3e4503ad1506f2 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -17,46 +17,43 @@ #include "exprs/expr.h" +#include + #include #include -#include #include "common/object_pool.h" #include "common/status.h" +#include "exprs/aggregate_functions.h" #include "exprs/anyval_util.h" -#include "exprs/literal.h" +#include "exprs/arithmetic_expr.h" #include "exprs/binary_predicate.h" #include "exprs/case_expr.h" #include "exprs/cast_expr.h" #include "exprs/compound_predicate.h" #include "exprs/conditional_functions.h" #include "exprs/in_predicate.h" -#include "exprs/arithmetic_expr.h" +#include "exprs/info_func.h" #include "exprs/is_null_predicate.h" +#include "exprs/literal.h" #include "exprs/null_literal.h" -#include "exprs/info_func.h" #include "exprs/scalar_fn_call.h" -#include "exprs/tuple_is_null_predicate.h" -#include "exprs/slot_ref.h" -#include "exprs/aggregate_functions.h" #include "exprs/slot_ref.h" -#include "exprs/aggregate_functions.h" -#include "gen_cpp/Exprs_types.h" +#include "exprs/tuple_is_null_predicate.h" #include "gen_cpp/Data_types.h" -#include "runtime/runtime_state.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/PaloService_types.h" #include "runtime/raw_value.h" +#include "runtime/runtime_state.h" #include "runtime/user_function_cache.h" #include "util/debug_util.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/PaloService_types.h" - using std::vector; namespace doris { const char* Expr::_s_get_constant_symbol_prefix = "_ZN4doris4Expr12get_constant"; -template +template bool parse_string(const std::string& str, T* val) { std::stringstream stream(str); stream >> *val; @@ -70,8 +67,8 @@ void init_builtins_dummy() { AggregateFunctions::init_null(NULL, NULL); } -FunctionContext* Expr::register_function_context( - ExprContext* ctx, RuntimeState* state, int varargs_buffer_size) { +FunctionContext* Expr::register_function_context(ExprContext* ctx, RuntimeState* state, + int varargs_buffer_size) { FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); std::vector arg_types; for (int i = 0; i < _children.size(); ++i) { @@ -82,28 +79,27 @@ FunctionContext* Expr::register_function_context( } // No children here -Expr::Expr(const Expr& expr) +Expr::Expr(const Expr& expr) : _cache_entry(expr._cache_entry), - _node_type(expr._node_type), - _opcode(expr._opcode), - _is_slotref(expr._is_slotref), - _type(expr._type), - _output_scale(expr._output_scale), - _output_column(expr._output_column), - _fn(expr._fn), - _fn_context_index(expr._fn_context_index), - _constant_val(expr._constant_val), - _vector_compute_fn(expr._vector_compute_fn) { -} - -Expr::Expr(const TypeDescriptor& type) : - _opcode(TExprOpcode::INVALID_OPCODE), - // _vector_opcode(TExprOpcode::INVALID_OPCODE), - _is_slotref(false), - _type(type), - _output_scale(-1), - _output_column(-1), - _fn_context_index(-1) { + _node_type(expr._node_type), + _opcode(expr._opcode), + _is_slotref(expr._is_slotref), + _type(expr._type), + _output_scale(expr._output_scale), + _output_column(expr._output_column), + _fn(expr._fn), + _fn_context_index(expr._fn_context_index), + _constant_val(expr._constant_val), + _vector_compute_fn(expr._vector_compute_fn) {} + +Expr::Expr(const TypeDescriptor& type) + : _opcode(TExprOpcode::INVALID_OPCODE), + // _vector_opcode(TExprOpcode::INVALID_OPCODE), + _is_slotref(false), + _type(type), + _output_scale(-1), + _output_column(-1), + _fn_context_index(-1) { switch (_type.type) { case TYPE_BOOLEAN: _node_type = (TExprNodeType::BOOL_LITERAL); @@ -152,14 +148,14 @@ Expr::Expr(const TypeDescriptor& type) : } } -Expr::Expr(const TypeDescriptor& type, bool is_slotref) : - _opcode(TExprOpcode::INVALID_OPCODE), - // _vector_opcode(TExprOpcode::INVALID_OPCODE), - _is_slotref(is_slotref), - _type(type), - _output_scale(-1), - _output_column(-1), - _fn_context_index(-1) { +Expr::Expr(const TypeDescriptor& type, bool is_slotref) + : _opcode(TExprOpcode::INVALID_OPCODE), + // _vector_opcode(TExprOpcode::INVALID_OPCODE), + _is_slotref(is_slotref), + _type(type), + _output_scale(-1), + _output_column(-1), + _fn_context_index(-1) { if (is_slotref) { _node_type = (TExprNodeType::SLOT_REF); } else { @@ -211,38 +207,37 @@ Expr::Expr(const TypeDescriptor& type, bool is_slotref) : } } -Expr::Expr(const TExprNode& node) : - _node_type(node.node_type), - _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), - // _vector_opcode( - // node.__isset.vector_opcode ? node.vector_opcode : TExprOpcode::INVALID_OPCODE), - _is_slotref(false), - _type(TypeDescriptor::from_thrift(node.type)), - _output_scale(node.output_scale), - _output_column(node.__isset.output_column ? node.output_column : -1), - _fn_context_index(-1) { +Expr::Expr(const TExprNode& node) + : _node_type(node.node_type), + _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), + // _vector_opcode( + // node.__isset.vector_opcode ? node.vector_opcode : TExprOpcode::INVALID_OPCODE), + _is_slotref(false), + _type(TypeDescriptor::from_thrift(node.type)), + _output_scale(node.output_scale), + _output_column(node.__isset.output_column ? node.output_column : -1), + _fn_context_index(-1) { if (node.__isset.fn) { _fn = node.fn; } } -Expr::Expr(const TExprNode& node, bool is_slotref) : - _node_type(node.node_type), - _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), - // _vector_opcode( - // node.__isset.vector_opcode ? node.vector_opcode : TExprOpcode::INVALID_OPCODE), - _is_slotref(is_slotref), - _type(TypeDescriptor::from_thrift(node.type)), - _output_scale(node.output_scale), - _output_column(node.__isset.output_column ? node.output_column : -1), - _fn_context_index(-1) { +Expr::Expr(const TExprNode& node, bool is_slotref) + : _node_type(node.node_type), + _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), + // _vector_opcode( + // node.__isset.vector_opcode ? node.vector_opcode : TExprOpcode::INVALID_OPCODE), + _is_slotref(is_slotref), + _type(TypeDescriptor::from_thrift(node.type)), + _output_scale(node.output_scale), + _output_column(node.__isset.output_column ? node.output_column : -1), + _fn_context_index(-1) { if (node.__isset.fn) { _fn = node.fn; } } -Expr::~Expr() { -} +Expr::~Expr() {} Status Expr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, ExprContext** ctx) { // input is empty @@ -255,19 +250,18 @@ Status Expr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, ExprContext* Status status = create_tree_from_thrift(pool, texpr.nodes, NULL, &node_idx, &e, ctx); if (status.ok() && node_idx + 1 != texpr.nodes.size()) { status = Status::InternalError( - "Expression tree only partially reconstructed. Not all thrift nodes were used."); + "Expression tree only partially reconstructed. Not all thrift nodes were used."); } if (!status.ok()) { - LOG(ERROR) << "Could not construct expr tree.\n" << status.get_error_msg() << "\n" - << apache::thrift::ThriftDebugString(texpr); + LOG(ERROR) << "Could not construct expr tree.\n" + << status.get_error_msg() << "\n" + << apache::thrift::ThriftDebugString(texpr); } return status; } -Status Expr::create_expr_trees( - ObjectPool* pool, - const std::vector& texprs, - std::vector* ctxs) { +Status Expr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, + std::vector* ctxs) { ctxs->clear(); for (int i = 0; i < texprs.size(); ++i) { ExprContext* ctx = nullptr; @@ -277,13 +271,9 @@ Status Expr::create_expr_trees( return Status::OK(); } -Status Expr::create_tree_from_thrift( - ObjectPool* pool, - const std::vector& nodes, - Expr* parent, - int* node_idx, - Expr** root_expr, - ExprContext** ctx) { +Status Expr::create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, + Expr* parent, int* node_idx, Expr** root_expr, + ExprContext** ctx) { // propagate error case if (*node_idx >= nodes.size()) { return Status::InternalError("Failed to reconstruct expression tree from thrift."); @@ -451,10 +441,8 @@ struct MemLayoutData { } }; -int Expr::compute_results_layout( - const std::vector& exprs, - std::vector* offsets, - int* var_result_begin) { +int Expr::compute_results_layout(const std::vector& exprs, std::vector* offsets, + int* var_result_begin) { if (exprs.size() == 0) { *var_result_begin = -1; return 0; @@ -467,16 +455,15 @@ int Expr::compute_results_layout( for (int i = 0; i < exprs.size(); ++i) { data[i].expr_idx = i; - if (exprs[i]->type().type == TYPE_CHAR - || exprs[i]->type().type == TYPE_VARCHAR) { + if (exprs[i]->type().type == TYPE_CHAR || exprs[i]->type().type == TYPE_VARCHAR) { data[i].byte_size = 16; data[i].variable_length = true; } else if (exprs[i]->type().type == TYPE_DECIMAL) { data[i].byte_size = get_byte_size(exprs[i]->type().type); - // Although the current decimal has a fix-length, for the + // Although the current decimal has a fix-length, for the // same value, it will work out different hash value due to the - // different memory represent if the variable_length here is set + // different memory represent if the variable_length here is set // to false, so we have to keep it. data[i].variable_length = true; } else { @@ -524,10 +511,8 @@ int Expr::compute_results_layout( return byte_offset; } -int Expr::compute_results_layout( - const std::vector& ctxs, - std::vector* offsets, - int* var_result_begin) { +int Expr::compute_results_layout(const std::vector& ctxs, std::vector* offsets, + int* var_result_begin) { std::vector exprs; for (int i = 0; i < ctxs.size(); ++i) { exprs.push_back(ctxs[i]->root()); @@ -535,19 +520,15 @@ int Expr::compute_results_layout( return compute_results_layout(exprs, offsets, var_result_begin); } -Status Expr::prepare( - const std::vector& ctxs, - RuntimeState* state, - const RowDescriptor& row_desc, - const std::shared_ptr& tracker) { +Status Expr::prepare(const std::vector& ctxs, RuntimeState* state, + const RowDescriptor& row_desc, const std::shared_ptr& tracker) { for (int i = 0; i < ctxs.size(); ++i) { RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc, tracker)); } return Status::OK(); } -Status Expr::prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context) { +Status Expr::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context) { DCHECK(_type.type != INVALID_TYPE); for (int i = 0; i < _children.size(); ++i) { RETURN_IF_ERROR(_children[i]->prepare(state, row_desc, context)); @@ -562,10 +543,8 @@ Status Expr::open(const std::vector& ctxs, RuntimeState* state) { return Status::OK(); } -Status Expr::open( - RuntimeState* state, - ExprContext* context, - FunctionContext::FunctionStateScope scope) { +Status Expr::open(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope) { DCHECK(_type.type != INVALID_TYPE); for (int i = 0; i < _children.size(); ++i) { RETURN_IF_ERROR(_children[i]->open(state, context, scope)); @@ -579,10 +558,8 @@ void Expr::close(const std::vector& ctxs, RuntimeState* state) { } } -void Expr::close( - RuntimeState* state, - ExprContext* context, - FunctionContext::FunctionStateScope scope) { +void Expr::close(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope) { for (int i = 0; i < _children.size(); ++i) { _children[i]->close(state, context, scope); } @@ -598,10 +575,8 @@ void Expr::close( #endif } -Status Expr::clone_if_not_exists( - const std::vector& ctxs, - RuntimeState* state, - std::vector* new_ctxs) { +Status Expr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, + std::vector* new_ctxs) { DCHECK(new_ctxs != NULL); if (!new_ctxs->empty()) { // 'ctxs' was already cloned into '*new_ctxs', nothing to do. @@ -627,7 +602,8 @@ std::string Expr::debug_string() const { out << " opcode=" << _opcode; } - out << " codegen=" << "false"; + out << " codegen=" + << "false"; if (!_children.empty()) { out << " children=" << debug_string(_children); @@ -859,13 +835,13 @@ Expr* Expr::copy(ObjectPool* pool, Expr* old_expr) { } void Expr::assign_fn_ctx_idx(int* next_fn_ctx_idx) { - _fn_ctx_idx_start = *next_fn_ctx_idx; - if (has_fn_ctx()) { - _fn_ctx_idx = *next_fn_ctx_idx; - ++(*next_fn_ctx_idx); - } - for (Expr* child : children()) child->assign_fn_ctx_idx(next_fn_ctx_idx); - _fn_ctx_idx_end = *next_fn_ctx_idx; + _fn_ctx_idx_start = *next_fn_ctx_idx; + if (has_fn_ctx()) { + _fn_ctx_idx = *next_fn_ctx_idx; + ++(*next_fn_ctx_idx); + } + for (Expr* child : children()) child->assign_fn_ctx_idx(next_fn_ctx_idx); + _fn_ctx_idx_end = *next_fn_ctx_idx; } Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, @@ -874,89 +850,92 @@ Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeSt *scalar_expr = nullptr; Expr* root; RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root)); - RETURN_IF_ERROR(create_tree(texpr, pool, root)); - // TODO pengyubing replace by Init() - ExprContext* ctx = pool->add(new ExprContext(root)); - // TODO chenhao check node type in ScalarExpr Init() - Status status = Status::OK(); - if (texpr.nodes[0].node_type != TExprNodeType::CASE_EXPR) { - status = root->prepare(state, row_desc, ctx); - } - if (UNLIKELY(!status.ok())) { - root->close(); - return status; - } - int fn_ctx_idx = 0; - root->assign_fn_ctx_idx(&fn_ctx_idx); - *scalar_expr = root; - return Status::OK(); + RETURN_IF_ERROR(create_tree(texpr, pool, root)); + // TODO pengyubing replace by Init() + ExprContext* ctx = pool->add(new ExprContext(root)); + // TODO chenhao check node type in ScalarExpr Init() + Status status = Status::OK(); + if (texpr.nodes[0].node_type != TExprNodeType::CASE_EXPR) { + status = root->prepare(state, row_desc, ctx); + } + if (UNLIKELY(!status.ok())) { + root->close(); + return status; + } + int fn_ctx_idx = 0; + root->assign_fn_ctx_idx(&fn_ctx_idx); + *scalar_expr = root; + return Status::OK(); } -Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, RuntimeState* state, - ObjectPool* pool, std::vector* exprs, +Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, + RuntimeState* state, ObjectPool* pool, std::vector* exprs, const std::shared_ptr& tracker) { exprs->clear(); - for (const TExpr& texpr: texprs) { - Expr* expr; - RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr, tracker)); - DCHECK(expr != nullptr); - exprs->push_back(expr); - } - return Status::OK(); + for (const TExpr& texpr : texprs) { + Expr* expr; + RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr, tracker)); + DCHECK(expr != nullptr); + exprs->push_back(expr); + } + return Status::OK(); } -Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, - RuntimeState* state, Expr** scalar_expr, const std::shared_ptr& tracker) { - return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr, tracker); +Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, + Expr** scalar_expr, const std::shared_ptr& tracker) { + return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr, tracker); } Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, std::vector* exprs, const std::shared_ptr& tracker) { - return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs, tracker); + RuntimeState* state, std::vector* exprs, + const std::shared_ptr& tracker) { + return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs, tracker); } Status Expr::create_tree(const TExpr& texpr, ObjectPool* pool, Expr* root) { - DCHECK(!texpr.nodes.empty()); - DCHECK(root != nullptr); - // The root of the tree at nodes[0] is already created and stored in 'root'. - int child_node_idx = 0; - int num_children = texpr.nodes[0].num_children; - for (int i = 0; i < num_children; ++i) { - ++child_node_idx; - Status status = create_tree_internal(texpr.nodes, pool, root, &child_node_idx); - if (UNLIKELY(!status.ok())) { - LOG(ERROR) << "Could not construct expr tree.\n" << status.get_error_msg() << "\n" - << apache::thrift::ThriftDebugString(texpr); - return status; - } - } - if (UNLIKELY(child_node_idx + 1 != texpr.nodes.size())) { - return Status::InternalError("Expression tree only partially reconstructed. Not all thrift " \ - "nodes were used."); - } - return Status::OK(); -} - -Status Expr::create_tree_internal(const std::vector& nodes, ObjectPool* pool, - Expr* root, int* child_node_idx) { - // propagate error case - if (*child_node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - - const TExprNode& texpr_node = nodes[*child_node_idx]; - DCHECK_NE(texpr_node.node_type, TExprNodeType::AGG_EXPR); - Expr* child_expr; - RETURN_IF_ERROR(create_expr(pool, texpr_node, &child_expr)); - root->_children.push_back(child_expr); - - int num_children = nodes[*child_node_idx].num_children; - for (int i = 0; i < num_children; ++i) { - *child_node_idx += 1; - RETURN_IF_ERROR(create_tree_internal(nodes, pool, child_expr, child_node_idx)); - DCHECK(child_expr->get_child(i) != nullptr); - } - return Status::OK(); + DCHECK(!texpr.nodes.empty()); + DCHECK(root != nullptr); + // The root of the tree at nodes[0] is already created and stored in 'root'. + int child_node_idx = 0; + int num_children = texpr.nodes[0].num_children; + for (int i = 0; i < num_children; ++i) { + ++child_node_idx; + Status status = create_tree_internal(texpr.nodes, pool, root, &child_node_idx); + if (UNLIKELY(!status.ok())) { + LOG(ERROR) << "Could not construct expr tree.\n" + << status.get_error_msg() << "\n" + << apache::thrift::ThriftDebugString(texpr); + return status; + } + } + if (UNLIKELY(child_node_idx + 1 != texpr.nodes.size())) { + return Status::InternalError( + "Expression tree only partially reconstructed. Not all thrift " + "nodes were used."); + } + return Status::OK(); +} + +Status Expr::create_tree_internal(const std::vector& nodes, ObjectPool* pool, Expr* root, + int* child_node_idx) { + // propagate error case + if (*child_node_idx >= nodes.size()) { + return Status::InternalError("Failed to reconstruct expression tree from thrift."); + } + + const TExprNode& texpr_node = nodes[*child_node_idx]; + DCHECK_NE(texpr_node.node_type, TExprNodeType::AGG_EXPR); + Expr* child_expr; + RETURN_IF_ERROR(create_expr(pool, texpr_node, &child_expr)); + root->_children.push_back(child_expr); + + int num_children = nodes[*child_node_idx].num_children; + for (int i = 0; i < num_children; ++i) { + *child_node_idx += 1; + RETURN_IF_ERROR(create_tree_internal(nodes, pool, child_expr, child_node_idx)); + DCHECK(child_expr->get_child(i) != nullptr); + } + return Status::OK(); } // TODO chenhao @@ -973,7 +952,7 @@ void Expr::close() { } void Expr::close(const std::vector& exprs) { - for (Expr* expr : exprs) expr->close(); + for (Expr* expr : exprs) expr->close(); } -} +} // namespace doris diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index 292626492ab10e..54f557e0747ca0 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -18,24 +18,24 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_EXPR_H #define DORIS_BE_SRC_QUERY_EXPRS_EXPR_H +#include #include #include -#include #include "common/status.h" #include "exprs/expr_context.h" #include "exprs/expr_value.h" #include "gen_cpp/Opcodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" -#include "runtime/string_value.h" -#include "runtime/string_value.hpp" #include "runtime/datetime_value.h" #include "runtime/decimal_value.h" #include "runtime/decimalv2_value.h" -#include "udf/udf.h" +#include "runtime/descriptors.h" +#include "runtime/string_value.h" +#include "runtime/string_value.hpp" +#include "runtime/tuple.h" +#include "runtime/tuple_row.h" #include "runtime/types.h" +#include "udf/udf.h" //#include // #undef USING_DORIS_UDF @@ -78,20 +78,18 @@ class Expr { // evaluate expr and return pointer to result. The result is // valid as long as 'row' doesn't change. // TODO: stop having the result cached in this Expr object - void* get_value(TupleRow* row) { - return NULL; - } + void* get_value(TupleRow* row) { return NULL; } // Vectorize Evalute expr and return result column index. // Result cached in batch and valid as long as batch. bool evaluate(VectorizedRowBatch* batch); - bool is_null_scalar_function(std::string &str) { + bool is_null_scalar_function(std::string& str) { // name and function_name both are required if (_fn.name.function_name.compare("is_null_pred") == 0) { str.assign("null"); return true; - } else if (_fn.name.function_name.compare("is_not_null_pred") == 0) { + } else if (_fn.name.function_name.compare("is_not_null_pred") == 0) { str.assign("not null"); return true; } else { @@ -121,51 +119,27 @@ class Expr { // value. Returns -1 if no scale has been specified (currently the scale is only set for // doubles set by RoundUpTo). get_value() must have already been called. // TODO: this will be unnecessary once we support the DECIMAL(precision, scale) type - int output_scale() const { - return _output_scale; - } - int output_column() const { - return _output_column; - } + int output_scale() const { return _output_scale; } + int output_column() const { return _output_column; } - void add_child(Expr* expr) { - _children.push_back(expr); - } - Expr* get_child(int i) const { - return _children[i]; - } - int get_num_children() const { - return _children.size(); - } + void add_child(Expr* expr) { _children.push_back(expr); } + Expr* get_child(int i) const { return _children[i]; } + int get_num_children() const { return _children.size(); } - const TypeDescriptor& type() const { - return _type; - } - const std::vector& children() const { - return _children; - } + const TypeDescriptor& type() const { return _type; } + const std::vector& children() const { return _children; } - TExprOpcode::type op() const { - return _opcode; - } + TExprOpcode::type op() const { return _opcode; } - TExprNodeType::type node_type() const { - return _node_type; - } + TExprNodeType::type node_type() const { return _node_type; } - const TFunction& fn() const { - return _fn; - } + const TFunction& fn() const { return _fn; } - bool is_slotref() const { - return _is_slotref; - } + bool is_slotref() const { return _is_slotref; } /// Returns true if this expr uses a FunctionContext to track its runtime state. /// Overridden by exprs which use FunctionContext. - virtual bool has_fn_ctx() const { - return false; - } + virtual bool has_fn_ctx() const { return false; } /// Returns an error status if the function context associated with the /// expr has an error set. @@ -238,10 +212,8 @@ class Expr { /// Idempotent: if '*new_ctxs' is empty, a clone of each context in 'ctxs' will be added /// to it, and if non-empty, it is assumed CloneIfNotExists() was already called and the /// call is a no-op. The new ExprContexts are created in state->obj_pool(). - static Status clone_if_not_exists( - const std::vector& ctxs, - RuntimeState* state, - std::vector* new_ctxs); + static Status clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, + std::vector* new_ctxs); /// Convenience function for closing multiple expr trees. static void close(const std::vector& ctxs, RuntimeState* state); @@ -255,10 +227,10 @@ class Expr { // Variable length types are guaranteed to be at the end and 'var_result_begin' // will be set the beginning byte offset where variable length results begin. // 'var_result_begin' will be set to -1 if there are no variable len types. - static int compute_results_layout(const std::vector& exprs, - std::vector* offsets, int* var_result_begin); + static int compute_results_layout(const std::vector& exprs, std::vector* offsets, + int* var_result_begin); static int compute_results_layout(const std::vector& ctxs, - std::vector* offsets, int* var_result_begin); + std::vector* offsets, int* var_result_begin); /// If this expr is constant, evaluates the expr with no input row argument and returns /// the output. Returns NULL if the argument is not constant. The returned AnyVal* is @@ -289,7 +261,7 @@ class Expr { // GetIrConstant(). enum ExprConstant { RETURN_TYPE_SIZE, // int - ARG_TYPE_SIZE // int[] + ARG_TYPE_SIZE // int[] }; static Expr* copy(ObjectPool* pool, Expr* old_expr); @@ -340,8 +312,7 @@ class Expr { /// /// Subclasses overriding this function should call Expr::Prepare() to recursively call /// Prepare() on the expr tree. - virtual Status prepare(RuntimeState* state, - const RowDescriptor& row_desc, + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context); /// Initializes 'context' for execution. If scope if FRAGMENT_LOCAL, both fragment- and @@ -350,39 +321,32 @@ class Expr { // /// Subclasses overriding this function should call Expr::Open() to recursively call /// Open() on the expr tree. - Status open(RuntimeState* state, - ExprContext* context) { + Status open(RuntimeState* state, ExprContext* context) { return open(state, context, FunctionContext::FRAGMENT_LOCAL); } - virtual Status open( - RuntimeState* state, - ExprContext* context, - FunctionContext::FunctionStateScope scope); + virtual Status open(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); /// Subclasses overriding this function should call Expr::Close(). // /// If scope if FRAGMENT_LOCAL, both fragment- and thread-local state should be torn /// down. Otherwise, if scope is THREAD_LOCAL, only thread-local state should be torn /// down. - void close( - RuntimeState* state, - ExprContext* context) { + void close(RuntimeState* state, ExprContext* context) { close(state, context, FunctionContext::FRAGMENT_LOCAL); } - virtual void close( - RuntimeState* state, - ExprContext* context, - FunctionContext::FunctionStateScope scope); + virtual void close(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); /// Releases cache entries to LibCache in all nodes of the Expr tree. virtual void close(); /// Helper function that calls ctx->Register(), sets fn_context_index_, and returns the /// registered FunctionContext. - FunctionContext* register_function_context( - ExprContext* ctx, RuntimeState* state, int varargs_buffer_size); + FunctionContext* register_function_context(ExprContext* ctx, RuntimeState* state, + int varargs_buffer_size); /// Cache entry for the library implementing this function. UserFunctionCacheEntry* _cache_entry = nullptr; @@ -447,13 +411,9 @@ class Expr { /// return /// status.ok() if successful /// !status.ok() if tree is inconsistent or corrupt - static Status create_tree_from_thrift( - ObjectPool* pool, - const std::vector& nodes, - Expr* parent, - int* node_idx, - Expr** root_expr, - ExprContext** ctx); + static Status create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, + Expr* parent, int* node_idx, Expr** root_expr, + ExprContext** ctx); /// Static wrappers around the virtual Get*Val() functions. Calls the appropriate /// Get*Val() function on expr, passing it the context and row arguments. @@ -488,8 +448,8 @@ class Expr { /// return /// status.ok() if successful /// !status.ok() if tree is inconsistent or corrupt - static Status create_tree_internal(const std::vector& nodes, - ObjectPool* pool, Expr* parent, int* child_node_idx); + static Status create_tree_internal(const std::vector& nodes, ObjectPool* pool, + Expr* parent, int* child_node_idx); /// 'fn_ctx_idx_' is the index into the FunctionContext vector in ScalarExprEvaluator /// for storing FunctionContext needed to evaluate this ScalarExprNode. It's -1 if this @@ -501,7 +461,6 @@ class Expr { /// in ScalarExpeEvaluator for the expression subtree rooted at this ScalarExpr node. int _fn_ctx_idx_start = 0; int _fn_ctx_idx_end = 0; - }; inline bool Expr::evaluate(VectorizedRowBatch* batch) { @@ -515,6 +474,6 @@ inline bool Expr::evaluate(VectorizedRowBatch* batch) { } } -} +} // namespace doris #endif diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 68ae418bf3f8cb..1eafabe3b10a18 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -17,30 +17,30 @@ #include "exprs/expr_context.h" -#include #include +#include + +#include "exprs/anyval_util.h" #include "exprs/expr.h" #include "exprs/slot_ref.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" -#include "runtime/runtime_state.h" #include "runtime/raw_value.h" +#include "runtime/runtime_state.h" #include "udf/udf_internal.h" #include "util/debug_util.h" #include "util/stack_util.h" -#include "exprs/anyval_util.h" namespace doris { -ExprContext::ExprContext(Expr* root) : - _fn_contexts_ptr(NULL), - _root(root), - _is_clone(false), - _prepared(false), - _opened(false), - _closed(false) { -} +ExprContext::ExprContext(Expr* root) + : _fn_contexts_ptr(NULL), + _root(root), + _is_clone(false), + _prepared(false), + _opened(false), + _closed(false) {} ExprContext::~ExprContext() { DCHECK(!_prepared || _closed); @@ -70,11 +70,11 @@ Status ExprContext::open(RuntimeState* state) { // Fragment-local state is only initialized for original contexts. Clones inherit the // original's fragment state and only need to have thread-local state initialized. FunctionContext::FunctionStateScope scope = - _is_clone? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; + _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; return _root->open(state, this, scope); } -// TODO chenhao , replace ExprContext with ScalarExprEvaluator +// TODO chenhao , replace ExprContext with ScalarExprEvaluator Status ExprContext::open(std::vector evals, RuntimeState* state) { for (int i = 0; i < evals.size(); ++i) { RETURN_IF_ERROR(evals[i]->open(state)); @@ -85,7 +85,7 @@ Status ExprContext::open(std::vector evals, RuntimeState* state) { void ExprContext::close(RuntimeState* state) { DCHECK(!_closed); FunctionContext::FunctionStateScope scope = - _is_clone? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; + _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; _root->close(state, this, scope); for (int i = 0; i < _fn_contexts.size(); ++i) { @@ -98,11 +98,10 @@ void ExprContext::close(RuntimeState* state) { _closed = true; } -int ExprContext::register_func( - RuntimeState* state, - const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, - int varargs_buffer_size) { +int ExprContext::register_func(RuntimeState* state, + const doris_udf::FunctionContext::TypeDesc& return_type, + const std::vector& arg_types, + int varargs_buffer_size) { _fn_contexts.push_back(FunctionContextImpl::create_context( state, _pool.get(), return_type, arg_types, varargs_buffer_size, false)); _fn_contexts_ptr = &_fn_contexts[0]; @@ -117,8 +116,7 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx) { *new_ctx = state->obj_pool()->add(new ExprContext(_root)); (*new_ctx)->_pool.reset(new MemPool(_pool->mem_tracker())); for (int i = 0; i < _fn_contexts.size(); ++i) { - (*new_ctx)->_fn_contexts.push_back( - _fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); + (*new_ctx)->_fn_contexts.push_back(_fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); } (*new_ctx)->_fn_contexts_ptr = &((*new_ctx)->_fn_contexts[0]); @@ -137,8 +135,7 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx, Expr* root *new_ctx = state->obj_pool()->add(new ExprContext(root)); (*new_ctx)->_pool.reset(new MemPool(_pool->mem_tracker())); for (int i = 0; i < _fn_contexts.size(); ++i) { - (*new_ctx)->_fn_contexts.push_back( - _fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); + (*new_ctx)->_fn_contexts.push_back(_fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); } (*new_ctx)->_fn_contexts_ptr = &((*new_ctx)->_fn_contexts[0]); @@ -464,58 +461,56 @@ DecimalV2Val ExprContext::get_decimalv2_val(TupleRow* row) { return _root->get_decimalv2_val(this, row); } -Status ExprContext::get_const_value(RuntimeState* state, Expr& expr, - AnyVal** const_val) { - DCHECK(_opened); - if (!expr.is_constant()) { - *const_val = nullptr; - return Status::OK(); - } - - // A constant expression shouldn't have any SlotRefs expr in it. - DCHECK_EQ(expr.get_slot_ids(nullptr), 0); - DCHECK(_pool != nullptr); - const TypeDescriptor& result_type = expr.type(); - ObjectPool* obj_pool = state->obj_pool(); - *const_val = create_any_val(obj_pool, result_type); - if (*const_val == NULL) { - return Status::InternalError("Could not create any val"); - } - - const void* result = ExprContext::get_value(&expr, nullptr); - AnyValUtil::set_any_val(result, result_type, *const_val); - if (result_type.is_string_type()) { - StringVal* sv = reinterpret_cast(*const_val); - if (!sv->is_null && sv->len > 0) { - // Make sure the memory is owned by this evaluator. - char* ptr_copy = reinterpret_cast(_pool->try_allocate(sv->len)); - if (ptr_copy == nullptr) { - return _pool->mem_tracker()->MemLimitExceeded( - state, "Could not allocate constant string value", sv->len); - } - memcpy(ptr_copy, sv->ptr, sv->len); - sv->ptr = reinterpret_cast(ptr_copy); - } - } - return get_error(expr._fn_ctx_idx_start, expr._fn_ctx_idx_end); -} +Status ExprContext::get_const_value(RuntimeState* state, Expr& expr, AnyVal** const_val) { + DCHECK(_opened); + if (!expr.is_constant()) { + *const_val = nullptr; + return Status::OK(); + } + // A constant expression shouldn't have any SlotRefs expr in it. + DCHECK_EQ(expr.get_slot_ids(nullptr), 0); + DCHECK(_pool != nullptr); + const TypeDescriptor& result_type = expr.type(); + ObjectPool* obj_pool = state->obj_pool(); + *const_val = create_any_val(obj_pool, result_type); + if (*const_val == NULL) { + return Status::InternalError("Could not create any val"); + } + + const void* result = ExprContext::get_value(&expr, nullptr); + AnyValUtil::set_any_val(result, result_type, *const_val); + if (result_type.is_string_type()) { + StringVal* sv = reinterpret_cast(*const_val); + if (!sv->is_null && sv->len > 0) { + // Make sure the memory is owned by this evaluator. + char* ptr_copy = reinterpret_cast(_pool->try_allocate(sv->len)); + if (ptr_copy == nullptr) { + return _pool->mem_tracker()->MemLimitExceeded( + state, "Could not allocate constant string value", sv->len); + } + memcpy(ptr_copy, sv->ptr, sv->len); + sv->ptr = reinterpret_cast(ptr_copy); + } + } + return get_error(expr._fn_ctx_idx_start, expr._fn_ctx_idx_end); +} Status ExprContext::get_error(int start_idx, int end_idx) const { - DCHECK(_opened); - end_idx = end_idx == -1 ? _fn_contexts.size() : end_idx; - DCHECK_GE(start_idx, 0); - DCHECK_LE(end_idx, _fn_contexts.size()); - for (int idx = start_idx; idx < end_idx; ++idx) { - DCHECK_LT(idx, _fn_contexts.size()); - FunctionContext* fn_ctx = _fn_contexts[idx]; - if (fn_ctx->has_error()) return Status::InternalError(fn_ctx->error_msg()); - } - return Status::OK(); + DCHECK(_opened); + end_idx = end_idx == -1 ? _fn_contexts.size() : end_idx; + DCHECK_GE(start_idx, 0); + DCHECK_LE(end_idx, _fn_contexts.size()); + for (int idx = start_idx; idx < end_idx; ++idx) { + DCHECK_LT(idx, _fn_contexts.size()); + FunctionContext* fn_ctx = _fn_contexts[idx]; + if (fn_ctx->has_error()) return Status::InternalError(fn_ctx->error_msg()); + } + return Status::OK(); } std::string ExprContext::get_error_msg() const { - for (auto fn_ctx: _fn_contexts) { + for (auto fn_ctx : _fn_contexts) { if (fn_ctx->has_error()) { return std::string(fn_ctx->error_msg()); } @@ -524,9 +519,9 @@ std::string ExprContext::get_error_msg() const { } void ExprContext::clear_error_msg() { - for (auto fn_ctx: _fn_contexts) { + for (auto fn_ctx : _fn_contexts) { fn_ctx->clear_error_msg(); } } -} +} // namespace doris diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index 7fe294cb2d65e9..70cbb7f55f2ef8 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -106,10 +106,9 @@ class ExprContext { /// retrieve the created context. Exprs that need a FunctionContext should call this in /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the /// size of the varargs buffer in the created FunctionContext (see udf-internal.h). - int register_func(RuntimeState* state, - const FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, - int varargs_buffer_size); + int register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type, + const std::vector& arg_types, + int varargs_buffer_size); /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to /// register_func(). This should only be called by Exprs. @@ -119,13 +118,9 @@ class ExprContext { return _fn_contexts[i]; } - Expr* root() { - return _root; - } + Expr* root() { return _root; } - bool closed() { - return _closed; - } + bool closed() { return _closed; } bool is_nullable(); @@ -138,7 +133,7 @@ class ExprContext { FloatVal get_float_val(TupleRow* row); DoubleVal get_double_val(TupleRow* row); StringVal get_string_val(TupleRow* row); - // TODO(zc): + // TODO(zc): // ArrayVal GetArrayVal(TupleRow* row); DateTimeVal get_datetime_val(TupleRow* row); DecimalVal get_decimal_val(TupleRow* row); @@ -150,9 +145,7 @@ class ExprContext { static void free_local_allocations(const std::vector& ctxs); static void free_local_allocations(const std::vector& ctxs); - bool opened() { - return _opened; - } + bool opened() { return _opened; } /// If 'expr' is constant, evaluates it with no input row argument and returns the /// result in 'const_val'. Sets 'const_val' to NULL if the argument is not constant. @@ -172,6 +165,7 @@ class ExprContext { // when you reused this expr context, you maybe need clear the error status and message. void clear_error_msg(); + private: friend class Expr; friend class ScalarFnCall; @@ -212,6 +206,6 @@ class ExprContext { void* get_value(Expr* e, TupleRow* row); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/expr_ir.cpp b/be/src/exprs/expr_ir.cpp index b29b0fc3b2a054..ad0c0a07a1ea17 100644 --- a/be/src/exprs/expr_ir.cpp +++ b/be/src/exprs/expr_ir.cpp @@ -27,9 +27,9 @@ // The arguments are pointers to prevent Clang from lowering the struct types // (e.g. IntVal={bool, i32} can be coerced to i64). void dummy(doris_udf::FunctionContext*, doris_udf::BooleanVal*, doris_udf::TinyIntVal*, - doris_udf::SmallIntVal*, doris_udf::IntVal*, doris_udf::BigIntVal*, - doris_udf::FloatVal*, doris_udf::DoubleVal*, doris_udf::StringVal*, - doris_udf::DateTimeVal*, doris_udf::DecimalVal*, doris::ExprContext*) { } + doris_udf::SmallIntVal*, doris_udf::IntVal*, doris_udf::BigIntVal*, doris_udf::FloatVal*, + doris_udf::DoubleVal*, doris_udf::StringVal*, doris_udf::DateTimeVal*, + doris_udf::DecimalVal*, doris::ExprContext*) {} #endif // The following are compute functions that are cross-compiled to both native and IR @@ -77,4 +77,4 @@ DecimalVal Expr::get_decimal_val(Expr* expr, ExprContext* context, TupleRow* row DecimalV2Val Expr::get_decimalv2_val(Expr* expr, ExprContext* context, TupleRow* row) { return expr->get_decimalv2_val(context, row); } -} +} // namespace doris diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h index daa434ac9a0620..849df3e2e904d9 100644 --- a/be/src/exprs/expr_value.h +++ b/be/src/exprs/expr_value.h @@ -18,11 +18,11 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_EXPR_VALUE_H #define DORIS_BE_SRC_QUERY_EXPRS_EXPR_VALUE_H -#include "runtime/string_value.h" -#include "runtime/string_value.hpp" #include "runtime/datetime_value.h" #include "runtime/decimal_value.h" #include "runtime/decimalv2_value.h" +#include "runtime/string_value.h" +#include "runtime/string_value.hpp" #include "runtime/types.h" namespace doris { @@ -47,37 +47,35 @@ struct ExprValue { DecimalValue decimal_val; DecimalV2Value decimalv2_val; - ExprValue() : - bool_val(false), - tinyint_val(0), - smallint_val(0), - int_val(0), - bigint_val(0), - large_int_val(0), - float_val(0.0), - double_val(0.0), - string_data(), - string_val(NULL, 0), - datetime_val(), - decimal_val(), - decimalv2_val() { - } - - ExprValue(bool v): bool_val(v) {} - ExprValue(int8_t v): tinyint_val(v) {} - ExprValue(int16_t v): smallint_val(v) {} - ExprValue(int32_t v): int_val(v) {} - ExprValue(int64_t v): bigint_val(v) {} + ExprValue() + : bool_val(false), + tinyint_val(0), + smallint_val(0), + int_val(0), + bigint_val(0), + large_int_val(0), + float_val(0.0), + double_val(0.0), + string_data(), + string_val(NULL, 0), + datetime_val(), + decimal_val(), + decimalv2_val() {} + + ExprValue(bool v) : bool_val(v) {} + ExprValue(int8_t v) : tinyint_val(v) {} + ExprValue(int16_t v) : smallint_val(v) {} + ExprValue(int32_t v) : int_val(v) {} + ExprValue(int64_t v) : bigint_val(v) {} ExprValue(__int128 value) : large_int_val(value) {} - ExprValue(float v): float_val(v) {} - ExprValue(double v): double_val(v) {} + ExprValue(float v) : float_val(v) {} + ExprValue(double v) : double_val(v) {} ExprValue(int64_t i, int32_t f) : decimal_val(i, f), decimalv2_val(i, f) {} // c'tor for string values - ExprValue(const std::string& str) : - string_data(str), - string_val(const_cast(string_data.data()), string_data.size()) { - } + ExprValue(const std::string& str) + : string_data(str), + string_val(const_cast(string_data.data()), string_data.size()) {} // Set string value to copy of str void set_string_val(const StringValue& str) { @@ -255,6 +253,6 @@ struct ExprValue { } }; -} +} // namespace doris #endif diff --git a/be/src/exprs/grouping_sets_functions.cpp b/be/src/exprs/grouping_sets_functions.cpp index cabb9a7c7df003..e182ddf0cd63b8 100644 --- a/be/src/exprs/grouping_sets_functions.cpp +++ b/be/src/exprs/grouping_sets_functions.cpp @@ -19,18 +19,16 @@ namespace doris { -void GroupingSetsFunctions::init() { -} +void GroupingSetsFunctions::init() {} -doris_udf::BigIntVal GroupingSetsFunctions::grouping_id( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& grouping_id) { +doris_udf::BigIntVal GroupingSetsFunctions::grouping_id(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& grouping_id) { return grouping_id; } -BigIntVal GroupingSetsFunctions::grouping( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& grouping) { +BigIntVal GroupingSetsFunctions::grouping(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& grouping) { return grouping; } -} // doris - +} // namespace doris diff --git a/be/src/exprs/grouping_sets_functions.h b/be/src/exprs/grouping_sets_functions.h index b6267eb33afad6..05bb807657a773 100644 --- a/be/src/exprs/grouping_sets_functions.h +++ b/be/src/exprs/grouping_sets_functions.h @@ -27,11 +27,10 @@ class GroupingSetsFunctions { public: static void init(); - static doris_udf::BigIntVal grouping_id( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& grouping_id); - static doris_udf::BigIntVal grouping( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& grouping); + static doris_udf::BigIntVal grouping_id(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& grouping_id); + static doris_udf::BigIntVal grouping(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& grouping); }; -} - +} // namespace doris diff --git a/be/src/exprs/hash_functions.cpp b/be/src/exprs/hash_functions.cpp index 79f3e36e0b619b..0407e62381b6db 100644 --- a/be/src/exprs/hash_functions.cpp +++ b/be/src/exprs/hash_functions.cpp @@ -26,10 +26,10 @@ using doris_udf::FunctionContext; using doris_udf::IntVal; using doris_udf::StringVal; -void HashFunctions::init() { } +void HashFunctions::init() {} -IntVal HashFunctions::murmur_hash3_32( - FunctionContext* ctx, int num_children, const StringVal* inputs) { +IntVal HashFunctions::murmur_hash3_32(FunctionContext* ctx, int num_children, + const StringVal* inputs) { uint32_t seed = HashUtil::MURMUR3_32_SEED; for (int i = 0; i < num_children; ++i) { if (inputs[i].is_null) { @@ -40,4 +40,4 @@ IntVal HashFunctions::murmur_hash3_32( return seed; } -} +} // namespace doris diff --git a/be/src/exprs/hash_functions.h b/be/src/exprs/hash_functions.h index 1a0c77f0992136..4b2318e9dce1ba 100644 --- a/be/src/exprs/hash_functions.h +++ b/be/src/exprs/hash_functions.h @@ -21,16 +21,15 @@ namespace doris_udf { class FunctionContext; class IntVal; class StringVal; -} +} // namespace doris_udf namespace doris { class HashFunctions { public: static void init(); - static doris_udf::IntVal murmur_hash3_32( - doris_udf::FunctionContext* ctx, int num_children, - const doris_udf::StringVal* inputs); + static doris_udf::IntVal murmur_hash3_32(doris_udf::FunctionContext* ctx, int num_children, + const doris_udf::StringVal* inputs); }; -} +} // namespace doris diff --git a/be/src/exprs/hll_function.cpp b/be/src/exprs/hll_function.cpp index 849cf691e23c66..4771f132f912e3 100644 --- a/be/src/exprs/hll_function.cpp +++ b/be/src/exprs/hll_function.cpp @@ -26,8 +26,7 @@ namespace doris { using doris_udf::BigIntVal; using doris_udf::StringVal; -void HllFunctions::init() { -} +void HllFunctions::init() {} StringVal HllFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) { HyperLogLog hll; @@ -41,7 +40,7 @@ StringVal HllFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) { return AnyValUtil::from_string_temp(ctx, buf); } -void HllFunctions::hll_init(FunctionContext *, StringVal* dst) { +void HllFunctions::hll_init(FunctionContext*, StringVal* dst) { dst->is_null = false; dst->len = sizeof(HyperLogLog); dst->ptr = (uint8_t*)new HyperLogLog(); @@ -51,7 +50,7 @@ StringVal HllFunctions::hll_empty(FunctionContext* ctx) { } template -void HllFunctions::hll_update(FunctionContext *, const T &src, StringVal* dst) { +void HllFunctions::hll_update(FunctionContext*, const T& src, StringVal* dst) { if (src.is_null) { return; } @@ -76,14 +75,14 @@ void HllFunctions::hll_merge(FunctionContext*, const StringVal& src, StringVal* } } -BigIntVal HllFunctions::hll_finalize(FunctionContext*, const StringVal &src) { +BigIntVal HllFunctions::hll_finalize(FunctionContext*, const StringVal& src) { auto* src_hll = reinterpret_cast(src.ptr); BigIntVal result(src_hll->estimate_cardinality()); delete src_hll; return result; } -BigIntVal HllFunctions::hll_get_value(FunctionContext*, const StringVal &src) { +BigIntVal HllFunctions::hll_get_value(FunctionContext*, const StringVal& src) { if (src.is_null) { return BigIntVal::null(); } @@ -102,7 +101,7 @@ BigIntVal HllFunctions::hll_cardinality(FunctionContext* ctx, const StringVal& i return hll_finalize(ctx, dst); } -StringVal HllFunctions::hll_serialize(FunctionContext *ctx, const StringVal &src) { +StringVal HllFunctions::hll_serialize(FunctionContext* ctx, const StringVal& src) { auto* src_hll = reinterpret_cast(src.ptr); StringVal result(ctx, src_hll->max_serialized_size()); int size = src_hll->serialize((uint8_t*)result.ptr); @@ -123,4 +122,4 @@ template void HllFunctions::hll_update(FunctionContext*, const DateTimeVal&, Str template void HllFunctions::hll_update(FunctionContext*, const LargeIntVal&, StringVal*); template void HllFunctions::hll_update(FunctionContext*, const DecimalVal&, StringVal*); template void HllFunctions::hll_update(FunctionContext*, const DecimalV2Val&, StringVal*); -} +} // namespace doris diff --git a/be/src/exprs/hll_function.h b/be/src/exprs/hll_function.h index 22b60108853fa0..b1d8e2cf381151 100644 --- a/be/src/exprs/hll_function.h +++ b/be/src/exprs/hll_function.h @@ -32,7 +32,7 @@ class HllFunctions { template static void hll_update(FunctionContext*, const T& src, StringVal* dst); - static void hll_merge(FunctionContext*,const StringVal& src, StringVal* dst); + static void hll_merge(FunctionContext*, const StringVal& src, StringVal* dst); static BigIntVal hll_finalize(FunctionContext*, const StringVal& src); @@ -44,6 +44,6 @@ class HllFunctions { static BigIntVal hll_cardinality(FunctionContext* ctx, const StringVal& src); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/hll_hash_function.cpp b/be/src/exprs/hll_hash_function.cpp index b16a19746f497a..79f699e82b4871 100644 --- a/be/src/exprs/hll_hash_function.cpp +++ b/be/src/exprs/hll_hash_function.cpp @@ -15,16 +15,16 @@ // specific language governing permissions and limitations // under the License. -#include "exprs/aggregate_functions.h" #include "exprs/hll_hash_function.h" +#include "exprs/aggregate_functions.h" + namespace doris { using doris_udf::BigIntVal; using doris_udf::StringVal; -void HllHashFunctions::init() { -} +void HllHashFunctions::init() {} StringVal HllHashFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) { HyperLogLog hll; @@ -48,4 +48,4 @@ BigIntVal HllHashFunctions::hll_cardinality(FunctionContext* ctx, const HllVal& return AggregateFunctions::hll_union_agg_finalize(ctx, dst); } -} +} // namespace doris diff --git a/be/src/exprs/hll_hash_function.h b/be/src/exprs/hll_hash_function.h index af47dc0c216c2c..5ff590e7244333 100644 --- a/be/src/exprs/hll_hash_function.h +++ b/be/src/exprs/hll_hash_function.h @@ -18,9 +18,9 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_HLL_HASH_FUNCTION_H #define DORIS_BE_SRC_QUERY_EXPRS_HLL_HASH_FUNCTION_H +#include "exprs/anyval_util.h" #include "udf/udf.h" #include "util/hash_util.hpp" -#include "exprs/anyval_util.h" namespace doris { @@ -35,6 +35,6 @@ class HllHashFunctions { static StringVal hll_hash(FunctionContext* ctx, const StringVal& dest_base); static BigIntVal hll_cardinality(FunctionContext* ctx, const HllVal& dest_base); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/hybrid_map.h b/be/src/exprs/hybrid_map.h index 433ba17e3db66e..0e0d1396e5ab78 100644 --- a/be/src/exprs/hybrid_map.h +++ b/be/src/exprs/hybrid_map.h @@ -19,22 +19,21 @@ #define DORIS_BE_SRC_QUERY_EXPRS_HYBRID_MAP_H #include + +#include "common/object_pool.h" #include "common/status.h" +#include "exprs/hybrid_set.h" +#include "runtime/datetime_value.h" #include "runtime/primitive_type.h" #include "runtime/string_value.h" -#include "runtime/datetime_value.h" -#include "common/object_pool.h" -#include "exprs/hybrid_set.h" namespace doris { class HybridMap { public: - HybridMap(PrimitiveType type) : _type(type) { - } + HybridMap(PrimitiveType type) : _type(type) {} - virtual ~HybridMap() { - } + virtual ~HybridMap() {} virtual HybridSetBase* find_or_insert_set(uint64_t dst, bool* is_add_buckets) { HybridSetBase* _set_ptr; @@ -58,6 +57,6 @@ class HybridMap { PrimitiveType _type; ObjectPool _pool; }; -} +} // namespace doris -#endif // DORIS_BE_SRC_QUERY_EXPRS_HYBRID_MAP_H +#endif // DORIS_BE_SRC_QUERY_EXPRS_HYBRID_MAP_H diff --git a/be/src/exprs/hybrid_set.cpp b/be/src/exprs/hybrid_set.cpp index 562ed250080aac..a79732373d1fb4 100644 --- a/be/src/exprs/hybrid_set.cpp +++ b/be/src/exprs/hybrid_set.cpp @@ -22,42 +22,42 @@ namespace doris { HybridSetBase* HybridSetBase::create_set(PrimitiveType type) { switch (type) { case TYPE_BOOLEAN: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_TINYINT: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_SMALLINT: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_INT: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_BIGINT: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_FLOAT: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_DOUBLE: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_DATE: case TYPE_DATETIME: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_DECIMAL: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_DECIMALV2: - return new(std::nothrow) HybridSet(); + return new (std::nothrow) HybridSet(); case TYPE_LARGEINT: - return new(std::nothrow) HybridSet<__int128>(); + return new (std::nothrow) HybridSet<__int128>(); case TYPE_CHAR: case TYPE_VARCHAR: - return new(std::nothrow) StringValueSet(); + return new (std::nothrow) StringValueSet(); default: return NULL; @@ -66,6 +66,6 @@ HybridSetBase* HybridSetBase::create_set(PrimitiveType type) { return NULL; } -} +} // namespace doris /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index 5ed8ab9029688d..527b65b97a49d7 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -20,22 +20,21 @@ #include #include -#include "common/status.h" + #include "common/object_pool.h" -#include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "common/status.h" #include "runtime/datetime_value.h" #include "runtime/decimal_value.h" #include "runtime/decimalv2_value.h" +#include "runtime/primitive_type.h" +#include "runtime/string_value.h" namespace doris { class HybridSetBase { public: - HybridSetBase() { - } - virtual ~HybridSetBase() { - } + HybridSetBase() {} + virtual ~HybridSetBase() {} virtual void insert(void* data) = 0; virtual void insert(HybridSetBase* set) = 0; @@ -46,10 +45,8 @@ class HybridSetBase { static HybridSetBase* create_set(PrimitiveType type); class IteratorBase { public: - IteratorBase() { - } - virtual ~IteratorBase() { - } + IteratorBase() {} + virtual ~IteratorBase() {} virtual const void* get_value() = 0; virtual bool has_next() const = 0; virtual void next() = 0; @@ -58,20 +55,18 @@ class HybridSetBase { virtual IteratorBase* begin() = 0; }; -template +template class HybridSet : public HybridSetBase { public: - HybridSet() { - } + HybridSet() {} - virtual ~HybridSet() { - } + virtual ~HybridSet() {} virtual void insert(void* data) { if (sizeof(T) >= 16) { // for largeint, it will core dump with no memcpy T value; - memcpy(&value, data, sizeof(T)); + memcpy(&value, data, sizeof(T)); _set.insert(value); } else { _set.insert(*reinterpret_cast(data)); @@ -83,12 +78,9 @@ class HybridSet : public HybridSetBase { _set.insert(hybrid_set->_set.begin(), hybrid_set->_set.end()); } - virtual int size() { - return _set.size(); - } + virtual int size() { return _set.size(); } virtual bool find(void* data) { - typename std::unordered_set::const_iterator it - = _set.find(*reinterpret_cast(data)); + typename std::unordered_set::const_iterator it = _set.find(*reinterpret_cast(data)); if (it == _set.end()) { return false; @@ -104,42 +96,31 @@ class HybridSet : public HybridSetBase { public: Iterator(typename std::unordered_set<_iT>::iterator begin, typename std::unordered_set<_iT>::iterator end) - : _begin(begin), - _end(end) { - } - virtual ~Iterator() { - } - virtual bool has_next() const { - return !(_begin == _end); - } - virtual const void* get_value() { - return _begin.operator->(); - } - virtual void next() { - ++_begin; - } + : _begin(begin), _end(end) {} + virtual ~Iterator() {} + virtual bool has_next() const { return !(_begin == _end); } + virtual const void* get_value() { return _begin.operator->(); } + virtual void next() { ++_begin; } + private: typename std::unordered_set<_iT>::iterator _begin; typename std::unordered_set<_iT>::iterator _end; }; IteratorBase* begin() { - return _pool.add(new(std::nothrow) Iterator(_set.begin(), _set.end())); + return _pool.add(new (std::nothrow) Iterator(_set.begin(), _set.end())); } private: - std::unordered_set _set; ObjectPool _pool; }; class StringValueSet : public HybridSetBase { public: - StringValueSet() { - } + StringValueSet() {} - virtual ~StringValueSet() { - } + virtual ~StringValueSet() {} virtual void insert(void* data) { StringValue* value = reinterpret_cast(data); @@ -148,13 +129,11 @@ class StringValueSet : public HybridSetBase { } void insert(HybridSetBase* set) { - StringValueSet* string_set = reinterpret_cast(set); + StringValueSet* string_set = reinterpret_cast(set); _set.insert(string_set->_set.begin(), string_set->_set.end()); } - virtual int size() { - return _set.size(); - } + virtual int size() { return _set.size(); } virtual bool find(void* data) { StringValue* value = reinterpret_cast(data); std::string str_value(value->ptr, value->len); @@ -173,22 +152,16 @@ class StringValueSet : public HybridSetBase { public: Iterator(std::unordered_set::iterator begin, std::unordered_set::iterator end) - : _begin(begin), - _end(end) { - } - virtual ~Iterator() { - } - virtual bool has_next() const { - return !(_begin == _end); - } + : _begin(begin), _end(end) {} + virtual ~Iterator() {} + virtual bool has_next() const { return !(_begin == _end); } virtual const void* get_value() { _value.ptr = const_cast(_begin->data()); _value.len = _begin->length(); return &_value; } - virtual void next() { - ++_begin; - } + virtual void next() { ++_begin; } + private: typename std::unordered_set::iterator _begin; typename std::unordered_set::iterator _end; @@ -196,15 +169,14 @@ class StringValueSet : public HybridSetBase { }; IteratorBase* begin() { - return _pool.add(new(std::nothrow) Iterator(_set.begin(), _set.end())); + return _pool.add(new (std::nothrow) Iterator(_set.begin(), _set.end())); } private: - std::unordered_set _set; ObjectPool _pool; }; -} +} // namespace doris -#endif // DORIS_BE_SRC_QUERY_EXPRS_HYBRID_SET_H +#endif // DORIS_BE_SRC_QUERY_EXPRS_HYBRID_SET_H diff --git a/be/src/exprs/in_predicate.cpp b/be/src/exprs/in_predicate.cpp index 1265f14d25b262..2811c14c2f745c 100644 --- a/be/src/exprs/in_predicate.cpp +++ b/be/src/exprs/in_predicate.cpp @@ -19,24 +19,21 @@ #include -#include "exprs/anyval_util.h" #include "exprs/anyval_util.h" #include "runtime/raw_value.h" -#include "runtime/string_value.hpp" #include "runtime/runtime_state.h" +#include "runtime/string_value.hpp" namespace doris { -InPredicate::InPredicate(const TExprNode& node) : - Predicate(node), - _is_not_in(node.in_predicate.is_not_in), - _is_prepare(false), - _null_in_set(false), - _hybrid_set() { -} +InPredicate::InPredicate(const TExprNode& node) + : Predicate(node), + _is_not_in(node.in_predicate.is_not_in), + _is_prepare(false), + _null_in_set(false), + _hybrid_set() {} -InPredicate::~InPredicate() { -} +InPredicate::~InPredicate() {} Status InPredicate::prepare(RuntimeState* state, const TypeDescriptor& type) { if (_is_prepare) { @@ -51,10 +48,8 @@ Status InPredicate::prepare(RuntimeState* state, const TypeDescriptor& type) { return Status::OK(); } -Status InPredicate::open( - RuntimeState* state, - ExprContext* context, - FunctionContext::FunctionStateScope scope) { +Status InPredicate::open(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope) { Expr::open(state, context, scope); for (int i = 1; i < _children.size(); ++i) { @@ -78,8 +73,8 @@ Status InPredicate::open( return Status::OK(); } -Status InPredicate::prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context) { +Status InPredicate::prepare(RuntimeState* state, const RowDescriptor& row_desc, + ExprContext* context) { for (int i = 0; i < _children.size(); ++i) { RETURN_IF_ERROR(_children[i]->prepare(state, row_desc, context)); } @@ -138,4 +133,4 @@ BooleanVal InPredicate::get_boolean_val(ExprContext* ctx, TupleRow* row) { return BooleanVal(_is_not_in); } -} +} // namespace doris diff --git a/be/src/exprs/in_predicate.h b/be/src/exprs/in_predicate.h index ef826153ffe489..55fa7433df1992 100644 --- a/be/src/exprs/in_predicate.h +++ b/be/src/exprs/in_predicate.h @@ -18,12 +18,13 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_IN_PREDICATE_H #define DORIS_BE_SRC_QUERY_EXPRS_IN_PREDICATE_H -#include #include #include +#include + +#include "exprs/hybrid_set.h" #include "exprs/predicate.h" #include "runtime/raw_value.h" -#include "exprs/hybrid_set.h" namespace doris { @@ -33,17 +34,15 @@ namespace doris { class InPredicate : public Predicate { public: virtual ~InPredicate(); - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new InPredicate(*this)); } Status prepare(RuntimeState* state, const TypeDescriptor&); - Status open( - RuntimeState* state, - ExprContext* context, - FunctionContext::FunctionStateScope scope); - virtual Status prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context); + Status open(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + ExprContext* context); virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row); @@ -51,13 +50,9 @@ class InPredicate : public Predicate { // if add to children, when List is long, copy is a expensive op. void insert(void* value); - HybridSetBase* hybrid_set() const { - return _hybrid_set.get(); - } + HybridSetBase* hybrid_set() const { return _hybrid_set.get(); } - bool is_not_in() const { - return _is_not_in; - } + bool is_not_in() const { return _is_not_in; } protected: friend class Expr; @@ -73,9 +68,8 @@ class InPredicate : public Predicate { bool _is_prepare; bool _null_in_set; boost::shared_ptr _hybrid_set; - }; -} +} // namespace doris #endif diff --git a/be/src/exprs/info_func.cpp b/be/src/exprs/info_func.cpp index a69cd3858469f1..2aab808e17c984 100644 --- a/be/src/exprs/info_func.cpp +++ b/be/src/exprs/info_func.cpp @@ -23,11 +23,8 @@ namespace doris { -InfoFunc::InfoFunc(const TExprNode& node) : - Expr(node), - _int_value(node.info_func.int_value), - _str_value(node.info_func.str_value) { -} +InfoFunc::InfoFunc(const TExprNode& node) + : Expr(node), _int_value(node.info_func.int_value), _str_value(node.info_func.str_value) {} StringVal InfoFunc::get_string_val(ExprContext* context, TupleRow*) { StringVal val; @@ -43,8 +40,8 @@ BigIntVal InfoFunc::get_big_int_val(ExprContext* context, TupleRow*) { std::string InfoFunc::debug_string() const { std::stringstream out; - out << "InfoFunc(" << Expr::debug_string() - << " int_value: " << _int_value << "; str_value: " << _str_value << ")"; + out << "InfoFunc(" << Expr::debug_string() << " int_value: " << _int_value + << "; str_value: " << _str_value << ")"; return out.str(); } @@ -62,4 +59,4 @@ void* InfoFunc::compute_fn(Expr* e, TupleRow* row) { return NULL; } -} +} // namespace doris diff --git a/be/src/exprs/info_func.h b/be/src/exprs/info_func.h index 9a145b62e2168f..c8082c3a1bd051 100644 --- a/be/src/exprs/info_func.h +++ b/be/src/exprs/info_func.h @@ -18,8 +18,9 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_INFO_FUNC_H #define DORIS_BE_SRC_QUERY_EXPRS_INFO_FUNC_H -#include #include +#include + #include "common/object_pool.h" #include "exprs/expr.h" #include "gen_cpp/Exprs_types.h" @@ -28,11 +29,9 @@ namespace doris { class InfoFunc : public Expr { public: - virtual ~InfoFunc() { } + virtual ~InfoFunc() {} - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new InfoFunc(*this)); - } + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new InfoFunc(*this)); } protected: friend class Expr; @@ -43,12 +42,13 @@ class InfoFunc : public Expr { virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*); virtual std::string debug_string() const; + private: static void* compute_fn(Expr* e, TupleRow* row); int64_t _int_value; std::string _str_value; }; -} +} // namespace doris #endif diff --git a/be/src/exprs/is_null_predicate.cpp b/be/src/exprs/is_null_predicate.cpp index efc3a05165709d..78ea27694fd00c 100644 --- a/be/src/exprs/is_null_predicate.cpp +++ b/be/src/exprs/is_null_predicate.cpp @@ -16,19 +16,19 @@ // under the License. #include "exprs/is_null_predicate.h" + #include "udf/udf.h" namespace doris { -void IsNullPredicate::init() { -} +void IsNullPredicate::init() {} -template +template BooleanVal IsNullPredicate::is_null(FunctionContext* ctx, const T& val) { return val.is_null; } -template +template BooleanVal IsNullPredicate::is_not_null(FunctionContext* ctx, const T& val) { return !val.is_null; } @@ -61,4 +61,4 @@ template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const DateTim template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const DecimalVal&); template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const DecimalV2Val&); -} +} // namespace doris diff --git a/be/src/exprs/is_null_predicate.h b/be/src/exprs/is_null_predicate.h index 0fe75a8d9189d9..468615c6312e46 100644 --- a/be/src/exprs/is_null_predicate.h +++ b/be/src/exprs/is_null_predicate.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXPRS_IS_NULL_PREDICATE_H #include + #include "exprs/predicate.h" namespace doris { @@ -27,12 +28,12 @@ class IsNullPredicate { public: static void init(); - template + template static BooleanVal is_null(FunctionContext* ctx, const T& val); - template + template static BooleanVal is_not_null(FunctionContext* ctx, const T& val); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 86d1f1215c62eb..70f6cc34dc1003 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -17,27 +17,26 @@ #include "exprs/json_functions.h" +#include +#include +#include +#include #include #include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include - -#include "exprs/expr.h" -#include "exprs/anyval_util.h" #include "common/logging.h" +#include "exprs/anyval_util.h" +#include "exprs/expr.h" #include "olap/olap_define.h" +#include "rapidjson/error/en.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" -#include "rapidjson/error/en.h" namespace doris { @@ -45,11 +44,10 @@ namespace doris { // json path cannot contains: ", [, ] static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+|\\*)\\])?"); -void JsonFunctions::init() { -} +void JsonFunctions::init() {} -IntVal JsonFunctions::get_json_int( - FunctionContext* context, const StringVal& json_str, const StringVal& path) { +IntVal JsonFunctions::get_json_int(FunctionContext* context, const StringVal& json_str, + const StringVal& path) { if (json_str.is_null || path.is_null) { return IntVal::null(); } @@ -57,7 +55,7 @@ IntVal JsonFunctions::get_json_int( std::string path_string((char*)path.ptr, path.len); rapidjson::Document document; rapidjson::Value* root = - get_json_object(context, json_string, path_string, JSON_FUN_INT, &document); + get_json_object(context, json_string, path_string, JSON_FUN_INT, &document); if (root != nullptr && root->IsInt()) { return IntVal(root->GetInt()); } else { @@ -65,8 +63,8 @@ IntVal JsonFunctions::get_json_int( } } -StringVal JsonFunctions::get_json_string( - FunctionContext* context, const StringVal& json_str, const StringVal& path) { +StringVal JsonFunctions::get_json_string(FunctionContext* context, const StringVal& json_str, + const StringVal& path) { if (json_str.is_null || path.is_null) { return StringVal::null(); } @@ -75,7 +73,7 @@ StringVal JsonFunctions::get_json_string( std::string path_string((char*)path.ptr, path.len); rapidjson::Document document; rapidjson::Value* root = - get_json_object(context, json_string, path_string, JSON_FUN_STRING, &document); + get_json_object(context, json_string, path_string, JSON_FUN_STRING, &document); if (root == nullptr || root->IsNull()) { return StringVal::null(); } else if (root->IsString()) { @@ -88,8 +86,8 @@ StringVal JsonFunctions::get_json_string( } } -DoubleVal JsonFunctions::get_json_double( - FunctionContext* context, const StringVal& json_str, const StringVal& path) { +DoubleVal JsonFunctions::get_json_double(FunctionContext* context, const StringVal& json_str, + const StringVal& path) { if (json_str.is_null || path.is_null) { return DoubleVal::null(); } @@ -97,7 +95,7 @@ DoubleVal JsonFunctions::get_json_double( std::string path_string((char*)path.ptr, path.len); rapidjson::Document document; rapidjson::Value* root = - get_json_object(context, json_string, path_string, JSON_FUN_DOUBLE, &document); + get_json_object(context, json_string, path_string, JSON_FUN_DOUBLE, &document); if (root == nullptr || root->IsNull()) { return DoubleVal::null(); } else if (root->IsInt()) { @@ -109,12 +107,10 @@ DoubleVal JsonFunctions::get_json_double( } } - -rapidjson::Value* JsonFunctions::match_value( - const std::vector& parsed_paths, - rapidjson::Value* document, - rapidjson::Document::AllocatorType& mem_allocator, - bool is_insert_null) { +rapidjson::Value* JsonFunctions::match_value(const std::vector& parsed_paths, + rapidjson::Value* document, + rapidjson::Document::AllocatorType& mem_allocator, + bool is_insert_null) { rapidjson::Value* root = document; rapidjson::Value* array_obj = nullptr; for (int i = 1; i < parsed_paths.size(); i++) { @@ -169,7 +165,7 @@ rapidjson::Value* JsonFunctions::match_value( } root = is_null ? &(array_obj->SetNull()) : array_obj; - } else if (root->IsObject()){ + } else if (root->IsObject()) { if (!root->HasMember(col.c_str())) { return nullptr; } else { @@ -212,13 +208,11 @@ rapidjson::Value* JsonFunctions::match_value( return root; } -rapidjson::Value* JsonFunctions::get_json_object( - FunctionContext* context, - const std::string& json_string, - const std::string& path_string, - const JsonFunctionType& fntype, - rapidjson::Document* document) { - +rapidjson::Value* JsonFunctions::get_json_object(FunctionContext* context, + const std::string& json_string, + const std::string& path_string, + const JsonFunctionType& fntype, + rapidjson::Document* document) { // split path by ".", and escape quota by "\" // eg: // '$.text#abc.xyz' -> [$, text#abc, xyz] @@ -227,15 +221,18 @@ rapidjson::Value* JsonFunctions::get_json_object( std::vector* parsed_paths; std::vector tmp_parsed_paths; #ifndef BE_TEST - parsed_paths = reinterpret_cast*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + parsed_paths = reinterpret_cast*>( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (parsed_paths == nullptr) { - boost::tokenizer> tok(path_string, boost::escaped_list_separator("\\", ".", "\"")); + boost::tokenizer> tok( + path_string, boost::escaped_list_separator("\\", ".", "\"")); std::vector paths(tok.begin(), tok.end()); get_parsed_paths(paths, &tmp_parsed_paths); parsed_paths = &tmp_parsed_paths; } #else - boost::tokenizer> tok(path_string, boost::escaped_list_separator("\\", ".", "\"")); + boost::tokenizer> tok( + path_string, boost::escaped_list_separator("\\", ".", "\"")); std::vector paths(tok.begin(), tok.end()); get_parsed_paths(paths, &tmp_parsed_paths); parsed_paths = &tmp_parsed_paths; @@ -258,8 +255,8 @@ rapidjson::Value* JsonFunctions::get_json_object( //rapidjson::Document document; document->Parse(json_string.c_str()); if (UNLIKELY(document->HasParseError())) { - VLOG(1) << "Error at offset " << document->GetErrorOffset() - << ": " << GetParseError_En(document->GetParseError()); + VLOG(1) << "Error at offset " << document->GetErrorOffset() << ": " + << GetParseError_En(document->GetParseError()); document->SetNull(); return document; } @@ -267,20 +264,16 @@ rapidjson::Value* JsonFunctions::get_json_object( } rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( - const std::string& json_path, - rapidjson::Value* document, + const std::string& json_path, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator) { - std::vector vec; parse_json_paths(json_path, &vec); return get_json_array_from_parsed_json(vec, document, mem_allocator); } rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( - const std::vector& parsed_paths, - rapidjson::Value* document, + const std::vector& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator) { - if (!parsed_paths[0].is_valid) { return nullptr; } @@ -290,8 +283,7 @@ rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( return nullptr; } else if (!root->IsArray()) { rapidjson::Value* array_obj = nullptr; - array_obj = static_cast( - mem_allocator.Malloc(sizeof(rapidjson::Value))); + array_obj = static_cast(mem_allocator.Malloc(sizeof(rapidjson::Value))); array_obj->SetArray(); array_obj->PushBack(*root, mem_allocator); return array_obj; @@ -299,12 +291,9 @@ rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( return root; } - rapidjson::Value* JsonFunctions::get_json_object_from_parsed_json( - const std::vector& parsed_paths, - rapidjson::Value* document, + const std::vector& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator) { - if (!parsed_paths[0].is_valid) { return nullptr; } @@ -316,9 +305,8 @@ rapidjson::Value* JsonFunctions::get_json_object_from_parsed_json( return root; } -void JsonFunctions::json_path_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { +void JsonFunctions::json_path_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } @@ -332,8 +320,8 @@ void JsonFunctions::json_path_prepare( } std::string path_str(reinterpret_cast(path->ptr), path->len); - boost::tokenizer> tok(path_str, - boost::escaped_list_separator("\\", ".", "\"")); + boost::tokenizer> tok( + path_str, boost::escaped_list_separator("\\", ".", "\"")); std::vector path_exprs(tok.begin(), tok.end()); std::vector* parsed_paths = new std::vector(); get_parsed_paths(path_exprs, parsed_paths); @@ -342,36 +330,35 @@ void JsonFunctions::json_path_prepare( VLOG(10) << "prepare json path. size: " << parsed_paths->size(); } -void JsonFunctions::json_path_close( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { +void JsonFunctions::json_path_close(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } - std::vector* parsed_paths = reinterpret_cast*>(context->get_function_state(scope)); + std::vector* parsed_paths = + reinterpret_cast*>(context->get_function_state(scope)); if (parsed_paths != nullptr) { delete parsed_paths; VLOG(10) << "close json path"; } } -void JsonFunctions::parse_json_paths( - const std::string& path_string, - std::vector* parsed_paths) { +void JsonFunctions::parse_json_paths(const std::string& path_string, + std::vector* parsed_paths) { // split path by ".", and escape quota by "\" // eg: // '$.text#abc.xyz' -> [$, text#abc, xyz] // '$."text.abc".xyz' -> [$, text.abc, xyz] // '$."text.abc"[1].xyz' -> [$, text.abc[1], xyz] - boost::tokenizer> tok(path_string, boost::escaped_list_separator("\\", ".", "\"")); + boost::tokenizer> tok( + path_string, boost::escaped_list_separator("\\", ".", "\"")); std::vector paths(tok.begin(), tok.end()); get_parsed_paths(paths, parsed_paths); } -void JsonFunctions::get_parsed_paths( - const std::vector& path_exprs, - std::vector* parsed_paths) { - if(path_exprs.empty()){ +void JsonFunctions::get_parsed_paths(const std::vector& path_exprs, + std::vector* parsed_paths) { + if (path_exprs.empty()) { return; } @@ -400,4 +387,4 @@ void JsonFunctions::get_parsed_paths( } } -} +} // namespace doris diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index b0b86d15bb943e..6f8a085aa38089 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXPRS_JSON_FUNCTIONS_H #include + #include "runtime/string_value.h" namespace doris { @@ -37,20 +38,25 @@ class TupleRow; struct JsonPath { std::string key; // key of a json object - int idx; // array index of a json array, -1 means not set, -2 means * - bool is_valid; // true if the path is successfully parsed + int idx; // array index of a json array, -1 means not set, -2 means * + bool is_valid; // true if the path is successfully parsed - JsonPath(const std::string& key_, int idx_, bool is_valid_): - key(key_), - idx(idx_), - is_valid(is_valid_) {} + JsonPath(const std::string& key_, int idx_, bool is_valid_) + : key(key_), idx(idx_), is_valid(is_valid_) {} std::string to_string() const { std::stringstream ss; - if (!is_valid) { return "INVALID"; } - if (!key.empty()) { ss << key; } - if (idx == -2) { ss << "[*]"; } - else if (idx > -1) { ss << "[" << idx << "]"; } + if (!is_valid) { + return "INVALID"; + } + if (!key.empty()) { + ss << key; + } + if (idx == -2) { + ss << "[*]"; + } else if (idx > -1) { + ss << "[" << idx << "]"; + } return ss.str(); } @@ -64,61 +70,56 @@ struct JsonPath { class JsonFunctions { public: static void init(); - static doris_udf::IntVal get_json_int( - doris_udf::FunctionContext* context, const doris_udf::StringVal& json_str, - const doris_udf::StringVal& path); - static doris_udf::StringVal get_json_string( - doris_udf::FunctionContext* context, const doris_udf::StringVal& json_str, - const doris_udf::StringVal& path); - static doris_udf::DoubleVal get_json_double( - doris_udf::FunctionContext* context, const doris_udf::StringVal& json_str, - const doris_udf::StringVal& path); - - static rapidjson::Value* get_json_object( - FunctionContext* context, - const std::string& json_string, const std::string& path_string, - const JsonFunctionType& fntype, rapidjson::Document* document); + static doris_udf::IntVal get_json_int(doris_udf::FunctionContext* context, + const doris_udf::StringVal& json_str, + const doris_udf::StringVal& path); + static doris_udf::StringVal get_json_string(doris_udf::FunctionContext* context, + const doris_udf::StringVal& json_str, + const doris_udf::StringVal& path); + static doris_udf::DoubleVal get_json_double(doris_udf::FunctionContext* context, + const doris_udf::StringVal& json_str, + const doris_udf::StringVal& path); + + static rapidjson::Value* get_json_object(FunctionContext* context, + const std::string& json_string, + const std::string& path_string, + const JsonFunctionType& fntype, + rapidjson::Document* document); /** * The `document` parameter must be has parsed. * return Value Is Array object */ static rapidjson::Value* get_json_array_from_parsed_json( - const std::vector& parsed_paths, - rapidjson::Value* document, + const std::vector& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator); // this is only for test, it will parse the json path inside, // so that we can easily pass a json path as string. static rapidjson::Value* get_json_array_from_parsed_json( - const std::string& jsonpath, - rapidjson::Value* document, + const std::string& jsonpath, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator); static rapidjson::Value* get_json_object_from_parsed_json( - const std::vector& parsed_paths, - rapidjson::Value* document, + const std::vector& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator); - static void json_path_prepare( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); + static void json_path_prepare(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); - static void json_path_close( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); + static void json_path_close(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); - static void parse_json_paths( - const std::string& path_strings, - std::vector* parsed_paths); + static void parse_json_paths(const std::string& path_strings, + std::vector* parsed_paths); private: static rapidjson::Value* match_value(const std::vector& parsed_paths, - rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator, - bool is_insert_null = false); - static void get_parsed_paths( - const std::vector& path_exprs, - std::vector* parsed_paths); + rapidjson::Value* document, + rapidjson::Document::AllocatorType& mem_allocator, + bool is_insert_null = false); + static void get_parsed_paths(const std::vector& path_exprs, + std::vector* parsed_paths); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/like_predicate.cpp b/be/src/exprs/like_predicate.cpp index ccdae6a8b1d9dc..933d9e02858d53 100644 --- a/be/src/exprs/like_predicate.cpp +++ b/be/src/exprs/like_predicate.cpp @@ -17,9 +17,10 @@ #include "exprs/like_predicate.h" -#include #include +#include + #include "exprs/string_functions.h" #include "runtime/string_value.hpp" @@ -27,17 +28,15 @@ namespace doris { // A regex to match any regex pattern is equivalent to a substring search. static const RE2 SUBSTRING_RE( - "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*"); + "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*"); // A regex to match any regex pattern which is equivalent to matching a constant string // at the end of the string values. -static const RE2 ENDS_WITH_RE( - "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$"); +static const RE2 ENDS_WITH_RE("(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$"); // A regex to match any regex pattern which is equivalent to matching a constant string // at the end of the string values. -static const RE2 STARTS_WITH_RE( - "\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*"); +static const RE2 STARTS_WITH_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*"); // A regex to match any regex pattern which is equivalent to a constant string match. static const RE2 EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$"); @@ -47,11 +46,10 @@ static const re2::RE2 LIKE_ENDS_WITH_RE("(?:%+)(((\\\\%)|(\\\\_)|([^%_]))+)"); static const re2::RE2 LIKE_STARTS_WITH_RE("(((\\\\%)|(\\\\_)|([^%_]))+)(?:%+)"); static const re2::RE2 LIKE_EQUALS_RE("(((\\\\%)|(\\\\_)|([^%_]))+)"); -void LikePredicate::init() { -} +void LikePredicate::init() {} -void LikePredicate::like_prepare( - FunctionContext* context, FunctionContext::FunctionStateScope scope) { +void LikePredicate::like_prepare(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::THREAD_LOCAL) { return; } @@ -84,10 +82,9 @@ void LikePredicate::like_prepare( state->function = constant_starts_with_fn; } else { std::string re_pattern; - convert_like_pattern( - context, - *reinterpret_cast(context->get_constant_arg(1)), - &re_pattern); + convert_like_pattern(context, + *reinterpret_cast(context->get_constant_arg(1)), + &re_pattern); RE2::Options opts; opts.set_never_nl(false); opts.set_dot_nl(true); @@ -99,28 +96,24 @@ void LikePredicate::like_prepare( } } -BooleanVal LikePredicate::like( - FunctionContext* context, - const StringVal& val, - const StringVal& pattern) { +BooleanVal LikePredicate::like(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); return (state->function)(context, val, pattern); } -void LikePredicate::like_close( - FunctionContext* context, - FunctionContext::FunctionStateScope scope) { +void LikePredicate::like_close(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope == FunctionContext::THREAD_LOCAL) { LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); delete state; } } -void LikePredicate::regex_prepare( - FunctionContext* context, - FunctionContext::FunctionStateScope scope) { +void LikePredicate::regex_prepare(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::THREAD_LOCAL) { return; } @@ -167,17 +160,17 @@ void LikePredicate::regex_prepare( } } -BooleanVal LikePredicate::regex( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::regex(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); return (state->function)(context, val, pattern); } // This prepare function is used only when 3 parameters are passed to the regexp_like() // function. For the 2 parameter version, the RegexPrepare() function is used to prepare. -void LikePredicate::regexp_like_prepare( - FunctionContext* context, FunctionContext::FunctionStateScope scope) { +void LikePredicate::regexp_like_prepare(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::THREAD_LOCAL) { return; } @@ -217,11 +210,8 @@ void LikePredicate::regexp_like_prepare( // This is used only for the 3 parameter version of regexp_like(). The 2 parameter // version calls Regex() directly. -BooleanVal LikePredicate::regexp_like( - FunctionContext* context, - const StringVal& val, - const StringVal& pattern, - const StringVal& match_parameter) { +BooleanVal LikePredicate::regexp_like(FunctionContext* context, const StringVal& val, + const StringVal& pattern, const StringVal& match_parameter) { if (val.is_null || pattern.is_null) { return BooleanVal::null(); } @@ -240,8 +230,8 @@ BooleanVal LikePredicate::regexp_like( std::string re_pattern(reinterpret_cast(pattern.ptr), pattern.len); re2::RE2 re(re_pattern, opts); if (re.ok()) { - return RE2::PartialMatch(re2::StringPiece( - reinterpret_cast(val.ptr), val.len), re); + return RE2::PartialMatch( + re2::StringPiece(reinterpret_cast(val.ptr), val.len), re); } else { context->set_error("Invalid regex: $0"); return BooleanVal(false); @@ -250,32 +240,32 @@ BooleanVal LikePredicate::regexp_like( return constant_regex_fn_partial(context, val, pattern); } -void LikePredicate::regex_close( - FunctionContext* context, FunctionContext::FunctionStateScope scope) { +void LikePredicate::regex_close(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope == FunctionContext::THREAD_LOCAL) { LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); delete state; } } -BooleanVal LikePredicate::regex_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::regex_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { return regex_match(context, val, pattern, false); } -BooleanVal LikePredicate::like_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::like_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { return regex_match(context, val, pattern, true); } -BooleanVal LikePredicate::constant_substring_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::constant_substring_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { if (val.is_null) { return BooleanVal::null(); } LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); if (state->search_string_sv.len == 0) { return BooleanVal(true); } @@ -283,89 +273,87 @@ BooleanVal LikePredicate::constant_substring_fn( return BooleanVal(state->substring_pattern.search(&pattern_value) != -1); } -BooleanVal LikePredicate::constant_starts_with_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::constant_starts_with_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { if (val.is_null) { return BooleanVal::null(); } LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); if (val.len < state->search_string_sv.len) { return BooleanVal(false); } else { - StringValue v = - StringValue(reinterpret_cast(val.ptr), state->search_string_sv.len); + StringValue v = StringValue(reinterpret_cast(val.ptr), state->search_string_sv.len); return BooleanVal(state->search_string_sv.eq((v))); } } -BooleanVal LikePredicate::constant_ends_with_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::constant_ends_with_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { if (val.is_null) { return BooleanVal::null(); } LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); if (val.len < state->search_string_sv.len) { return BooleanVal(false); } else { - char* ptr = - reinterpret_cast(val.ptr) + val.len - state->search_string_sv.len; + char* ptr = reinterpret_cast(val.ptr) + val.len - state->search_string_sv.len; int len = state->search_string_sv.len; StringValue v = StringValue(ptr, len); return BooleanVal(state->search_string_sv.eq(v)); } } -BooleanVal LikePredicate::constant_equals_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::constant_equals_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { if (val.is_null) { return BooleanVal::null(); } LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); return BooleanVal(state->search_string_sv.eq(StringValue::from_string_val(val))); } -BooleanVal LikePredicate::constant_regex_fn_partial( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::constant_regex_fn_partial(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { if (val.is_null) { return BooleanVal::null(); } LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); re2::StringPiece operand_sp(reinterpret_cast(val.ptr), val.len); return RE2::PartialMatch(operand_sp, *state->regex); } -BooleanVal LikePredicate::constant_regex_fn( - FunctionContext* context, const StringVal& val, const StringVal& pattern) { +BooleanVal LikePredicate::constant_regex_fn(FunctionContext* context, const StringVal& val, + const StringVal& pattern) { if (val.is_null) { return BooleanVal::null(); } LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); re2::StringPiece operand_sp(reinterpret_cast(val.ptr), val.len); return RE2::FullMatch(operand_sp, *state->regex); } -BooleanVal LikePredicate::regex_match( - FunctionContext* context, - const StringVal& operand_value, - const StringVal& pattern_value, - bool is_like_pattern) { +BooleanVal LikePredicate::regex_match(FunctionContext* context, const StringVal& operand_value, + const StringVal& pattern_value, bool is_like_pattern) { if (operand_value.is_null || pattern_value.is_null) { return BooleanVal::null(); } if (context->is_arg_constant(1)) { LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); if (is_like_pattern) { - return RE2::FullMatch(re2::StringPiece(reinterpret_cast( - operand_value.ptr), operand_value.len), *state->regex.get()); + return RE2::FullMatch(re2::StringPiece(reinterpret_cast(operand_value.ptr), + operand_value.len), + *state->regex.get()); } else { - return RE2::PartialMatch(re2::StringPiece(reinterpret_cast( - operand_value.ptr), operand_value.len), *state->regex.get()); + return RE2::PartialMatch( + re2::StringPiece(reinterpret_cast(operand_value.ptr), + operand_value.len), + *state->regex.get()); } } else { std::string re_pattern; @@ -375,17 +363,21 @@ BooleanVal LikePredicate::regex_match( if (is_like_pattern) { convert_like_pattern(context, pattern_value, &re_pattern); } else { - re_pattern = - std::string(reinterpret_cast(pattern_value.ptr), pattern_value.len); + re_pattern = std::string(reinterpret_cast(pattern_value.ptr), + pattern_value.len); } re2::RE2 re(re_pattern, opts); if (re.ok()) { if (is_like_pattern) { - return RE2::FullMatch(re2::StringPiece( - reinterpret_cast(operand_value.ptr), operand_value.len), re); + return RE2::FullMatch( + re2::StringPiece(reinterpret_cast(operand_value.ptr), + operand_value.len), + re); } else { - return RE2::PartialMatch(re2::StringPiece( - reinterpret_cast(operand_value.ptr), operand_value.len), re); + return RE2::PartialMatch( + re2::StringPiece(reinterpret_cast(operand_value.ptr), + operand_value.len), + re); } } else { context->set_error("Invalid regex: $0"); @@ -394,13 +386,11 @@ BooleanVal LikePredicate::regex_match( } } -void LikePredicate::convert_like_pattern( - FunctionContext* context, - const StringVal& pattern, - std::string* re_pattern) { +void LikePredicate::convert_like_pattern(FunctionContext* context, const StringVal& pattern, + std::string* re_pattern) { re_pattern->clear(); LikePredicateState* state = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); + context->get_function_state(FunctionContext::THREAD_LOCAL)); bool is_escaped = false; for (int i = 0; i < pattern.len; ++i) { if (!is_escaped && pattern.ptr[i] == '%') { @@ -410,22 +400,11 @@ void LikePredicate::convert_like_pattern( // check for escape char before checking for regex special chars, they might overlap } else if (!is_escaped && pattern.ptr[i] == state->escape_char) { is_escaped = true; - } else if ( - pattern.ptr[i] == '.' - || pattern.ptr[i] == '[' - || pattern.ptr[i] == ']' - || pattern.ptr[i] == '{' - || pattern.ptr[i] == '}' - || pattern.ptr[i] == '(' - || pattern.ptr[i] == ')' - || pattern.ptr[i] == '\\' - || pattern.ptr[i] == '*' - || pattern.ptr[i] == '+' - || pattern.ptr[i] == '?' - || pattern.ptr[i] == '|' - || pattern.ptr[i] == '^' - || pattern.ptr[i] == '$' - ) { + } else if (pattern.ptr[i] == '.' || pattern.ptr[i] == '[' || pattern.ptr[i] == ']' || + pattern.ptr[i] == '{' || pattern.ptr[i] == '}' || pattern.ptr[i] == '(' || + pattern.ptr[i] == ')' || pattern.ptr[i] == '\\' || pattern.ptr[i] == '*' || + pattern.ptr[i] == '+' || pattern.ptr[i] == '?' || pattern.ptr[i] == '|' || + pattern.ptr[i] == '^' || pattern.ptr[i] == '$') { // escape all regex special characters; see list at re_pattern->append("\\"); re_pattern->append(1, pattern.ptr[i]); @@ -443,10 +422,9 @@ void LikePredicate::remove_escape_character(std::string* search_string) { tmp_search_string.swap(*search_string); int len = tmp_search_string.length(); for (int i = 0; i < len;) { - if (tmp_search_string[i] == '\\' - && i + 1 < len - && (tmp_search_string[i+1] == '%' || tmp_search_string[i+1] == '_')) { - search_string->append(1, tmp_search_string[i+1]); + if (tmp_search_string[i] == '\\' && i + 1 < len && + (tmp_search_string[i + 1] == '%' || tmp_search_string[i + 1] == '_')) { + search_string->append(1, tmp_search_string[i + 1]); i += 2; } else { search_string->append(1, tmp_search_string[i]); @@ -455,4 +433,4 @@ void LikePredicate::remove_escape_character(std::string* search_string) { } } -} +} // namespace doris diff --git a/be/src/exprs/like_predicate.h b/be/src/exprs/like_predicate.h index 5ff519253949f5..f231f26f92f4ca 100644 --- a/be/src/exprs/like_predicate.h +++ b/be/src/exprs/like_predicate.h @@ -18,10 +18,11 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_LIKE_PREDICATE_H #define DORIS_BE_SRC_QUERY_EXPRS_LIKE_PREDICATE_H -#include -#include #include +#include +#include + #include "exprs/predicate.h" #include "gen_cpp/Exprs_types.h" #include "runtime/string_search.hpp" @@ -33,8 +34,9 @@ class LikePredicate { static void init(); private: - typedef doris_udf::BooleanVal (*LikePredicateFunction) ( - doris_udf::FunctionContext*, const doris_udf::StringVal&, const doris_udf::StringVal&); + typedef doris_udf::BooleanVal (*LikePredicateFunction)(doris_udf::FunctionContext*, + const doris_udf::StringVal&, + const doris_udf::StringVal&); struct LikePredicateState { char escape_char; @@ -66,8 +68,7 @@ class LikePredicate { /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument. std::unique_ptr regex; - LikePredicateState() : escape_char('\\') { - } + LikePredicateState() : escape_char('\\') {} void set_search_string(const std::string& search_string_arg) { search_string = search_string_arg; @@ -78,101 +79,85 @@ class LikePredicate { friend class OpcodeRegistry; - static void like_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void like_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal like( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal like(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); - static void like_close( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void like_close(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); - static void regex_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void regex_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal regex( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal regex(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); /// Prepare function for regexp_like() when a third optional parameter is used - static void regexp_like_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void regexp_like_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); /// Handles regexp_like() when 3 parameters are passed to it - static doris_udf::BooleanVal regexp_like( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern, - const doris_udf::StringVal& match_parameter); - - static void regex_close( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal regex_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); - - static doris_udf::BooleanVal like_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal regexp_like(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern, + const doris_udf::StringVal& match_parameter); + + static void regex_close(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope scope); + + static doris_udf::BooleanVal regex_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); + + static doris_udf::BooleanVal like_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); /// Handling of like predicates that map to strstr - static doris_udf::BooleanVal constant_substring_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal constant_substring_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); /// Handling of like predicates that can be implemented using strncmp - static doris_udf::BooleanVal constant_starts_with_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal constant_starts_with_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); /// Handling of like predicates that can be implemented using strncmp - static doris_udf::BooleanVal constant_ends_with_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal constant_ends_with_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); /// Handling of like predicates that can be implemented using strcmp - static doris_udf::BooleanVal constant_equals_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal constant_equals_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); - static doris_udf::BooleanVal constant_regex_fn_partial( - doris_udf::FunctionContext* context, const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal constant_regex_fn_partial(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); - static doris_udf::BooleanVal constant_regex_fn( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern); + static doris_udf::BooleanVal constant_regex_fn(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern); - static doris_udf::BooleanVal regex_match( - doris_udf::FunctionContext* context, const doris_udf::StringVal& val, - const doris_udf::StringVal& pattern, bool is_like_pattern); + static doris_udf::BooleanVal regex_match(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, + const doris_udf::StringVal& pattern, + bool is_like_pattern); /// Convert a LIKE pattern (with embedded % and _) into the corresponding /// regular expression pattern. Escaped chars are copied verbatim. - static void convert_like_pattern( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& pattern, - std::string* re_pattern); + static void convert_like_pattern(doris_udf::FunctionContext* context, + const doris_udf::StringVal& pattern, std::string* re_pattern); static void remove_escape_character(std::string* search_string); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/literal.cpp b/be/src/exprs/literal.cpp index 398b5c32a00469..89269ef0b5bd90 100644 --- a/be/src/exprs/literal.cpp +++ b/be/src/exprs/literal.cpp @@ -20,13 +20,12 @@ #include #include "gen_cpp/Exprs_types.h" -#include "util/string_parser.hpp" #include "runtime/runtime_state.h" +#include "util/string_parser.hpp" namespace doris { -Literal::Literal(const TExprNode& node) : - Expr(node) { +Literal::Literal(const TExprNode& node) : Expr(node) { switch (_type.type) { case TYPE_BOOLEAN: DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL); @@ -56,12 +55,11 @@ Literal::Literal(const TExprNode& node) : case TYPE_LARGEINT: { StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL); - _value.large_int_val = - StringParser::string_to_int<__int128>(node.large_int_literal.value.c_str(), - node.large_int_literal.value.size(), - &parse_result); + _value.large_int_val = StringParser::string_to_int<__int128>( + node.large_int_literal.value.c_str(), node.large_int_literal.value.size(), + &parse_result); if (parse_result != StringParser::PARSE_SUCCESS) { - _value.large_int_val = MAX_INT128; + _value.large_int_val = MAX_INT128; } break; } @@ -78,8 +76,8 @@ Literal::Literal(const TExprNode& node) : break; case TYPE_DATE: case TYPE_DATETIME: - _value.datetime_val.from_date_str( - node.date_literal.value.c_str(), node.date_literal.value.size()); + _value.datetime_val.from_date_str(node.date_literal.value.c_str(), + node.date_literal.value.size()); break; case TYPE_CHAR: case TYPE_VARCHAR: @@ -99,14 +97,13 @@ Literal::Literal(const TExprNode& node) : _value.decimalv2_val = DecimalV2Value(node.decimal_literal.value); break; } - default: + default: break; // DCHECK(false) << "Invalid type: " << TypeToString(_type.type); } } -Literal::~Literal() { -} +Literal::~Literal() {} BooleanVal Literal::get_boolean_val(ExprContext* context, TupleRow* row) { DCHECK_EQ(_type.type, TYPE_BOOLEAN) << _type; @@ -175,4 +172,4 @@ StringVal Literal::get_string_val(ExprContext* context, TupleRow* row) { return str_val; } -} +} // namespace doris diff --git a/be/src/exprs/literal.h b/be/src/exprs/literal.h index dc12f908e6d099..ec9616c54aa7cc 100644 --- a/be/src/exprs/literal.h +++ b/be/src/exprs/literal.h @@ -29,9 +29,7 @@ class Literal : public Expr { public: virtual ~Literal(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new Literal(*this)); - } + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new Literal(*this)); } virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow*); virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); @@ -54,6 +52,6 @@ class Literal : public Expr { ExprValue _value; }; -} +} // namespace doris #endif diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp index 89c10ea0d74ebb..612f2e3ba84715 100644 --- a/be/src/exprs/math_functions.cpp +++ b/be/src/exprs/math_functions.cpp @@ -17,18 +17,19 @@ #include "exprs/math_functions.h" -#include -#include +#include + #include +#include #include -#include +#include #include "common/compiler_util.h" #include "exprs/anyval_util.h" #include "exprs/expr.h" -#include "runtime/tuple_row.h" #include "runtime/decimal_value.h" #include "runtime/decimalv2_value.h" +#include "runtime/tuple_row.h" #include "util/string_parser.hpp" namespace doris { @@ -36,40 +37,32 @@ namespace doris { const char* MathFunctions::_s_alphanumeric_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const double log_10[] = { - 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, - 1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, - 1e020, 1e021, 1e022, 1e023, 1e024, 1e025, 1e026, 1e027, 1e028, 1e029, - 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, 1e039, - 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049, - 1e050, 1e051, 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059, - 1e060, 1e061, 1e062, 1e063, 1e064, 1e065, 1e066, 1e067, 1e068, 1e069, - 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, 1e078, 1e079, - 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089, - 1e090, 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099, - 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, - 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, - 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, - 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, - 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, - 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, - 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, - 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, - 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, - 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, - 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, - 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, - 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, - 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, - 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, - 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, - 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, - 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, - 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, - 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, - 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308 -}; - -#define ARRAY_ELEMENTS(A) ((uint64_t) (sizeof(A)/sizeof(A[0]))) + 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, 1e010, 1e011, 1e012, + 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, 1e020, 1e021, 1e022, 1e023, 1e024, 1e025, + 1e026, 1e027, 1e028, 1e029, 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, + 1e039, 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049, 1e050, 1e051, + 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059, 1e060, 1e061, 1e062, 1e063, 1e064, + 1e065, 1e066, 1e067, 1e068, 1e069, 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, + 1e078, 1e079, 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089, 1e090, + 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099, 1e100, 1e101, 1e102, 1e103, + 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, + 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, 1e142, + 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, + 1e156, 1e157, 1e158, 1e159, 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, + 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, 1e180, 1e181, + 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, + 1e195, 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, + 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, + 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, 1e232, 1e233, + 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, + 1e247, 1e248, 1e249, 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, + 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, 1e270, 1e271, 1e272, + 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, + 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, + 1e299, 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308}; + +#define ARRAY_ELEMENTS(A) ((uint64_t)(sizeof(A) / sizeof(A[0]))) double MathFunctions::my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate) { bool dec_negative = (dec < 0) && !dec_unsigned; @@ -81,8 +74,8 @@ double MathFunctions::my_double_round(double value, int64_t dec, bool dec_unsign */ volatile double tmp2 = 0.0; - double tmp = (abs_dec < ARRAY_ELEMENTS(log_10) ? - log_10[abs_dec] : std::pow(10.0, (double)abs_dec)); + double tmp = + (abs_dec < ARRAY_ELEMENTS(log_10) ? log_10[abs_dec] : std::pow(10.0, (double)abs_dec)); // Pre-compute these, to avoid optimizing away e.g. 'floor(v/tmp) * tmp'. volatile double value_div_tmp = value / tmp; @@ -105,8 +98,7 @@ double MathFunctions::my_double_round(double value, int64_t dec, bool dec_unsign return tmp2; } -void MathFunctions::init() { -} +void MathFunctions::init() {} DoubleVal MathFunctions::pi(FunctionContext* ctx) { return DoubleVal(M_PI); @@ -177,10 +169,10 @@ SmallIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::TinyIntVal } // Generates a UDF that always calls FN() on the input val and returns it. -#define ONE_ARG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN) \ +#define ONE_ARG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN) \ RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \ - if (v.is_null) return RET_TYPE::null(); \ - return RET_TYPE(FN(v.val)); \ + if (v.is_null) return RET_TYPE::null(); \ + return RET_TYPE(FN(v.val)); \ } ONE_ARG_MATH_FN(abs, DoubleVal, DoubleVal, std::fabs); @@ -198,56 +190,50 @@ ONE_ARG_MATH_FN(ln, DoubleVal, DoubleVal, std::log); ONE_ARG_MATH_FN(log10, DoubleVal, DoubleVal, std::log10); ONE_ARG_MATH_FN(exp, DoubleVal, DoubleVal, std::exp); -FloatVal MathFunctions::sign( - FunctionContext* ctx, const DoubleVal& v) { +FloatVal MathFunctions::sign(FunctionContext* ctx, const DoubleVal& v) { if (v.is_null) { return FloatVal::null(); } return FloatVal((v.val > 0) ? 1.0f : ((v.val < 0) ? -1.0f : 0.0f)); } -DoubleVal MathFunctions::radians( - FunctionContext* ctx, const DoubleVal& v) { +DoubleVal MathFunctions::radians(FunctionContext* ctx, const DoubleVal& v) { if (v.is_null) { return v; } return DoubleVal(v.val * M_PI / 180.0); } -DoubleVal MathFunctions::degrees( - FunctionContext* ctx, const DoubleVal& v) { +DoubleVal MathFunctions::degrees(FunctionContext* ctx, const DoubleVal& v) { if (v.is_null) { return v; } return DoubleVal(v.val * 180.0 / M_PI); } -BigIntVal MathFunctions::round( - FunctionContext* ctx, const DoubleVal& v) { +BigIntVal MathFunctions::round(FunctionContext* ctx, const DoubleVal& v) { if (v.is_null) { return BigIntVal::null(); } return BigIntVal(static_cast(v.val + ((v.val < 0) ? -0.5 : 0.5))); } -DoubleVal MathFunctions::round_up_to( - FunctionContext* ctx, const DoubleVal& v, const IntVal& scale) { +DoubleVal MathFunctions::round_up_to(FunctionContext* ctx, const DoubleVal& v, + const IntVal& scale) { if (v.is_null || scale.is_null) { return DoubleVal::null(); } return DoubleVal(my_double_round(v.val, scale.val, false, false)); } -DoubleVal MathFunctions::truncate( - FunctionContext* ctx, const DoubleVal& v, const IntVal& scale) { +DoubleVal MathFunctions::truncate(FunctionContext* ctx, const DoubleVal& v, const IntVal& scale) { if (v.is_null || scale.is_null) { return DoubleVal::null(); } return DoubleVal(my_double_round(v.val, scale.val, false, true)); } -DoubleVal MathFunctions::log2( - FunctionContext* ctx, const DoubleVal& v) { +DoubleVal MathFunctions::log2(FunctionContext* ctx, const DoubleVal& v) { if (v.is_null) { return DoubleVal::null(); } @@ -255,30 +241,26 @@ DoubleVal MathFunctions::log2( } const double EPSILON = 1e-9; -DoubleVal MathFunctions::log( - FunctionContext* ctx, const DoubleVal& base, const DoubleVal& v) { +DoubleVal MathFunctions::log(FunctionContext* ctx, const DoubleVal& base, const DoubleVal& v) { if (base.is_null || v.is_null) { return DoubleVal::null(); } - if (base.val <= 0 || std::fabs(base.val - 1.0) < EPSILON || v.val <= 0.0) { + if (base.val <= 0 || std::fabs(base.val - 1.0) < EPSILON || v.val <= 0.0) { return DoubleVal::null(); } return DoubleVal(std::log(v.val) / std::log(base.val)); } -DoubleVal MathFunctions::pow( - FunctionContext* ctx, const DoubleVal& base, const DoubleVal& exp) { +DoubleVal MathFunctions::pow(FunctionContext* ctx, const DoubleVal& base, const DoubleVal& exp) { if (base.is_null || exp.is_null) { return DoubleVal::null(); } return DoubleVal(std::pow(base.val, exp.val)); } -void MathFunctions::rand_prepare( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { - std::mt19937* generator = reinterpret_cast( - ctx->allocate(sizeof(std::mt19937))); +void MathFunctions::rand_prepare(FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { + std::mt19937* generator = reinterpret_cast(ctx->allocate(sizeof(std::mt19937))); if (UNLIKELY(generator == NULL)) { LOG(ERROR) << "allocate random seed generator failed."; return; @@ -306,30 +288,29 @@ void MathFunctions::rand_prepare( } DoubleVal MathFunctions::rand(FunctionContext* ctx) { - std::mt19937* generator = reinterpret_cast( - ctx->get_function_state(FunctionContext::THREAD_LOCAL)); - DCHECK(generator != nullptr); - static const double min = 0.0; - static const double max = 1.0; - std::uniform_real_distribution distribution(min, max); - return DoubleVal(distribution(*generator)); + std::mt19937* generator = + reinterpret_cast(ctx->get_function_state(FunctionContext::THREAD_LOCAL)); + DCHECK(generator != nullptr); + static const double min = 0.0; + static const double max = 1.0; + std::uniform_real_distribution distribution(min, max); + return DoubleVal(distribution(*generator)); } DoubleVal MathFunctions::rand_seed(FunctionContext* ctx, const BigIntVal& seed) { - if (seed.is_null) { - return DoubleVal::null(); - } - return rand(ctx); + if (seed.is_null) { + return DoubleVal::null(); + } + return rand(ctx); } -void MathFunctions::rand_close(FunctionContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (scope == FunctionContext::THREAD_LOCAL) { - uint8_t* generator = reinterpret_cast( - ctx->get_function_state(FunctionContext::THREAD_LOCAL)); - ctx->free(generator); - ctx->set_function_state(FunctionContext::THREAD_LOCAL, nullptr); - } +void MathFunctions::rand_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { + if (scope == FunctionContext::THREAD_LOCAL) { + uint8_t* generator = + reinterpret_cast(ctx->get_function_state(FunctionContext::THREAD_LOCAL)); + ctx->free(generator); + ctx->set_function_state(FunctionContext::THREAD_LOCAL, nullptr); + } } StringVal MathFunctions::bin(FunctionContext* ctx, const BigIntVal& v) { @@ -431,17 +412,16 @@ StringVal MathFunctions::unhex(FunctionContext* ctx, const StringVal& s) { return AnyValUtil::from_buffer_temp(ctx, result, result_len); } -StringVal MathFunctions::conv_int( - FunctionContext* ctx, const BigIntVal& num, - const TinyIntVal& src_base, const TinyIntVal& dest_base) { +StringVal MathFunctions::conv_int(FunctionContext* ctx, const BigIntVal& num, + const TinyIntVal& src_base, const TinyIntVal& dest_base) { if (num.is_null || src_base.is_null || dest_base.is_null) { return StringVal::null(); } // As in MySQL and Hive, min base is 2 and max base is 36. // (36 is max base representable by alphanumeric chars) // If a negative target base is given, num should be interpreted in 2's complement. - if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE - || std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) { + if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE || + std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) { // Return NULL like Hive does. return StringVal::null(); } @@ -461,24 +441,23 @@ StringVal MathFunctions::conv_int( return decimal_to_base(ctx, decimal_num, dest_base.val); } -StringVal MathFunctions::conv_string( - FunctionContext* ctx, const StringVal& num_str, - const TinyIntVal& src_base, const TinyIntVal& dest_base) { +StringVal MathFunctions::conv_string(FunctionContext* ctx, const StringVal& num_str, + const TinyIntVal& src_base, const TinyIntVal& dest_base) { if (num_str.is_null || src_base.is_null || dest_base.is_null) { return StringVal::null(); } // As in MySQL and Hive, min base is 2 and max base is 36. // (36 is max base representable by alphanumeric chars) // If a negative target base is given, num should be interpreted in 2's complement. - if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE - || std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) { + if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE || + std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) { // Return NULL like Hive does. return StringVal::null(); } // Convert digits in num_str in src_base to decimal. StringParser::ParseResult parse_res; int64_t decimal_num = StringParser::string_to_int( - reinterpret_cast(num_str.ptr), num_str.len, src_base.val, &parse_res); + reinterpret_cast(num_str.ptr), num_str.len, src_base.val, &parse_res); if (src_base.val < 0 && decimal_num >= 0) { // Invalid input. return StringVal::null(); @@ -490,8 +469,7 @@ StringVal MathFunctions::conv_string( return decimal_to_base(ctx, decimal_num, dest_base.val); } -StringVal MathFunctions::decimal_to_base( - FunctionContext* ctx, int64_t src_num, int8_t dest_base) { +StringVal MathFunctions::decimal_to_base(FunctionContext* ctx, int64_t src_num, int8_t dest_base) { // Max number of digits of any base (base 2 gives max digits), plus sign. const size_t max_digits = sizeof(uint64_t) * 8 + 1; char buf[max_digits]; @@ -521,8 +499,7 @@ StringVal MathFunctions::decimal_to_base( return AnyValUtil::from_buffer_temp(ctx, buf + max_digits - result_len, result_len); } -bool MathFunctions::decimal_in_base_to_decimal( - int64_t src_num, int8_t src_base, int64_t* result) { +bool MathFunctions::decimal_in_base_to_decimal(int64_t src_num, int8_t src_base, int64_t* result) { uint64_t temp_num = std::abs(src_num); int32_t place = 1; *result = 0; @@ -546,8 +523,8 @@ bool MathFunctions::decimal_in_base_to_decimal( return true; } -bool MathFunctions::handle_parse_result( - int8_t dest_base, int64_t* num, StringParser::ParseResult parse_res) { +bool MathFunctions::handle_parse_result(int8_t dest_base, int64_t* num, + StringParser::ParseResult parse_res) { // On overflow set special value depending on dest_base. // This is consistent with Hive and MySQL's behavior. if (parse_res == StringParser::PARSE_OVERFLOW) { @@ -563,76 +540,65 @@ bool MathFunctions::handle_parse_result( return true; } -BigIntVal MathFunctions::pmod_bigint( - FunctionContext* ctx, const BigIntVal& a, const BigIntVal& b) { +BigIntVal MathFunctions::pmod_bigint(FunctionContext* ctx, const BigIntVal& a, const BigIntVal& b) { if (a.is_null || b.is_null) { return BigIntVal::null(); } return BigIntVal(((a.val % b.val) + b.val) % b.val); } -DoubleVal MathFunctions::pmod_double( - FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) { +DoubleVal MathFunctions::pmod_double(FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) { if (a.is_null || b.is_null) { return DoubleVal::null(); } return DoubleVal(fmod(fmod(a.val, b.val) + b.val, b.val)); } -FloatVal MathFunctions::fmod_float( - FunctionContext* ctx, const FloatVal& a, const FloatVal& b) { +FloatVal MathFunctions::fmod_float(FunctionContext* ctx, const FloatVal& a, const FloatVal& b) { if (a.is_null || b.is_null || b.val == 0) { return FloatVal::null(); } return FloatVal(fmodf(a.val, b.val)); } -DoubleVal MathFunctions::fmod_double( - FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) { +DoubleVal MathFunctions::fmod_double(FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) { if (a.is_null || b.is_null || b.val == 0) { return DoubleVal::null(); } return DoubleVal(fmod(a.val, b.val)); } -BigIntVal MathFunctions::positive_bigint( - FunctionContext* ctx, const BigIntVal& val) { +BigIntVal MathFunctions::positive_bigint(FunctionContext* ctx, const BigIntVal& val) { return val; } -DoubleVal MathFunctions::positive_double( - FunctionContext* ctx, const DoubleVal& val) { +DoubleVal MathFunctions::positive_double(FunctionContext* ctx, const DoubleVal& val) { return val; } -DecimalVal MathFunctions::positive_decimal( - FunctionContext* ctx, const DecimalVal& val) { +DecimalVal MathFunctions::positive_decimal(FunctionContext* ctx, const DecimalVal& val) { return val; } -DecimalV2Val MathFunctions::positive_decimal( - FunctionContext* ctx, const DecimalV2Val& val) { +DecimalV2Val MathFunctions::positive_decimal(FunctionContext* ctx, const DecimalV2Val& val) { return val; } -BigIntVal MathFunctions::negative_bigint( - FunctionContext* ctx, const BigIntVal& val) { +BigIntVal MathFunctions::negative_bigint(FunctionContext* ctx, const BigIntVal& val) { if (val.is_null) { return val; } return BigIntVal(-val.val); } -DoubleVal MathFunctions::negative_double( - FunctionContext* ctx, const DoubleVal& val) { +DoubleVal MathFunctions::negative_double(FunctionContext* ctx, const DoubleVal& val) { if (val.is_null) { return val; } return DoubleVal(-val.val); } -DecimalVal MathFunctions::negative_decimal( - FunctionContext* ctx, const DecimalVal& val) { +DecimalVal MathFunctions::negative_decimal(FunctionContext* ctx, const DecimalVal& val) { if (val.is_null) { return val; } @@ -644,8 +610,7 @@ DecimalVal MathFunctions::negative_decimal( return result; } -DecimalV2Val MathFunctions::negative_decimal( - FunctionContext* ctx, const DecimalV2Val& val) { +DecimalV2Val MathFunctions::negative_decimal(FunctionContext* ctx, const DecimalV2Val& val) { if (val.is_null) { return val; } @@ -655,95 +620,91 @@ DecimalV2Val MathFunctions::negative_decimal( return result; } -#define LEAST_FN(TYPE) \ - TYPE MathFunctions::least(\ - FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - int result_idx = 0; \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - if (args[i].val < args[result_idx].val) result_idx = i; \ - } \ - return TYPE(args[result_idx].val); \ +#define LEAST_FN(TYPE) \ + TYPE MathFunctions::least(FunctionContext* ctx, int num_args, const TYPE* args) { \ + if (args[0].is_null) return TYPE::null(); \ + int result_idx = 0; \ + for (int i = 1; i < num_args; ++i) { \ + if (args[i].is_null) return TYPE::null(); \ + if (args[i].val < args[result_idx].val) result_idx = i; \ + } \ + return TYPE(args[result_idx].val); \ } -#define LEAST_FNS() \ - LEAST_FN(TinyIntVal); \ +#define LEAST_FNS() \ + LEAST_FN(TinyIntVal); \ LEAST_FN(SmallIntVal); \ - LEAST_FN(IntVal); \ - LEAST_FN(BigIntVal); \ + LEAST_FN(IntVal); \ + LEAST_FN(BigIntVal); \ LEAST_FN(LargeIntVal); \ - LEAST_FN(FloatVal); \ + LEAST_FN(FloatVal); \ LEAST_FN(DoubleVal); LEAST_FNS(); -#define LEAST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \ - TYPE MathFunctions::least(\ - FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \ - if (val < result_val) result_val = val; \ - } \ - TYPE result; \ - result_val.to_##TYPE_NAME(&result); \ - return result; \ - } - -#define LEAST_NONNUMERIC_FNS() \ - LEAST_NONNUMERIC_FN(string_val, StringVal, StringValue); \ +#define LEAST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \ + TYPE MathFunctions::least(FunctionContext* ctx, int num_args, const TYPE* args) { \ + if (args[0].is_null) return TYPE::null(); \ + DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \ + for (int i = 1; i < num_args; ++i) { \ + if (args[i].is_null) return TYPE::null(); \ + DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \ + if (val < result_val) result_val = val; \ + } \ + TYPE result; \ + result_val.to_##TYPE_NAME(&result); \ + return result; \ + } + +#define LEAST_NONNUMERIC_FNS() \ + LEAST_NONNUMERIC_FN(string_val, StringVal, StringValue); \ LEAST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \ - LEAST_NONNUMERIC_FN(decimal_val, DecimalVal, DecimalValue); \ - LEAST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); \ + LEAST_NONNUMERIC_FN(decimal_val, DecimalVal, DecimalValue); \ + LEAST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); LEAST_NONNUMERIC_FNS(); -#define GREATEST_FN(TYPE) \ - TYPE MathFunctions::greatest(\ - FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - int result_idx = 0; \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - if (args[i].val > args[result_idx].val) result_idx = i; \ - } \ - return TYPE(args[result_idx].val); \ +#define GREATEST_FN(TYPE) \ + TYPE MathFunctions::greatest(FunctionContext* ctx, int num_args, const TYPE* args) { \ + if (args[0].is_null) return TYPE::null(); \ + int result_idx = 0; \ + for (int i = 1; i < num_args; ++i) { \ + if (args[i].is_null) return TYPE::null(); \ + if (args[i].val > args[result_idx].val) result_idx = i; \ + } \ + return TYPE(args[result_idx].val); \ } -#define GREATEST_FNS() \ - GREATEST_FN(TinyIntVal); \ +#define GREATEST_FNS() \ + GREATEST_FN(TinyIntVal); \ GREATEST_FN(SmallIntVal); \ - GREATEST_FN(IntVal); \ - GREATEST_FN(BigIntVal); \ + GREATEST_FN(IntVal); \ + GREATEST_FN(BigIntVal); \ GREATEST_FN(LargeIntVal); \ - GREATEST_FN(FloatVal); \ + GREATEST_FN(FloatVal); \ GREATEST_FN(DoubleVal); GREATEST_FNS(); -#define GREATEST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \ - TYPE MathFunctions::greatest(\ - FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \ - if (val > result_val) result_val = val; \ - } \ - TYPE result; \ - result_val.to_##TYPE_NAME(&result); \ - return result; \ - } - -#define GREATEST_NONNUMERIC_FNS() \ - GREATEST_NONNUMERIC_FN(string_val, StringVal, StringValue); \ +#define GREATEST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \ + TYPE MathFunctions::greatest(FunctionContext* ctx, int num_args, const TYPE* args) { \ + if (args[0].is_null) return TYPE::null(); \ + DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \ + for (int i = 1; i < num_args; ++i) { \ + if (args[i].is_null) return TYPE::null(); \ + DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \ + if (val > result_val) result_val = val; \ + } \ + TYPE result; \ + result_val.to_##TYPE_NAME(&result); \ + return result; \ + } + +#define GREATEST_NONNUMERIC_FNS() \ + GREATEST_NONNUMERIC_FN(string_val, StringVal, StringValue); \ GREATEST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \ - GREATEST_NONNUMERIC_FN(decimal_val, DecimalVal, DecimalValue); \ - GREATEST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); \ + GREATEST_NONNUMERIC_FN(decimal_val, DecimalVal, DecimalValue); \ + GREATEST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); GREATEST_NONNUMERIC_FNS(); @@ -943,5 +904,4 @@ void* MathFunctions::least_timestamp(Expr* e, TupleRow* row) { } #endif -} - +} // namespace doris diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h index 6d729b364632eb..bfef5957c56931 100644 --- a/be/src/exprs/math_functions.h +++ b/be/src/exprs/math_functions.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_QUERY_EXPRS_MATH_FUNCTIONS_H #include + #include "util/string_parser.hpp" namespace doris { @@ -37,22 +38,20 @@ class MathFunctions { static doris_udf::DoubleVal abs(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); static doris_udf::FloatVal abs(doris_udf::FunctionContext*, const doris_udf::FloatVal&); static doris_udf::DecimalVal abs(doris_udf::FunctionContext*, const doris_udf::DecimalVal&); - static doris_udf::DecimalV2Val abs( - doris_udf::FunctionContext*, const doris_udf::DecimalV2Val&); + static doris_udf::DecimalV2Val abs(doris_udf::FunctionContext*, const doris_udf::DecimalV2Val&); // For integer math, we have to promote ABS() to the next highest integer type because // in two's complement arithmetic, the largest negative value for any bit width is not // representable as a positive value within the same width. For the largest width, we // simply overflow. In the unlikely event a workaround is needed, one can simply cast - // to a higher precision decimal type. + // to a higher precision decimal type. static doris_udf::LargeIntVal abs(doris_udf::FunctionContext*, const doris_udf::LargeIntVal&); static doris_udf::LargeIntVal abs(doris_udf::FunctionContext*, const doris_udf::BigIntVal&); static doris_udf::BigIntVal abs(doris_udf::FunctionContext*, const doris_udf::IntVal&); static doris_udf::IntVal abs(doris_udf::FunctionContext*, const doris_udf::SmallIntVal&); static doris_udf::SmallIntVal abs(doris_udf::FunctionContext*, const doris_udf::TinyIntVal&); - static doris_udf::FloatVal sign( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); + static doris_udf::FloatVal sign(doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); static doris_udf::DoubleVal sin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); static doris_udf::DoubleVal asin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); @@ -63,133 +62,132 @@ class MathFunctions { static doris_udf::BigIntVal ceil(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); static doris_udf::BigIntVal floor(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::BigIntVal round( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal round_up_to( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v, - const doris_udf::IntVal& scale); - static doris_udf::DoubleVal truncate( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v, - const doris_udf::IntVal& scale); + static doris_udf::BigIntVal round(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v); + static doris_udf::DoubleVal round_up_to(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v, + const doris_udf::IntVal& scale); + static doris_udf::DoubleVal truncate(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v, + const doris_udf::IntVal& scale); static doris_udf::DoubleVal ln(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal log( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& base, - const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal log2( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); + static doris_udf::DoubleVal log(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& base, + const doris_udf::DoubleVal& v); + static doris_udf::DoubleVal log2(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v); static doris_udf::DoubleVal log10(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); static doris_udf::DoubleVal exp(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal radians( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal degrees( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); + static doris_udf::DoubleVal radians(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v); + static doris_udf::DoubleVal degrees(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v); static doris_udf::DoubleVal sqrt(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal pow( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& base, - const doris_udf::DoubleVal& exp); + static doris_udf::DoubleVal pow(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& base, + const doris_udf::DoubleVal& exp); /// Used for both rand() and rand_seed() - static void rand_prepare( - doris_udf::FunctionContext*, doris_udf::FunctionContext::FunctionStateScope); + static void rand_prepare(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); static doris_udf::DoubleVal rand(doris_udf::FunctionContext*); - static doris_udf::DoubleVal rand_seed( - doris_udf::FunctionContext*, const doris_udf::BigIntVal& seed); - static void rand_close( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope); - - static doris_udf::StringVal bin( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& v); - static doris_udf::StringVal hex_int( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& v); - static doris_udf::StringVal hex_string( - doris_udf::FunctionContext* ctx, const doris_udf::StringVal& s); - static doris_udf::StringVal unhex( - doris_udf::FunctionContext* ctx, const doris_udf::StringVal& s); - - static doris_udf::StringVal conv_int( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& num, - const doris_udf::TinyIntVal& src_base, const doris_udf::TinyIntVal& dest_base); - static doris_udf::StringVal conv_string( - doris_udf::FunctionContext* ctx, const doris_udf::StringVal& num_str, - const doris_udf::TinyIntVal& src_base, const doris_udf::TinyIntVal& dest_base); - - static doris_udf::BigIntVal pmod_bigint( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& a, - const doris_udf::BigIntVal& b); - static doris_udf::DoubleVal pmod_double( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& a, - const doris_udf::DoubleVal& b); - static doris_udf::FloatVal fmod_float( - doris_udf::FunctionContext*, const doris_udf::FloatVal&, - const doris_udf::FloatVal&); - static doris_udf::DoubleVal fmod_double( - doris_udf::FunctionContext*, const doris_udf::DoubleVal&, - const doris_udf::DoubleVal&); - - static doris_udf::BigIntVal positive_bigint( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& val); - static doris_udf::DoubleVal positive_double( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& val); - static doris_udf::DecimalVal positive_decimal( - doris_udf::FunctionContext* ctx, const doris_udf::DecimalVal& val); - static doris_udf::DecimalV2Val positive_decimal( - doris_udf::FunctionContext* ctx, const doris_udf::DecimalV2Val& val); - static doris_udf::BigIntVal negative_bigint( - doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& val); - static doris_udf::DoubleVal negative_double( - doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& val); - static doris_udf::DecimalVal negative_decimal( - doris_udf::FunctionContext* ctx, const doris_udf::DecimalVal& val); - static doris_udf::DecimalV2Val negative_decimal( - doris_udf::FunctionContext* ctx, const doris_udf::DecimalV2Val& val); - - static doris_udf::TinyIntVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::TinyIntVal* args); - static doris_udf::TinyIntVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::TinyIntVal* args); - static doris_udf::SmallIntVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::SmallIntVal* val); - static doris_udf::SmallIntVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::SmallIntVal* val); - static doris_udf::IntVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::IntVal* val); - static doris_udf::IntVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::IntVal* val); - static doris_udf::BigIntVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::BigIntVal* val); - static doris_udf::BigIntVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::BigIntVal* val); - static doris_udf::LargeIntVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::LargeIntVal* val); - static doris_udf::LargeIntVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::LargeIntVal* val); - static doris_udf::FloatVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::FloatVal* val); - static doris_udf::FloatVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::FloatVal* val); - static doris_udf::DoubleVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DoubleVal* val); - static doris_udf::DoubleVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DoubleVal* val); - static doris_udf::StringVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::StringVal* val); - static doris_udf::StringVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::StringVal* val); - static doris_udf::DateTimeVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DateTimeVal* val); - static doris_udf::DateTimeVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DateTimeVal* val); - static doris_udf::DecimalVal least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DecimalVal* val); - static doris_udf::DecimalVal greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DecimalVal* val); - static doris_udf::DecimalV2Val least( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DecimalV2Val* val); - static doris_udf::DecimalV2Val greatest( - doris_udf::FunctionContext* ctx, int num_args, const doris_udf::DecimalV2Val* val); + static doris_udf::DoubleVal rand_seed(doris_udf::FunctionContext*, + const doris_udf::BigIntVal& seed); + static void rand_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope); + + static doris_udf::StringVal bin(doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& v); + static doris_udf::StringVal hex_int(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& v); + static doris_udf::StringVal hex_string(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& s); + static doris_udf::StringVal unhex(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& s); + + static doris_udf::StringVal conv_int(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& num, + const doris_udf::TinyIntVal& src_base, + const doris_udf::TinyIntVal& dest_base); + static doris_udf::StringVal conv_string(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& num_str, + const doris_udf::TinyIntVal& src_base, + const doris_udf::TinyIntVal& dest_base); + + static doris_udf::BigIntVal pmod_bigint(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& a, + const doris_udf::BigIntVal& b); + static doris_udf::DoubleVal pmod_double(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& a, + const doris_udf::DoubleVal& b); + static doris_udf::FloatVal fmod_float(doris_udf::FunctionContext*, const doris_udf::FloatVal&, + const doris_udf::FloatVal&); + static doris_udf::DoubleVal fmod_double(doris_udf::FunctionContext*, + const doris_udf::DoubleVal&, + const doris_udf::DoubleVal&); + + static doris_udf::BigIntVal positive_bigint(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& val); + static doris_udf::DoubleVal positive_double(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& val); + static doris_udf::DecimalVal positive_decimal(doris_udf::FunctionContext* ctx, + const doris_udf::DecimalVal& val); + static doris_udf::DecimalV2Val positive_decimal(doris_udf::FunctionContext* ctx, + const doris_udf::DecimalV2Val& val); + static doris_udf::BigIntVal negative_bigint(doris_udf::FunctionContext* ctx, + const doris_udf::BigIntVal& val); + static doris_udf::DoubleVal negative_double(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& val); + static doris_udf::DecimalVal negative_decimal(doris_udf::FunctionContext* ctx, + const doris_udf::DecimalVal& val); + static doris_udf::DecimalV2Val negative_decimal(doris_udf::FunctionContext* ctx, + const doris_udf::DecimalV2Val& val); + + static doris_udf::TinyIntVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::TinyIntVal* args); + static doris_udf::TinyIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::TinyIntVal* args); + static doris_udf::SmallIntVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::SmallIntVal* val); + static doris_udf::SmallIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::SmallIntVal* val); + static doris_udf::IntVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::IntVal* val); + static doris_udf::IntVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::IntVal* val); + static doris_udf::BigIntVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::BigIntVal* val); + static doris_udf::BigIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::BigIntVal* val); + static doris_udf::LargeIntVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::LargeIntVal* val); + static doris_udf::LargeIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::LargeIntVal* val); + static doris_udf::FloatVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::FloatVal* val); + static doris_udf::FloatVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::FloatVal* val); + static doris_udf::DoubleVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DoubleVal* val); + static doris_udf::DoubleVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DoubleVal* val); + static doris_udf::StringVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::StringVal* val); + static doris_udf::StringVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::StringVal* val); + static doris_udf::DateTimeVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DateTimeVal* val); + static doris_udf::DateTimeVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DateTimeVal* val); + static doris_udf::DecimalVal least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DecimalVal* val); + static doris_udf::DecimalVal greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DecimalVal* val); + static doris_udf::DecimalV2Val least(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DecimalV2Val* val); + static doris_udf::DecimalV2Val greatest(doris_udf::FunctionContext* ctx, int num_args, + const doris_udf::DecimalV2Val* val); static double my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate); @@ -200,8 +198,8 @@ class MathFunctions { // Converts src_num in decimal to dest_base, // and fills expr_val.string_val with the result. - static doris_udf::StringVal decimal_to_base( - doris_udf::FunctionContext* ctx, int64_t src_num, int8_t dest_base); + static doris_udf::StringVal decimal_to_base(doris_udf::FunctionContext* ctx, int64_t src_num, + int8_t dest_base); // Converts src_num representing a number in src_base but encoded in decimal // into its actual decimal number. @@ -216,13 +214,10 @@ class MathFunctions { // is positive, otherwise to -1. // Returns true if no parse_res == PARSE_SUCCESS || parse_res == PARSE_OVERFLOW. // Returns false otherwise, indicating some other error condition. - static bool handle_parse_result( - int8_t dest_base, - int64_t* num, - StringParser::ParseResult parse_res); - + static bool handle_parse_result(int8_t dest_base, int64_t* num, + StringParser::ParseResult parse_res); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index ca90390a94bc60..952f9fe9b56ae0 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -17,14 +17,16 @@ #include "exprs/new_agg_fn_evaluator.h" +#include + #include #include "common/logging.h" -#include "exprs/aggregate_functions.h" #include "exprs/agg_fn.h" +#include "exprs/aggregate_functions.h" #include "exprs/anyval_util.h" -#include "exprs/expr_context.h" #include "exprs/expr.h" +#include "exprs/expr_context.h" #include "exprs/scalar_fn_call.h" #include "gutil/strings/substitute.h" #include "runtime/mem_tracker.h" @@ -34,10 +36,6 @@ #include "udf/udf_internal.h" #include "util/debug_util.h" -#include - - - using namespace doris; using namespace doris_udf; using std::move; @@ -51,37 +49,40 @@ typedef void (*InitFn)(FunctionContext*, AnyVal*); typedef void (*UpdateFn0)(FunctionContext*, AnyVal*); typedef void (*UpdateFn1)(FunctionContext*, const AnyVal&, AnyVal*); typedef void (*UpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, AnyVal*); -typedef void (*UpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, AnyVal*); +typedef void (*UpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); +typedef void (*UpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, AnyVal*); +typedef void (*UpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, AnyVal*); +typedef void (*UpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); +typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); +typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, + AnyVal*); typedef void (*VarargUpdateFn0)(FunctionContext*, int num_varargs, const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn1)(FunctionContext*, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); +typedef void (*VarargUpdateFn1)(FunctionContext*, const AnyVal&, int num_varargs, const AnyVal*, + AnyVal*); typedef void (*VarargUpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, int num_varargs, - const AnyVal*, AnyVal*); + const AnyVal*, AnyVal*); typedef void (*VarargUpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - int num_varargs, const AnyVal*, AnyVal*); + int num_varargs, const AnyVal*, AnyVal*); typedef void (*VarargUpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); + const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); typedef void (*VarargUpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); + const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, + AnyVal*); typedef void (*VarargUpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); + const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, + const AnyVal*, AnyVal*); typedef void (*VarargUpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); + const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, + int num_varargs, const AnyVal*, AnyVal*); typedef void (*VarargUpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, - const AnyVal*, AnyVal*); + const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, + const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); typedef StringVal (*SerializeFn)(FunctionContext*, const StringVal&); typedef AnyVal (*GetValueFn)(FunctionContext*, const AnyVal&); @@ -99,18 +100,18 @@ NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, _mem_tracker(tracker) {} NewAggFnEvaluator::~NewAggFnEvaluator() { - if (UNLIKELY(_total_mem_consumption > 0)) { - _mem_tracker->Release(_total_mem_consumption); - } - DCHECK(closed_); + if (UNLIKELY(_total_mem_consumption > 0)) { + _mem_tracker->Release(_total_mem_consumption); + } + DCHECK(closed_); } const SlotDescriptor& NewAggFnEvaluator::intermediate_slot_desc() const { - return agg_fn_.intermediate_slot_desc(); + return agg_fn_.intermediate_slot_desc(); } const TypeDescriptor& NewAggFnEvaluator::intermediate_type() const { - return agg_fn_.intermediate_type(); + return agg_fn_.intermediate_type(); } Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, @@ -120,52 +121,53 @@ Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, Objec *result = nullptr; // Create a new AggFn evaluator. - NewAggFnEvaluator* agg_fn_eval = pool->add(new NewAggFnEvaluator(agg_fn, mem_pool, tracker, false)); - - agg_fn_eval->agg_fn_ctx_.reset(FunctionContextImpl::create_context(state, mem_pool, - agg_fn.GetIntermediateTypeDesc(), agg_fn.GetOutputTypeDesc(), - agg_fn.arg_type_descs(), 0, false)); - - Status status; - // Create the evaluators for the input expressions. - for (Expr* input_expr : agg_fn.children()) { - // TODO chenhao replace ExprContext with ScalarFnEvaluator - ExprContext* input_eval = pool->add(new ExprContext(input_expr)); - if (input_eval == nullptr) goto cleanup; - input_eval->prepare(state, row_desc, tracker); - agg_fn_eval->input_evals_.push_back(input_eval); - Expr* root = input_eval->root(); - DCHECK(root == input_expr); - AnyVal* staging_input_val; - status = allocate_any_val(state, mem_pool, input_expr->type(), - "Could not allocate aggregate expression input value", &staging_input_val); - agg_fn_eval->staging_input_vals_.push_back(staging_input_val); + NewAggFnEvaluator* agg_fn_eval = + pool->add(new NewAggFnEvaluator(agg_fn, mem_pool, tracker, false)); + + agg_fn_eval->agg_fn_ctx_.reset(FunctionContextImpl::create_context( + state, mem_pool, agg_fn.GetIntermediateTypeDesc(), agg_fn.GetOutputTypeDesc(), + agg_fn.arg_type_descs(), 0, false)); + + Status status; + // Create the evaluators for the input expressions. + for (Expr* input_expr : agg_fn.children()) { + // TODO chenhao replace ExprContext with ScalarFnEvaluator + ExprContext* input_eval = pool->add(new ExprContext(input_expr)); + if (input_eval == nullptr) goto cleanup; + input_eval->prepare(state, row_desc, tracker); + agg_fn_eval->input_evals_.push_back(input_eval); + Expr* root = input_eval->root(); + DCHECK(root == input_expr); + AnyVal* staging_input_val; + status = allocate_any_val(state, mem_pool, input_expr->type(), + "Could not allocate aggregate expression input value", + &staging_input_val); + agg_fn_eval->staging_input_vals_.push_back(staging_input_val); + if (UNLIKELY(!status.ok())) goto cleanup; + } + DCHECK_EQ(agg_fn.get_num_children(), agg_fn_eval->input_evals_.size()); + DCHECK_EQ(agg_fn_eval->staging_input_vals_.size(), agg_fn_eval->input_evals_.size()); + + status = allocate_any_val(state, mem_pool, agg_fn.intermediate_type(), + "Could not allocate aggregate expression intermediate value", + &(agg_fn_eval->staging_intermediate_val_)); if (UNLIKELY(!status.ok())) goto cleanup; - } - DCHECK_EQ(agg_fn.get_num_children(), agg_fn_eval->input_evals_.size()); - DCHECK_EQ(agg_fn_eval->staging_input_vals_.size(), agg_fn_eval->input_evals_.size()); - - status = allocate_any_val(state, mem_pool, agg_fn.intermediate_type(), - "Could not allocate aggregate expression intermediate value", - &(agg_fn_eval->staging_intermediate_val_)); - if (UNLIKELY(!status.ok())) goto cleanup; - status = allocate_any_val(state, mem_pool, agg_fn.intermediate_type(), - "Could not allocate aggregate expression merge input value", - &(agg_fn_eval->staging_merge_input_val_)); - if (UNLIKELY(!status.ok())) goto cleanup; - - if (agg_fn.is_merge()) { - DCHECK_EQ(agg_fn_eval->staging_input_vals_.size(), 1) - << "Merge should only have 1 input."; - } - - *result = agg_fn_eval; - return Status::OK(); + status = allocate_any_val(state, mem_pool, agg_fn.intermediate_type(), + "Could not allocate aggregate expression merge input value", + &(agg_fn_eval->staging_merge_input_val_)); + if (UNLIKELY(!status.ok())) goto cleanup; + + if (agg_fn.is_merge()) { + DCHECK_EQ(agg_fn_eval->staging_input_vals_.size(), 1) << "Merge should only have 1 input."; + } + + *result = agg_fn_eval; + return Status::OK(); cleanup: - DCHECK(!status.ok()); - agg_fn_eval->Close(state); - return status; + DCHECK(!status.ok()); + agg_fn_eval->Close(state); + return status; } Status NewAggFnEvaluator::Create(const vector& agg_fns, RuntimeState* state, @@ -175,160 +177,152 @@ Status NewAggFnEvaluator::Create(const vector& agg_fns, RuntimeState* st const RowDescriptor& row_desc) { for (const AggFn* agg_fn : agg_fns) { NewAggFnEvaluator* agg_fn_eval; - RETURN_IF_ERROR(NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, - &agg_fn_eval, tracker, row_desc)); - evals->push_back(agg_fn_eval); - } - return Status::OK(); + RETURN_IF_ERROR(NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, &agg_fn_eval, + tracker, row_desc)); + evals->push_back(agg_fn_eval); + } + return Status::OK(); } Status NewAggFnEvaluator::Open(RuntimeState* state) { - if (opened_) return Status::OK(); - opened_ = true; - // TODO chenhao, ScalarFnEvaluator different from ExprContext - RETURN_IF_ERROR(ExprContext::open(input_evals_, state)); - // Now that we have opened all our input exprs, it is safe to evaluate any constant - // values for the UDA's FunctionContext (we cannot evaluate exprs before calling Open() - // on them). - vector constant_args(input_evals_.size(), nullptr); - for (int i = 0; i < input_evals_.size(); ++i) { - ExprContext* eval = input_evals_[i]; - RETURN_IF_ERROR(eval->get_const_value(state, *(agg_fn_.get_child(i)), - &constant_args[i])); - } - agg_fn_ctx_->impl()->set_constant_args(move(constant_args)); - return Status::OK(); + if (opened_) return Status::OK(); + opened_ = true; + // TODO chenhao, ScalarFnEvaluator different from ExprContext + RETURN_IF_ERROR(ExprContext::open(input_evals_, state)); + // Now that we have opened all our input exprs, it is safe to evaluate any constant + // values for the UDA's FunctionContext (we cannot evaluate exprs before calling Open() + // on them). + vector constant_args(input_evals_.size(), nullptr); + for (int i = 0; i < input_evals_.size(); ++i) { + ExprContext* eval = input_evals_[i]; + RETURN_IF_ERROR(eval->get_const_value(state, *(agg_fn_.get_child(i)), &constant_args[i])); + } + agg_fn_ctx_->impl()->set_constant_args(move(constant_args)); + return Status::OK(); } -Status NewAggFnEvaluator::Open( - const vector& evals, RuntimeState* state) { - for (NewAggFnEvaluator* eval : evals) RETURN_IF_ERROR(eval->Open(state)); - return Status::OK(); +Status NewAggFnEvaluator::Open(const vector& evals, RuntimeState* state) { + for (NewAggFnEvaluator* eval : evals) RETURN_IF_ERROR(eval->Open(state)); + return Status::OK(); } void NewAggFnEvaluator::Close(RuntimeState* state) { - if (closed_) return; - closed_ = true; - if (!is_clone_) Expr::close(input_evals_, state); - // TODO chenhao - //FreeLocalAllocations(); - agg_fn_ctx_->impl()->close(); - agg_fn_ctx_.reset(); - - //TODO chenhao release ExprContext - //for (int i = 0; i < input_evals_.size(); i++) { - // ExprContext* context = input_evals_[i]; - // delete context; - //} - input_evals_.clear(); + if (closed_) return; + closed_ = true; + if (!is_clone_) Expr::close(input_evals_, state); + // TODO chenhao + //FreeLocalAllocations(); + agg_fn_ctx_->impl()->close(); + agg_fn_ctx_.reset(); + + //TODO chenhao release ExprContext + //for (int i = 0; i < input_evals_.size(); i++) { + // ExprContext* context = input_evals_[i]; + // delete context; + //} + input_evals_.clear(); } - -void NewAggFnEvaluator::Close( - const vector& evals, RuntimeState* state) { - for (NewAggFnEvaluator* eval : evals) eval->Close(state); +void NewAggFnEvaluator::Close(const vector& evals, RuntimeState* state) { + for (NewAggFnEvaluator* eval : evals) eval->Close(state); } void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_slot_desc, - Tuple* dst) { - if (src->is_null) { - dst->set_null(dst_slot_desc.null_indicator_offset()); - return; - } - - dst->set_not_null(dst_slot_desc.null_indicator_offset()); - void* slot = dst->get_slot(dst_slot_desc.tuple_offset()); - switch (dst_slot_desc.type().type) { + Tuple* dst) { + if (src->is_null) { + dst->set_null(dst_slot_desc.null_indicator_offset()); + return; + } + + dst->set_not_null(dst_slot_desc.null_indicator_offset()); + void* slot = dst->get_slot(dst_slot_desc.tuple_offset()); + switch (dst_slot_desc.type().type) { case TYPE_NULL: - return; + return; case TYPE_BOOLEAN: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_TINYINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_SMALLINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_INT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_BIGINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_LARGEINT: - memcpy(slot, &reinterpret_cast(src)->val, sizeof(__int128)); - return; + memcpy(slot, &reinterpret_cast(src)->val, sizeof(__int128)); + return; case TYPE_FLOAT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_DOUBLE: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; + return; case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: - *reinterpret_cast(slot) = - StringValue::from_string_val(*reinterpret_cast(src)); - return; + *reinterpret_cast(slot) = + StringValue::from_string_val(*reinterpret_cast(src)); + return; case TYPE_DATE: case TYPE_DATETIME: - *reinterpret_cast(slot) = DateTimeValue::from_datetime_val( - *reinterpret_cast(src)); + *reinterpret_cast(slot) = + DateTimeValue::from_datetime_val(*reinterpret_cast(src)); return; case TYPE_DECIMAL: - *reinterpret_cast(slot) = DecimalValue::from_decimal_val( - *reinterpret_cast(src)); + *reinterpret_cast(slot) = + DecimalValue::from_decimal_val(*reinterpret_cast(src)); return; case TYPE_DECIMALV2: - *reinterpret_cast(slot) = - reinterpret_cast(src)->val; + *reinterpret_cast(slot) = reinterpret_cast(src)->val; return; default: - DCHECK(false) << "NYI: " << dst_slot_desc.type(); - } + DCHECK(false) << "NYI: " << dst_slot_desc.type(); + } } // This function would be replaced in codegen. void NewAggFnEvaluator::Init(Tuple* dst) { - DCHECK(opened_); - DCHECK(agg_fn_.init_fn_ != nullptr); - for (ExprContext* input_eval : input_evals_) { - DCHECK(input_eval->opened()); - } - - const TypeDescriptor& type = intermediate_type(); - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - if (type.type == TYPE_CHAR) { - // The intermediate value is represented as a fixed-length buffer inline in the tuple. - // The aggregate function writes to this buffer directly. staging_intermediate_val_ - // is a StringVal with a pointer to the slot and the length of the slot. - void* slot = dst->get_slot(slot_desc.tuple_offset()); - StringVal* sv = reinterpret_cast(staging_intermediate_val_); - sv->is_null = dst->is_null(slot_desc.null_indicator_offset()); - sv->ptr = reinterpret_cast(slot); - sv->len = type.len; - } - reinterpret_cast(agg_fn_.init_fn_)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(staging_intermediate_val_, slot_desc, dst); - agg_fn_ctx_->impl()->set_num_updates(0); - agg_fn_ctx_->impl()->set_num_removes(0); + DCHECK(opened_); + DCHECK(agg_fn_.init_fn_ != nullptr); + for (ExprContext* input_eval : input_evals_) { + DCHECK(input_eval->opened()); + } + + const TypeDescriptor& type = intermediate_type(); + const SlotDescriptor& slot_desc = intermediate_slot_desc(); + if (type.type == TYPE_CHAR) { + // The intermediate value is represented as a fixed-length buffer inline in the tuple. + // The aggregate function writes to this buffer directly. staging_intermediate_val_ + // is a StringVal with a pointer to the slot and the length of the slot. + void* slot = dst->get_slot(slot_desc.tuple_offset()); + StringVal* sv = reinterpret_cast(staging_intermediate_val_); + sv->is_null = dst->is_null(slot_desc.null_indicator_offset()); + sv->ptr = reinterpret_cast(slot); + sv->len = type.len; + } + reinterpret_cast(agg_fn_.init_fn_)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(staging_intermediate_val_, slot_desc, dst); + agg_fn_ctx_->impl()->set_num_updates(0); + agg_fn_ctx_->impl()->set_num_removes(0); } static void SetAnyVal(const SlotDescriptor& desc, Tuple* tuple, AnyVal* dst) { - bool is_null = tuple->is_null(desc.null_indicator_offset()); - void* slot = nullptr; - if (!is_null) slot = tuple->get_slot(desc.tuple_offset()); - AnyValUtil::set_any_val(slot, desc.type(), dst); + bool is_null = tuple->is_null(desc.null_indicator_offset()); + void* slot = nullptr; + if (!is_null) slot = tuple->get_slot(desc.tuple_offset()); + AnyValUtil::set_any_val(slot, desc.type(), dst); } - // Utility to put val into an AnyVal struct -inline void NewAggFnEvaluator::set_any_val( - const void* slot, - const TypeDescriptor& type, AnyVal* dst) { +inline void NewAggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor& type, + AnyVal* dst) { if (slot == NULL) { dst->is_null = true; return; @@ -389,7 +383,7 @@ inline void NewAggFnEvaluator::set_any_val( case TYPE_DECIMALV2: reinterpret_cast(dst)->val = - reinterpret_cast(slot)->value; + reinterpret_cast(slot)->value; return; case TYPE_LARGEINT: @@ -402,304 +396,272 @@ inline void NewAggFnEvaluator::set_any_val( } void NewAggFnEvaluator::Update(const TupleRow* row, Tuple* dst, void* fn) { - if (fn == nullptr) return; - - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - SetAnyVal(slot_desc, dst, staging_intermediate_val_); - for (int i = 0; i < input_evals_.size(); ++i) { - void* src_slot = input_evals_[i]->get_value(const_cast(row)); - DCHECK(input_evals_[i]->root() == agg_fn_.get_child(i)); - AnyValUtil::set_any_val(src_slot, agg_fn_.get_child(i)->type(), staging_input_vals_[i]); - } - if (agg_fn_.is_merge()) { - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], staging_intermediate_val_); - SetDstSlot(staging_intermediate_val_, slot_desc, dst); - return; - } - - // TODO: this part is not so good and not scalable. It can be replaced with - // codegen but we can also consider leaving it for the first few cases for - // debugging. - if (agg_fn_.get_vararg_start_idx() == -1) { - switch (input_evals_.size()) { - case 0: - reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - break; - case 1: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], staging_intermediate_val_); - break; - case 2: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - staging_intermediate_val_); - break; - case 3: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], staging_intermediate_val_); - break; - case 4: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - staging_intermediate_val_); - break; - case 5: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], staging_intermediate_val_); - break; - case 6: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - staging_intermediate_val_); - break; - case 7: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - *staging_input_vals_[6], staging_intermediate_val_); - break; - case 8: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - *staging_input_vals_[6], *staging_input_vals_[7], - staging_intermediate_val_); - break; - default: - DCHECK(false) << "NYI"; - } - } else { - int num_varargs = input_evals_.size() - agg_fn_.get_vararg_start_idx(); - const AnyVal* varargs = *(staging_input_vals_.data() + agg_fn_.get_vararg_start_idx()); - switch (agg_fn_.get_vararg_start_idx()) { - case 0: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - num_varargs, varargs, - staging_intermediate_val_); - break; - case 1: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 2: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 3: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 4: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 5: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 6: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 7: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - *staging_input_vals_[6], - num_varargs, varargs, - staging_intermediate_val_); - break; - case 8: - reinterpret_cast(fn)(agg_fn_ctx_.get(), - *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], - *staging_input_vals_[4], *staging_input_vals_[5], - *staging_input_vals_[6], *staging_input_vals_[7], - num_varargs, varargs, - staging_intermediate_val_); - break; - default: - DCHECK(false) << "NYI"; - } - } - SetDstSlot(staging_intermediate_val_, slot_desc, dst); + if (fn == nullptr) return; + + const SlotDescriptor& slot_desc = intermediate_slot_desc(); + SetAnyVal(slot_desc, dst, staging_intermediate_val_); + for (int i = 0; i < input_evals_.size(); ++i) { + void* src_slot = input_evals_[i]->get_value(const_cast(row)); + DCHECK(input_evals_[i]->root() == agg_fn_.get_child(i)); + AnyValUtil::set_any_val(src_slot, agg_fn_.get_child(i)->type(), staging_input_vals_[i]); + } + if (agg_fn_.is_merge()) { + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + staging_intermediate_val_); + SetDstSlot(staging_intermediate_val_, slot_desc, dst); + return; + } + + // TODO: this part is not so good and not scalable. It can be replaced with + // codegen but we can also consider leaving it for the first few cases for + // debugging. + if (agg_fn_.get_vararg_start_idx() == -1) { + switch (input_evals_.size()) { + case 0: + reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + break; + case 1: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + staging_intermediate_val_); + break; + case 2: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], staging_intermediate_val_); + break; + case 3: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + staging_intermediate_val_); + break; + case 4: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], staging_intermediate_val_); + break; + case 5: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], *staging_input_vals_[4], + staging_intermediate_val_); + break; + case 6: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], *staging_input_vals_[4], + *staging_input_vals_[5], staging_intermediate_val_); + break; + case 7: + reinterpret_cast(fn)( + agg_fn_ctx_.get(), *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], *staging_input_vals_[4], + *staging_input_vals_[5], *staging_input_vals_[6], staging_intermediate_val_); + break; + case 8: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], *staging_input_vals_[4], + *staging_input_vals_[5], *staging_input_vals_[6], + *staging_input_vals_[7], staging_intermediate_val_); + break; + default: + DCHECK(false) << "NYI"; + } + } else { + int num_varargs = input_evals_.size() - agg_fn_.get_vararg_start_idx(); + const AnyVal* varargs = *(staging_input_vals_.data() + agg_fn_.get_vararg_start_idx()); + switch (agg_fn_.get_vararg_start_idx()) { + case 0: + reinterpret_cast(fn)(agg_fn_ctx_.get(), num_varargs, varargs, + staging_intermediate_val_); + break; + case 1: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + num_varargs, varargs, staging_intermediate_val_); + break; + case 2: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], num_varargs, varargs, + staging_intermediate_val_); + break; + case 3: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + num_varargs, varargs, staging_intermediate_val_); + break; + case 4: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], num_varargs, varargs, + staging_intermediate_val_); + break; + case 5: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], *staging_input_vals_[4], + num_varargs, varargs, staging_intermediate_val_); + break; + case 6: + reinterpret_cast(fn)( + agg_fn_ctx_.get(), *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], *staging_input_vals_[4], + *staging_input_vals_[5], num_varargs, varargs, staging_intermediate_val_); + break; + case 7: + reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], + *staging_input_vals_[1], *staging_input_vals_[2], + *staging_input_vals_[3], *staging_input_vals_[4], + *staging_input_vals_[5], *staging_input_vals_[6], + num_varargs, varargs, staging_intermediate_val_); + break; + case 8: + reinterpret_cast(fn)( + agg_fn_ctx_.get(), *staging_input_vals_[0], *staging_input_vals_[1], + *staging_input_vals_[2], *staging_input_vals_[3], *staging_input_vals_[4], + *staging_input_vals_[5], *staging_input_vals_[6], *staging_input_vals_[7], + num_varargs, varargs, staging_intermediate_val_); + break; + default: + DCHECK(false) << "NYI"; + } + } + SetDstSlot(staging_intermediate_val_, slot_desc, dst); } void NewAggFnEvaluator::Merge(Tuple* src, Tuple* dst) { - DCHECK(agg_fn_.merge_fn_ != nullptr); - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - SetAnyVal(slot_desc, dst, staging_intermediate_val_); - SetAnyVal(slot_desc, src, staging_merge_input_val_); - // The merge fn always takes one input argument. - reinterpret_cast(agg_fn_.merge_fn_)(agg_fn_ctx_.get(), - *staging_merge_input_val_, staging_intermediate_val_); + DCHECK(agg_fn_.merge_fn_ != nullptr); + const SlotDescriptor& slot_desc = intermediate_slot_desc(); + SetAnyVal(slot_desc, dst, staging_intermediate_val_); + SetAnyVal(slot_desc, src, staging_merge_input_val_); + // The merge fn always takes one input argument. + reinterpret_cast(agg_fn_.merge_fn_)(agg_fn_ctx_.get(), *staging_merge_input_val_, + staging_intermediate_val_); SetDstSlot(staging_intermediate_val_, slot_desc, dst); } -void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src, - const SlotDescriptor& dst_slot_desc, Tuple* dst, void* fn) { - // No fn was given and the src and dst are identical. Nothing to be done. - if (fn == nullptr && src == dst) return; - // src != dst means we are performing a Finalize(), so even if fn == null we - // still must copy the value of the src slot into dst. - - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - bool src_slot_null = src->is_null(slot_desc.null_indicator_offset()); - void* src_slot = nullptr; - if (!src_slot_null) src_slot = src->get_slot(slot_desc.tuple_offset()); - - // No fn was given but the src and dst tuples are different (doing a Finalize()). - // Just copy the src slot into the dst tuple. - if (fn == nullptr) { - DCHECK_EQ(intermediate_type(), dst_slot_desc.type()); - RawValue::write(src_slot, dst, &dst_slot_desc, nullptr); - return; - } - - AnyValUtil::set_any_val(src_slot, intermediate_type(), staging_intermediate_val_); - switch (dst_slot_desc.type().type) { +void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src, const SlotDescriptor& dst_slot_desc, + Tuple* dst, void* fn) { + // No fn was given and the src and dst are identical. Nothing to be done. + if (fn == nullptr && src == dst) return; + // src != dst means we are performing a Finalize(), so even if fn == null we + // still must copy the value of the src slot into dst. + + const SlotDescriptor& slot_desc = intermediate_slot_desc(); + bool src_slot_null = src->is_null(slot_desc.null_indicator_offset()); + void* src_slot = nullptr; + if (!src_slot_null) src_slot = src->get_slot(slot_desc.tuple_offset()); + + // No fn was given but the src and dst tuples are different (doing a Finalize()). + // Just copy the src slot into the dst tuple. + if (fn == nullptr) { + DCHECK_EQ(intermediate_type(), dst_slot_desc.type()); + RawValue::write(src_slot, dst, &dst_slot_desc, nullptr); + return; + } + + AnyValUtil::set_any_val(src_slot, intermediate_type(), staging_intermediate_val_); + switch (dst_slot_desc.type().type) { case TYPE_BOOLEAN: { - typedef BooleanVal(*Fn)(FunctionContext*, AnyVal*); - BooleanVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef BooleanVal (*Fn)(FunctionContext*, AnyVal*); + BooleanVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_TINYINT: { - typedef TinyIntVal(*Fn)(FunctionContext*, AnyVal*); - TinyIntVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef TinyIntVal (*Fn)(FunctionContext*, AnyVal*); + TinyIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_SMALLINT: { - typedef SmallIntVal(*Fn)(FunctionContext*, AnyVal*); - SmallIntVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef SmallIntVal (*Fn)(FunctionContext*, AnyVal*); + SmallIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_INT: { - typedef IntVal(*Fn)(FunctionContext*, AnyVal*); - IntVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef IntVal (*Fn)(FunctionContext*, AnyVal*); + IntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_BIGINT: { - typedef BigIntVal(*Fn)(FunctionContext*, AnyVal*); - BigIntVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef BigIntVal (*Fn)(FunctionContext*, AnyVal*); + BigIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_LARGEINT: { - typedef LargeIntVal(*Fn)(FunctionContext*, AnyVal*); - LargeIntVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef LargeIntVal (*Fn)(FunctionContext*, AnyVal*); + LargeIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_FLOAT: { - typedef FloatVal(*Fn)(FunctionContext*, AnyVal*); - FloatVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef FloatVal (*Fn)(FunctionContext*, AnyVal*); + FloatVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_DOUBLE: { - typedef DoubleVal(*Fn)(FunctionContext*, AnyVal*); - DoubleVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef DoubleVal (*Fn)(FunctionContext*, AnyVal*); + DoubleVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: { - typedef StringVal(*Fn)(FunctionContext*, AnyVal*); - StringVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef StringVal (*Fn)(FunctionContext*, AnyVal*); + StringVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_DECIMAL: { - typedef DecimalVal(*Fn)(FunctionContext*, AnyVal*); - DecimalVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef DecimalVal (*Fn)(FunctionContext*, AnyVal*); + DecimalVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_DECIMALV2: { - typedef DecimalV2Val(*Fn)(FunctionContext*, AnyVal*); - DecimalV2Val v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef DecimalV2Val (*Fn)(FunctionContext*, AnyVal*); + DecimalV2Val v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } case TYPE_DATE: case TYPE_DATETIME: { - typedef DateTimeVal(*Fn)(FunctionContext*, AnyVal*); - DateTimeVal v = reinterpret_cast(fn)( - agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; + typedef DateTimeVal (*Fn)(FunctionContext*, AnyVal*); + DateTimeVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); + SetDstSlot(&v, dst_slot_desc, dst); + break; } default: - DCHECK(false) << "NYI"; - } + DCHECK(false) << "NYI"; + } } void NewAggFnEvaluator::ShallowClone(ObjectPool* pool, MemPool* mem_pool, - NewAggFnEvaluator** cloned_eval) const { - DCHECK(opened_); - *cloned_eval = pool->add(new NewAggFnEvaluator(agg_fn_, mem_pool, _mem_tracker, true)); - (*cloned_eval)->agg_fn_ctx_.reset(agg_fn_ctx_->impl()->clone(mem_pool)); - DCHECK_EQ((*cloned_eval)->input_evals_.size(), 0); - (*cloned_eval)->input_evals_ = input_evals_; - (*cloned_eval)->staging_input_vals_ = staging_input_vals_; - (*cloned_eval)->staging_intermediate_val_ = staging_intermediate_val_; - (*cloned_eval)->staging_merge_input_val_ = staging_merge_input_val_; - (*cloned_eval)->opened_ = true; + NewAggFnEvaluator** cloned_eval) const { + DCHECK(opened_); + *cloned_eval = pool->add(new NewAggFnEvaluator(agg_fn_, mem_pool, _mem_tracker, true)); + (*cloned_eval)->agg_fn_ctx_.reset(agg_fn_ctx_->impl()->clone(mem_pool)); + DCHECK_EQ((*cloned_eval)->input_evals_.size(), 0); + (*cloned_eval)->input_evals_ = input_evals_; + (*cloned_eval)->staging_input_vals_ = staging_input_vals_; + (*cloned_eval)->staging_intermediate_val_ = staging_intermediate_val_; + (*cloned_eval)->staging_merge_input_val_ = staging_merge_input_val_; + (*cloned_eval)->opened_ = true; } void NewAggFnEvaluator::ShallowClone(ObjectPool* pool, MemPool* mem_pool, - const vector& evals, - vector* cloned_evals) { - for (const NewAggFnEvaluator* eval : evals) { - NewAggFnEvaluator* cloned_eval; - eval->ShallowClone(pool, mem_pool, &cloned_eval); - cloned_evals->push_back(cloned_eval); - } + const vector& evals, + vector* cloned_evals) { + for (const NewAggFnEvaluator* eval : evals) { + NewAggFnEvaluator* cloned_eval; + eval->ShallowClone(pool, mem_pool, &cloned_eval); + cloned_evals->push_back(cloned_eval); + } } // @@ -711,4 +673,3 @@ void NewAggFnEvaluator::ShallowClone(ObjectPool* pool, MemPool* mem_pool, //void NewAggFnEvaluator::FreeLocalAllocations(const vector& evals) { // for (NewAggFnEvaluator* eval : evals) eval->FreeLocalAllocations(); //} - diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h index 203701dfb1f0ed..ca13c962c69cc1 100644 --- a/be/src/exprs/new_agg_fn_evaluator.h +++ b/be/src/exprs/new_agg_fn_evaluator.h @@ -18,25 +18,24 @@ #ifndef IMPALA_EXPRS_AGG_FN_EVALUATOR_H #define IMPALA_EXPRS_AGG_FN_EVALUATOR_H -#include - #include #include +#include + #include "codegen/doris_ir.h" #include "common/compiler_util.h" #include "common/status.h" #include "exprs/agg_fn.h" #include "exprs/hybrid_map.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" #include "runtime/descriptors.h" #include "runtime/tuple_row.h" #include "runtime/types.h" #include "udf/udf.h" #include "udf/udf_internal.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" - namespace doris { class MemPool; @@ -62,279 +61,267 @@ class ExprContext; /// consumption per partition in an aggregation node. /// class NewAggFnEvaluator { - public: - /// Creates an NewAggFnEvaluator object from the aggregate expression 'agg_fn'. - /// The evaluator is added to 'pool' and returned in 'eval'. This will also - /// create a single evaluator for each input expression. All allocations will come - /// from 'mem_pool'. Note that it's the responsibility to call Close() all evaluators - /// even if this function returns error status on initialization failure. - static Status Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, - MemPool* mem_pool, NewAggFnEvaluator** eval, const std::shared_ptr& tracker, - const RowDescriptor& row_desc) WARN_UNUSED_RESULT; - - /// Convenience functions for creating evaluators for multiple aggregate functions. - static Status Create(const std::vector& agg_fns, RuntimeState* state, - ObjectPool* pool, MemPool* mem_pool, std::vector* evals, - const std::shared_ptr& tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT; - - ~NewAggFnEvaluator(); - - /// Initializes the evaluator by calling Open() on all the input expressions' evaluators - /// and caches all constant input arguments. - /// TODO: Move the evaluation of constant input arguments to AggFn setup. - Status Open(RuntimeState* state) WARN_UNUSED_RESULT; - - /// Convenience functions for opening multiple NewAggFnEvaluators. - static Status Open(const std::vector& evals, - RuntimeState* state) WARN_UNUSED_RESULT; - - /// Used by PartitionedAggregation node to initialize one evaluator per partition. - /// Avoid the overhead of re-initializing an evaluator (e.g. calling GetConstVal() - /// on the input expressions). Cannot be called until after Open() has been called. - /// 'cloned_eval' is a shallow copy of this evaluator: all input values, staging - /// intermediate values and merge values are shared with the original evaluator. Only - /// the FunctionContext 'agg_fn_ctx' is cloned for resource isolation per partition. - /// So, it's not safe to use cloned evaluators concurrently. - void ShallowClone( - ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** cloned_eval) const; - - /// Convenience function for cloning multiple evaluators. The newly cloned evaluators - /// are appended to 'cloned_evals'. - static void ShallowClone(ObjectPool* pool, MemPool* mem_pool, - const std::vector& evals, - std::vector* cloned_evals); - - /// Free resources owned by the evaluator. - void Close(RuntimeState* state); - static void Close(const std::vector& evals, RuntimeState* state); - - const AggFn& agg_fn() const { return agg_fn_; } - - FunctionContext* IR_ALWAYS_INLINE agg_fn_ctx() const; - - ExprContext* const* IR_ALWAYS_INLINE input_evals() const; - - /// Call the initialization function of the AggFn. May update 'dst'. - void Init(Tuple* dst); - - /// Updates the intermediate state dst based on adding the input src row. This can be - /// called either to drive the UDA's Update() or Merge() function, depending on whether - /// the AggFn is a merging aggregation. - void Add(const TupleRow* src, Tuple* dst); - - /// Updates the intermediate state dst to remove the input src row, i.e. undo - /// Add(src, dst). Only used internally for analytic fn builtins. - void Remove(const TupleRow* src, Tuple* dst); - - /// Explicitly does a merge, even if this evaluator is not marked as merging. - /// This is used by the partitioned agg node when it needs to merge spill results. - /// In the non-spilling case, this node would normally not merge. - void Merge(Tuple* src, Tuple* dst); - - /// Flattens any intermediate values containing pointers, and frees any memory - /// allocated during the init, update and merge phases. - void Serialize(Tuple* dst); - - /// Does one final transformation of the aggregated value in 'agg_val' and stores the - /// result in 'output_val'. Also frees the resources allocated during init, update and - /// merge phases. - void Finalize(Tuple* agg_val, Tuple* output_val); - - /// Puts the finalized value from Tuple* src in Tuple* dst just as Finalize() does. - /// However, unlike Finalize(), GetValue() does not clean up state in src. - /// GetValue() can be called repeatedly with the same src. Only used internally for - /// analytic fn builtins. Note that StringVal result is from local allocation (which - /// will be freed in the next QueryMaintenance()) so it needs to be copied out if it - /// needs to survive beyond QueryMaintenance() (e.g. if 'dst' lives in a row batch). - void GetValue(Tuple* src, Tuple* dst); - - // TODO: implement codegen path. These functions would return IR functions with - // the same signature as the interpreted ones above. - // Function* GetIrInitFn(); - // Function* GetIrUpdateFn(); - // Function* GetIrMergeFn(); - // Function* GetIrSerializeFn(); - // Function* GetIrFinalizeFn(); - static const size_t TINYINT_SIZE = sizeof(int8_t); - static const size_t SMALLINT_SIZE = sizeof(int16_t); - static const size_t INT_SIZE = sizeof(int32_t); - static const size_t BIGINT_SIZE = sizeof(int64_t); - static const size_t FLOAT_SIZE = sizeof(float); - static const size_t DOUBLE_SIZE = sizeof(double); - static const size_t DECIMAL_SIZE = sizeof(DecimalValue); - static const size_t DECIMALV2_SIZE = sizeof(DecimalV2Value); - static const size_t TIME_DURATION_SIZE = sizeof(boost::posix_time::time_duration); - static const size_t DATE_SIZE = sizeof(boost::gregorian::date); - static const size_t LARGEINT_SIZE = sizeof(__int128); - - // DATETIME VAL has two part: packet_time is 8 byte, and type is 4 byte - // MySQL packet time : int64_t packed_time; - // Indicate which type of this value : int type; - static const size_t DATETIME_SIZE = 16; - - bool is_multi_distinct() { - return _is_multi_distinct; - } - - const std::vector& input_expr_ctxs() const { - return input_evals_; - } - - /// Helper functions for calling the above functions on many evaluators. - static void Init(const std::vector& evals, Tuple* dst); - static void Add(const std::vector& evals, const TupleRow* src, - Tuple* dst); - static void Remove(const std::vector& evals, - const TupleRow* src, Tuple* dst); - static void Serialize(const std::vector& evals, - Tuple* dst); - static void GetValue(const std::vector& evals, Tuple* src, - Tuple* dst); - static void Finalize(const std::vector& evals, Tuple* src, - Tuple* dst); - - /// Free local allocations made in UDA functions and input arguments' evals. - //void FreeLocalAllocations(); - //static void FreeLocalAllocations(const std::vector& evals); - - std::string DebugString() const; - static std::string DebugString(const std::vector& evals); - - private: - - uint64_t _total_mem_consumption; - uint64_t _accumulated_mem_consumption; - - // index if has multi count distinct - bool _is_multi_distinct; - - /// True if the evaluator has been initialized. - bool opened_ = false; - - /// True if the evaluator has been closed. - bool closed_ = false; - - /// True if this evaluator is created from a ShallowClone() call. - const bool is_clone_; - - const AggFn& agg_fn_; - - /// Pointer to the MemPool which all allocations come from. - /// Owned by the exec node which owns this evaluator. - MemPool* mem_pool_ = nullptr; - - std::shared_ptr _mem_tracker; // saved c'tor param - - /// This contains runtime state such as constant input arguments to the aggregate - /// functions and a FreePool from which the intermediate values are allocated. - /// Owned by this evaluator. - boost::scoped_ptr agg_fn_ctx_; - - /// Evaluators for input expressions for this aggregate function. - /// Empty if there is no input expression (e.g. count(*)). - std::vector input_evals_; - - /// Staging input values used by the interpreted Update() / Merge() paths. - /// It stores the evaluation results of input expressions to be passed to the - /// Update() / Merge() function. - std::vector staging_input_vals_; - - /// Staging intermediate and merged values used in the interpreted - /// Update() / Merge() paths. - doris_udf::AnyVal* staging_intermediate_val_ = nullptr; - doris_udf::AnyVal* staging_merge_input_val_ = nullptr; - - /// Use Create() instead. - NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, const std::shared_ptr& tracker, bool is_clone); - - /// Return the intermediate type of the aggregate function. - inline const SlotDescriptor& intermediate_slot_desc() const; - inline const TypeDescriptor& intermediate_type() const; - - /// The interpreted path for the UDA's Update() function. It sets up the arguments to - /// call 'fn' is either the 'update_fn_' or 'merge_fn_' of agg_fn_, depending on whether - /// agg_fn_ is a merging aggregation. This converts from the agg-expr signature, taking - /// TupleRow to the UDA signature taking AnyVals by evaluating any input expressions - /// and populating the staging input values. - /// - /// Note that this function may be superseded by the codegend Update() IR function - /// generated by AggFn::CodegenUpdateOrMergeFunction() when codegen is enabled. - void Update(const TupleRow* row, Tuple* dst, void* fn); - - /// Sets up the arguments to call 'fn'. This converts from the agg-expr signature, - /// taking TupleRow to the UDA signature taking AnyVals. Writes the serialize/finalize - /// result to the given destination slot/tuple. 'fn' can be NULL to indicate the src - /// value should simply be written into the destination. Note that StringVal result is - /// from local allocation (which will be freed in the next QueryMaintenance()) so it - /// needs to be copied out if it needs to survive beyond QueryMaintenance() (e.g. if - /// 'dst' lives in a row batch). - void SerializeOrFinalize(Tuple* src, const SlotDescriptor& dst_slot_desc, - Tuple* dst, void* fn); - - /// Writes the result in src into dst pointed to by dst_slot_desc - inline void SetDstSlot( - const doris_udf::AnyVal* src, const SlotDescriptor& dst_slot_desc, Tuple* dst); - - // Sets 'dst' to the value from 'slot'. - void set_any_val(const void* slot, const TypeDescriptor& type, doris_udf::AnyVal* dst); +public: + /// Creates an NewAggFnEvaluator object from the aggregate expression 'agg_fn'. + /// The evaluator is added to 'pool' and returned in 'eval'. This will also + /// create a single evaluator for each input expression. All allocations will come + /// from 'mem_pool'. Note that it's the responsibility to call Close() all evaluators + /// even if this function returns error status on initialization failure. + static Status Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, + MemPool* mem_pool, NewAggFnEvaluator** eval, + const std::shared_ptr& tracker, + const RowDescriptor& row_desc) WARN_UNUSED_RESULT; + + /// Convenience functions for creating evaluators for multiple aggregate functions. + static Status Create(const std::vector& agg_fns, RuntimeState* state, ObjectPool* pool, + MemPool* mem_pool, std::vector* evals, + const std::shared_ptr& tracker, + const RowDescriptor& row_desc) WARN_UNUSED_RESULT; + + ~NewAggFnEvaluator(); + + /// Initializes the evaluator by calling Open() on all the input expressions' evaluators + /// and caches all constant input arguments. + /// TODO: Move the evaluation of constant input arguments to AggFn setup. + Status Open(RuntimeState* state) WARN_UNUSED_RESULT; + + /// Convenience functions for opening multiple NewAggFnEvaluators. + static Status Open(const std::vector& evals, + RuntimeState* state) WARN_UNUSED_RESULT; + + /// Used by PartitionedAggregation node to initialize one evaluator per partition. + /// Avoid the overhead of re-initializing an evaluator (e.g. calling GetConstVal() + /// on the input expressions). Cannot be called until after Open() has been called. + /// 'cloned_eval' is a shallow copy of this evaluator: all input values, staging + /// intermediate values and merge values are shared with the original evaluator. Only + /// the FunctionContext 'agg_fn_ctx' is cloned for resource isolation per partition. + /// So, it's not safe to use cloned evaluators concurrently. + void ShallowClone(ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** cloned_eval) const; + + /// Convenience function for cloning multiple evaluators. The newly cloned evaluators + /// are appended to 'cloned_evals'. + static void ShallowClone(ObjectPool* pool, MemPool* mem_pool, + const std::vector& evals, + std::vector* cloned_evals); + + /// Free resources owned by the evaluator. + void Close(RuntimeState* state); + static void Close(const std::vector& evals, RuntimeState* state); + + const AggFn& agg_fn() const { return agg_fn_; } + + FunctionContext* IR_ALWAYS_INLINE agg_fn_ctx() const; + + ExprContext* const* IR_ALWAYS_INLINE input_evals() const; + + /// Call the initialization function of the AggFn. May update 'dst'. + void Init(Tuple* dst); + + /// Updates the intermediate state dst based on adding the input src row. This can be + /// called either to drive the UDA's Update() or Merge() function, depending on whether + /// the AggFn is a merging aggregation. + void Add(const TupleRow* src, Tuple* dst); + + /// Updates the intermediate state dst to remove the input src row, i.e. undo + /// Add(src, dst). Only used internally for analytic fn builtins. + void Remove(const TupleRow* src, Tuple* dst); + + /// Explicitly does a merge, even if this evaluator is not marked as merging. + /// This is used by the partitioned agg node when it needs to merge spill results. + /// In the non-spilling case, this node would normally not merge. + void Merge(Tuple* src, Tuple* dst); + + /// Flattens any intermediate values containing pointers, and frees any memory + /// allocated during the init, update and merge phases. + void Serialize(Tuple* dst); + + /// Does one final transformation of the aggregated value in 'agg_val' and stores the + /// result in 'output_val'. Also frees the resources allocated during init, update and + /// merge phases. + void Finalize(Tuple* agg_val, Tuple* output_val); + + /// Puts the finalized value from Tuple* src in Tuple* dst just as Finalize() does. + /// However, unlike Finalize(), GetValue() does not clean up state in src. + /// GetValue() can be called repeatedly with the same src. Only used internally for + /// analytic fn builtins. Note that StringVal result is from local allocation (which + /// will be freed in the next QueryMaintenance()) so it needs to be copied out if it + /// needs to survive beyond QueryMaintenance() (e.g. if 'dst' lives in a row batch). + void GetValue(Tuple* src, Tuple* dst); + + // TODO: implement codegen path. These functions would return IR functions with + // the same signature as the interpreted ones above. + // Function* GetIrInitFn(); + // Function* GetIrUpdateFn(); + // Function* GetIrMergeFn(); + // Function* GetIrSerializeFn(); + // Function* GetIrFinalizeFn(); + static const size_t TINYINT_SIZE = sizeof(int8_t); + static const size_t SMALLINT_SIZE = sizeof(int16_t); + static const size_t INT_SIZE = sizeof(int32_t); + static const size_t BIGINT_SIZE = sizeof(int64_t); + static const size_t FLOAT_SIZE = sizeof(float); + static const size_t DOUBLE_SIZE = sizeof(double); + static const size_t DECIMAL_SIZE = sizeof(DecimalValue); + static const size_t DECIMALV2_SIZE = sizeof(DecimalV2Value); + static const size_t TIME_DURATION_SIZE = sizeof(boost::posix_time::time_duration); + static const size_t DATE_SIZE = sizeof(boost::gregorian::date); + static const size_t LARGEINT_SIZE = sizeof(__int128); + + // DATETIME VAL has two part: packet_time is 8 byte, and type is 4 byte + // MySQL packet time : int64_t packed_time; + // Indicate which type of this value : int type; + static const size_t DATETIME_SIZE = 16; + + bool is_multi_distinct() { return _is_multi_distinct; } + + const std::vector& input_expr_ctxs() const { return input_evals_; } + + /// Helper functions for calling the above functions on many evaluators. + static void Init(const std::vector& evals, Tuple* dst); + static void Add(const std::vector& evals, const TupleRow* src, Tuple* dst); + static void Remove(const std::vector& evals, const TupleRow* src, + Tuple* dst); + static void Serialize(const std::vector& evals, Tuple* dst); + static void GetValue(const std::vector& evals, Tuple* src, Tuple* dst); + static void Finalize(const std::vector& evals, Tuple* src, Tuple* dst); + + /// Free local allocations made in UDA functions and input arguments' evals. + //void FreeLocalAllocations(); + //static void FreeLocalAllocations(const std::vector& evals); + + std::string DebugString() const; + static std::string DebugString(const std::vector& evals); + +private: + uint64_t _total_mem_consumption; + uint64_t _accumulated_mem_consumption; + + // index if has multi count distinct + bool _is_multi_distinct; + + /// True if the evaluator has been initialized. + bool opened_ = false; + + /// True if the evaluator has been closed. + bool closed_ = false; + + /// True if this evaluator is created from a ShallowClone() call. + const bool is_clone_; + + const AggFn& agg_fn_; + + /// Pointer to the MemPool which all allocations come from. + /// Owned by the exec node which owns this evaluator. + MemPool* mem_pool_ = nullptr; + + std::shared_ptr _mem_tracker; // saved c'tor param + + /// This contains runtime state such as constant input arguments to the aggregate + /// functions and a FreePool from which the intermediate values are allocated. + /// Owned by this evaluator. + boost::scoped_ptr agg_fn_ctx_; + + /// Evaluators for input expressions for this aggregate function. + /// Empty if there is no input expression (e.g. count(*)). + std::vector input_evals_; + + /// Staging input values used by the interpreted Update() / Merge() paths. + /// It stores the evaluation results of input expressions to be passed to the + /// Update() / Merge() function. + std::vector staging_input_vals_; + + /// Staging intermediate and merged values used in the interpreted + /// Update() / Merge() paths. + doris_udf::AnyVal* staging_intermediate_val_ = nullptr; + doris_udf::AnyVal* staging_merge_input_val_ = nullptr; + + /// Use Create() instead. + NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, + const std::shared_ptr& tracker, bool is_clone); + + /// Return the intermediate type of the aggregate function. + inline const SlotDescriptor& intermediate_slot_desc() const; + inline const TypeDescriptor& intermediate_type() const; + + /// The interpreted path for the UDA's Update() function. It sets up the arguments to + /// call 'fn' is either the 'update_fn_' or 'merge_fn_' of agg_fn_, depending on whether + /// agg_fn_ is a merging aggregation. This converts from the agg-expr signature, taking + /// TupleRow to the UDA signature taking AnyVals by evaluating any input expressions + /// and populating the staging input values. + /// + /// Note that this function may be superseded by the codegend Update() IR function + /// generated by AggFn::CodegenUpdateOrMergeFunction() when codegen is enabled. + void Update(const TupleRow* row, Tuple* dst, void* fn); + + /// Sets up the arguments to call 'fn'. This converts from the agg-expr signature, + /// taking TupleRow to the UDA signature taking AnyVals. Writes the serialize/finalize + /// result to the given destination slot/tuple. 'fn' can be NULL to indicate the src + /// value should simply be written into the destination. Note that StringVal result is + /// from local allocation (which will be freed in the next QueryMaintenance()) so it + /// needs to be copied out if it needs to survive beyond QueryMaintenance() (e.g. if + /// 'dst' lives in a row batch). + void SerializeOrFinalize(Tuple* src, const SlotDescriptor& dst_slot_desc, Tuple* dst, void* fn); + + /// Writes the result in src into dst pointed to by dst_slot_desc + inline void SetDstSlot(const doris_udf::AnyVal* src, const SlotDescriptor& dst_slot_desc, + Tuple* dst); + + // Sets 'dst' to the value from 'slot'. + void set_any_val(const void* slot, const TypeDescriptor& type, doris_udf::AnyVal* dst); }; inline void NewAggFnEvaluator::Add(const TupleRow* row, Tuple* dst) { - agg_fn_ctx_->impl()->increment_num_updates(); - Update(row, dst, agg_fn_.merge_or_update_fn()); + agg_fn_ctx_->impl()->increment_num_updates(); + Update(row, dst, agg_fn_.merge_or_update_fn()); } inline void NewAggFnEvaluator::Remove(const TupleRow* row, Tuple* dst) { - agg_fn_ctx_->impl()->increment_num_removes(); - Update(row, dst, agg_fn_.remove_fn()); + agg_fn_ctx_->impl()->increment_num_removes(); + Update(row, dst, agg_fn_.remove_fn()); } inline void NewAggFnEvaluator::Serialize(Tuple* tuple) { - SerializeOrFinalize(tuple, agg_fn_.intermediate_slot_desc(), tuple, - agg_fn_.serialize_fn()); + SerializeOrFinalize(tuple, agg_fn_.intermediate_slot_desc(), tuple, agg_fn_.serialize_fn()); } inline void NewAggFnEvaluator::Finalize(Tuple* agg_val, Tuple* output_val) { - SerializeOrFinalize(agg_val, agg_fn_.output_slot_desc(), output_val, - agg_fn_.finalize_fn()); + SerializeOrFinalize(agg_val, agg_fn_.output_slot_desc(), output_val, agg_fn_.finalize_fn()); } inline void NewAggFnEvaluator::GetValue(Tuple* src, Tuple* dst) { - SerializeOrFinalize(src, agg_fn_.output_slot_desc(), dst, - agg_fn_.get_value_fn()); + SerializeOrFinalize(src, agg_fn_.output_slot_desc(), dst, agg_fn_.get_value_fn()); } inline void NewAggFnEvaluator::Init(const std::vector& evals, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Init(dst); + for (int i = 0; i < evals.size(); ++i) evals[i]->Init(dst); } inline void NewAggFnEvaluator::Add(const std::vector& evals, - const TupleRow* src, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Add(src, dst); + const TupleRow* src, Tuple* dst) { + for (int i = 0; i < evals.size(); ++i) evals[i]->Add(src, dst); } inline void NewAggFnEvaluator::Remove(const std::vector& evals, - const TupleRow* src, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Remove(src, dst); + const TupleRow* src, Tuple* dst) { + for (int i = 0; i < evals.size(); ++i) evals[i]->Remove(src, dst); } -inline void NewAggFnEvaluator::Serialize(const std::vector& evals, - Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Serialize(dst); +inline void NewAggFnEvaluator::Serialize(const std::vector& evals, Tuple* dst) { + for (int i = 0; i < evals.size(); ++i) evals[i]->Serialize(dst); } -inline void NewAggFnEvaluator::GetValue(const std::vector& evals, - Tuple* src, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->GetValue(src, dst); +inline void NewAggFnEvaluator::GetValue(const std::vector& evals, Tuple* src, + Tuple* dst) { + for (int i = 0; i < evals.size(); ++i) evals[i]->GetValue(src, dst); } inline void NewAggFnEvaluator::Finalize(const std::vector& evals, - Tuple* agg_val, Tuple* output_val) { - for (int i = 0; i < evals.size(); ++i) { - evals[i]->Finalize(agg_val, output_val); - } + Tuple* agg_val, Tuple* output_val) { + for (int i = 0; i < evals.size(); ++i) { + evals[i]->Finalize(agg_val, output_val); + } } -} +} // namespace doris #endif diff --git a/be/src/exprs/new_agg_fn_evaluator_ir.cc b/be/src/exprs/new_agg_fn_evaluator_ir.cc index 84bb092819d3d5..7be1291edc9c21 100644 --- a/be/src/exprs/new_agg_fn_evaluator_ir.cc +++ b/be/src/exprs/new_agg_fn_evaluator_ir.cc @@ -20,9 +20,9 @@ using namespace doris; FunctionContext* NewAggFnEvaluator::agg_fn_ctx() const { - return agg_fn_ctx_.get(); + return agg_fn_ctx_.get(); } ExprContext* const* NewAggFnEvaluator::input_evals() const { - return input_evals_.data(); + return input_evals_.data(); } diff --git a/be/src/exprs/new_in_predicate.cpp b/be/src/exprs/new_in_predicate.cpp index 026e52ab17e945..20730aad2a4a58 100644 --- a/be/src/exprs/new_in_predicate.cpp +++ b/be/src/exprs/new_in_predicate.cpp @@ -15,52 +15,48 @@ // specific language governing permissions and limitations // under the License. -#include - #include "exprs/new_in_predicate.h" +#include + #include "exprs/anyval_util.h" #include "runtime/string_value.hpp" namespace doris { -void InPredicate::init() { -} +void InPredicate::init() {} // Templated getter functions for extracting 'SetType' values from AnyVals -template +template SetType get_val(const FunctionContext::TypeDesc* type, const T& x) { DCHECK(!x.is_null); return x.val; } -template<> +template <> StringValue get_val(const FunctionContext::TypeDesc* type, const StringVal& x) { DCHECK(!x.is_null); return StringValue::from_string_val(x); } -template<> -DateTimeValue get_val( - const FunctionContext::TypeDesc* type, const DateTimeVal& x) { +template <> +DateTimeValue get_val(const FunctionContext::TypeDesc* type, const DateTimeVal& x) { return DateTimeValue::from_datetime_val(x); } -template<> -DecimalValue get_val( - const FunctionContext::TypeDesc* type, const DecimalVal& x) { +template <> +DecimalValue get_val(const FunctionContext::TypeDesc* type, const DecimalVal& x) { return DecimalValue::from_decimal_val(x); } -template<> -DecimalV2Value get_val( - const FunctionContext::TypeDesc* type, const DecimalV2Val& x) { +template <> +DecimalV2Value get_val(const FunctionContext::TypeDesc* type, const DecimalV2Val& x) { return DecimalV2Value::from_decimal_val(x); } -template -void InPredicate::set_lookup_prepare( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { +template +void InPredicate::set_lookup_prepare(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } @@ -80,20 +76,20 @@ void InPredicate::set_lookup_prepare( ctx->set_function_state(scope, state); } -template -void InPredicate::set_lookup_close( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { +template +void InPredicate::set_lookup_close(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } SetLookupState* state = - reinterpret_cast*>(ctx->get_function_state(scope)); + reinterpret_cast*>(ctx->get_function_state(scope)); delete state; } -template -BooleanVal InPredicate::templated_in( - FunctionContext* ctx, const T& val, int num_args, const T* args) { +template +BooleanVal InPredicate::templated_in(FunctionContext* ctx, const T& val, int num_args, + const T* args) { if (val.is_null) { return BooleanVal::null(); } @@ -101,7 +97,7 @@ BooleanVal InPredicate::templated_in( BooleanVal found; if (strategy == SET_LOOKUP) { SetLookupState* state = reinterpret_cast*>( - ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); DCHECK(state != NULL); found = set_lookup(state, val); } else { @@ -114,9 +110,8 @@ BooleanVal InPredicate::templated_in( return BooleanVal(found.val ^ not_in); } -template -BooleanVal InPredicate::set_lookup( - SetLookupState* state, const T& v) { +template +BooleanVal InPredicate::set_lookup(SetLookupState* state, const T& v) { DCHECK(state != NULL); SetType val = get_val(state->type, v); bool found = state->val_set.find(val) != state->val_set.end(); @@ -129,9 +124,9 @@ BooleanVal InPredicate::set_lookup( return BooleanVal(false); } -template -BooleanVal InPredicate::iterate( - const FunctionContext::TypeDesc* type, const T& val, int num_args, const T* args) { +template +BooleanVal InPredicate::iterate(const FunctionContext::TypeDesc* type, const T& val, int num_args, + const T* args) { bool found_null = false; for (int i = 0; i < num_args; ++i) { if (args[i].is_null) { @@ -146,44 +141,36 @@ BooleanVal InPredicate::iterate( return BooleanVal(false); } -#define IN_FUNCTIONS(AnyValType, SetType, type_name) \ - BooleanVal InPredicate::in_set_lookup( \ - FunctionContext* context, const AnyValType& val, int num_args, \ - const AnyValType* args) { \ - return templated_in( \ - context, val, num_args, args); \ - } \ -\ - BooleanVal InPredicate::not_in_set_lookup( \ - FunctionContext* context, const AnyValType& val, int num_args, \ - const AnyValType* args) { \ - return templated_in( \ - context, val, num_args, args); \ - } \ -\ - BooleanVal InPredicate::in_iterate( \ - FunctionContext* context, const AnyValType& val, int num_args, \ - const AnyValType* args) { \ - return templated_in( \ - context, val, num_args, args); \ - } \ -\ - BooleanVal InPredicate::not_in_iterate( \ - FunctionContext* context, const AnyValType& val, int num_args, \ - const AnyValType* args) { \ - return templated_in( \ - context, val, num_args, args); \ - } \ -\ - void InPredicate::set_lookup_prepare_##type_name( \ - FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { \ - set_lookup_prepare(ctx, scope); \ - } \ -\ - void InPredicate::set_lookup_close_##type_name( \ - FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { \ - set_lookup_close(ctx, scope); \ - } +#define IN_FUNCTIONS(AnyValType, SetType, type_name) \ + BooleanVal InPredicate::in_set_lookup(FunctionContext* context, const AnyValType& val, \ + int num_args, const AnyValType* args) { \ + return templated_in(context, val, num_args, args); \ + } \ + \ + BooleanVal InPredicate::not_in_set_lookup(FunctionContext* context, const AnyValType& val, \ + int num_args, const AnyValType* args) { \ + return templated_in(context, val, num_args, args); \ + } \ + \ + BooleanVal InPredicate::in_iterate(FunctionContext* context, const AnyValType& val, \ + int num_args, const AnyValType* args) { \ + return templated_in(context, val, num_args, args); \ + } \ + \ + BooleanVal InPredicate::not_in_iterate(FunctionContext* context, const AnyValType& val, \ + int num_args, const AnyValType* args) { \ + return templated_in(context, val, num_args, args); \ + } \ + \ + void InPredicate::set_lookup_prepare_##type_name(FunctionContext* ctx, \ + FunctionContext::FunctionStateScope scope) { \ + set_lookup_prepare(ctx, scope); \ + } \ + \ + void InPredicate::set_lookup_close_##type_name(FunctionContext* ctx, \ + FunctionContext::FunctionStateScope scope) { \ + set_lookup_close(ctx, scope); \ + } IN_FUNCTIONS(BooleanVal, bool, boolean_val) IN_FUNCTIONS(TinyIntVal, int8_t, tiny_int_val) @@ -199,6 +186,6 @@ IN_FUNCTIONS(DecimalV2Val, DecimalV2Value, decimalv2_val) IN_FUNCTIONS(LargeIntVal, __int128, large_int_val) // Needed for in-predicate-benchmark to build -template BooleanVal InPredicate::iterate( - const FunctionContext::TypeDesc*, const IntVal&, int, const IntVal*); -} +template BooleanVal InPredicate::iterate(const FunctionContext::TypeDesc*, const IntVal&, + int, const IntVal*); +} // namespace doris diff --git a/be/src/exprs/new_in_predicate.h b/be/src/exprs/new_in_predicate.h index 0ae413079bab96..111b30200d4774 100644 --- a/be/src/exprs/new_in_predicate.h +++ b/be/src/exprs/new_in_predicate.h @@ -19,32 +19,35 @@ #define DORIS_BE_SRC_QUERY_EXPRS_NEW_IN_PREDICATE_H #include + #include "exprs/predicate.h" #include "udf/udf.h" /* added by lide */ -#define IN_FUNCTIONS_STMT(AnyValType, SetType, type_name) \ - static doris_udf::BooleanVal in_set_lookup( \ - doris_udf::FunctionContext* context, const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ -\ - static doris_udf::BooleanVal not_in_set_lookup( \ - doris_udf::FunctionContext* context, const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ -\ - static doris_udf::BooleanVal in_iterate( \ - doris_udf::FunctionContext* context, const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ -\ - static doris_udf::BooleanVal not_in_iterate( \ - doris_udf::FunctionContext* context, const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ -\ - static void set_lookup_prepare_##type_name( \ - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); \ -\ - static void set_lookup_close_##type_name( \ - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); +#define IN_FUNCTIONS_STMT(AnyValType, SetType, type_name) \ + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, \ + const doris_udf::AnyValType& val, int num_args, \ + const doris_udf::AnyValType* args); \ + \ + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, \ + const doris_udf::AnyValType& val, int num_args, \ + const doris_udf::AnyValType* args); \ + \ + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, \ + const doris_udf::AnyValType& val, int num_args, \ + const doris_udf::AnyValType* args); \ + \ + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, \ + const doris_udf::AnyValType& val, int num_args, \ + const doris_udf::AnyValType* args); \ + \ + static void set_lookup_prepare_##type_name( \ + doris_udf::FunctionContext* ctx, \ + doris_udf::FunctionContext::FunctionStateScope scope); \ + \ + static void set_lookup_close_##type_name( \ + doris_udf::FunctionContext* ctx, \ + doris_udf::FunctionContext::FunctionStateScope scope); namespace doris { @@ -70,249 +73,247 @@ class InPredicate { static void init(); /// Functions for every type - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::BooleanVal& val, - int num_args, const doris_udf::BooleanVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::BooleanVal& val, int num_args, + const doris_udf::BooleanVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::BooleanVal& val, - int num_args, const doris_udf::BooleanVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::BooleanVal& val, int num_args, + const doris_udf::BooleanVal* args); static void set_lookup_prepare_boolean_val( - doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - static void set_lookup_close_boolean_val( - doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_close_boolean_val(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::BooleanVal& val, - int num_args, const doris_udf::BooleanVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::BooleanVal& val, int num_args, + const doris_udf::BooleanVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::BooleanVal& val, - int num_args, const doris_udf::BooleanVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::BooleanVal& val, int num_args, + const doris_udf::BooleanVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::TinyIntVal& val, - int num_args, const doris_udf::TinyIntVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::TinyIntVal& val, int num_args, + const doris_udf::TinyIntVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::TinyIntVal& val, - int num_args, const doris_udf::TinyIntVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::TinyIntVal& val, int num_args, + const doris_udf::TinyIntVal* args); - static void set_lookup_prepare_tiny_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_prepare_tiny_int_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_tiny_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::TinyIntVal& val, - int num_args, const doris_udf::TinyIntVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::TinyIntVal& val, int num_args, + const doris_udf::TinyIntVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::TinyIntVal& val, - int num_args, const doris_udf::TinyIntVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::TinyIntVal& val, int num_args, + const doris_udf::TinyIntVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::SmallIntVal& val, - int num_args, const doris_udf::SmallIntVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::SmallIntVal& val, int num_args, + const doris_udf::SmallIntVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::SmallIntVal& val, - int num_args, const doris_udf::SmallIntVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::SmallIntVal& val, int num_args, + const doris_udf::SmallIntVal* args); - static void set_lookup_prepare_small_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_prepare_small_int_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - static void set_lookup_close_small_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_close_small_int_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::SmallIntVal& val, - int num_args, const doris_udf::SmallIntVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::SmallIntVal& val, int num_args, + const doris_udf::SmallIntVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::SmallIntVal& val, - int num_args, const doris_udf::SmallIntVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::SmallIntVal& val, int num_args, + const doris_udf::SmallIntVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::IntVal& val, - int num_args, const doris_udf::IntVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::IntVal& val, int num_args, + const doris_udf::IntVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::IntVal& val, - int num_args, const doris_udf::IntVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::IntVal& val, int num_args, + const doris_udf::IntVal* args); static void set_lookup_prepare_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::IntVal& val, - int num_args, const doris_udf::IntVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::IntVal& val, int num_args, + const doris_udf::IntVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::IntVal& val, - int num_args, const doris_udf::IntVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::IntVal& val, int num_args, + const doris_udf::IntVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::BigIntVal& val, - int num_args, const doris_udf::BigIntVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::BigIntVal& val, int num_args, + const doris_udf::BigIntVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::BigIntVal& val, - int num_args, const doris_udf::BigIntVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::BigIntVal& val, int num_args, + const doris_udf::BigIntVal* args); - static void set_lookup_prepare_big_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_prepare_big_int_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_big_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::BigIntVal& val, - int num_args, const doris_udf::BigIntVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::BigIntVal& val, int num_args, + const doris_udf::BigIntVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::BigIntVal& val, - int num_args, const doris_udf::BigIntVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::BigIntVal& val, int num_args, + const doris_udf::BigIntVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::FloatVal& val, - int num_args, const doris_udf::FloatVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::FloatVal& val, int num_args, + const doris_udf::FloatVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::FloatVal& val, - int num_args, const doris_udf::FloatVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::FloatVal& val, int num_args, + const doris_udf::FloatVal* args); static void set_lookup_prepare_float_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_float_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::FloatVal& val, - int num_args, const doris_udf::FloatVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::FloatVal& val, int num_args, + const doris_udf::FloatVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::FloatVal& val, - int num_args, const doris_udf::FloatVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::FloatVal& val, int num_args, + const doris_udf::FloatVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DoubleVal& val, - int num_args, const doris_udf::DoubleVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DoubleVal& val, int num_args, + const doris_udf::DoubleVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DoubleVal& val, - int num_args, const doris_udf::DoubleVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DoubleVal& val, int num_args, + const doris_udf::DoubleVal* args); static void set_lookup_prepare_double_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_double_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DoubleVal& val, - int num_args, const doris_udf::DoubleVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DoubleVal& val, int num_args, + const doris_udf::DoubleVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DoubleVal& val, - int num_args, const doris_udf::DoubleVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DoubleVal& val, int num_args, + const doris_udf::DoubleVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::StringVal& val, - int num_args, const doris_udf::StringVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, int num_args, + const doris_udf::StringVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::StringVal& val, - int num_args, const doris_udf::StringVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, int num_args, + const doris_udf::StringVal* args); static void set_lookup_prepare_string_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_string_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::StringVal& val, - int num_args, const doris_udf::StringVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, int num_args, + const doris_udf::StringVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::StringVal& val, - int num_args, const doris_udf::StringVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::StringVal& val, int num_args, + const doris_udf::StringVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& val, - int num_args, const doris_udf::DateTimeVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& val, int num_args, + const doris_udf::DateTimeVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& val, - int num_args, const doris_udf::DateTimeVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& val, int num_args, + const doris_udf::DateTimeVal* args); - static void set_lookup_prepare_datetime_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_prepare_datetime_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_datetime_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& val, - int num_args, const doris_udf::DateTimeVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& val, int num_args, + const doris_udf::DateTimeVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& val, - int num_args, const doris_udf::DateTimeVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& val, int num_args, + const doris_udf::DateTimeVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DecimalVal& val, - int num_args, const doris_udf::DecimalVal* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DecimalVal& val, int num_args, + const doris_udf::DecimalVal* args); - static doris_udf::BooleanVal in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DecimalV2Val& val, - int num_args, const doris_udf::DecimalV2Val* args); + static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DecimalV2Val& val, int num_args, + const doris_udf::DecimalV2Val* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DecimalVal& val, - int num_args, const doris_udf::DecimalVal* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DecimalVal& val, int num_args, + const doris_udf::DecimalVal* args); - static doris_udf::BooleanVal not_in_iterate( - doris_udf::FunctionContext* context, const doris_udf::DecimalV2Val& val, - int num_args, const doris_udf::DecimalV2Val* args); + static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, + const doris_udf::DecimalV2Val& val, int num_args, + const doris_udf::DecimalV2Val* args); - static void set_lookup_prepare_decimal_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_prepare_decimal_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - static void set_lookup_prepare_decimalv2_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_prepare_decimalv2_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); static void set_lookup_close_decimal_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + doris_udf::FunctionContext::FunctionStateScope scope); - static void set_lookup_close_decimalv2_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); + static void set_lookup_close_decimalv2_val( + doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DecimalVal& val, - int num_args, const doris_udf::DecimalVal* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DecimalVal& val, int num_args, + const doris_udf::DecimalVal* args); - static doris_udf::BooleanVal in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DecimalV2Val& val, - int num_args, const doris_udf::DecimalV2Val* args); + static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DecimalV2Val& val, int num_args, + const doris_udf::DecimalV2Val* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DecimalVal& val, - int num_args, const doris_udf::DecimalVal* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DecimalVal& val, int num_args, + const doris_udf::DecimalVal* args); - static doris_udf::BooleanVal not_in_set_lookup( - doris_udf::FunctionContext* context, const doris_udf::DecimalV2Val& val, - int num_args, const doris_udf::DecimalV2Val* args); + static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, + const doris_udf::DecimalV2Val& val, int num_args, + const doris_udf::DecimalV2Val* args); /* added by lide */ IN_FUNCTIONS_STMT(LargeIntVal, __int128, large_int_val) @@ -327,7 +328,7 @@ class InPredicate { ITERATE }; - template + template struct SetLookupState { /// If true, there is at least one NULL constant in the IN list. bool contains_null; @@ -343,29 +344,27 @@ class InPredicate { /// The templated function that provides the implementation for all the In() and NotIn() /// functions. - template - static inline doris_udf::BooleanVal templated_in( - doris_udf::FunctionContext* context, const T& val, int num_args, const T* args); + template + static inline doris_udf::BooleanVal templated_in(doris_udf::FunctionContext* context, + const T& val, int num_args, const T* args); /// Initializes an SetLookupState in ctx. - template - static void set_lookup_prepare( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope); + template + static void set_lookup_prepare(FunctionContext* ctx, FunctionContext::FunctionStateScope scope); - template - static void set_lookup_close( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope); + template + static void set_lookup_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope); /// Looks up v in state->val_set. - template + template static BooleanVal set_lookup(SetLookupState* state, const T& v); /// Iterates through each vararg looking for val. 'type' is the type of 'val' and 'args'. - template - static BooleanVal iterate( - const FunctionContext::TypeDesc* type, const T& val, int num_args, const T* args); + template + static BooleanVal iterate(const FunctionContext::TypeDesc* type, const T& val, int num_args, + const T* args); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/null_literal.cpp b/be/src/exprs/null_literal.cpp index c4e8e43aee77c5..cda5670b6750b7 100644 --- a/be/src/exprs/null_literal.cpp +++ b/be/src/exprs/null_literal.cpp @@ -22,9 +22,7 @@ namespace doris { -NullLiteral::NullLiteral(const TExprNode& node) : - Expr(node) { -} +NullLiteral::NullLiteral(const TExprNode& node) : Expr(node) {} // NullLiteral::NullLiteral(PrimitiveType type) : Expr(TypeDescriptor(type)) { // } @@ -73,4 +71,4 @@ DecimalV2Val NullLiteral::get_decimalv2_val(ExprContext*, TupleRow*) { return DecimalV2Val::null(); } -} +} // namespace doris diff --git a/be/src/exprs/null_literal.h b/be/src/exprs/null_literal.h index 6eeb105aca57ad..000f0809909573 100644 --- a/be/src/exprs/null_literal.h +++ b/be/src/exprs/null_literal.h @@ -27,8 +27,7 @@ class TExprNode; class NullLiteral : public Expr { public: - - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new NullLiteral(*this)); } // NullLiteral(PrimitiveType type); @@ -53,6 +52,6 @@ class NullLiteral : public Expr { static void* return_value(Expr* e, TupleRow* row); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/operators.cpp b/be/src/exprs/operators.cpp index 699b900c5284e9..2380bbc8e01ec6 100644 --- a/be/src/exprs/operators.cpp +++ b/be/src/exprs/operators.cpp @@ -16,100 +16,100 @@ // under the License. #include "exprs/operators.h" + +#include + #include "exprs/anyval_util.h" -#include "runtime/string_value.h" #include "runtime/datetime_value.h" +#include "runtime/string_value.h" #include "util/debug_util.h" -#include - namespace doris { -void Operators::init() { -} +void Operators::init() {} -#define BINARY_OP_FN(NAME, TYPE_NAME, TYPE, OP) \ - TYPE Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(\ - FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ - if (v1.is_null || v2.is_null) return TYPE::null();\ - return TYPE(v1.val OP v2.val);\ - } +#define BINARY_OP_FN(NAME, TYPE_NAME, TYPE, OP) \ + TYPE Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ + const TYPE& v2) { \ + if (v1.is_null || v2.is_null) return TYPE::null(); \ + return TYPE(v1.val OP v2.val); \ + } -#define BINARY_OP_CHECK_ZERO_FN(NAME, TYPE_NAME, TYPE, OP) \ - TYPE Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(\ - FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ - if (v1.is_null || v2.is_null || v2.val == 0) return TYPE::null();\ - return TYPE(v1.val OP v2.val);\ - } +#define BINARY_OP_CHECK_ZERO_FN(NAME, TYPE_NAME, TYPE, OP) \ + TYPE Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ + const TYPE& v2) { \ + if (v1.is_null || v2.is_null || v2.val == 0) return TYPE::null(); \ + return TYPE(v1.val OP v2.val); \ + } -#define BITNOT_FN(TYPE, TYPE_NAME)\ - TYPE Operators::bitnot_##TYPE_NAME(FunctionContext* c, const TYPE& v) {\ - if (v.is_null) return TYPE::null();\ - return TYPE(~v.val);\ - } +#define BITNOT_FN(TYPE, TYPE_NAME) \ + TYPE Operators::bitnot_##TYPE_NAME(FunctionContext* c, const TYPE& v) { \ + if (v.is_null) return TYPE::null(); \ + return TYPE(~v.val); \ + } // Return infinity if overflow. -#define FACTORIAL_FN(TYPE)\ - BigIntVal Operators::Factorial_##TYPE(FunctionContext* c, const TYPE& v) {\ - if (v.is_null) return BigIntVal::null();\ - int64_t fact = ComputeFactorial(v.val); \ - if (fact < 0) { \ - return BigIntVal::null(); \ - } \ - return BigIntVal(fact); \ - } +#define FACTORIAL_FN(TYPE) \ + BigIntVal Operators::Factorial_##TYPE(FunctionContext* c, const TYPE& v) { \ + if (v.is_null) return BigIntVal::null(); \ + int64_t fact = ComputeFactorial(v.val); \ + if (fact < 0) { \ + return BigIntVal::null(); \ + } \ + return BigIntVal(fact); \ + } -#define BINARY_PREDICATE_NUMERIC_FN(NAME, TYPE_NAME, TYPE, OP) \ - BooleanVal Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(\ - FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ - if (v1.is_null || v2.is_null) return BooleanVal::null();\ - return BooleanVal(v1.val OP v2.val);\ +#define BINARY_PREDICATE_NUMERIC_FN(NAME, TYPE_NAME, TYPE, OP) \ + BooleanVal Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ + const TYPE& v2) { \ + if (v1.is_null || v2.is_null) return BooleanVal::null(); \ + return BooleanVal(v1.val OP v2.val); \ } -#define BINARY_PREDICATE_NONNUMERIC_FN(NAME, TYPE_NAME, FUNC_NAME, TYPE, DORIS_TYPE, OP) \ - BooleanVal Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(\ - FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ - if (v1.is_null || v2.is_null) return BooleanVal::null();\ - DORIS_TYPE iv1 = DORIS_TYPE::from_##FUNC_NAME(v1);\ - DORIS_TYPE iv2 = DORIS_TYPE::from_##FUNC_NAME(v2);\ - return BooleanVal(iv1 OP iv2);\ +#define BINARY_PREDICATE_NONNUMERIC_FN(NAME, TYPE_NAME, FUNC_NAME, TYPE, DORIS_TYPE, OP) \ + BooleanVal Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ + const TYPE& v2) { \ + if (v1.is_null || v2.is_null) return BooleanVal::null(); \ + DORIS_TYPE iv1 = DORIS_TYPE::from_##FUNC_NAME(v1); \ + DORIS_TYPE iv2 = DORIS_TYPE::from_##FUNC_NAME(v2); \ + return BooleanVal(iv1 OP iv2); \ } -#define BINARY_OP_NUMERIC_TYPES(NAME, OP) \ - BINARY_OP_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_OP_FN(NAME, small_int_val, SmallIntVal, OP);\ - BINARY_OP_FN(NAME, int_val, IntVal, OP);\ - BINARY_OP_FN(NAME, big_int_val, BigIntVal, OP);\ - BINARY_OP_FN(NAME, large_int_val, LargeIntVal, OP);\ - BINARY_OP_FN(NAME, float_val, FloatVal, OP);\ +#define BINARY_OP_NUMERIC_TYPES(NAME, OP) \ + BINARY_OP_FN(NAME, tiny_int_val, TinyIntVal, OP); \ + BINARY_OP_FN(NAME, small_int_val, SmallIntVal, OP); \ + BINARY_OP_FN(NAME, int_val, IntVal, OP); \ + BINARY_OP_FN(NAME, big_int_val, BigIntVal, OP); \ + BINARY_OP_FN(NAME, large_int_val, LargeIntVal, OP); \ + BINARY_OP_FN(NAME, float_val, FloatVal, OP); \ BINARY_OP_FN(NAME, double_val, DoubleVal, OP); -#define BINARY_OP_INT_TYPES(NAME, OP) \ - BINARY_OP_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_OP_FN(NAME, small_int_val, SmallIntVal, OP);\ - BINARY_OP_FN(NAME, int_val, IntVal, OP);\ - BINARY_OP_FN(NAME, big_int_val, BigIntVal, OP);\ - BINARY_OP_FN(NAME, large_int_val, LargeIntVal, OP);\ - -#define BINARY_OP_CHECK_ZERO_INT_TYPES(NAME, OP) \ - BINARY_OP_CHECK_ZERO_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_OP_CHECK_ZERO_FN(NAME, small_int_val, SmallIntVal, OP);\ - BINARY_OP_CHECK_ZERO_FN(NAME, int_val, IntVal, OP);\ - BINARY_OP_CHECK_ZERO_FN(NAME, big_int_val, BigIntVal, OP);\ - BINARY_OP_CHECK_ZERO_FN(NAME, large_int_val, LargeIntVal, OP);\ - -#define BINARY_PREDICATE_ALL_TYPES(NAME, OP) \ - BINARY_PREDICATE_NUMERIC_FN(NAME, boolean_val, BooleanVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, small_int_val, SmallIntVal, OP);\ - BINARY_PREDICATE_NUMERIC_FN(NAME, int_val, IntVal, OP);\ - BINARY_PREDICATE_NUMERIC_FN(NAME, big_int_val, BigIntVal, OP);\ - BINARY_PREDICATE_NUMERIC_FN(NAME, large_int_val, LargeIntVal, OP);\ - BINARY_PREDICATE_NUMERIC_FN(NAME, float_val, FloatVal, OP);\ - BINARY_PREDICATE_NUMERIC_FN(NAME, double_val, DoubleVal, OP);\ - BINARY_PREDICATE_NONNUMERIC_FN(NAME, string_val, string_val, StringVal, StringValue, OP);\ - BINARY_PREDICATE_NONNUMERIC_FN(\ - NAME, datetime_val, datetime_val, DateTimeVal, DateTimeValue, OP); +#define BINARY_OP_INT_TYPES(NAME, OP) \ + BINARY_OP_FN(NAME, tiny_int_val, TinyIntVal, OP); \ + BINARY_OP_FN(NAME, small_int_val, SmallIntVal, OP); \ + BINARY_OP_FN(NAME, int_val, IntVal, OP); \ + BINARY_OP_FN(NAME, big_int_val, BigIntVal, OP); \ + BINARY_OP_FN(NAME, large_int_val, LargeIntVal, OP); + +#define BINARY_OP_CHECK_ZERO_INT_TYPES(NAME, OP) \ + BINARY_OP_CHECK_ZERO_FN(NAME, tiny_int_val, TinyIntVal, OP); \ + BINARY_OP_CHECK_ZERO_FN(NAME, small_int_val, SmallIntVal, OP); \ + BINARY_OP_CHECK_ZERO_FN(NAME, int_val, IntVal, OP); \ + BINARY_OP_CHECK_ZERO_FN(NAME, big_int_val, BigIntVal, OP); \ + BINARY_OP_CHECK_ZERO_FN(NAME, large_int_val, LargeIntVal, OP); + +#define BINARY_PREDICATE_ALL_TYPES(NAME, OP) \ + BINARY_PREDICATE_NUMERIC_FN(NAME, boolean_val, BooleanVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, tiny_int_val, TinyIntVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, small_int_val, SmallIntVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, int_val, IntVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, big_int_val, BigIntVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, large_int_val, LargeIntVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, float_val, FloatVal, OP); \ + BINARY_PREDICATE_NUMERIC_FN(NAME, double_val, DoubleVal, OP); \ + BINARY_PREDICATE_NONNUMERIC_FN(NAME, string_val, string_val, StringVal, StringValue, OP); \ + BINARY_PREDICATE_NONNUMERIC_FN(NAME, datetime_val, datetime_val, DateTimeVal, DateTimeValue, \ + OP); BINARY_OP_NUMERIC_TYPES(add, +); BINARY_OP_NUMERIC_TYPES(subtract, -); @@ -182,4 +182,3 @@ BINARY_PREDICATE_ALL_TYPES(lt, <); BINARY_PREDICATE_ALL_TYPES(ge, >=); BINARY_PREDICATE_ALL_TYPES(le, <=); } // namespace doris - diff --git a/be/src/exprs/operators.h b/be/src/exprs/operators.h index 1fad930bc0ab2b..54acb8143c63b1 100644 --- a/be/src/exprs/operators.h +++ b/be/src/exprs/operators.h @@ -42,239 +42,217 @@ class Operators { static BigIntVal bitnot_big_int_val(FunctionContext*, const BigIntVal&); static LargeIntVal bitnot_large_int_val(FunctionContext*, const LargeIntVal&); - static TinyIntVal bitand_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal bitand_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal bitand_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal bitand_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal bitand_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); + static TinyIntVal bitand_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal bitand_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal bitand_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal bitand_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal bitand_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); - static TinyIntVal bitxor_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal bitxor_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal bitxor_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal bitxor_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal bitxor_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); + static TinyIntVal bitxor_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal bitxor_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal bitxor_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal bitxor_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal bitxor_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); - static TinyIntVal bitor_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal bitor_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal bitor_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal bitor_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal bitor_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); + static TinyIntVal bitor_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal bitor_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal bitor_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal bitor_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal bitor_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); - // Arithmetic - static TinyIntVal add_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal add_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal add_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal add_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal add_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static FloatVal add_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static DoubleVal add_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); + // Arithmetic + static TinyIntVal add_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal add_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal add_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal add_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal add_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static FloatVal add_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static DoubleVal add_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); - static TinyIntVal subtract_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal subtract_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal subtract_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal subtract_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal subtract_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static FloatVal subtract_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static DoubleVal subtract_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); + static TinyIntVal subtract_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal subtract_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal subtract_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal subtract_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal subtract_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static FloatVal subtract_float_val_float_val(FunctionContext*, const FloatVal&, + const FloatVal&); + static DoubleVal subtract_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); - static TinyIntVal multiply_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal multiply_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal multiply_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal multiply_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal multiply_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static FloatVal multiply_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static DoubleVal multiply_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); + static TinyIntVal multiply_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal multiply_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal multiply_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal multiply_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal multiply_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static FloatVal multiply_float_val_float_val(FunctionContext*, const FloatVal&, + const FloatVal&); + static DoubleVal multiply_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); - static DoubleVal divide_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); + static DoubleVal divide_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); - static TinyIntVal int_divide_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal int_divide_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal int_divide_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal int_divide_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal int_divide_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); + static TinyIntVal int_divide_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal int_divide_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal int_divide_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal int_divide_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal int_divide_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); - static TinyIntVal mod_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static SmallIntVal mod_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static IntVal mod_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal mod_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static LargeIntVal mod_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); + static TinyIntVal mod_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static SmallIntVal mod_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static IntVal mod_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BigIntVal mod_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static LargeIntVal mod_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); // Binary predicate - static BooleanVal eq_boolean_val_boolean_val( - FunctionContext*, const BooleanVal&, const BooleanVal&); - static BooleanVal eq_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static BooleanVal eq_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static BooleanVal eq_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal eq_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static BooleanVal eq_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static BooleanVal eq_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal eq_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); - static BooleanVal eq_string_val_string_val( - FunctionContext*, const StringVal&, const StringVal&); - static BooleanVal eq_datetime_val_datetime_val( - FunctionContext*, const DateTimeVal&, const DateTimeVal&); + static BooleanVal eq_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, + const BooleanVal&); + static BooleanVal eq_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static BooleanVal eq_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static BooleanVal eq_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal eq_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static BooleanVal eq_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static BooleanVal eq_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal eq_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); + static BooleanVal eq_string_val_string_val(FunctionContext*, const StringVal&, + const StringVal&); + static BooleanVal eq_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, + const DateTimeVal&); - static BooleanVal ne_boolean_val_boolean_val( - FunctionContext*, const BooleanVal&, const BooleanVal&); - static BooleanVal ne_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static BooleanVal ne_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static BooleanVal ne_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal ne_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static BooleanVal ne_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static BooleanVal ne_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal ne_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); - static BooleanVal ne_string_val_string_val( - FunctionContext*, const StringVal&, const StringVal&); - static BooleanVal ne_datetime_val_datetime_val( - FunctionContext*, const DateTimeVal&, const DateTimeVal&); + static BooleanVal ne_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, + const BooleanVal&); + static BooleanVal ne_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static BooleanVal ne_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static BooleanVal ne_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal ne_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static BooleanVal ne_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static BooleanVal ne_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal ne_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); + static BooleanVal ne_string_val_string_val(FunctionContext*, const StringVal&, + const StringVal&); + static BooleanVal ne_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, + const DateTimeVal&); - static BooleanVal gt_boolean_val_boolean_val( - FunctionContext*, const BooleanVal&, const BooleanVal&); - static BooleanVal gt_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static BooleanVal gt_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static BooleanVal gt_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal gt_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static BooleanVal gt_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static BooleanVal gt_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal gt_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); - static BooleanVal gt_string_val_string_val( - FunctionContext*, const StringVal&, const StringVal&); - static BooleanVal gt_datetime_val_datetime_val( - FunctionContext*, const DateTimeVal&, const DateTimeVal&); + static BooleanVal gt_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, + const BooleanVal&); + static BooleanVal gt_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static BooleanVal gt_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static BooleanVal gt_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal gt_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static BooleanVal gt_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static BooleanVal gt_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal gt_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); + static BooleanVal gt_string_val_string_val(FunctionContext*, const StringVal&, + const StringVal&); + static BooleanVal gt_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, + const DateTimeVal&); - static BooleanVal lt_boolean_val_boolean_val( - FunctionContext*, const BooleanVal&, const BooleanVal&); - static BooleanVal lt_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static BooleanVal lt_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static BooleanVal lt_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal lt_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static BooleanVal lt_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static BooleanVal lt_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal lt_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); - static BooleanVal lt_string_val_string_val( - FunctionContext*, const StringVal&, const StringVal&); - static BooleanVal lt_datetime_val_datetime_val( - FunctionContext*, const DateTimeVal&, const DateTimeVal&); + static BooleanVal lt_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, + const BooleanVal&); + static BooleanVal lt_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static BooleanVal lt_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static BooleanVal lt_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal lt_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static BooleanVal lt_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static BooleanVal lt_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal lt_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); + static BooleanVal lt_string_val_string_val(FunctionContext*, const StringVal&, + const StringVal&); + static BooleanVal lt_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, + const DateTimeVal&); - static BooleanVal ge_boolean_val_boolean_val( - FunctionContext*, const BooleanVal&, const BooleanVal&); - static BooleanVal ge_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static BooleanVal ge_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static BooleanVal ge_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal ge_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static BooleanVal ge_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static BooleanVal ge_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal ge_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); - static BooleanVal ge_string_val_string_val( - FunctionContext*, const StringVal&, const StringVal&); - static BooleanVal ge_datetime_val_datetime_val( - FunctionContext*, const DateTimeVal&, const DateTimeVal&); + static BooleanVal ge_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, + const BooleanVal&); + static BooleanVal ge_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static BooleanVal ge_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static BooleanVal ge_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal ge_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static BooleanVal ge_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static BooleanVal ge_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal ge_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); + static BooleanVal ge_string_val_string_val(FunctionContext*, const StringVal&, + const StringVal&); + static BooleanVal ge_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, + const DateTimeVal&); - static BooleanVal le_boolean_val_boolean_val( - FunctionContext*, const BooleanVal&, const BooleanVal&); - static BooleanVal le_tiny_int_val_tiny_int_val( - FunctionContext*, const TinyIntVal&, const TinyIntVal&); - static BooleanVal le_small_int_val_small_int_val( - FunctionContext*, const SmallIntVal&, const SmallIntVal&); - static BooleanVal le_int_val_int_val( - FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal le_big_int_val_big_int_val( - FunctionContext*, const BigIntVal&, const BigIntVal&); - static BooleanVal le_large_int_val_large_int_val( - FunctionContext*, const LargeIntVal&, const LargeIntVal&); - static BooleanVal le_float_val_float_val( - FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal le_double_val_double_val( - FunctionContext*, const DoubleVal&, const DoubleVal&); - static BooleanVal le_string_val_string_val( - FunctionContext*, const StringVal&, const StringVal&); - static BooleanVal le_datetime_val_datetime_val( - FunctionContext*, const DateTimeVal&, const DateTimeVal&); + static BooleanVal le_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, + const BooleanVal&); + static BooleanVal le_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, + const TinyIntVal&); + static BooleanVal le_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, + const SmallIntVal&); + static BooleanVal le_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal le_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, + const BigIntVal&); + static BooleanVal le_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, + const LargeIntVal&); + static BooleanVal le_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal le_double_val_double_val(FunctionContext*, const DoubleVal&, + const DoubleVal&); + static BooleanVal le_string_val_string_val(FunctionContext*, const StringVal&, + const StringVal&); + static BooleanVal le_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, + const DateTimeVal&); }; } // namespace doris #endif - diff --git a/be/src/exprs/predicate.h b/be/src/exprs/predicate.h index 8e9a54d08b3248..43336fab70ff5e 100644 --- a/be/src/exprs/predicate.h +++ b/be/src/exprs/predicate.h @@ -24,13 +24,13 @@ namespace doris { class TExprNode; -class Predicate: public Expr { +class Predicate : public Expr { protected: friend class Expr; Predicate(const TExprNode& node) : Expr(node) {} }; -} +} // namespace doris #endif diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp index 7c6b03fc8fc031..45820105b08064 100644 --- a/be/src/exprs/scalar_fn_call.cpp +++ b/be/src/exprs/scalar_fn_call.cpp @@ -21,30 +21,27 @@ #include "exprs/anyval_util.h" #include "exprs/expr_context.h" -#include "runtime/user_function_cache.h" #include "runtime/runtime_state.h" +#include "runtime/user_function_cache.h" #include "udf/udf_internal.h" #include "util/debug_util.h" #include "util/symbols_util.h" namespace doris { -ScalarFnCall::ScalarFnCall(const TExprNode& node) : - Expr(node), - _vararg_start_idx(node.__isset.vararg_start_idx ? node.vararg_start_idx : -1), - _scalar_fn_wrapper(NULL), - _prepare_fn(NULL), - _close_fn(NULL), - _scalar_fn(NULL) { +ScalarFnCall::ScalarFnCall(const TExprNode& node) + : Expr(node), + _vararg_start_idx(node.__isset.vararg_start_idx ? node.vararg_start_idx : -1), + _scalar_fn_wrapper(NULL), + _prepare_fn(NULL), + _close_fn(NULL), + _scalar_fn(NULL) { DCHECK_NE(_fn.binary_type, TFunctionBinaryType::HIVE); } -ScalarFnCall::~ScalarFnCall() { -} +ScalarFnCall::~ScalarFnCall() {} -Status ScalarFnCall::prepare( - RuntimeState* state, const RowDescriptor& desc, - ExprContext* context) { +Status ScalarFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, ExprContext* context) { RETURN_IF_ERROR(Expr::prepare(state, desc, context)); if (_fn.scalar_fn.symbol.empty()) { // This path is intended to only be used during development to test FE @@ -74,14 +71,14 @@ Status ScalarFnCall::prepare( } } - _fn_context_index = context->register_func( - state, return_type, arg_types, varargs_buffer_size); + _fn_context_index = context->register_func(state, return_type, arg_types, varargs_buffer_size); // _scalar_fn = OpcodeRegistry::instance()->get_function_ptr(_opcode); Status status = Status::OK(); if (_scalar_fn == NULL) { if (SymbolsUtil::is_mangled(_fn.scalar_fn.symbol)) { status = UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.scalar_fn.symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, &_cache_entry); + _fn.id, _fn.scalar_fn.symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, + &_cache_entry); } else { std::vector arg_types; for (auto& t_type : _fn.arg_types) { @@ -89,10 +86,10 @@ Status ScalarFnCall::prepare( } // ColumnType ret_type(INVALID_TYPE); // ret_type = ColumnType(thrift_to_type(_fn.ret_type)); - std::string symbol = SymbolsUtil::mangle_user_function( - _fn.scalar_fn.symbol, arg_types, _fn.has_var_args, NULL); + std::string symbol = SymbolsUtil::mangle_user_function(_fn.scalar_fn.symbol, arg_types, + _fn.has_var_args, NULL); status = UserFunctionCache::instance()->get_function_ptr( - _fn.id, symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, &_cache_entry); + _fn.id, symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, &_cache_entry); } } #if 0 @@ -148,18 +145,18 @@ Status ScalarFnCall::prepare( #endif if (_fn.scalar_fn.__isset.prepare_fn_symbol) { RETURN_IF_ERROR(get_function(state, _fn.scalar_fn.prepare_fn_symbol, - reinterpret_cast(&_prepare_fn))); + reinterpret_cast(&_prepare_fn))); } if (_fn.scalar_fn.__isset.close_fn_symbol) { RETURN_IF_ERROR(get_function(state, _fn.scalar_fn.close_fn_symbol, - reinterpret_cast(&_close_fn))); + reinterpret_cast(&_close_fn))); } return status; } -Status ScalarFnCall::open( - RuntimeState* state, ExprContext* ctx, FunctionContext::FunctionStateScope scope) { +Status ScalarFnCall::open(RuntimeState* state, ExprContext* ctx, + FunctionContext::FunctionStateScope scope) { // Opens and inits children RETURN_IF_ERROR(Expr::open(state, ctx, scope)); FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); @@ -216,8 +213,8 @@ Status ScalarFnCall::open( return Status::OK(); } -void ScalarFnCall::close( - RuntimeState* state, ExprContext* context, FunctionContext::FunctionStateScope scope) { +void ScalarFnCall::close(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope) { if (_fn_context_index != -1 && _close_fn != NULL) { FunctionContext* fn_ctx = context->fn_context(_fn_context_index); _close_fn(fn_ctx, FunctionContext::THREAD_LOCAL); @@ -236,11 +233,11 @@ bool ScalarFnCall::is_constant() const { } Status ScalarFnCall::get_function(RuntimeState* state, const std::string& symbol, void** fn) { - if (_fn.binary_type == TFunctionBinaryType::NATIVE - || _fn.binary_type == TFunctionBinaryType::BUILTIN - || _fn.binary_type == TFunctionBinaryType::HIVE) { - return UserFunctionCache::instance()->get_function_ptr( - _fn.id, symbol, _fn.hdfs_location, _fn.checksum, fn, &_cache_entry); + if (_fn.binary_type == TFunctionBinaryType::NATIVE || + _fn.binary_type == TFunctionBinaryType::BUILTIN || + _fn.binary_type == TFunctionBinaryType::HIVE) { + return UserFunctionCache::instance()->get_function_ptr(_fn.id, symbol, _fn.hdfs_location, + _fn.checksum, fn, &_cache_entry); } else { #if 0 DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::IR); @@ -260,8 +257,8 @@ Status ScalarFnCall::get_function(RuntimeState* state, const std::string& symbol return Status::OK(); } -void ScalarFnCall::evaluate_children( - ExprContext* context, TupleRow* row, std::vector* input_vals) { +void ScalarFnCall::evaluate_children(ExprContext* context, TupleRow* row, + std::vector* input_vals) { DCHECK_EQ(input_vals->size(), num_fixed_args()); FunctionContext* fn_ctx = context->fn_context(_fn_context_index); uint8_t* varargs_buffer = fn_ctx->impl()->varargs_buffer(); @@ -278,157 +275,137 @@ void ScalarFnCall::evaluate_children( } } -template +template RETURN_TYPE ScalarFnCall::interpret_eval(ExprContext* context, TupleRow* row) { DCHECK(_scalar_fn != NULL); FunctionContext* fn_ctx = context->fn_context(_fn_context_index); std::vector* input_vals = fn_ctx->impl()->staging_input_vals(); - + evaluate_children(context, row, input_vals); if (_vararg_start_idx == -1) { switch (_children.size()) { case 0: - typedef RETURN_TYPE(*ScalarFn0)(FunctionContext*); + typedef RETURN_TYPE (*ScalarFn0)(FunctionContext*); return reinterpret_cast(_scalar_fn)(fn_ctx); case 1: - typedef RETURN_TYPE(*ScalarFn1)(FunctionContext*, const AnyVal& a1); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0]); + typedef RETURN_TYPE (*ScalarFn1)(FunctionContext*, const AnyVal& a1); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0]); case 2: - typedef RETURN_TYPE(*ScalarFn2)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1]); + typedef RETURN_TYPE (*ScalarFn2)(FunctionContext*, const AnyVal& a1, const AnyVal& a2); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1]); case 3: - typedef RETURN_TYPE(*ScalarFn3)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, const AnyVal& a3); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2]); + typedef RETURN_TYPE (*ScalarFn3)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1], *(*input_vals)[2]); case 4: - typedef RETURN_TYPE(*ScalarFn4)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3]); + typedef RETURN_TYPE (*ScalarFn4)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3]); case 5: - typedef RETURN_TYPE(*ScalarFn5)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4]); + typedef RETURN_TYPE (*ScalarFn5)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4]); case 6: - typedef RETURN_TYPE(*ScalarFn6)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6); + typedef RETURN_TYPE (*ScalarFn6)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + const AnyVal& a6); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - *(*input_vals)[5]); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5]); case 7: - typedef RETURN_TYPE(*ScalarFn7)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7); + typedef RETURN_TYPE (*ScalarFn7)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + const AnyVal& a6, const AnyVal& a7); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - *(*input_vals)[5], *(*input_vals)[6]); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6]); case 8: - typedef RETURN_TYPE(*ScalarFn8)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7, const AnyVal& a8); + typedef RETURN_TYPE (*ScalarFn8)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + const AnyVal& a6, const AnyVal& a7, const AnyVal& a8); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7]); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], + *(*input_vals)[7]); default: DCHECK(false) << "Interpreted path not implemented. We should have " - << "codegen'd the wrapper"; + << "codegen'd the wrapper"; } } else { int num_varargs = _children.size() - num_fixed_args(); const AnyVal* varargs = reinterpret_cast(fn_ctx->impl()->varargs_buffer()); switch (num_fixed_args()) { case 0: - typedef RETURN_TYPE(*VarargFn0)( - FunctionContext*, int num_varargs, const AnyVal* varargs); + typedef RETURN_TYPE (*VarargFn0)(FunctionContext*, int num_varargs, + const AnyVal* varargs); return reinterpret_cast(_scalar_fn)(fn_ctx, num_varargs, varargs); case 1: - typedef RETURN_TYPE(*VarargFn1)( - FunctionContext*, const AnyVal& a1, int num_varargs, const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], num_varargs, varargs); + typedef RETURN_TYPE (*VarargFn1)(FunctionContext*, const AnyVal& a1, int num_varargs, + const AnyVal* varargs); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], num_varargs, + varargs); case 2: - typedef RETURN_TYPE(*VarargFn2)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - int num_varargs, const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - num_varargs, varargs); + typedef RETURN_TYPE (*VarargFn2)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + int num_varargs, const AnyVal* varargs); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1], num_varargs, varargs); case 3: - typedef RETURN_TYPE(*VarargFn3)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, int num_varargs, const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], num_varargs, varargs); + typedef RETURN_TYPE (*VarargFn3)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, int num_varargs, + const AnyVal* varargs); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1], *(*input_vals)[2], + num_varargs, varargs); case 4: - typedef RETURN_TYPE(*VarargFn4)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], num_varargs, - varargs); + typedef RETURN_TYPE (*VarargFn4)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, int num_varargs, + const AnyVal* varargs); + return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], + *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], num_varargs, varargs); case 5: - typedef RETURN_TYPE(*VarargFn5)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - int num_varargs, const AnyVal* varargs); + typedef RETURN_TYPE (*VarargFn5)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + int num_varargs, const AnyVal* varargs); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - num_varargs, varargs); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], num_varargs, varargs); case 6: - typedef RETURN_TYPE(*VarargFn6)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, int num_varargs, const AnyVal* varargs); + typedef RETURN_TYPE (*VarargFn6)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + const AnyVal& a6, int num_varargs, + const AnyVal* varargs); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - *(*input_vals)[5], num_varargs, varargs); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], num_varargs, varargs); case 7: - typedef RETURN_TYPE(*VarargFn7)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7, int num_varargs, - const AnyVal* varargs); + typedef RETURN_TYPE (*VarargFn7)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + const AnyVal& a6, const AnyVal& a7, int num_varargs, + const AnyVal* varargs); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - *(*input_vals)[5], *(*input_vals)[6], num_varargs, - varargs); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], + num_varargs, varargs); case 8: - typedef RETURN_TYPE(*VarargFn8)( - FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7, const AnyVal& a8, - int num_varargs, const AnyVal* varargs); + typedef RETURN_TYPE (*VarargFn8)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, + const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, + const AnyVal& a6, const AnyVal& a7, const AnyVal& a8, + int num_varargs, const AnyVal* varargs); return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], - *(*input_vals)[2], *(*input_vals)[3], *(*input_vals)[4], - *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7], - num_varargs, varargs); + fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], + *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], + *(*input_vals)[7], num_varargs, varargs); default: DCHECK(false) << "Interpreted path not implemented. We should have " - << "codegen'd the wrapper"; + << "codegen'd the wrapper"; } } return RETURN_TYPE::null(); @@ -521,10 +498,10 @@ FloatVal ScalarFnCall::get_float_val(ExprContext* context, TupleRow* row) { DoubleVal ScalarFnCall::get_double_val(ExprContext* context, TupleRow* row) { DCHECK(_type.type == TYPE_DOUBLE || _type.type == TYPE_TIME); DCHECK(context != NULL); - if (_scalar_fn_wrapper == NULL) { + if (_scalar_fn_wrapper == NULL) { return interpret_eval(context, row); } - + DoubleWrapper fn = reinterpret_cast(_scalar_fn_wrapper); return fn(context, row); } @@ -571,9 +548,8 @@ DecimalV2Val ScalarFnCall::get_decimalv2_val(ExprContext* context, TupleRow* row std::string ScalarFnCall::debug_string() const { std::stringstream out; - out << "ScalarFnCall(udf_type=" << _fn.binary_type - << " location=" << _fn.hdfs_location + out << "ScalarFnCall(udf_type=" << _fn.binary_type << " location=" << _fn.hdfs_location << " symbol_name=" << _fn.scalar_fn.symbol << Expr::debug_string() << ")"; return out.str(); } -} +} // namespace doris diff --git a/be/src/exprs/scalar_fn_call.h b/be/src/exprs/scalar_fn_call.h index 58c296a2a5cfb2..8e48360b36e090 100644 --- a/be/src/exprs/scalar_fn_call.h +++ b/be/src/exprs/scalar_fn_call.h @@ -50,7 +50,7 @@ class ScalarFnCall : public Expr { public: virtual std::string debug_string() const; virtual ~ScalarFnCall(); - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new ScalarFnCall(*this)); } @@ -58,12 +58,11 @@ class ScalarFnCall : public Expr { friend class Expr; ScalarFnCall(const TExprNode& node); - virtual Status prepare( - RuntimeState* state, const RowDescriptor& desc, ExprContext* context); - virtual Status open( - RuntimeState* state, ExprContext* context, FunctionContext::FunctionStateScope scope); - virtual void close( - RuntimeState* state, ExprContext* context, FunctionContext::FunctionStateScope scope); + virtual Status prepare(RuntimeState* state, const RowDescriptor& desc, ExprContext* context); + virtual Status open(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); + virtual void close(RuntimeState* state, ExprContext* context, + FunctionContext::FunctionStateScope scope); virtual bool is_constant() const; @@ -118,14 +117,13 @@ class ScalarFnCall : public Expr { /// Evaluates the children exprs and stores the results in input_vals. Used in the /// interpreted path. void evaluate_children(ExprContext* context, TupleRow* row, - std::vector* input_vals); + std::vector* input_vals); /// Function to call _scalar_fn. Used in the interpreted path. - template + template RETURN_TYPE interpret_eval(ExprContext* context, TupleRow* row); }; -} +} // namespace doris #endif - diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp index ccba53d5402c73..dc1c98b58c9be4 100644 --- a/be/src/exprs/slot_ref.cpp +++ b/be/src/exprs/slot_ref.cpp @@ -25,41 +25,36 @@ namespace doris { -SlotRef::SlotRef(const TExprNode& node) : - Expr(node, true), - _slot_offset(-1), // invalid - _null_indicator_offset(0, 0), - _slot_id(node.slot_ref.slot_id), - _tuple_id(node.slot_ref.tuple_id) { +SlotRef::SlotRef(const TExprNode& node) + : Expr(node, true), + _slot_offset(-1), // invalid + _null_indicator_offset(0, 0), + _slot_id(node.slot_ref.slot_id), + _tuple_id(node.slot_ref.tuple_id) { // _slot/_null_indicator_offset are set in Prepare() } -SlotRef::SlotRef(const SlotDescriptor* desc) : - Expr(desc->type(), true), - _slot_offset(-1), - _null_indicator_offset(0, 0), - _slot_id(desc->id()) { +SlotRef::SlotRef(const SlotDescriptor* desc) + : Expr(desc->type(), true), + _slot_offset(-1), + _null_indicator_offset(0, 0), + _slot_id(desc->id()) { // _slot/_null_indicator_offset are set in Prepare() } -SlotRef::SlotRef(const SlotDescriptor* desc, const TypeDescriptor& type) : - Expr(type, true), - _slot_offset(-1), - _null_indicator_offset(0, 0), - _slot_id(desc->id()) { +SlotRef::SlotRef(const SlotDescriptor* desc, const TypeDescriptor& type) + : Expr(type, true), _slot_offset(-1), _null_indicator_offset(0, 0), _slot_id(desc->id()) { // _slot/_null_indicator_offset are set in Prepare() } -SlotRef::SlotRef(const TypeDescriptor& type, int offset) : - Expr(type, true), - _tuple_idx(0), - _slot_offset(offset), - _null_indicator_offset(0, -1), - _slot_id(-1) { -} +SlotRef::SlotRef(const TypeDescriptor& type, int offset) + : Expr(type, true), + _tuple_idx(0), + _slot_offset(offset), + _null_indicator_offset(0, -1), + _slot_id(-1) {} -Status SlotRef::prepare(const SlotDescriptor* slot_desc, - const RowDescriptor& row_desc) { +Status SlotRef::prepare(const SlotDescriptor* slot_desc, const RowDescriptor& row_desc) { if (!slot_desc->is_materialized()) { std::stringstream error; error << "reference to non-materialized slot. slot_id: " << _slot_id; @@ -76,14 +71,13 @@ Status SlotRef::prepare(const SlotDescriptor* slot_desc, return Status::OK(); } -Status SlotRef::prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx) { +Status SlotRef::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx) { DCHECK_EQ(_children.size(), 0); if (_slot_id == -1) { return Status::OK(); } - const SlotDescriptor* slot_desc = state->desc_tbl().get_slot_descriptor(_slot_id); + const SlotDescriptor* slot_desc = state->desc_tbl().get_slot_descriptor(_slot_id); if (slot_desc == NULL) { // TODO: create macro MAKE_ERROR() that returns a stream std::stringstream error; @@ -127,10 +121,9 @@ bool SlotRef::is_bound(std::vector* tuple_ids) const { std::string SlotRef::debug_string() const { std::stringstream out; - out << "SlotRef(slot_id=" << _slot_id - << " tuple_idx=" << _tuple_idx << " slot_offset=" << _slot_offset - << " null_indicator=" << _null_indicator_offset - << " " << Expr::debug_string() << ")"; + out << "SlotRef(slot_id=" << _slot_id << " tuple_idx=" << _tuple_idx + << " slot_offset=" << _slot_offset << " null_indicator=" << _null_indicator_offset << " " + << Expr::debug_string() << ")"; return out.str(); } @@ -252,4 +245,4 @@ DecimalV2Val SlotRef::get_decimalv2_val(ExprContext* context, TupleRow* row) { return DecimalV2Val(reinterpret_cast(t->get_slot(_slot_offset))->value); } -} +} // namespace doris diff --git a/be/src/exprs/slot_ref.h b/be/src/exprs/slot_ref.h index e2c4c052d36cdb..5916a8bfdef40e 100644 --- a/be/src/exprs/slot_ref.h +++ b/be/src/exprs/slot_ref.h @@ -31,9 +31,7 @@ class SlotRef : public Expr { public: SlotRef(const TExprNode& node); SlotRef(const SlotDescriptor* desc); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new SlotRef(*this)); - } + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new SlotRef(*this)); } // TODO: this is a hack to allow aggregation nodes to work around NULL slot // descriptors. Ideally the FE would dictate the type of the intermediate SlotRefs. @@ -42,11 +40,9 @@ class SlotRef : public Expr { // Used for testing. get_value will return tuple + offset interpreted as 'type' SlotRef(const TypeDescriptor& type, int offset); - Status prepare(const SlotDescriptor* slot_desc, - const RowDescriptor& row_desc); + Status prepare(const SlotDescriptor* slot_desc, const RowDescriptor& row_desc); - virtual Status prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx); + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx); static void* get_value(Expr* expr, TupleRow* row); void* get_slot(TupleRow* row); Tuple* get_tuple(TupleRow* row); @@ -54,20 +50,12 @@ class SlotRef : public Expr { static bool vector_compute_fn(Expr* expr, VectorizedRowBatch* batch); static bool is_nullable(Expr* expr); virtual std::string debug_string() const; - virtual bool is_constant() const { - return false; - } - virtual bool is_vectorized() const { - return true; - } + virtual bool is_constant() const { return false; } + virtual bool is_vectorized() const { return true; } virtual bool is_bound(std::vector* tuple_ids) const; virtual int get_slot_ids(std::vector* slot_ids) const; - SlotId slot_id() const { - return _slot_id; - } - inline NullIndicatorOffset null_indicator_offset() const { - return _null_indicator_offset; - } + SlotId slot_id() const { return _slot_id; } + inline NullIndicatorOffset null_indicator_offset() const { return _null_indicator_offset; } virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*); virtual doris_udf::TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); @@ -84,12 +72,12 @@ class SlotRef : public Expr { // virtual doris_udf::ArrayVal GetArrayVal(ExprContext* context, TupleRow*); private: - int _tuple_idx; // within row - int _slot_offset; // within tuple - NullIndicatorOffset _null_indicator_offset; // within tuple + int _tuple_idx; // within row + int _slot_offset; // within tuple + NullIndicatorOffset _null_indicator_offset; // within tuple const SlotId _slot_id; bool _tuple_is_nullable; // true if the tuple is nullable. - TupleId _tuple_id; // used for desc this slot from + TupleId _tuple_id; // used for desc this slot from bool _is_nullable; }; @@ -128,6 +116,6 @@ inline bool SlotRef::is_nullable(Expr* expr) { return ref->_is_nullable; } -} +} // namespace doris #endif diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 4d7c8ff70790ea..a2b7123f87d477 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -19,19 +19,19 @@ #include +#include + #include "exprs/anyval_util.h" #include "exprs/expr.h" #include "math_functions.h" #include "runtime/string_value.hpp" #include "runtime/tuple_row.h" #include "util/url_parser.h" -#include // NOTE: be careful not to use string::append. It is not performant. namespace doris { -void StringFunctions::init() { -} +void StringFunctions::init() {} size_t get_utf8_byte_length(unsigned char byte) { size_t char_size = 0; @@ -64,9 +64,8 @@ size_t get_char_len(const StringVal& str, std::vector* str_index) { // - 1-indexed positions // - supported negative positions (count from the end of the string) // - [optional] len. No len indicates longest substr possible -StringVal StringFunctions::substring( - FunctionContext* context, const StringVal& str, - const IntVal& pos, const IntVal& len) { +StringVal StringFunctions::substring(FunctionContext* context, const StringVal& str, + const IntVal& pos, const IntVal& len) { if (str.is_null || pos.is_null || len.is_null || pos.val > str.len) { return StringVal::null(); } @@ -108,8 +107,8 @@ StringVal StringFunctions::substring( } } -StringVal StringFunctions::substring( - FunctionContext* context, const StringVal& str, const IntVal& pos) { +StringVal StringFunctions::substring(FunctionContext* context, const StringVal& str, + const IntVal& pos) { // StringVal.len is an int => INT32_MAX return substring(context, str, pos, IntVal(INT32_MAX)); } @@ -117,23 +116,22 @@ StringVal StringFunctions::substring( // Implementation of Left. The signature is // string left(string input, int len) // This behaves identically to the mysql implementation. -StringVal StringFunctions::left( - FunctionContext* context, const StringVal& str, const IntVal& len) { +StringVal StringFunctions::left(FunctionContext* context, const StringVal& str, const IntVal& len) { return substring(context, str, 1, len); } // Implementation of Right. The signature is // string right(string input, int len) // This behaves identically to the mysql implementation. -StringVal StringFunctions::right( - FunctionContext* context, const StringVal& str, const IntVal& len) { +StringVal StringFunctions::right(FunctionContext* context, const StringVal& str, + const IntVal& len) { // Don't index past the beginning of str, otherwise we'll get an empty string back int32_t pos = std::max(-len.val, static_cast(-str.len)); return substring(context, str, IntVal(pos), len); } -BooleanVal StringFunctions::starts_with( - FunctionContext* context, const StringVal& str, const StringVal& prefix) { +BooleanVal StringFunctions::starts_with(FunctionContext* context, const StringVal& str, + const StringVal& prefix) { if (str.is_null || prefix.is_null) { return BooleanVal::null(); } @@ -142,8 +140,8 @@ BooleanVal StringFunctions::starts_with( return BooleanVal(str_sp.starts_with(prefix_sp)); } -BooleanVal StringFunctions::ends_with( - FunctionContext* context, const StringVal& str, const StringVal& suffix) { +BooleanVal StringFunctions::ends_with(FunctionContext* context, const StringVal& str, + const StringVal& suffix) { if (str.is_null || suffix.is_null) { return BooleanVal::null(); } @@ -152,8 +150,7 @@ BooleanVal StringFunctions::ends_with( return BooleanVal(str_sp.ends_with(suffix_sp)); } -BooleanVal StringFunctions::null_or_empty( - FunctionContext* context, const StringVal& str) { +BooleanVal StringFunctions::null_or_empty(FunctionContext* context, const StringVal& str) { if (str.is_null || str.len == 0) { return 1; } else { @@ -162,7 +159,7 @@ BooleanVal StringFunctions::null_or_empty( } StringVal StringFunctions::space(FunctionContext* context, const IntVal& len) { - if (len.is_null){ + if (len.is_null) { return StringVal::null(); } if (len.val <= 0) { @@ -176,8 +173,7 @@ StringVal StringFunctions::space(FunctionContext* context, const IntVal& len) { return result; } -StringVal StringFunctions::repeat( - FunctionContext* context, const StringVal& str, const IntVal& n) { +StringVal StringFunctions::repeat(FunctionContext* context, const StringVal& str, const IntVal& n) { if (str.is_null || n.is_null) { return StringVal::null(); } @@ -199,9 +195,8 @@ StringVal StringFunctions::repeat( return result; } -StringVal StringFunctions::lpad( - FunctionContext* context, const StringVal& str, - const IntVal& len, const StringVal& pad) { +StringVal StringFunctions::lpad(FunctionContext* context, const StringVal& str, const IntVal& len, + const StringVal& pad) { if (str.is_null || len.is_null || pad.is_null || len.val < 0) { return StringVal::null(); } @@ -210,7 +205,7 @@ StringVal StringFunctions::lpad( size_t str_char_size = get_char_len(str, &str_index); std::vector pad_index; size_t pad_char_size = get_char_len(pad, &pad_index); - + // Corner cases: Shrink the original string, or leave it alone. // TODO: Hive seems to go into an infinite loop if pad.len == 0, // so we should pay attention to Hive's future solution to be compatible. @@ -251,9 +246,8 @@ StringVal StringFunctions::lpad( return result; } -StringVal StringFunctions::rpad( - FunctionContext* context, const StringVal& str, - const IntVal& len, const StringVal& pad) { +StringVal StringFunctions::rpad(FunctionContext* context, const StringVal& str, const IntVal& len, + const StringVal& pad) { if (str.is_null || len.is_null || pad.is_null || len.val < 0) { return StringVal::null(); } @@ -301,8 +295,9 @@ StringVal StringFunctions::rpad( return result; } -StringVal StringFunctions::append_trailing_char_if_absent(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::StringVal& trailing_char) { +StringVal StringFunctions::append_trailing_char_if_absent( + doris_udf::FunctionContext* context, const doris_udf::StringVal& str, + const doris_udf::StringVal& trailing_char) { if (str.is_null || trailing_char.is_null || trailing_char.len != 1) { return StringVal::null(); } @@ -449,9 +444,8 @@ IntVal StringFunctions::ascii(FunctionContext* context, const StringVal& str) { return IntVal((str.len == 0) ? 0 : static_cast(str.ptr[0])); } -IntVal StringFunctions::instr( - FunctionContext* context, const StringVal& str, - const StringVal& substr) { +IntVal StringFunctions::instr(FunctionContext* context, const StringVal& str, + const StringVal& substr) { if (str.is_null || substr.is_null) { return IntVal::null(); } @@ -475,14 +469,13 @@ IntVal StringFunctions::instr( return IntVal(loc + 1); } -IntVal StringFunctions::locate( - FunctionContext* context, const StringVal& substr, const StringVal& str) { +IntVal StringFunctions::locate(FunctionContext* context, const StringVal& substr, + const StringVal& str) { return instr(context, str, substr); } -IntVal StringFunctions::locate_pos( - FunctionContext* context, const StringVal& substr, - const StringVal& str, const IntVal& start_pos) { +IntVal StringFunctions::locate_pos(FunctionContext* context, const StringVal& substr, + const StringVal& str, const IntVal& start_pos) { if (str.is_null || substr.is_null || start_pos.is_null) { return IntVal::null(); } @@ -503,8 +496,8 @@ IntVal StringFunctions::locate_pos( StringValue substr_sv = StringValue::from_string_val(substr); StringSearch search(&substr_sv); // Input start_pos.val starts from 1. - StringValue adjusted_str( - reinterpret_cast(str.ptr) + index[start_pos.val - 1], str.len - index[start_pos.val - 1]); + StringValue adjusted_str(reinterpret_cast(str.ptr) + index[start_pos.val - 1], + str.len - index[start_pos.val - 1]); int32_t match_pos = search.search(&adjusted_str); if (match_pos >= 0) { // Hive returns the position in the original string starting from 1. @@ -521,10 +514,8 @@ IntVal StringFunctions::locate_pos( } // This function sets options in the RE2 library before pattern matching. -bool StringFunctions::set_re2_options( - const StringVal& match_parameter, - std::string* error_str, - re2::RE2::Options* opts) { +bool StringFunctions::set_re2_options(const StringVal& match_parameter, std::string* error_str, + re2::RE2::Options* opts) { for (int i = 0; i < match_parameter.len; i++) { char match = match_parameter.ptr[i]; switch (match) { @@ -553,10 +544,8 @@ bool StringFunctions::set_re2_options( } // The caller owns the returned regex. Returns NULL if the pattern could not be compiled. -static re2::RE2* compile_regex( - const StringVal& pattern, - std::string* error_str, - const StringVal& match_parameter) { +static re2::RE2* compile_regex(const StringVal& pattern, std::string* error_str, + const StringVal& match_parameter) { re2::StringPiece pattern_sp(reinterpret_cast(pattern.ptr), pattern.len); re2::RE2::Options options; // Disable error logging in case e.g. every row causes an error @@ -564,15 +553,15 @@ static re2::RE2* compile_regex( // Return the leftmost longest match (rather than the first match). options.set_longest_match(true); options.set_dot_nl(true); - if (!match_parameter.is_null - && !StringFunctions::set_re2_options(match_parameter, error_str, &options)) { + if (!match_parameter.is_null && + !StringFunctions::set_re2_options(match_parameter, error_str, &options)) { return NULL; } re2::RE2* re = new re2::RE2(pattern_sp, options); if (!re->ok()) { std::stringstream ss; - ss << "Could not compile regexp pattern: " << AnyValUtil::to_string(pattern) - << std::endl << "Error: " << re->error(); + ss << "Could not compile regexp pattern: " << AnyValUtil::to_string(pattern) << std::endl + << "Error: " << re->error(); *error_str = ss.str(); delete re; return NULL; @@ -580,8 +569,8 @@ static re2::RE2* compile_regex( return re; } -void StringFunctions::regexp_prepare( - FunctionContext* context, FunctionContext::FunctionStateScope scope) { +void StringFunctions::regexp_prepare(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } @@ -602,8 +591,8 @@ void StringFunctions::regexp_prepare( context->set_function_state(scope, re); } -void StringFunctions::regexp_close( - FunctionContext* context, FunctionContext::FunctionStateScope scope) { +void StringFunctions::regexp_close(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } @@ -611,9 +600,8 @@ void StringFunctions::regexp_close( delete re; } -StringVal StringFunctions::regexp_extract( - FunctionContext* context, const StringVal& str, - const StringVal& pattern, const BigIntVal& index) { +StringVal StringFunctions::regexp_extract(FunctionContext* context, const StringVal& str, + const StringVal& pattern, const BigIntVal& index) { if (str.is_null || pattern.is_null || index.is_null) { return StringVal::null(); } @@ -622,7 +610,7 @@ StringVal StringFunctions::regexp_extract( } re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); std::unique_ptr scoped_re; // destroys re if we have to locally compile it if (re == NULL) { DCHECK(!context->is_arg_constant(1)); @@ -643,8 +631,7 @@ StringVal StringFunctions::regexp_extract( // Use a vector because clang complains about non-POD varlen arrays // TODO: fix this std::vector matches(max_matches); - bool success = - re->Match(str_sp, 0, str.len, re2::RE2::UNANCHORED, &matches[0], max_matches); + bool success = re->Match(str_sp, 0, str.len, re2::RE2::UNANCHORED, &matches[0], max_matches); if (!success) { return StringVal(); } @@ -653,15 +640,14 @@ StringVal StringFunctions::regexp_extract( return AnyValUtil::from_buffer_temp(context, match.data(), match.size()); } -StringVal StringFunctions::regexp_replace( - FunctionContext* context, const StringVal& str, - const StringVal& pattern, const StringVal& replace) { +StringVal StringFunctions::regexp_replace(FunctionContext* context, const StringVal& str, + const StringVal& pattern, const StringVal& replace) { if (str.is_null || pattern.is_null || replace.is_null) { return StringVal::null(); } re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); std::unique_ptr scoped_re; // destroys re if state->re is NULL if (re == NULL) { DCHECK(!context->is_arg_constant(1)); @@ -675,14 +661,14 @@ StringVal StringFunctions::regexp_replace( } re2::StringPiece replace_str = - re2::StringPiece(reinterpret_cast(replace.ptr), replace.len); + re2::StringPiece(reinterpret_cast(replace.ptr), replace.len); std::string result_str = AnyValUtil::to_string(str); re2::RE2::GlobalReplace(&result_str, *re, replace_str); return AnyValUtil::from_string_temp(context, result_str); } -StringVal StringFunctions::concat( - FunctionContext* context, int num_children, const StringVal* strs) { +StringVal StringFunctions::concat(FunctionContext* context, int num_children, + const StringVal* strs) { DCHECK_GE(num_children, 1); // Pass through if there's only one argument @@ -710,9 +696,8 @@ StringVal StringFunctions::concat( return result; } -StringVal StringFunctions::concat_ws( - FunctionContext* context, const StringVal& sep, - int num_children, const StringVal* strs) { +StringVal StringFunctions::concat_ws(FunctionContext* context, const StringVal& sep, + int num_children, const StringVal* strs) { DCHECK_GE(num_children, 1); if (sep.is_null) { return StringVal::null(); @@ -751,8 +736,8 @@ StringVal StringFunctions::concat_ws( return result; } -IntVal StringFunctions::find_in_set( - FunctionContext* context, const StringVal& str, const StringVal& str_set) { +IntVal StringFunctions::find_in_set(FunctionContext* context, const StringVal& str, + const StringVal& str_set) { if (str.is_null || str_set.is_null) { return IntVal::null(); } @@ -785,9 +770,8 @@ IntVal StringFunctions::find_in_set( return IntVal(0); } -void StringFunctions::parse_url_prepare( - FunctionContext* ctx, - FunctionContext::FunctionStateScope scope) { +void StringFunctions::parse_url_prepare(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } @@ -803,20 +787,20 @@ void StringFunctions::parse_url_prepare( if (*url_part == UrlParser::INVALID) { std::stringstream ss; ss << "Invalid URL part: " << AnyValUtil::to_string(*part) << std::endl - << "(Valid URL parts are 'PROTOCOL', 'HOST', 'PATH', 'REF', 'AUTHORITY', 'FILE', " - << "'USERINFO', 'PORT' and 'QUERY')"; + << "(Valid URL parts are 'PROTOCOL', 'HOST', 'PATH', 'REF', 'AUTHORITY', 'FILE', " + << "'USERINFO', 'PORT' and 'QUERY')"; ctx->set_error(ss.str().c_str()); return; } ctx->set_function_state(scope, url_part); } -StringVal StringFunctions::parse_url( - FunctionContext* ctx, const StringVal& url, const StringVal& part) { +StringVal StringFunctions::parse_url(FunctionContext* ctx, const StringVal& url, + const StringVal& part) { if (url.is_null || part.is_null) { return StringVal::null(); } - std::string part_str = std::string(reinterpret_cast(part.ptr), part.len); + std::string part_str = std::string(reinterpret_cast(part.ptr), part.len); transform(part_str.begin(), part_str.end(), part_str.begin(), ::toupper); StringVal newPart = AnyValUtil::from_string_temp(ctx, part_str); void* state = ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); @@ -847,19 +831,18 @@ StringVal StringFunctions::parse_url( return result_sv; } -void StringFunctions::parse_url_close( - FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { +void StringFunctions::parse_url_close(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } UrlParser::UrlPart* url_part = - reinterpret_cast(ctx->get_function_state(scope)); + reinterpret_cast(ctx->get_function_state(scope)); delete url_part; } -StringVal StringFunctions::parse_url_key( - FunctionContext* ctx, const StringVal& url, - const StringVal& part, const StringVal& key) { +StringVal StringFunctions::parse_url_key(FunctionContext* ctx, const StringVal& url, + const StringVal& part, const StringVal& key) { if (url.is_null || part.is_null || key.is_null) { return StringVal::null(); } @@ -873,9 +856,8 @@ StringVal StringFunctions::parse_url_key( } StringValue result; - if (!UrlParser::parse_url_key( - StringValue::from_string_val(url), url_part, - StringValue::from_string_val(key), &result)) { + if (!UrlParser::parse_url_key(StringValue::from_string_val(url), url_part, + StringValue::from_string_val(key), &result)) { // url is malformed, or url_part is invalid. if (url_part == UrlParser::INVALID) { std::stringstream ss; @@ -898,11 +880,11 @@ StringVal StringFunctions::money_format(FunctionContext* context, const DoubleVa return StringVal::null(); } - double v_cent= MathFunctions::my_double_round(v.val, 2, false, false) * 100; + double v_cent = MathFunctions::my_double_round(v.val, 2, false, false) * 100; return do_money_format(context, std::to_string(v_cent)); } -StringVal StringFunctions::money_format(FunctionContext *context, const DecimalVal &v) { +StringVal StringFunctions::money_format(FunctionContext* context, const DecimalVal& v) { if (v.is_null) { return StringVal::null(); } @@ -914,7 +896,7 @@ StringVal StringFunctions::money_format(FunctionContext *context, const DecimalV return do_money_format(context, result.to_string()); } -StringVal StringFunctions::money_format(FunctionContext *context, const DecimalV2Val &v) { +StringVal StringFunctions::money_format(FunctionContext* context, const DecimalV2Val& v) { if (v.is_null) { return StringVal::null(); } @@ -926,8 +908,7 @@ StringVal StringFunctions::money_format(FunctionContext *context, const DecimalV return do_money_format(context, result.to_string()); } - -StringVal StringFunctions::money_format(FunctionContext *context, const BigIntVal &v) { +StringVal StringFunctions::money_format(FunctionContext* context, const BigIntVal& v) { if (v.is_null) { return StringVal::null(); } @@ -936,7 +917,7 @@ StringVal StringFunctions::money_format(FunctionContext *context, const BigIntVa return do_money_format(context, cent_money); } -StringVal StringFunctions::money_format(FunctionContext *context, const LargeIntVal &v) { +StringVal StringFunctions::money_format(FunctionContext* context, const LargeIntVal& v) { if (v.is_null) { return StringVal::null(); } @@ -947,8 +928,7 @@ StringVal StringFunctions::money_format(FunctionContext *context, const LargeInt } static int index_of(const uint8_t* source, int source_offset, int source_count, - const uint8_t* target, int target_offset, int target_count, - int from_index) { + const uint8_t* target, int target_offset, int target_count, int from_index) { if (from_index >= source_count) { return (target_count == 0 ? source_count : -1); } @@ -965,7 +945,8 @@ static int index_of(const uint8_t* source, int source_offset, int source_count, if (i <= max) { // Found first character, now look at the rest of v2 int j = i + 1; int end = j + target_count - 1; - for (int k = target_offset + 1; j < end && source[j] == target[k]; j++, k++); + for (int k = target_offset + 1; j < end && source[j] == target[k]; j++, k++) + ; if (j == end) { return i - source_offset; // Found whole string. } @@ -983,13 +964,15 @@ StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& int from = 0; for (int i = 1; i <= field.val; i++) { // find int last_index = i - 1; - find[last_index] = index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from); + find[last_index] = + index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from); from = find[last_index] + delimiter.len; if (find[last_index] == -1) { break; } } - if ((field.val > 1 && find[field.val - 2] == -1) || (field.val == 1 && find[field.val - 1] == -1)) { + if ((field.val > 1 && find[field.val - 2] == -1) || + (field.val == 1 && find[field.val - 1] == -1)) { // field not find return null return StringVal::null(); } @@ -1003,22 +986,22 @@ StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& return StringVal(content.ptr + start_pos, len); } -StringVal StringFunctions::replace(FunctionContext *context, const StringVal &origStr, const StringVal &oldStr, const StringVal &newStr) { +StringVal StringFunctions::replace(FunctionContext* context, const StringVal& origStr, + const StringVal& oldStr, const StringVal& newStr) { if (origStr.is_null || oldStr.is_null || newStr.is_null) { return StringVal::null(); } - std::string orig_str = std::string(reinterpret_cast(origStr.ptr), origStr.len); - std::string old_str = std::string(reinterpret_cast(oldStr.ptr), oldStr.len); - std::string new_str = std::string(reinterpret_cast(newStr.ptr), newStr.len); + std::string orig_str = std::string(reinterpret_cast(origStr.ptr), origStr.len); + std::string old_str = std::string(reinterpret_cast(oldStr.ptr), oldStr.len); + std::string new_str = std::string(reinterpret_cast(newStr.ptr), newStr.len); std::string::size_type pos = 0; std::string::size_type oldLen = old_str.size(); std::string::size_type newLen = new_str.size(); - while ((pos = orig_str.find(old_str, pos))) - { - if(pos == std::string::npos) break; + while ((pos = orig_str.find(old_str, pos))) { + if (pos == std::string::npos) break; orig_str.replace(pos, oldLen, new_str); pos += newLen; } return AnyValUtil::from_string_temp(context, orig_str); } -} +} // namespace doris diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 62e5377e122b18..d77524d1b97e17 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -20,12 +20,13 @@ #include -#include "runtime/string_value.h" -#include "runtime/string_search.hpp" -#include "anyval_util.h" +#include #include #include -#include + +#include "anyval_util.h" +#include "runtime/string_search.hpp" +#include "runtime/string_value.h" namespace doris { @@ -37,85 +38,78 @@ class StringFunctions { public: static void init(); - static doris_udf::StringVal substring( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& pos, const doris_udf::IntVal& len); - static doris_udf::StringVal substring( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& pos); - static doris_udf::StringVal left( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& len); - static doris_udf::StringVal right( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& len); - static doris_udf::BooleanVal starts_with( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal& prefix); - static doris_udf::BooleanVal ends_with( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal& suffix); - static doris_udf::BooleanVal null_or_empty( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal space( - doris_udf::FunctionContext* context, const doris_udf::IntVal& len); - static doris_udf::StringVal repeat( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& n); - static doris_udf::StringVal lpad( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& len, const doris_udf::StringVal& pad); - static doris_udf::StringVal rpad( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::IntVal& len, const doris_udf::StringVal& pad); + static doris_udf::StringVal substring(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::IntVal& pos, + const doris_udf::IntVal& len); + static doris_udf::StringVal substring(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::IntVal& pos); + static doris_udf::StringVal left(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, const doris_udf::IntVal& len); + static doris_udf::StringVal right(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::IntVal& len); + static doris_udf::BooleanVal starts_with(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::StringVal& prefix); + static doris_udf::BooleanVal ends_with(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::StringVal& suffix); + static doris_udf::BooleanVal null_or_empty(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal space(doris_udf::FunctionContext* context, + const doris_udf::IntVal& len); + static doris_udf::StringVal repeat(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, const doris_udf::IntVal& n); + static doris_udf::StringVal lpad(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, const doris_udf::IntVal& len, + const doris_udf::StringVal& pad); + static doris_udf::StringVal rpad(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, const doris_udf::IntVal& len, + const doris_udf::StringVal& pad); static doris_udf::StringVal append_trailing_char_if_absent( doris_udf::FunctionContext* context, const doris_udf::StringVal& str, const doris_udf::StringVal& trailing_char); - static doris_udf::IntVal length( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::IntVal char_utf8_length( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal lower( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal upper( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal reverse( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal trim( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal ltrim( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::StringVal rtrim( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::IntVal ascii( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str); - static doris_udf::IntVal instr( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal&); - static doris_udf::IntVal locate( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal&); - static doris_udf::IntVal locate_pos( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal&, const doris_udf::IntVal&); - - static bool set_re2_options( - const doris_udf::StringVal& match_parameter, - std::string* error_str, - re2::RE2::Options* opts); - - static void regexp_prepare( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static StringVal regexp_extract( - doris_udf::FunctionContext*, const doris_udf::StringVal& str, - const doris_udf::StringVal& pattern, const doris_udf::BigIntVal& index); - static StringVal regexp_replace( - doris_udf::FunctionContext*, const doris_udf::StringVal& str, - const doris_udf::StringVal& pattern, const doris_udf::StringVal& replace); - static void regexp_close( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); + static doris_udf::IntVal length(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::IntVal char_utf8_length(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal lower(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal upper(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal reverse(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal trim(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal ltrim(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal rtrim(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::IntVal ascii(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::IntVal instr(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, const doris_udf::StringVal&); + static doris_udf::IntVal locate(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, const doris_udf::StringVal&); + static doris_udf::IntVal locate_pos(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::StringVal&, const doris_udf::IntVal&); + + static bool set_re2_options(const doris_udf::StringVal& match_parameter, std::string* error_str, + re2::RE2::Options* opts); + + static void regexp_prepare(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); + static StringVal regexp_extract(doris_udf::FunctionContext*, const doris_udf::StringVal& str, + const doris_udf::StringVal& pattern, + const doris_udf::BigIntVal& index); + static StringVal regexp_replace(doris_udf::FunctionContext*, const doris_udf::StringVal& str, + const doris_udf::StringVal& pattern, + const doris_udf::StringVal& replace); + static void regexp_close(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); #if 0 static void RegexpMatchCountPrepare(FunctionContext* context, FunctionContext::FunctionStateScope scope); @@ -125,51 +119,36 @@ class StringFunctions { const StringVal& pattern, const IntVal& start_pos, const StringVal& match_parameter); #endif - static StringVal concat( - doris_udf::FunctionContext*, - int num_children, - const StringVal* strs); - static StringVal concat_ws( - doris_udf::FunctionContext*, - const doris_udf::StringVal& sep, - int num_children, - const doris_udf::StringVal* strs); - static IntVal find_in_set( - doris_udf::FunctionContext*, - const doris_udf::StringVal& str, - const doris_udf::StringVal& str_set); - - static void parse_url_prepare( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static StringVal parse_url( - doris_udf::FunctionContext*, - const doris_udf::StringVal& url, - const doris_udf::StringVal& part); - static StringVal parse_url_key( - doris_udf::FunctionContext*, - const doris_udf::StringVal& url, - const doris_udf::StringVal& key, - const doris_udf::StringVal& part); - static void parse_url_close( - doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - + static StringVal concat(doris_udf::FunctionContext*, int num_children, const StringVal* strs); + static StringVal concat_ws(doris_udf::FunctionContext*, const doris_udf::StringVal& sep, + int num_children, const doris_udf::StringVal* strs); + static IntVal find_in_set(doris_udf::FunctionContext*, const doris_udf::StringVal& str, + const doris_udf::StringVal& str_set); + + static void parse_url_prepare(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); + static StringVal parse_url(doris_udf::FunctionContext*, const doris_udf::StringVal& url, + const doris_udf::StringVal& part); + static StringVal parse_url_key(doris_udf::FunctionContext*, const doris_udf::StringVal& url, + const doris_udf::StringVal& key, + const doris_udf::StringVal& part); + static void parse_url_close(doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& v); + const doris_udf::DoubleVal& v); static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::DecimalVal& v); + const doris_udf::DecimalVal& v); static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& v); + const doris_udf::DecimalV2Val& v); static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& v); + const doris_udf::BigIntVal& v); static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::LargeIntVal& v); + const doris_udf::LargeIntVal& v); struct CommaMoneypunct : std::moneypunct { pattern do_pos_format() const override { return {{none, sign, none, value}}; } @@ -180,7 +159,7 @@ class StringFunctions { string_type do_negative_sign() const override { return "-"; } }; - static StringVal do_money_format(FunctionContext *context, const std::string& v) { + static StringVal do_money_format(FunctionContext* context, const std::string& v) { static std::locale comma_locale(std::locale(), new CommaMoneypunct()); static std::stringstream ss; static bool ss_init = false; @@ -196,11 +175,11 @@ class StringFunctions { }; static StringVal split_part(FunctionContext* context, const StringVal& content, - const StringVal& delimiter, const IntVal& field); + const StringVal& delimiter, const IntVal& field); - static StringVal replace(FunctionContext *context, const StringVal &origStr, - const StringVal &oldStr, const StringVal &newStr); + static StringVal replace(FunctionContext* context, const StringVal& origStr, + const StringVal& oldStr, const StringVal& newStr); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/time_operators.cpp b/be/src/exprs/time_operators.cpp index 9a4e198a3e80fd..ad22e0c39775c6 100644 --- a/be/src/exprs/time_operators.cpp +++ b/be/src/exprs/time_operators.cpp @@ -17,59 +17,55 @@ #include "exprs/time_operators.h" +#include + #include #include -#include #include "exprs/anyval_util.h" #include "exprs/case_expr.h" #include "exprs/expr.h" #include "runtime/tuple_row.h" -#include "util/string_parser.hpp" #include "util/date_func.h" +#include "util/string_parser.hpp" namespace doris { -void TimeOperators::init() { -} +void TimeOperators::init() {} -#define CAST_TIME_TO_INT(to_type, type_name) \ - to_type TimeOperators::cast_to_##type_name( \ - FunctionContext* context, const DoubleVal& val) { \ - if (val.is_null) return to_type::null(); \ - int time = (int) val.val ; \ - int second = time % 60; \ - int minute = time / 60 % 60; \ - int hour = time / 3600; \ - return to_type(hour * 10000 + minute * 100 + second); \ +#define CAST_TIME_TO_INT(to_type, type_name) \ + to_type TimeOperators::cast_to_##type_name(FunctionContext* context, const DoubleVal& val) { \ + if (val.is_null) return to_type::null(); \ + int time = (int)val.val; \ + int second = time % 60; \ + int minute = time / 60 % 60; \ + int hour = time / 3600; \ + return to_type(hour * 10000 + minute * 100 + second); \ } -#define CAST_FROM_TIME() \ - CAST_TIME_TO_INT(BooleanVal, boolean_val);\ - CAST_TIME_TO_INT(TinyIntVal, tiny_int_val);\ - CAST_TIME_TO_INT(SmallIntVal, small_int_val);\ - CAST_TIME_TO_INT(IntVal, int_val);\ - CAST_TIME_TO_INT(BigIntVal, big_int_val);\ - CAST_TIME_TO_INT(LargeIntVal, large_int_val);\ - CAST_TIME_TO_INT(FloatVal, float_val);\ +#define CAST_FROM_TIME() \ + CAST_TIME_TO_INT(BooleanVal, boolean_val); \ + CAST_TIME_TO_INT(TinyIntVal, tiny_int_val); \ + CAST_TIME_TO_INT(SmallIntVal, small_int_val); \ + CAST_TIME_TO_INT(IntVal, int_val); \ + CAST_TIME_TO_INT(BigIntVal, big_int_val); \ + CAST_TIME_TO_INT(LargeIntVal, large_int_val); \ + CAST_TIME_TO_INT(FloatVal, float_val); \ CAST_TIME_TO_INT(DoubleVal, double_val); CAST_FROM_TIME(); -StringVal TimeOperators::cast_to_string_val( - FunctionContext* ctx, const DoubleVal& val) { +StringVal TimeOperators::cast_to_string_val(FunctionContext* ctx, const DoubleVal& val) { if (val.is_null) { return StringVal::null(); } return AnyValUtil::from_string_temp(ctx, time_str_from_double(val.val)); } -DateTimeVal TimeOperators::cast_to_datetime_val( - FunctionContext* context, const DoubleVal& val) { +DateTimeVal TimeOperators::cast_to_datetime_val(FunctionContext* context, const DoubleVal& val) { return DateTimeVal::null(); } -DecimalVal TimeOperators::cast_to_decimal_val( - FunctionContext* context, const DoubleVal& val) { +DecimalVal TimeOperators::cast_to_decimal_val(FunctionContext* context, const DoubleVal& val) { return DecimalVal::null(); } -} +} // namespace doris diff --git a/be/src/exprs/time_operators.h b/be/src/exprs/time_operators.h index a5c7bc81f52f4a..a844587b9e7c98 100644 --- a/be/src/exprs/time_operators.h +++ b/be/src/exprs/time_operators.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_EXPRS_TIME_OPERATORS_H #include + #include "udf/udf.h" namespace doris { @@ -44,5 +45,5 @@ class TimeOperators { static DateTimeVal cast_to_datetime_val(FunctionContext*, const DoubleVal&); static DecimalVal cast_to_decimal_val(FunctionContext*, const DoubleVal&); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/timestamp_functions.cpp b/be/src/exprs/timestamp_functions.cpp index 9c64e60c5c522c..57b5b808f2c9bc 100644 --- a/be/src/exprs/timestamp_functions.cpp +++ b/be/src/exprs/timestamp_functions.cpp @@ -17,20 +17,19 @@ #include "exprs/timestamp_functions.h" -#include "exprs/expr.h" #include "exprs/anyval_util.h" -#include "runtime/tuple_row.h" +#include "exprs/expr.h" #include "runtime/datetime_value.h" #include "runtime/runtime_state.h" #include "runtime/string_value.hpp" +#include "runtime/tuple_row.h" #include "util/debug_util.h" #include "util/path_builder.h" #include "util/timezone_utils.h" namespace doris { -void TimestampFunctions::init() { -} +void TimestampFunctions::init() {} // TODO: accept Java data/time format strings: // http://docs.oracle.com/javase/1.4.2/docs/api/java/text/SimpleDateFormat.html @@ -89,13 +88,12 @@ StringVal TimestampFunctions::convert_format(FunctionContext* ctx, const StringV } void TimestampFunctions::report_bad_format(const StringVal* format) { - std::string format_str((char *)format->ptr, format->len); + std::string format_str((char*)format->ptr, format->len); // LOG(WARNING) << "Bad date/time conversion format: " << format_str // << " Format must be: 'yyyy-MM-dd[ HH:mm:ss]'"; } -IntVal TimestampFunctions::year( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::year(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -103,8 +101,7 @@ IntVal TimestampFunctions::year( return IntVal(ts_value.year()); } -IntVal TimestampFunctions::quarter( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::quarter(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -112,8 +109,7 @@ IntVal TimestampFunctions::quarter( return IntVal((ts_value.month() - 1) / 3 + 1); } -IntVal TimestampFunctions::month( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::month(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -121,20 +117,18 @@ IntVal TimestampFunctions::month( return IntVal(ts_value.month()); } -IntVal TimestampFunctions::day_of_week( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::day_of_week(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } const DateTimeValue& ts_value = DateTimeValue::from_datetime_val(ts_val); if (ts_value.is_valid_date()) { - return IntVal((ts_value.weekday() + 1 ) % 7 + 1); + return IntVal((ts_value.weekday() + 1) % 7 + 1); } return IntVal::null(); } -IntVal TimestampFunctions::day_of_month( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::day_of_month(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -142,8 +136,7 @@ IntVal TimestampFunctions::day_of_month( return IntVal(ts_value.day()); } -IntVal TimestampFunctions::day_of_year( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::day_of_year(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -154,8 +147,7 @@ IntVal TimestampFunctions::day_of_year( return IntVal::null(); } -IntVal TimestampFunctions::week_of_year( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::week_of_year(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -166,8 +158,7 @@ IntVal TimestampFunctions::week_of_year( return IntVal::null(); } -IntVal TimestampFunctions::hour( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::hour(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -175,8 +166,7 @@ IntVal TimestampFunctions::hour( return IntVal(ts_value.hour()); } -IntVal TimestampFunctions::minute( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::minute(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -184,8 +174,7 @@ IntVal TimestampFunctions::minute( return IntVal(ts_value.minute()); } -IntVal TimestampFunctions::second( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::second(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -193,8 +182,7 @@ IntVal TimestampFunctions::second( return IntVal(ts_value.second()); } -DateTimeVal TimestampFunctions::to_date( - FunctionContext* ctx, const DateTimeVal& ts_val) { +DateTimeVal TimestampFunctions::to_date(FunctionContext* ctx, const DateTimeVal& ts_val) { if (ts_val.is_null) { return DateTimeVal::null(); } @@ -205,14 +193,14 @@ DateTimeVal TimestampFunctions::to_date( return result; } -DateTimeVal TimestampFunctions::str_to_date( - FunctionContext* ctx, const StringVal& str, const StringVal& format) { +DateTimeVal TimestampFunctions::str_to_date(FunctionContext* ctx, const StringVal& str, + const StringVal& format) { if (str.is_null || format.is_null) { return DateTimeVal::null(); } DateTimeValue ts_value; - if (!ts_value.from_date_format_str((const char*)format.ptr, format.len, - (const char*)str.ptr, str.len)) { + if (!ts_value.from_date_format_str((const char*)format.ptr, format.len, (const char*)str.ptr, + str.len)) { return DateTimeVal::null(); } DateTimeVal ts_val; @@ -220,8 +208,7 @@ DateTimeVal TimestampFunctions::str_to_date( return ts_val; } -StringVal TimestampFunctions::month_name( - FunctionContext* ctx, const DateTimeVal& ts_val) { +StringVal TimestampFunctions::month_name(FunctionContext* ctx, const DateTimeVal& ts_val) { if (ts_val.is_null) { return StringVal::null(); } @@ -233,8 +220,7 @@ StringVal TimestampFunctions::month_name( return AnyValUtil::from_string_temp(ctx, name); } -StringVal TimestampFunctions::day_name( - FunctionContext* ctx, const DateTimeVal& ts_val) { +StringVal TimestampFunctions::day_name(FunctionContext* ctx, const DateTimeVal& ts_val) { if (ts_val.is_null) { return StringVal::null(); } @@ -246,106 +232,89 @@ StringVal TimestampFunctions::day_name( return AnyValUtil::from_string_temp(ctx, name); } -DateTimeVal TimestampFunctions::years_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::years_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::years_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::years_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::months_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::months_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::months_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::months_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::weeks_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::weeks_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::weeks_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::weeks_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::days_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::days_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::days_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::days_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::hours_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::hours_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::hours_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::hours_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::minutes_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::minutes_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::minutes_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::minutes_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::seconds_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::seconds_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::seconds_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::seconds_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } -DateTimeVal TimestampFunctions::micros_add( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::micros_add(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, true); } -DateTimeVal TimestampFunctions::micros_sub( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count) { +DateTimeVal TimestampFunctions::micros_sub(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count) { return timestamp_time_op(ctx, ts_val, count, false); } template -DateTimeVal TimestampFunctions::timestamp_time_op( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& count, bool is_add) { +DateTimeVal TimestampFunctions::timestamp_time_op(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& count, bool is_add) { if (ts_val.is_null || count.is_null) { return DateTimeVal::null(); } @@ -361,114 +330,113 @@ DateTimeVal TimestampFunctions::timestamp_time_op( return new_ts_val; } -BigIntVal TimestampFunctions::years_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::years_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } -BigIntVal TimestampFunctions::months_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::months_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } -BigIntVal TimestampFunctions::weeks_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::weeks_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } -BigIntVal TimestampFunctions::days_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::days_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } -BigIntVal TimestampFunctions::hours_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::hours_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } -BigIntVal TimestampFunctions::minutes_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::minutes_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } -BigIntVal TimestampFunctions::seconds_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +BigIntVal TimestampFunctions::seconds_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { return timestamp_diff(ctx, ts_val1, ts_val2); } template -BigIntVal TimestampFunctions::timestamp_diff(FunctionContext* ctx, const DateTimeVal& ts_val2, const DateTimeVal& ts_val1) { +BigIntVal TimestampFunctions::timestamp_diff(FunctionContext* ctx, const DateTimeVal& ts_val2, + const DateTimeVal& ts_val1) { if (ts_val1.is_null || ts_val2.is_null) { - return BigIntVal::null(); + return BigIntVal::null(); } DateTimeValue ts_value1 = DateTimeValue::from_datetime_val(ts_val1); DateTimeValue ts_value2 = DateTimeValue::from_datetime_val(ts_val2); switch (unit) { - case YEAR: { - int year = (ts_value2.year() - ts_value1.year()); - if (year > 0) { - year -= (ts_value2.to_int64() % 10000000000 - ts_value1.to_int64() % 10000000000) < 0; - } else if (year < 0) { - year += (ts_value2.to_int64() % 10000000000 - ts_value1.to_int64() % 10000000000) > 0; - } - return year; - } - case MONTH: { - int month = (ts_value2.year() - ts_value1.year()) * 12 + (ts_value2.month() - ts_value1.month()); - if (month > 0) { - month -= (ts_value2.to_int64() % 100000000 - ts_value1.to_int64() % 100000000) < 0; - } else if (month < 0) { - month += (ts_value2.to_int64() % 100000000 - ts_value1.to_int64() % 100000000) > 0; - } - return month; + case YEAR: { + int year = (ts_value2.year() - ts_value1.year()); + if (year > 0) { + year -= (ts_value2.to_int64() % 10000000000 - ts_value1.to_int64() % 10000000000) < 0; + } else if (year < 0) { + year += (ts_value2.to_int64() % 10000000000 - ts_value1.to_int64() % 10000000000) > 0; } - case WEEK: { - int day = ts_value2.daynr() - ts_value1.daynr(); - if (day > 0) { - day -= ts_value2.time_part_diff(ts_value1) < 0; - } else if (day < 0) { - day += ts_value2.time_part_diff(ts_value1) > 0; - } - return day / 7; + return year; + } + case MONTH: { + int month = (ts_value2.year() - ts_value1.year()) * 12 + + (ts_value2.month() - ts_value1.month()); + if (month > 0) { + month -= (ts_value2.to_int64() % 100000000 - ts_value1.to_int64() % 100000000) < 0; + } else if (month < 0) { + month += (ts_value2.to_int64() % 100000000 - ts_value1.to_int64() % 100000000) > 0; } - case DAY: { - int day = ts_value2.daynr() - ts_value1.daynr(); - if (day > 0) { - day -= ts_value2.time_part_diff(ts_value1) < 0; - } else if (day < 0) { - day += ts_value2.time_part_diff(ts_value1) > 0; - } - return day; + return month; + } + case WEEK: { + int day = ts_value2.daynr() - ts_value1.daynr(); + if (day > 0) { + day -= ts_value2.time_part_diff(ts_value1) < 0; + } else if (day < 0) { + day += ts_value2.time_part_diff(ts_value1) > 0; } - case HOUR: { - int64_t second = ts_value2.second_diff(ts_value1); - int64_t hour = second / 60 / 60; - return hour; + return day / 7; + } + case DAY: { + int day = ts_value2.daynr() - ts_value1.daynr(); + if (day > 0) { + day -= ts_value2.time_part_diff(ts_value1) < 0; + } else if (day < 0) { + day += ts_value2.time_part_diff(ts_value1) > 0; } - case MINUTE: { - int64_t second = ts_value2.second_diff(ts_value1); - int64_t minute = second / 60; - return minute; - } - case SECOND: { - int64_t second = ts_value2.second_diff(ts_value1); - return second; - } - default: - return BigIntVal::null(); + return day; + } + case HOUR: { + int64_t second = ts_value2.second_diff(ts_value1); + int64_t hour = second / 60 / 60; + return hour; + } + case MINUTE: { + int64_t second = ts_value2.second_diff(ts_value1); + int64_t minute = second / 60; + return minute; + } + case SECOND: { + int64_t second = ts_value2.second_diff(ts_value1); + return second; + } + default: + return BigIntVal::null(); } } -void TimestampFunctions::format_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { - - if (scope != FunctionContext::FRAGMENT_LOCAL - || context->get_num_args() < 2 - || context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR - || !context->is_arg_constant(1)) { +void TimestampFunctions::format_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL || context->get_num_args() < 2 || + context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR || + !context->is_arg_constant(1)) { VLOG(10) << "format_prepare returned"; return; } @@ -483,7 +451,7 @@ void TimestampFunctions::format_prepare( } fc->fmt = convert_format(context, *format); - int format_len = DateTimeValue::compute_format_len((const char*) fc->fmt.ptr, fc->fmt.len); + int format_len = DateTimeValue::compute_format_len((const char*)fc->fmt.ptr, fc->fmt.len); if (UNLIKELY(format_len >= 128)) { fc->is_valid = false; return; @@ -493,14 +461,14 @@ void TimestampFunctions::format_prepare( return; } -void TimestampFunctions::format_close( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { +void TimestampFunctions::format_close(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } - FormatCtx* fc = reinterpret_cast(context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + FormatCtx* fc = reinterpret_cast( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (fc != nullptr) { delete fc; } @@ -517,36 +485,37 @@ DateTimeVal from_olap_datetime(uint64_t datetime) { return ts_val; } -#define _TR_4(TYPE, type, UNIT, unit) \ - DateTimeVal TimestampFunctions::unit##_##type( \ - FunctionContext* ctx, const DateTimeVal& ts_val, \ - const IntVal& period, const DateTimeVal& origin) { \ - return time_round(ctx, ts_val, period, origin); \ - } \ - DateTimeVal TimestampFunctions::unit##_##type( \ - FunctionContext* ctx, const DateTimeVal& ts_val, const DateTimeVal& origin) { \ - return time_round(ctx, ts_val, IntVal(1), origin); \ +#define _TR_4(TYPE, type, UNIT, unit) \ + DateTimeVal TimestampFunctions::unit##_##type(FunctionContext* ctx, const DateTimeVal& ts_val, \ + const IntVal& period, \ + const DateTimeVal& origin) { \ + return time_round(ctx, ts_val, period, origin); \ + } \ + DateTimeVal TimestampFunctions::unit##_##type(FunctionContext* ctx, const DateTimeVal& ts_val, \ + const DateTimeVal& origin) { \ + return time_round(ctx, ts_val, IntVal(1), origin); \ } -#define _TR_5(TYPE, type, UNIT, unit, ORIGIN) \ - DateTimeVal TimestampFunctions::unit##_##type( \ - FunctionContext* ctx, const DateTimeVal& ts_val) { \ - return time_round(ctx, ts_val, IntVal(1), ORIGIN); \ - } \ - DateTimeVal TimestampFunctions::unit##_##type( \ - FunctionContext* ctx, const DateTimeVal& ts_val, const IntVal& period) { \ - return time_round(ctx, ts_val, period, ORIGIN); \ +#define _TR_5(TYPE, type, UNIT, unit, ORIGIN) \ + DateTimeVal TimestampFunctions::unit##_##type(FunctionContext* ctx, \ + const DateTimeVal& ts_val) { \ + return time_round(ctx, ts_val, IntVal(1), ORIGIN); \ + } \ + DateTimeVal TimestampFunctions::unit##_##type(FunctionContext* ctx, const DateTimeVal& ts_val, \ + const IntVal& period) { \ + return time_round(ctx, ts_val, period, ORIGIN); \ } #define FLOOR 0 -#define CEIL 1 +#define CEIL 1 -static const DateTimeVal FIRST_DAY = from_olap_datetime(19700101000000); +static const DateTimeVal FIRST_DAY = from_olap_datetime(19700101000000); static const DateTimeVal FIRST_SUNDAY = from_olap_datetime(19700104000000); -#define TIME_ROUND(UNIT, unit, ORIGIN) \ - _TR_4(FLOOR, floor, UNIT, unit) _TR_4(CEIL, ceil, UNIT, unit) \ - _TR_5(FLOOR, floor, UNIT, unit, ORIGIN) _TR_5(CEIL, ceil, UNIT, unit, ORIGIN) +#define TIME_ROUND(UNIT, unit, ORIGIN) \ + _TR_4(FLOOR, floor, UNIT, unit) \ + _TR_4(CEIL, ceil, UNIT, unit) _TR_5(FLOOR, floor, UNIT, unit, ORIGIN) \ + _TR_5(CEIL, ceil, UNIT, unit, ORIGIN) TIME_ROUND(YEAR, year, FIRST_DAY) TIME_ROUND(MONTH, month, FIRST_DAY) @@ -557,9 +526,8 @@ TIME_ROUND(MINUTE, minute, FIRST_DAY) TIME_ROUND(SECOND, second, FIRST_DAY) template -DateTimeVal TimestampFunctions::time_round( - FunctionContext* ctx, const DateTimeVal& ts_val, - const IntVal& period, const DateTimeVal& origin) { +DateTimeVal TimestampFunctions::time_round(FunctionContext* ctx, const DateTimeVal& ts_val, + const IntVal& period, const DateTimeVal& origin) { if (ts_val.is_null || period.is_null || period.val < 1 || origin.is_null) { return DateTimeVal::null(); } @@ -568,43 +536,43 @@ DateTimeVal TimestampFunctions::time_round( DateTimeValue ts2 = DateTimeValue::from_datetime_val(ts_val); int64_t diff; switch (unit) { - case YEAR: { - int year = (ts2.year() - ts1.year()); - diff = year - (ts2.to_int64() % 10000000000 < ts1.to_int64() % 10000000000); - break; - } - case MONTH: { - int month = (ts2.year() - ts1.year()) * 12 + (ts2.month() - ts1.month()); - diff = month - (ts2.to_int64() % 100000000 < ts1.to_int64() % 100000000); - break; - } - case WEEK: { - int week = ts2.daynr() / 7 - ts1.daynr() / 7; - diff = week - (ts2.daynr() % 7 < ts1.daynr() % 7 + (ts2.time_part_diff(ts1) < 0)); - break; - } - case DAY: { - int day = ts2.daynr() - ts1.daynr(); - diff = day - (ts2.time_part_diff(ts1) < 0); - break; - } - case HOUR: { - int hour = (ts2.daynr() - ts1.daynr()) * 24 + (ts2.hour() - ts1.hour()); - diff = hour - ((ts2.minute() * 60 + ts2.second()) < (ts1.minute() * 60 - ts1.second())); - break; - } - case MINUTE: { - int minute = (ts2.daynr() - ts1.daynr()) * 24 * 60 + - (ts2.hour() - ts1.hour()) * 60 + (ts2.minute() - ts1.minute()); - diff = minute - (ts2.second() < ts1.second()); - break; - } - case SECOND: { - diff = ts2.second_diff(ts1); - break; - } - default: - return DateTimeVal::null(); + case YEAR: { + int year = (ts2.year() - ts1.year()); + diff = year - (ts2.to_int64() % 10000000000 < ts1.to_int64() % 10000000000); + break; + } + case MONTH: { + int month = (ts2.year() - ts1.year()) * 12 + (ts2.month() - ts1.month()); + diff = month - (ts2.to_int64() % 100000000 < ts1.to_int64() % 100000000); + break; + } + case WEEK: { + int week = ts2.daynr() / 7 - ts1.daynr() / 7; + diff = week - (ts2.daynr() % 7 < ts1.daynr() % 7 + (ts2.time_part_diff(ts1) < 0)); + break; + } + case DAY: { + int day = ts2.daynr() - ts1.daynr(); + diff = day - (ts2.time_part_diff(ts1) < 0); + break; + } + case HOUR: { + int hour = (ts2.daynr() - ts1.daynr()) * 24 + (ts2.hour() - ts1.hour()); + diff = hour - ((ts2.minute() * 60 + ts2.second()) < (ts1.minute() * 60 - ts1.second())); + break; + } + case MINUTE: { + int minute = (ts2.daynr() - ts1.daynr()) * 24 * 60 + (ts2.hour() - ts1.hour()) * 60 + + (ts2.minute() - ts1.minute()); + diff = minute - (ts2.second() < ts1.second()); + break; + } + case SECOND: { + diff = ts2.second_diff(ts1); + break; + } + default: + return DateTimeVal::null(); } int64_t count = period.val; int64_t step = diff - (diff % count + count) % count + (type == FLOOR ? 0 : count); @@ -619,23 +587,24 @@ DateTimeVal TimestampFunctions::time_round( return new_ts_val; } -StringVal TimestampFunctions::date_format( - FunctionContext* ctx, const DateTimeVal& ts_val, const StringVal& format) { +StringVal TimestampFunctions::date_format(FunctionContext* ctx, const DateTimeVal& ts_val, + const StringVal& format) { if (ts_val.is_null || format.is_null) { return StringVal::null(); } DateTimeValue ts_value = DateTimeValue::from_datetime_val(ts_val); - FormatCtx* fc = reinterpret_cast(ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + FormatCtx* fc = + reinterpret_cast(ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (UNLIKELY(fc == nullptr)) { // prepare phase failed, calculate at runtime StringVal new_fmt = convert_format(ctx, format); - if (DateTimeValue::compute_format_len((const char*) new_fmt.ptr, new_fmt.len) >= 128) { + if (DateTimeValue::compute_format_len((const char*)new_fmt.ptr, new_fmt.len) >= 128) { return StringVal::null(); } char buf[128]; - if (!ts_value.to_format_string((const char*) new_fmt.ptr, new_fmt.len, buf)) { + if (!ts_value.to_format_string((const char*)new_fmt.ptr, new_fmt.len, buf)) { return StringVal::null(); } return AnyValUtil::from_string_temp(ctx, buf); @@ -646,14 +615,13 @@ StringVal TimestampFunctions::date_format( } char buf[128]; - if (!ts_value.to_format_string((const char*) fc->fmt.ptr, fc->fmt.len, buf)) { + if (!ts_value.to_format_string((const char*)fc->fmt.ptr, fc->fmt.len, buf)) { return StringVal::null(); } return AnyValUtil::from_string_temp(ctx, buf); } -DateTimeVal TimestampFunctions::from_days( - FunctionContext* ctx, const IntVal& days) { +DateTimeVal TimestampFunctions::from_days(FunctionContext* ctx, const IntVal& days) { if (days.is_null) { return DateTimeVal::null(); } @@ -666,8 +634,7 @@ DateTimeVal TimestampFunctions::from_days( return ts_val; } -IntVal TimestampFunctions::to_days( - FunctionContext* ctx, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::to_days(FunctionContext* ctx, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -675,8 +642,8 @@ IntVal TimestampFunctions::to_days( return IntVal(ts_value.daynr()); } -DoubleVal TimestampFunctions::time_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +DoubleVal TimestampFunctions::time_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { if (ts_val1.is_null || ts_val2.is_null) { return DoubleVal::null(); } @@ -689,8 +656,8 @@ DoubleVal TimestampFunctions::time_diff( return DoubleVal::null(); } -IntVal TimestampFunctions::date_diff( - FunctionContext* ctx, const DateTimeVal& ts_val1, const DateTimeVal& ts_val2) { +IntVal TimestampFunctions::date_diff(FunctionContext* ctx, const DateTimeVal& ts_val1, + const DateTimeVal& ts_val2) { if (ts_val1.is_null || ts_val2.is_null) { return IntVal::null(); } @@ -700,14 +667,12 @@ IntVal TimestampFunctions::date_diff( } // TimeZone correlation functions. -DateTimeVal TimestampFunctions::timestamp( - FunctionContext* ctx, const DateTimeVal& val) { +DateTimeVal TimestampFunctions::timestamp(FunctionContext* ctx, const DateTimeVal& val) { return val; } // FROM_UNIXTIME() without format -StringVal TimestampFunctions::from_unix( - FunctionContext* context, const IntVal& unix_time) { +StringVal TimestampFunctions::from_unix(FunctionContext* context, const IntVal& unix_time) { if (unix_time.is_null || unix_time.val < 0 || unix_time.val > INT_MAX) { return StringVal::null(); } @@ -722,8 +687,8 @@ StringVal TimestampFunctions::from_unix( } // FROM_UNIXTIME() with format -StringVal TimestampFunctions::from_unix( - FunctionContext* context, const IntVal& unix_time, const StringVal& fmt) { +StringVal TimestampFunctions::from_unix(FunctionContext* context, const IntVal& unix_time, + const StringVal& fmt) { if (unix_time.is_null || fmt.is_null || unix_time.val < 0 || unix_time.val > INT_MAX) { return StringVal::null(); } @@ -733,7 +698,8 @@ StringVal TimestampFunctions::from_unix( return StringVal::null(); } - FormatCtx* fc = reinterpret_cast(context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + FormatCtx* fc = reinterpret_cast( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (UNLIKELY(fc == nullptr)) { // prepare phase failed, calculate at runtime StringVal new_fmt = convert_format(context, fmt); @@ -749,7 +715,7 @@ StringVal TimestampFunctions::from_unix( } char buf[128]; - if (!dtv.to_format_string((const char*) fc->fmt.ptr, fc->fmt.len, buf)) { + if (!dtv.to_format_string((const char*)fc->fmt.ptr, fc->fmt.len, buf)) { return StringVal::null(); } return AnyValUtil::from_string_temp(context, buf); @@ -761,10 +727,9 @@ IntVal TimestampFunctions::to_unix(FunctionContext* context) { } // UNIX_TIMESTAMP() -IntVal TimestampFunctions::to_unix( - FunctionContext* context, const DateTimeValue& ts_value) { +IntVal TimestampFunctions::to_unix(FunctionContext* context, const DateTimeValue& ts_value) { int64_t timestamp; - if(!ts_value.unix_timestamp(×tamp, context->impl()->state()->timezone_obj())) { + if (!ts_value.unix_timestamp(×tamp, context->impl()->state()->timezone_obj())) { return IntVal::null(); } else { //To compatible to mysql, timestamp not between 1970-01-01 00:00:00 ~ 2038-01-01 00:00:00 return 0 @@ -775,22 +740,21 @@ IntVal TimestampFunctions::to_unix( } // UNIX_TIMESTAMP() -IntVal TimestampFunctions::to_unix( - FunctionContext* context, const StringVal& string_val, const StringVal& fmt) { +IntVal TimestampFunctions::to_unix(FunctionContext* context, const StringVal& string_val, + const StringVal& fmt) { if (string_val.is_null || fmt.is_null) { return IntVal::null(); } DateTimeValue tv; - if (!tv.from_date_format_str( - (const char *)fmt.ptr, fmt.len, (const char *)string_val.ptr, string_val.len)) { + if (!tv.from_date_format_str((const char*)fmt.ptr, fmt.len, (const char*)string_val.ptr, + string_val.len)) { return IntVal::null(); } return to_unix(context, tv); } // UNIX_TIMESTAMP() -IntVal TimestampFunctions::to_unix( - FunctionContext* context, const DateTimeVal& ts_val) { +IntVal TimestampFunctions::to_unix(FunctionContext* context, const DateTimeVal& ts_val) { if (ts_val.is_null) { return IntVal::null(); } @@ -811,7 +775,7 @@ DateTimeVal TimestampFunctions::utc_timestamp(FunctionContext* context) { DateTimeVal TimestampFunctions::now(FunctionContext* context) { DateTimeValue dtv; if (!dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000, - context->impl()->state()->timezone_obj())) { + context->impl()->state()->timezone_obj())) { return DateTimeVal::null(); } @@ -823,7 +787,7 @@ DateTimeVal TimestampFunctions::now(FunctionContext* context) { DoubleVal TimestampFunctions::curtime(FunctionContext* context) { DateTimeValue dtv; if (!dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000, - context->impl()->state()->timezone_obj())) { + context->impl()->state()->timezone_obj())) { return DoubleVal::null(); } @@ -833,7 +797,7 @@ DoubleVal TimestampFunctions::curtime(FunctionContext* context) { DateTimeVal TimestampFunctions::curdate(FunctionContext* context) { DateTimeValue dtv; if (!dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000, - context->impl()->state()->timezone_obj())) { + context->impl()->state()->timezone_obj())) { return DateTimeVal::null(); } dtv.set_type(TIME_DATE); @@ -843,16 +807,12 @@ DateTimeVal TimestampFunctions::curdate(FunctionContext* context) { return return_val; } -void TimestampFunctions::convert_tz_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { - - if (scope != FunctionContext::FRAGMENT_LOCAL - || context->get_num_args() != 3 - || context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR - || context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR - || !context->is_arg_constant(1) - || !context->is_arg_constant(2)) { +void TimestampFunctions::convert_tz_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL || context->get_num_args() != 3 || + context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR || + context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR || + !context->is_arg_constant(1) || !context->is_arg_constant(2)) { return; } @@ -865,7 +825,8 @@ void TimestampFunctions::convert_tz_prepare( ctc->is_valid = false; return; } - if (!TimezoneUtils::find_cctz_time_zone(std::string((char*) from->ptr, from->len), ctc->from_tz)) { + if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)from->ptr, from->len), + ctc->from_tz)) { ctc->is_valid = false; return; } @@ -876,7 +837,7 @@ void TimestampFunctions::convert_tz_prepare( ctc->is_valid = false; return; } - if (!TimezoneUtils::find_cctz_time_zone(std::string((char*) to->ptr, to->len), ctc->to_tz)) { + if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)to->ptr, to->len), ctc->to_tz)) { ctc->is_valid = false; return; } @@ -886,16 +847,17 @@ void TimestampFunctions::convert_tz_prepare( } DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeVal& ts_val, - const StringVal& from_tz, const StringVal& to_tz) { - const DateTimeValue &ts_value = DateTimeValue::from_datetime_val(ts_val); - ConvertTzCtx* ctc = reinterpret_cast(ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + const StringVal& from_tz, const StringVal& to_tz) { + const DateTimeValue& ts_value = DateTimeValue::from_datetime_val(ts_val); + ConvertTzCtx* ctc = reinterpret_cast( + ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (UNLIKELY(ctc == nullptr)) { int64_t timestamp; - if(!ts_value.unix_timestamp(×tamp, std::string((char *)from_tz.ptr, from_tz.len))) { + if (!ts_value.unix_timestamp(×tamp, std::string((char*)from_tz.ptr, from_tz.len))) { return DateTimeVal::null(); } DateTimeValue ts_value2; - if (!ts_value2.from_unixtime(timestamp, std::string((char *)to_tz.ptr, to_tz.len))) { + if (!ts_value2.from_unixtime(timestamp, std::string((char*)to_tz.ptr, to_tz.len))) { return DateTimeVal::null(); } @@ -909,7 +871,7 @@ DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeV } int64_t timestamp; - if(!ts_value.unix_timestamp(×tamp, ctc->from_tz)) { + if (!ts_value.unix_timestamp(×tamp, ctc->from_tz)) { return DateTimeVal::null(); } DateTimeValue ts_value2; @@ -922,17 +884,17 @@ DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeV return return_val; } -void TimestampFunctions::convert_tz_close( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { +void TimestampFunctions::convert_tz_close(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } - ConvertTzCtx* ctc = reinterpret_cast(context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + ConvertTzCtx* ctc = reinterpret_cast( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (ctc != nullptr) { delete ctc; } } -} +} // namespace doris diff --git a/be/src/exprs/timestamp_functions.h b/be/src/exprs/timestamp_functions.h index 5aeafeec53c4a9..a0dffd55091951 100644 --- a/be/src/exprs/timestamp_functions.h +++ b/be/src/exprs/timestamp_functions.h @@ -18,13 +18,14 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_TIMESTAMP_FUNCTIONS_H #define DORIS_BE_SRC_QUERY_EXPRS_TIMESTAMP_FUNCTIONS_H -#include #include -#include #include +#include +#include #include -#include "runtime/string_value.h" + #include "runtime/datetime_value.h" +#include "runtime/string_value.h" namespace doris { @@ -54,326 +55,349 @@ class TimestampFunctions { static void init(); // Functions to extract parts of the timestamp, return integers. - static doris_udf::IntVal year( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal quarter( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal month( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal day_of_week( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal day_of_month( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal day_of_year( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal week_of_year( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal hour( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal minute( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal second( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal year(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal quarter(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal month(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal day_of_week(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal day_of_month(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal day_of_year(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal week_of_year(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal hour(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal minute(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal second(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& ts_val); // Date/time functions. - static doris_udf::DateTimeVal to_date( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::IntVal date_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, - const doris_udf::DateTimeVal& ts_val2); - static doris_udf::DoubleVal time_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, - const doris_udf::DateTimeVal& ts_val2); - static doris_udf::DateTimeVal years_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal years_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal months_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal months_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal weeks_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal weeks_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal days_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal days_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal hours_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal hours_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal minutes_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal minutes_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal seconds_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal seconds_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal micros_add( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::DateTimeVal micros_sub( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count); - static doris_udf::StringVal date_format( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::StringVal& format); - static doris_udf::DateTimeVal from_days( - doris_udf::FunctionContext* ctx, const doris_udf::IntVal& days); - static doris_udf::IntVal to_days( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal str_to_date( - doris_udf::FunctionContext* ctx, const doris_udf::StringVal& str, - const doris_udf::StringVal& format); - static doris_udf::StringVal month_name( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::StringVal day_name( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal to_date(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::IntVal date_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::DoubleVal time_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::DateTimeVal years_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal years_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal months_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal months_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal weeks_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal weeks_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal days_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal days_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal hours_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal hours_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal minutes_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal minutes_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal seconds_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal seconds_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal micros_add(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::DateTimeVal micros_sub(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count); + static doris_udf::StringVal date_format(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::StringVal& format); + static doris_udf::DateTimeVal from_days(doris_udf::FunctionContext* ctx, + const doris_udf::IntVal& days); + static doris_udf::IntVal to_days(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal str_to_date(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& str, + const doris_udf::StringVal& format); + static doris_udf::StringVal month_name(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::StringVal day_name(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); // timestamp function template - static doris_udf::BigIntVal timestamp_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal years_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal months_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal weeks_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal days_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal hours_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal minutes_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); - static doris_udf::BigIntVal seconds_diff( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal timestamp_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal years_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal months_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal weeks_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal days_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal hours_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal minutes_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); + static doris_udf::BigIntVal seconds_diff(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val1, + const doris_udf::DateTimeVal& ts_val2); // Period functions. template - static doris_udf::DateTimeVal time_round( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal year_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal year_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal year_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal year_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal year_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal year_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal year_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal year_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal month_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal month_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal month_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal month_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal month_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal month_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal month_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal month_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal week_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal week_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal week_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal week_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal week_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal week_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal week_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal week_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal day_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal day_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal day_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal day_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal day_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal day_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal day_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal day_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal hour_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal hour_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal hour_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal hour_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal hour_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal hour_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal hour_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal hour_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal minute_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal minute_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal minute_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal minute_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal minute_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal minute_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal minute_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal minute_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal second_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal second_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal second_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal second_floor( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - - static doris_udf::DateTimeVal second_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val); - static doris_udf::DateTimeVal second_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period); - static doris_udf::DateTimeVal second_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::DateTimeVal& origin); - static doris_udf::DateTimeVal second_ceil( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& period, const doris_udf::DateTimeVal& origin); - + static doris_udf::DateTimeVal time_round(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal year_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal year_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal year_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal year_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal year_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal year_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal year_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal year_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal month_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal month_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal month_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal month_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal month_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal month_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal month_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal month_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal week_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal week_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal week_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal week_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal week_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal week_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal week_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal week_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal day_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal day_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal day_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal day_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal day_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal day_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal day_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal day_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal hour_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal hour_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal hour_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal hour_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal hour_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal hour_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal hour_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal hour_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal minute_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal minute_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal minute_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal minute_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal minute_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal minute_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal minute_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal minute_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal second_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal second_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal second_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal second_floor(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + + static doris_udf::DateTimeVal second_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val); + static doris_udf::DateTimeVal second_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period); + static doris_udf::DateTimeVal second_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::DateTimeVal& origin); + static doris_udf::DateTimeVal second_ceil(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& period, + const doris_udf::DateTimeVal& origin); + // TimeZone correlation functions. - static doris_udf::DateTimeVal timestamp( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& val); + static doris_udf::DateTimeVal timestamp(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& val); // Helper for add/sub functions on the time portion. template - static doris_udf::DateTimeVal timestamp_time_op( - doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val, - const doris_udf::IntVal& count, bool is_add); + static doris_udf::DateTimeVal timestamp_time_op(doris_udf::FunctionContext* ctx, + const doris_udf::DateTimeVal& ts_val, + const doris_udf::IntVal& count, bool is_add); static doris_udf::DateTimeVal now(doris_udf::FunctionContext* context); static doris_udf::DoubleVal curtime(doris_udf::FunctionContext* context); static doris_udf::DateTimeVal curdate(doris_udf::FunctionContext* context); static doris_udf::DateTimeVal utc_timestamp(doris_udf::FunctionContext* context); /// Returns the current time. - static doris_udf::IntVal to_unix( - FunctionContext* context, const DateTimeValue& ts_value); + static doris_udf::IntVal to_unix(FunctionContext* context, const DateTimeValue& ts_value); static doris_udf::IntVal to_unix(doris_udf::FunctionContext* context); /// Converts 'tv_val' to a unix time_t - static doris_udf::IntVal to_unix( - doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& tv_val); + static doris_udf::IntVal to_unix(doris_udf::FunctionContext* context, + const doris_udf::DateTimeVal& tv_val); /// Parses 'string_val' based on the format 'fmt'. - static doris_udf::IntVal to_unix( - doris_udf::FunctionContext* context, const doris_udf::StringVal& string_val, - const doris_udf::StringVal& fmt); + static doris_udf::IntVal to_unix(doris_udf::FunctionContext* context, + const doris_udf::StringVal& string_val, + const doris_udf::StringVal& fmt); /// Return a timestamp string from a unix time_t /// Optional second argument is the format of the string. /// TIME is the integer type of the unix time argument. - static doris_udf::StringVal from_unix( - doris_udf::FunctionContext* context, const doris_udf::IntVal& unix_time); - static doris_udf::StringVal from_unix( - doris_udf::FunctionContext* context, const doris_udf::IntVal& unix_time, - const doris_udf::StringVal& fmt); + static doris_udf::StringVal from_unix(doris_udf::FunctionContext* context, + const doris_udf::IntVal& unix_time); + static doris_udf::StringVal from_unix(doris_udf::FunctionContext* context, + const doris_udf::IntVal& unix_time, + const doris_udf::StringVal& fmt); static doris_udf::DateTimeVal convert_tz(doris_udf::FunctionContext* ctx, - const doris_udf::DateTimeVal& ts_val, const doris_udf::StringVal& from_tz, - const doris_udf::StringVal& to_tz); + const doris_udf::DateTimeVal& ts_val, + const doris_udf::StringVal& from_tz, + const doris_udf::StringVal& to_tz); // Helper function to check date/time format strings. // TODO: eventually return format converted from Java to Boost. @@ -386,22 +410,18 @@ class TimestampFunctions { // Issue a warning for a bad format string. static void report_bad_format(const StringVal* format); - static void format_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void format_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); - static void format_close( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void format_close(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); - static void convert_tz_prepare( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void convert_tz_prepare(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); - static void convert_tz_close( - doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope); + static void convert_tz_close(doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope); }; -} +} // namespace doris #endif diff --git a/be/src/exprs/tuple_is_null_predicate.cpp b/be/src/exprs/tuple_is_null_predicate.cpp index a0dfce41255cda..31ca79ad84f1b8 100644 --- a/be/src/exprs/tuple_is_null_predicate.cpp +++ b/be/src/exprs/tuple_is_null_predicate.cpp @@ -23,14 +23,13 @@ namespace doris { -TupleIsNullPredicate::TupleIsNullPredicate(const TExprNode& node) : - Predicate(node), - _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(), - node.tuple_is_null_pred.tuple_ids.end()) { -} +TupleIsNullPredicate::TupleIsNullPredicate(const TExprNode& node) + : Predicate(node), + _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(), + node.tuple_is_null_pred.tuple_ids.end()) {} -Status TupleIsNullPredicate::prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx) { +Status TupleIsNullPredicate::prepare(RuntimeState* state, const RowDescriptor& row_desc, + ExprContext* ctx) { RETURN_IF_ERROR(Expr::prepare(state, row_desc, ctx)); DCHECK_EQ(0, _children.size()); @@ -65,4 +64,4 @@ std::string TupleIsNullPredicate::debug_string() const { return out.str(); } -} +} // namespace doris diff --git a/be/src/exprs/tuple_is_null_predicate.h b/be/src/exprs/tuple_is_null_predicate.h index f891c6225b87ef..21258871960523 100644 --- a/be/src/exprs/tuple_is_null_predicate.h +++ b/be/src/exprs/tuple_is_null_predicate.h @@ -25,9 +25,9 @@ namespace doris { class TExprNode; -class TupleIsNullPredicate: public Predicate { +class TupleIsNullPredicate : public Predicate { public: - virtual Expr* clone(ObjectPool* pool) const override { + virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new TupleIsNullPredicate(*this)); } @@ -36,8 +36,7 @@ class TupleIsNullPredicate: public Predicate { TupleIsNullPredicate(const TExprNode& node); - virtual Status prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx); + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx); virtual BooleanVal get_boolean_val(ExprContext* ctx, TupleRow* row); virtual std::string debug_string() const; @@ -47,6 +46,6 @@ class TupleIsNullPredicate: public Predicate { std::vector _tuple_idxs; }; -} +} // namespace doris #endif diff --git a/be/src/exprs/udf_builtins.cpp b/be/src/exprs/udf_builtins.cpp old mode 100755 new mode 100644 index b04b7d22767e33..ba218b4bf940de --- a/be/src/exprs/udf_builtins.cpp +++ b/be/src/exprs/udf_builtins.cpp @@ -19,6 +19,7 @@ #include #include + #include "common/logging.h" namespace doris { @@ -62,12 +63,9 @@ DecimalV2Val UdfBuiltins::decimal_abs(FunctionContext* context, const DecimalV2V return result; } - //for test -BigIntVal UdfBuiltins::add_two_number( - FunctionContext* context, - const BigIntVal& v1, - const BigIntVal& v2) { +BigIntVal UdfBuiltins::add_two_number(FunctionContext* context, const BigIntVal& v1, + const BigIntVal& v2) { if (v1.is_null || v2.is_null) { return BigIntVal::null(); } @@ -76,17 +74,14 @@ BigIntVal UdfBuiltins::add_two_number( } //for test -StringVal UdfBuiltins::sub_string( - FunctionContext* context, - const StringVal& v1, - const IntVal& begin, - const IntVal& len) { +StringVal UdfBuiltins::sub_string(FunctionContext* context, const StringVal& v1, + const IntVal& begin, const IntVal& len) { if (v1.is_null || begin.is_null || len.is_null) { return StringVal::null(); } int substring_len = (len.val > v1.len) ? v1.len : len.val; - StringVal v = StringVal(context, substring_len); + StringVal v = StringVal(context, substring_len); memcpy(v.ptr, v1.ptr + begin.val, substring_len); return v; } @@ -109,5 +104,4 @@ StringVal UdfBuiltins::lower(FunctionContext* context, const StringVal& v) { return result; } -} - +} // namespace doris diff --git a/be/src/exprs/udf_builtins.h b/be/src/exprs/udf_builtins.h old mode 100755 new mode 100644 index 7781ae77e40597..40b994d1baef91 --- a/be/src/exprs/udf_builtins.h +++ b/be/src/exprs/udf_builtins.h @@ -28,27 +28,25 @@ namespace doris { // implemented. class UdfBuiltins { public: - static doris_udf::DoubleVal abs(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& v); - static doris_udf::DecimalVal decimal_abs(doris_udf::FunctionContext* context, - const doris_udf::DecimalVal& v); - static doris_udf::DecimalV2Val decimal_abs(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& v); - static doris_udf::BigIntVal add_two_number( - doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& v1, - const doris_udf::BigIntVal& v2); - static doris_udf::StringVal sub_string( - doris_udf::FunctionContext* context, - const doris_udf::StringVal& v1, - const doris_udf::IntVal& begin, - const doris_udf::IntVal& len); + static doris_udf::DoubleVal abs(doris_udf::FunctionContext* context, + const doris_udf::DoubleVal& v); + static doris_udf::DecimalVal decimal_abs(doris_udf::FunctionContext* context, + const doris_udf::DecimalVal& v); + static doris_udf::DecimalV2Val decimal_abs(doris_udf::FunctionContext* context, + const doris_udf::DecimalV2Val& v); + static doris_udf::BigIntVal add_two_number(doris_udf::FunctionContext* context, + const doris_udf::BigIntVal& v1, + const doris_udf::BigIntVal& v2); + static doris_udf::StringVal sub_string(doris_udf::FunctionContext* context, + const doris_udf::StringVal& v1, + const doris_udf::IntVal& begin, + const doris_udf::IntVal& len); static doris_udf::DoubleVal pi(doris_udf::FunctionContext* context); - static doris_udf::StringVal lower(doris_udf::FunctionContext* context, - const doris_udf::StringVal&); + static doris_udf::StringVal lower(doris_udf::FunctionContext* context, + const doris_udf::StringVal&); }; -} +} // namespace doris #endif diff --git a/be/src/geo/geo_common.cpp b/be/src/geo/geo_common.cpp index 0857069dd4c5e3..a7d6bbf141c6ae 100644 --- a/be/src/geo/geo_common.cpp +++ b/be/src/geo/geo_common.cpp @@ -51,4 +51,4 @@ std::ostream& operator<<(std::ostream& os, GeoParseStatus status) { return os; } -} +} // namespace doris diff --git a/be/src/geo/geo_common.h b/be/src/geo/geo_common.h index a5e79f131313a0..b51fc8b5a8026a 100644 --- a/be/src/geo/geo_common.h +++ b/be/src/geo/geo_common.h @@ -49,4 +49,4 @@ enum GeoParseStatus { std::string to_string(GeoParseStatus status); std::ostream& operator<<(std::ostream& os, GeoParseStatus status); -} +} // namespace doris diff --git a/be/src/geo/geo_functions.cpp b/be/src/geo/geo_functions.cpp index cd9dcad272d3e5..4baf9245cca23a 100644 --- a/be/src/geo/geo_functions.cpp +++ b/be/src/geo/geo_functions.cpp @@ -17,8 +17,8 @@ #include "geo/geo_functions.h" -#include #include +#include #include "common/logging.h" #include "geo/geo_types.h" @@ -30,10 +30,9 @@ void GeoFunctions::init() { FLAGS_s2debug = false; } -DoubleVal GeoFunctions::st_distance_sphere(FunctionContext* ctx, - const DoubleVal& x_lng, const DoubleVal& x_lat, - const DoubleVal& y_lng, const DoubleVal& y_lat) { - +DoubleVal GeoFunctions::st_distance_sphere(FunctionContext* ctx, const DoubleVal& x_lng, + const DoubleVal& x_lat, const DoubleVal& y_lng, + const DoubleVal& y_lat) { if (x_lng.is_null || x_lat.is_null || y_lng.is_null || y_lat.is_null) { return DoubleVal::null(); } @@ -108,8 +107,8 @@ StringVal GeoFunctions::st_as_wkt(doris_udf::FunctionContext* ctx, } struct StConstructState { - StConstructState() : is_null(false) { } - ~StConstructState() { } + StConstructState() : is_null(false) {} + ~StConstructState() {} bool is_null; std::string encoded_buf; @@ -139,7 +138,8 @@ void GeoFunctions::st_from_wkt_prepare_common(FunctionContext* ctx, str->is_null = true; } else { GeoParseStatus status; - std::unique_ptr shape(GeoShape::from_wkt((const char*)str->ptr, str->len, &status)); + std::unique_ptr shape( + GeoShape::from_wkt((const char*)str->ptr, str->len, &status)); if (shape == nullptr || (shape_type != GEO_SHAPE_ANY && shape->type() != shape_type)) { state->is_null = true; } else { @@ -149,13 +149,13 @@ void GeoFunctions::st_from_wkt_prepare_common(FunctionContext* ctx, ctx->set_function_state(scope, state.release()); } -StringVal GeoFunctions::st_from_wkt_common(FunctionContext* ctx, - const StringVal& wkt, +StringVal GeoFunctions::st_from_wkt_common(FunctionContext* ctx, const StringVal& wkt, GeoShapeType shape_type) { if (wkt.is_null) { return StringVal::null(); } - StConstructState* state = (StConstructState*)ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); + StConstructState* state = + (StConstructState*)ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); if (state == nullptr) { GeoParseStatus status; std::unique_ptr shape(GeoShape::from_wkt((const char*)wkt.ptr, wkt.len, &status)); @@ -202,14 +202,13 @@ void GeoFunctions::st_circle_prepare(doris_udf::FunctionContext* ctx, ctx->set_function_state(scope, state.release()); } -doris_udf::StringVal GeoFunctions::st_circle(FunctionContext* ctx, - const DoubleVal& lng, - const DoubleVal& lat, - const DoubleVal& radius) { +doris_udf::StringVal GeoFunctions::st_circle(FunctionContext* ctx, const DoubleVal& lng, + const DoubleVal& lat, const DoubleVal& radius) { if (lng.is_null || lat.is_null || radius.is_null) { return StringVal::null(); } - StConstructState* state = (StConstructState*) ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); + StConstructState* state = + (StConstructState*)ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); if (state == nullptr) { std::unique_ptr circle(new GeoCircle()); auto res = circle->init(lng.val, lat.val, radius.val); @@ -231,7 +230,7 @@ doris_udf::StringVal GeoFunctions::st_circle(FunctionContext* ctx, } struct StContainsState { - StContainsState() : is_null(false), shapes{nullptr, nullptr} { } + StContainsState() : is_null(false), shapes{nullptr, nullptr} {} ~StContainsState() { delete shapes[0]; delete shapes[1]; @@ -270,19 +269,19 @@ void GeoFunctions::st_contains_close(doris_udf::FunctionContext* ctx, if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } - StContainsState* contains_ctx = reinterpret_cast(ctx->get_function_state(scope)); + StContainsState* contains_ctx = + reinterpret_cast(ctx->get_function_state(scope)); delete contains_ctx; } - -doris_udf::BooleanVal GeoFunctions::st_contains( - doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& lhs, - const doris_udf::StringVal& rhs) { + +doris_udf::BooleanVal GeoFunctions::st_contains(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& lhs, + const doris_udf::StringVal& rhs) { if (lhs.is_null || rhs.is_null) { return BooleanVal::null(); } const StContainsState* state = reinterpret_cast( - ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (state != nullptr && state->is_null) { return BooleanVal::null(); } @@ -304,4 +303,4 @@ doris_udf::BooleanVal GeoFunctions::st_contains( return shapes[0]->contains(shapes[1]); } -} +} // namespace doris diff --git a/be/src/geo/geo_functions.h b/be/src/geo/geo_functions.h index 30be9f0e3fda4f..ef1e896e2ffb37 100644 --- a/be/src/geo/geo_functions.h +++ b/be/src/geo/geo_functions.h @@ -17,9 +17,8 @@ #pragma once -#include "udf/udf.h" - #include "geo/geo_common.h" +#include "udf/udf.h" namespace doris { @@ -28,9 +27,9 @@ class GeoFunctions { static void init(); // compute distance between two points in earth sphere - static DoubleVal st_distance_sphere(FunctionContext* ctx, - const DoubleVal& x_lng, const DoubleVal& x_lat, - const DoubleVal& y_lng, const DoubleVal& y_lat); + static DoubleVal st_distance_sphere(FunctionContext* ctx, const DoubleVal& x_lng, + const DoubleVal& x_lat, const DoubleVal& y_lng, + const DoubleVal& y_lat); // point static doris_udf::StringVal st_point(doris_udf::FunctionContext* ctx, @@ -52,48 +51,46 @@ class GeoFunctions { static void st_from_wkt_close(doris_udf::FunctionContext*, doris_udf::FunctionContext::FunctionStateScope); static doris_udf::StringVal st_from_wkt_common(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& wkt, - GeoShapeType shape_type); + const doris_udf::StringVal& wkt, + GeoShapeType shape_type); static void st_from_wkt_prepare(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope) __attribute__ ((used)) { + doris_udf::FunctionContext::FunctionStateScope scope) + __attribute__((used)) { st_from_wkt_prepare_common(ctx, scope, GEO_SHAPE_ANY); } - static doris_udf::StringVal st_from_wkt( - doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& wkt) __attribute__ ((used)) { + static doris_udf::StringVal st_from_wkt(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt) __attribute__((used)) { return st_from_wkt_common(ctx, wkt, GEO_SHAPE_ANY); } // for line - static void st_line_prepare( - doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope) __attribute__ ((used)) { + static void st_line_prepare(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) + __attribute__((used)) { st_from_wkt_prepare_common(ctx, scope, GEO_SHAPE_LINE_STRING); } - static doris_udf::StringVal st_line( - doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& wkt) __attribute__ ((used)) { + static doris_udf::StringVal st_line(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt) __attribute__((used)) { return st_from_wkt_common(ctx, wkt, GEO_SHAPE_LINE_STRING); } - + // for polygon - static void st_polygon_prepare( - doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope) __attribute__ ((used)) { + static void st_polygon_prepare(doris_udf::FunctionContext* ctx, + doris_udf::FunctionContext::FunctionStateScope scope) + __attribute__((used)) { st_from_wkt_prepare_common(ctx, scope, GEO_SHAPE_POLYGON); } - static doris_udf::StringVal st_polygon( - doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& wkt) __attribute__ ((used)) { + static doris_udf::StringVal st_polygon(doris_udf::FunctionContext* ctx, + const doris_udf::StringVal& wkt) __attribute__((used)) { return st_from_wkt_common(ctx, wkt, GEO_SHAPE_POLYGON); } // for circle static doris_udf::StringVal st_circle(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& center_lng, - const doris_udf::DoubleVal& center_lat, - const doris_udf::DoubleVal& radius_meter); + const doris_udf::DoubleVal& center_lng, + const doris_udf::DoubleVal& center_lat, + const doris_udf::DoubleVal& radius_meter); static void st_circle_prepare(doris_udf::FunctionContext*, doris_udf::FunctionContext::FunctionStateScope); @@ -110,4 +107,4 @@ class GeoFunctions { doris_udf::FunctionContext::FunctionStateScope); }; -} +} // namespace doris diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index 1c1de72954a771..936bb0408848d8 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -17,15 +17,15 @@ #include "geo/geo_types.h" -#include -#include -#include - -#include #include #include +#include #include #include +#include + +#include +#include #include "geo/wkt_parse.h" @@ -73,15 +73,14 @@ static void remove_duplicate_points(std::vector* points) { if ((*points)[rhs] != (*points)[lhs]) { lhs++; if (lhs != rhs) { - (*points)[lhs] = (*points)[rhs]; + (*points)[lhs] = (*points)[rhs]; } } } points->resize(lhs + 1); } -static GeoParseStatus to_s2loop(const GeoCoordinateList& coords, - std::unique_ptr* loop) { +static GeoParseStatus to_s2loop(const GeoCoordinateList& coords, std::unique_ptr* loop) { // 1. convert all coordinates to points std::vector points(coords.list.size()); for (int i = 0; i < coords.list.size(); ++i) { @@ -326,7 +325,6 @@ bool GeoPolygon::contains(const GeoShape* rhs) const { } return _polygon->MayIntersect(S2Cell(point->point())); #endif - } case GEO_SHAPE_LINE_STRING: { const GeoLine* line = (const GeoLine*)rhs; @@ -395,7 +393,6 @@ bool GeoCircle::contains(const GeoShape* rhs) const { } return _polygon->MayIntersect(S2Cell(point->point())); #endif - } #if 0 case GEO_SHAPE_LINE_STRING: { @@ -517,4 +514,4 @@ bool GeoMultiPolygon::contains(const GeoShape* rhs) { } #endif -} +} // namespace doris diff --git a/be/src/geo/geo_types.h b/be/src/geo/geo_types.h index 6768c01cba68eb..3704f20256ee98 100644 --- a/be/src/geo/geo_types.h +++ b/be/src/geo/geo_types.h @@ -17,14 +17,14 @@ #pragma once -#include -#include -#include - #include #include -#include #include +#include + +#include +#include +#include #include "geo/geo_common.h" #include "geo/wkt_parse_type.h" @@ -33,7 +33,7 @@ namespace doris { class GeoShape { public: - virtual ~GeoShape() { } + virtual ~GeoShape() {} virtual GeoShapeType type() const = 0; @@ -59,8 +59,8 @@ class GeoShape { class GeoPoint : public GeoShape { public: - GeoPoint() { } - ~GeoPoint() override { } + GeoPoint() {} + ~GeoPoint() override {} GeoParseStatus from_coord(double x, double y); GeoParseStatus from_coord(const GeoCoordinate& point); @@ -85,8 +85,8 @@ class GeoPoint : public GeoShape { class GeoLine : public GeoShape { public: - GeoLine() { } - ~GeoLine() override { } + GeoLine() {} + ~GeoLine() override {} GeoParseStatus from_coords(const GeoCoordinateList& list); @@ -94,6 +94,7 @@ class GeoLine : public GeoShape { const S2Polyline* polyline() const { return _polyline.get(); } std::string as_wkt() const override; + protected: void encode(std::string* buf) override; bool decode(const void* data, size_t size) override; @@ -104,8 +105,8 @@ class GeoLine : public GeoShape { class GeoPolygon : public GeoShape { public: - GeoPolygon() { } - ~GeoPolygon() override { } + GeoPolygon() {} + ~GeoPolygon() override {} GeoParseStatus from_coords(const GeoCoordinateListList& list); @@ -125,8 +126,8 @@ class GeoPolygon : public GeoShape { class GeoCircle : public GeoShape { public: - GeoCircle() { } - ~GeoCircle() { } + GeoCircle() {} + ~GeoCircle() {} GeoParseStatus init(double lng, double lat, double radius); @@ -195,5 +196,4 @@ class GeoCircle : public GeoShape { #endif -} - +} // namespace doris diff --git a/be/src/geo/wkt_parse.cpp b/be/src/geo/wkt_parse.cpp index 5699ea97afe97e..fba2e203277c60 100644 --- a/be/src/geo/wkt_parse.cpp +++ b/be/src/geo/wkt_parse.cpp @@ -17,8 +17,8 @@ #include "geo/wkt_parse.h" -#include "geo/wkt_parse_ctx.h" #include "geo/geo_types.h" +#include "geo/wkt_parse_ctx.h" #include "geo/wkt_parse_type.h" #include "geo/wkt_yacc.y.hpp" #define YYSTYPE WKT_STYPE @@ -46,4 +46,4 @@ GeoParseStatus WktParse::parse_wkt(const char* str, size_t len, GeoShape** shape return ctx.parse_status; } -} +} // namespace doris diff --git a/be/src/geo/wkt_parse.h b/be/src/geo/wkt_parse.h index 891c3e73c0145b..4adb84a355b8a1 100644 --- a/be/src/geo/wkt_parse.h +++ b/be/src/geo/wkt_parse.h @@ -32,4 +32,4 @@ class WktParse { static GeoParseStatus parse_wkt(const char* str, size_t len, GeoShape** shape); }; -} +} // namespace doris diff --git a/be/src/geo/wkt_parse_ctx.h b/be/src/geo/wkt_parse_ctx.h index 43092e04c9e689..0c0f77adeb9e8c 100644 --- a/be/src/geo/wkt_parse_ctx.h +++ b/be/src/geo/wkt_parse_ctx.h @@ -29,4 +29,3 @@ struct WktParseContext { doris::GeoShape* shape = nullptr; doris::GeoParseStatus parse_status = doris::GEO_PARSE_OK; }; - diff --git a/be/src/geo/wkt_parse_type.h b/be/src/geo/wkt_parse_type.h index 67c66e45740096..8d4204049a22c6 100644 --- a/be/src/geo/wkt_parse_type.h +++ b/be/src/geo/wkt_parse_type.h @@ -19,7 +19,7 @@ #include -// This file include +// This file include namespace doris { struct GeoCoordinate { @@ -28,9 +28,7 @@ struct GeoCoordinate { }; struct GeoCoordinateList { - void add(const GeoCoordinate& coordinate) { - list.push_back(coordinate); - } + void add(const GeoCoordinate& coordinate) { list.push_back(coordinate); } std::vector list; }; @@ -40,11 +38,8 @@ struct GeoCoordinateListList { delete item; } } - void add(GeoCoordinateList* coordinates) { - list.push_back(coordinates); - } + void add(GeoCoordinateList* coordinates) { list.push_back(coordinates); } std::vector list; }; -} - +} // namespace doris diff --git a/be/src/gutil/arm_instruction_set_select.h b/be/src/gutil/arm_instruction_set_select.h index 87bc183358b813..e2cc71eacc4f3c 100644 --- a/be/src/gutil/arm_instruction_set_select.h +++ b/be/src/gutil/arm_instruction_set_select.h @@ -8,45 +8,31 @@ #ifndef ARM_INSTRUCTION_SET_SELECT_H_ #define ARM_INSTRUCTION_SET_SELECT_H_ -#if defined(__ARM_ARCH_7__) || \ - defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7A__) -# define ARMV7 1 +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7A__) +#define ARMV7 1 #endif -#if defined(ARMV7) || \ - defined(__ARM_ARCH_6__) || \ - defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6ZK__) -# define ARMV6 1 +#if defined(ARMV7) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +#define ARMV6 1 #endif -#if defined(ARMV6) || \ - defined(__ARM_ARCH_5T__) || \ - defined(__ARM_ARCH_5E__) || \ - defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_5TEJ__) -# define ARMV5 1 +#if defined(ARMV6) || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5E__) || \ + defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) +#define ARMV5 1 #endif -#if defined(ARMV5) || \ - defined(__ARM_ARCH_4__) || \ - defined(__ARM_ARCH_4T__) -# define ARMV4 1 +#if defined(ARMV5) || defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) +#define ARMV4 1 #endif -#if defined(ARMV4) || \ - defined(__ARM_ARCH_3__) || \ - defined(__ARM_ARCH_3M__) -# define ARMV3 1 +#if defined(ARMV4) || defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__) +#define ARMV3 1 #endif -#if defined(ARMV3) || \ - defined(__ARM_ARCH_2__) -# define ARMV2 1 +#if defined(ARMV3) || defined(__ARM_ARCH_2__) +#define ARMV2 1 #endif -#endif // ARM_INSTRUCTION_SET_SELECT_H_ +#endif // ARM_INSTRUCTION_SET_SELECT_H_ diff --git a/be/src/gutil/atomic_refcount.h b/be/src/gutil/atomic_refcount.h index 2b5cb36099998f..640951b71b06a6 100644 --- a/be/src/gutil/atomic_refcount.h +++ b/be/src/gutil/atomic_refcount.h @@ -50,104 +50,100 @@ namespace base { // more than 1, in which case they may use RefCountIncN/RefCountDecN. // Increment a reference count by "increment", which must exceed 0. -inline void RefCountIncN(volatile Atomic32 *ptr, Atomic32 increment) { - DCHECK_GT(increment, 0); - base::subtle::NoBarrier_AtomicIncrement(ptr, increment); +inline void RefCountIncN(volatile Atomic32* ptr, Atomic32 increment) { + DCHECK_GT(increment, 0); + base::subtle::NoBarrier_AtomicIncrement(ptr, increment); } // Decrement a reference count by "decrement", which must exceed 0, // and return whether the result is non-zero. // Insert barriers to ensure that state written before the reference count // became zero will be visible to a thread that has just made the count zero. -inline bool RefCountDecN(volatile Atomic32 *ptr, Atomic32 decrement) { - DCHECK_GT(decrement, 0); - bool res = base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0; - return res; +inline bool RefCountDecN(volatile Atomic32* ptr, Atomic32 decrement) { + DCHECK_GT(decrement, 0); + bool res = base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0; + return res; } // Increment a reference count by 1. -inline void RefCountInc(volatile Atomic32 *ptr) { - base::RefCountIncN(ptr, 1); +inline void RefCountInc(volatile Atomic32* ptr) { + base::RefCountIncN(ptr, 1); } // Decrement a reference count by 1 and return whether the result is non-zero. // Insert barriers to ensure that state written before the reference count // became zero will be visible to a thread that has just made the count zero. -inline bool RefCountDec(volatile Atomic32 *ptr) { - return base::RefCountDecN(ptr, 1); +inline bool RefCountDec(volatile Atomic32* ptr) { + return base::RefCountDecN(ptr, 1); } // Return whether the reference count is one. // If the reference count is used in the conventional way, a -// refrerence count of 1 implies that the current thread owns the -// reference and no other thread shares it. +// refrerence count of 1 implies that the current thread owns the +// reference and no other thread shares it. // This call performs the test for a referenece count of one, and // performs the memory barrier needed for the owning thread // to act on the object, knowing that it has exclusive access to the // object. -inline bool RefCountIsOne(const volatile Atomic32 *ptr) { - return base::subtle::Acquire_Load(ptr) == 1; +inline bool RefCountIsOne(const volatile Atomic32* ptr) { + return base::subtle::Acquire_Load(ptr) == 1; } // Return whether the reference count is zero. With conventional object // referencing counting, the object will be destroyed, so the reference count // should never be zero. Hence this is generally used for a debug check. -inline bool RefCountIsZero(const volatile Atomic32 *ptr) { - return subtle::Acquire_Load(ptr) == 0; +inline bool RefCountIsZero(const volatile Atomic32* ptr) { + return subtle::Acquire_Load(ptr) == 0; } #if BASE_HAS_ATOMIC64 // Implementations for Atomic64, if available. -inline void RefCountIncN(volatile base::subtle::Atomic64 *ptr, - base::subtle::Atomic64 increment) { - DCHECK_GT(increment, 0); - base::subtle::NoBarrier_AtomicIncrement(ptr, increment); +inline void RefCountIncN(volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 increment) { + DCHECK_GT(increment, 0); + base::subtle::NoBarrier_AtomicIncrement(ptr, increment); } -inline bool RefCountDecN(volatile base::subtle::Atomic64 *ptr, - base::subtle::Atomic64 decrement) { - DCHECK_GT(decrement, 0); - return base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0; +inline bool RefCountDecN(volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 decrement) { + DCHECK_GT(decrement, 0); + return base::subtle::Barrier_AtomicIncrement(ptr, -decrement) != 0; } -inline void RefCountInc(volatile base::subtle::Atomic64 *ptr) { - base::RefCountIncN(ptr, 1); +inline void RefCountInc(volatile base::subtle::Atomic64* ptr) { + base::RefCountIncN(ptr, 1); } -inline bool RefCountDec(volatile base::subtle::Atomic64 *ptr) { - return base::RefCountDecN(ptr, 1); +inline bool RefCountDec(volatile base::subtle::Atomic64* ptr) { + return base::RefCountDecN(ptr, 1); } -inline bool RefCountIsOne(const volatile base::subtle::Atomic64 *ptr) { - return base::subtle::Acquire_Load(ptr) == 1; +inline bool RefCountIsOne(const volatile base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr) == 1; } -inline bool RefCountIsZero(const volatile base::subtle::Atomic64 *ptr) { - return base::subtle::Acquire_Load(ptr) == 0; +inline bool RefCountIsZero(const volatile base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr) == 0; } #endif #ifdef AtomicWordCastType // Implementations for AtomicWord, if it's a different type from the above. -inline void RefCountIncN(volatile AtomicWord *ptr, AtomicWord increment) { - base::RefCountIncN( - reinterpret_cast(ptr), increment); +inline void RefCountIncN(volatile AtomicWord* ptr, AtomicWord increment) { + base::RefCountIncN(reinterpret_cast(ptr), increment); } -inline bool RefCountDecN(volatile AtomicWord *ptr, AtomicWord decrement) { - return base::RefCountDecN( - reinterpret_cast(ptr), decrement); +inline bool RefCountDecN(volatile AtomicWord* ptr, AtomicWord decrement) { + return base::RefCountDecN(reinterpret_cast(ptr), decrement); } -inline void RefCountInc(volatile AtomicWord *ptr) { - base::RefCountIncN(ptr, 1); +inline void RefCountInc(volatile AtomicWord* ptr) { + base::RefCountIncN(ptr, 1); } -inline bool RefCountDec(volatile AtomicWord *ptr) { - return base::RefCountDecN(ptr, 1); +inline bool RefCountDec(volatile AtomicWord* ptr) { + return base::RefCountDecN(ptr, 1); } -inline bool RefCountIsOne(const volatile AtomicWord *ptr) { - return base::subtle::Acquire_Load( - reinterpret_cast(ptr)) == 1; +inline bool RefCountIsOne(const volatile AtomicWord* ptr) { + return base::subtle::Acquire_Load(reinterpret_cast(ptr)) == + 1; } -inline bool RefCountIsZero(const volatile AtomicWord *ptr) { - return base::subtle::Acquire_Load( - reinterpret_cast(ptr)) == 0; +inline bool RefCountIsZero(const volatile AtomicWord* ptr) { + return base::subtle::Acquire_Load(reinterpret_cast(ptr)) == + 0; } #endif } // namespace base -#endif // BASE_ATOMIC_REFCOUNT_H_ +#endif // BASE_ATOMIC_REFCOUNT_H_ diff --git a/be/src/gutil/atomicops-internals-gcc.h b/be/src/gutil/atomicops-internals-gcc.h index 26e32e08cc059d..ebac0dca8b48a6 100644 --- a/be/src/gutil/atomicops-internals-gcc.h +++ b/be/src/gutil/atomicops-internals-gcc.h @@ -40,6 +40,7 @@ #include #include + #include "gutil/basictypes.h" typedef int32_t Atomic32; @@ -53,170 +54,148 @@ inline void MemoryBarrier() { __sync_synchronize(); } -inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - Atomic32 prev_value = old_value; - __atomic_compare_exchange_n(ptr, &prev_value, new_value, - 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - return prev_value; + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return prev_value; } -inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELAXED); +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELAXED); } -inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_ACQUIRE); +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_ACQUIRE); } -inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELEASE); +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELEASE); } -inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, - Atomic32 increment) { - return __atomic_add_fetch(ptr, increment, __ATOMIC_RELAXED); +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { + return __atomic_add_fetch(ptr, increment, __ATOMIC_RELAXED); } -inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, - Atomic32 increment) { - return __atomic_add_fetch(ptr, increment, __ATOMIC_SEQ_CST); +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { + return __atomic_add_fetch(ptr, increment, __ATOMIC_SEQ_CST); } -inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - Atomic32 prev_value = old_value; - __atomic_compare_exchange_n(ptr, &prev_value, new_value, - 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); - return prev_value; + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return prev_value; } -inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - Atomic32 prev_value = old_value; - __atomic_compare_exchange_n(ptr, &prev_value, new_value, - 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); - return prev_value; + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return prev_value; } inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { - *ptr = value; + *ptr = value; } inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { - *ptr = value; - MemoryBarrier(); + *ptr = value; + MemoryBarrier(); } inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { - MemoryBarrier(); - *ptr = value; + MemoryBarrier(); + *ptr = value; } inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { - return *ptr; + return *ptr; } inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { - Atomic32 value = *ptr; - MemoryBarrier(); - return value; + Atomic32 value = *ptr; + MemoryBarrier(); + return value; } inline Atomic32 Release_Load(volatile const Atomic32* ptr) { - MemoryBarrier(); - return *ptr; + MemoryBarrier(); + return *ptr; } // 64-bit versions -inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - Atomic64 prev_value = old_value; - __atomic_compare_exchange_n(ptr, &prev_value, new_value, - 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - return prev_value; + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return prev_value; } -inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_value) { - return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELAXED); +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELAXED); } -inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_value) { - return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_ACQUIRE); +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_ACQUIRE); } -inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_value) { - return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELEASE); +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return __atomic_exchange_n(const_cast(ptr), new_value, __ATOMIC_RELEASE); } -inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, - Atomic64 increment) { - return __atomic_add_fetch(ptr, increment, __ATOMIC_RELAXED); +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + return __atomic_add_fetch(ptr, increment, __ATOMIC_RELAXED); } -inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, - Atomic64 increment) { - return __sync_add_and_fetch(ptr, increment); +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + return __sync_add_and_fetch(ptr, increment); } -inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - Atomic64 prev_value = old_value; - __atomic_compare_exchange_n(ptr, &prev_value, new_value, - 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); - return prev_value; + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return prev_value; } -inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - Atomic64 prev_value = old_value; - __atomic_compare_exchange_n(ptr, &prev_value, new_value, - 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); - return prev_value; + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return prev_value; } inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { - *ptr = value; + *ptr = value; } inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { - *ptr = value; - MemoryBarrier(); + *ptr = value; + MemoryBarrier(); } inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { - MemoryBarrier(); - *ptr = value; + MemoryBarrier(); + *ptr = value; } inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { - return *ptr; + return *ptr; } inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { - Atomic64 value = *ptr; - MemoryBarrier(); - return value; + Atomic64 value = *ptr; + MemoryBarrier(); + return value; } inline Atomic64 Release_Load(volatile const Atomic64* ptr) { - MemoryBarrier(); - return *ptr; + MemoryBarrier(); + return *ptr; } -} // namespace base::subtle -} // namespace base +} // namespace subtle +} // namespace base -#endif // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ +#endif // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ diff --git a/be/src/gutil/atomicops-internals-tsan.h b/be/src/gutil/atomicops-internals-tsan.h index aecaefc3b4a91f..4f0efcb5c4af88 100644 --- a/be/src/gutil/atomicops-internals-tsan.h +++ b/be/src/gutil/atomicops-internals-tsan.h @@ -19,12 +19,11 @@ // Features of this x86. Values may not be correct before main() is run, // but are set conservatively. struct AtomicOps_x86CPUFeatureStruct { - bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence - // after acquire compare-and-swap. - bool has_sse2; // Processor has SSE2. + bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence + // after acquire compare-and-swap. + bool has_sse2; // Processor has SSE2. }; -BASE_EXPORT extern struct AtomicOps_x86CPUFeatureStruct - AtomicOps_Internalx86CPUFeatures; +BASE_EXPORT extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") @@ -39,179 +38,155 @@ namespace subtle { typedef int32_t Atomic32; typedef int64_t Atomic64; -inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 cmp = old_value; - __tsan_atomic32_compare_exchange_strong(ptr, &cmp, new_value, - __tsan_memory_order_relaxed, __tsan_memory_order_relaxed); - return cmp; +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, + Atomic32 new_value) { + Atomic32 cmp = old_value; + __tsan_atomic32_compare_exchange_strong(ptr, &cmp, new_value, __tsan_memory_order_relaxed, + __tsan_memory_order_relaxed); + return cmp; } -inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, - Atomic32 new_value) { - return __tsan_atomic32_exchange(ptr, new_value, - __tsan_memory_order_relaxed); +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return __tsan_atomic32_exchange(ptr, new_value, __tsan_memory_order_relaxed); } -inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, - Atomic32 new_value) { - return __tsan_atomic32_exchange(ptr, new_value, - __tsan_memory_order_acquire); +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return __tsan_atomic32_exchange(ptr, new_value, __tsan_memory_order_acquire); } -inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, - Atomic32 new_value) { - return __tsan_atomic32_exchange(ptr, new_value, - __tsan_memory_order_release); +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return __tsan_atomic32_exchange(ptr, new_value, __tsan_memory_order_release); } -inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, - Atomic32 increment) { - return increment + __tsan_atomic32_fetch_add(ptr, increment, - __tsan_memory_order_relaxed); +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { + return increment + __tsan_atomic32_fetch_add(ptr, increment, __tsan_memory_order_relaxed); } -inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32 *ptr, - Atomic32 increment) { - return increment + __tsan_atomic32_fetch_add(ptr, increment, - __tsan_memory_order_acq_rel); +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { + return increment + __tsan_atomic32_fetch_add(ptr, increment, __tsan_memory_order_acq_rel); } -inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 cmp = old_value; - __tsan_atomic32_compare_exchange_strong(ptr, &cmp, new_value, - __tsan_memory_order_acquire, __tsan_memory_order_acquire); - return cmp; +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, + Atomic32 new_value) { + Atomic32 cmp = old_value; + __tsan_atomic32_compare_exchange_strong(ptr, &cmp, new_value, __tsan_memory_order_acquire, + __tsan_memory_order_acquire); + return cmp; } -inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 cmp = old_value; - __tsan_atomic32_compare_exchange_strong(ptr, &cmp, new_value, - __tsan_memory_order_release, __tsan_memory_order_relaxed); - return cmp; +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, + Atomic32 new_value) { + Atomic32 cmp = old_value; + __tsan_atomic32_compare_exchange_strong(ptr, &cmp, new_value, __tsan_memory_order_release, + __tsan_memory_order_relaxed); + return cmp; } -inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) { - __tsan_atomic32_store(ptr, value, __tsan_memory_order_relaxed); +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + __tsan_atomic32_store(ptr, value, __tsan_memory_order_relaxed); } -inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { - __tsan_atomic32_store(ptr, value, __tsan_memory_order_relaxed); - __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + __tsan_atomic32_store(ptr, value, __tsan_memory_order_relaxed); + __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); } -inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { - __tsan_atomic32_store(ptr, value, __tsan_memory_order_release); +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + __tsan_atomic32_store(ptr, value, __tsan_memory_order_release); } -inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) { - return __tsan_atomic32_load(ptr, __tsan_memory_order_relaxed); +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return __tsan_atomic32_load(ptr, __tsan_memory_order_relaxed); } -inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { - return __tsan_atomic32_load(ptr, __tsan_memory_order_acquire); +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + return __tsan_atomic32_load(ptr, __tsan_memory_order_acquire); } -inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { - __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); - return __tsan_atomic32_load(ptr, __tsan_memory_order_relaxed); +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); + return __tsan_atomic32_load(ptr, __tsan_memory_order_relaxed); } -inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, - Atomic64 old_value, - Atomic64 new_value) { - Atomic64 cmp = old_value; - __tsan_atomic64_compare_exchange_strong(ptr, &cmp, new_value, - __tsan_memory_order_relaxed, __tsan_memory_order_relaxed); - return cmp; +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, + Atomic64 new_value) { + Atomic64 cmp = old_value; + __tsan_atomic64_compare_exchange_strong(ptr, &cmp, new_value, __tsan_memory_order_relaxed, + __tsan_memory_order_relaxed); + return cmp; } -inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, - Atomic64 new_value) { - return __tsan_atomic64_exchange(ptr, new_value, __tsan_memory_order_relaxed); +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return __tsan_atomic64_exchange(ptr, new_value, __tsan_memory_order_relaxed); } -inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, - Atomic64 new_value) { - return __tsan_atomic64_exchange(ptr, new_value, __tsan_memory_order_acquire); +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return __tsan_atomic64_exchange(ptr, new_value, __tsan_memory_order_acquire); } -inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, - Atomic64 new_value) { - return __tsan_atomic64_exchange(ptr, new_value, __tsan_memory_order_release); +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return __tsan_atomic64_exchange(ptr, new_value, __tsan_memory_order_release); } -inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, - Atomic64 increment) { - return increment + __tsan_atomic64_fetch_add(ptr, increment, - __tsan_memory_order_relaxed); +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + return increment + __tsan_atomic64_fetch_add(ptr, increment, __tsan_memory_order_relaxed); } -inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64 *ptr, - Atomic64 increment) { - return increment + __tsan_atomic64_fetch_add(ptr, increment, - __tsan_memory_order_acq_rel); +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + return increment + __tsan_atomic64_fetch_add(ptr, increment, __tsan_memory_order_acq_rel); } -inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) { - __tsan_atomic64_store(ptr, value, __tsan_memory_order_relaxed); +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __tsan_atomic64_store(ptr, value, __tsan_memory_order_relaxed); } -inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { - __tsan_atomic64_store(ptr, value, __tsan_memory_order_relaxed); - __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + __tsan_atomic64_store(ptr, value, __tsan_memory_order_relaxed); + __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); } -inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { - __tsan_atomic64_store(ptr, value, __tsan_memory_order_release); +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + __tsan_atomic64_store(ptr, value, __tsan_memory_order_release); } -inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) { - return __tsan_atomic64_load(ptr, __tsan_memory_order_relaxed); +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return __tsan_atomic64_load(ptr, __tsan_memory_order_relaxed); } -inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { - return __tsan_atomic64_load(ptr, __tsan_memory_order_acquire); +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + return __tsan_atomic64_load(ptr, __tsan_memory_order_acquire); } -inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { - __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); - return __tsan_atomic64_load(ptr, __tsan_memory_order_relaxed); +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); + return __tsan_atomic64_load(ptr, __tsan_memory_order_relaxed); } -inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, - Atomic64 old_value, - Atomic64 new_value) { - Atomic64 cmp = old_value; - __tsan_atomic64_compare_exchange_strong(ptr, &cmp, new_value, - __tsan_memory_order_acquire, __tsan_memory_order_acquire); - return cmp; +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, + Atomic64 new_value) { + Atomic64 cmp = old_value; + __tsan_atomic64_compare_exchange_strong(ptr, &cmp, new_value, __tsan_memory_order_acquire, + __tsan_memory_order_acquire); + return cmp; } -inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, - Atomic64 old_value, - Atomic64 new_value) { - Atomic64 cmp = old_value; - __tsan_atomic64_compare_exchange_strong(ptr, &cmp, new_value, - __tsan_memory_order_release, __tsan_memory_order_relaxed); - return cmp; +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, + Atomic64 new_value) { + Atomic64 cmp = old_value; + __tsan_atomic64_compare_exchange_strong(ptr, &cmp, new_value, __tsan_memory_order_release, + __tsan_memory_order_relaxed); + return cmp; } inline void MemoryBarrier() { - __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); + __tsan_atomic_thread_fence(__tsan_memory_order_seq_cst); } -inline void PauseCPU() { -} +inline void PauseCPU() {} -} // namespace base::subtle -} // namespace base +} // namespace subtle +} // namespace base #undef ATOMICOPS_COMPILER_BARRIER -#endif // BASE_ATOMICOPS_INTERNALS_TSAN_H_ +#endif // BASE_ATOMICOPS_INTERNALS_TSAN_H_ diff --git a/be/src/gutil/atomicops-internals-x86.cc b/be/src/gutil/atomicops-internals-x86.cc index ce35f1a82c4069..68b970471eb1de 100644 --- a/be/src/gutil/atomicops-internals-x86.cc +++ b/be/src/gutil/atomicops-internals-x86.cc @@ -19,15 +19,14 @@ // // All rights reserved. - // This module gets enough CPU information to optimize the // atomicops module on x86. #include "gutil/atomicops-internals-x86.h" +#include #include -#include #include "gutil/integral_types.h" // This file only makes sense with atomicops-internals-x86.h -- it @@ -41,71 +40,71 @@ // of the global offset table. To avoid breaking such executables, this code // must preserve that register's value across cpuid instructions. #if defined(__i386__) -#define cpuid(a, b, c, d, inp) \ - asm("mov %%ebx, %%edi\n" \ - "cpuid\n" \ - "xchg %%edi, %%ebx\n" \ - : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#define cpuid(a, b, c, d, inp) \ + asm("mov %%ebx, %%edi\n" \ + "cpuid\n" \ + "xchg %%edi, %%ebx\n" \ + : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ + : "a"(inp)) #elif defined(__x86_64__) -#define cpuid(a, b, c, d, inp) \ - asm("mov %%rbx, %%rdi\n" \ - "cpuid\n" \ - "xchg %%rdi, %%rbx\n" \ - : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#define cpuid(a, b, c, d, inp) \ + asm("mov %%rbx, %%rdi\n" \ + "cpuid\n" \ + "xchg %%rdi, %%rbx\n" \ + : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ + : "a"(inp)) #endif -#if defined(cpuid) // initialize the struct only on x86 +#if defined(cpuid) // initialize the struct only on x86 // Set the flags so that code will run correctly and conservatively // until InitGoogle() is called. struct GutilAtomicOps_x86CPUFeatureStruct GutilAtomicOps_Internalx86CPUFeatures = { - false, // no SSE2 - false // no cmpxchg16b + false, // no SSE2 + false // no cmpxchg16b }; // Initialize the GutilAtomicOps_Internalx86CPUFeatures struct. static void AtomicOps_Internalx86CPUFeaturesInit() { - uint32 eax; - uint32 ebx; - uint32 ecx; - uint32 edx; - - // Get vendor string (issue CPUID with eax = 0) - cpuid(eax, ebx, ecx, edx, 0); - char vendor[13]; - memcpy(vendor, &ebx, 4); - memcpy(vendor + 4, &edx, 4); - memcpy(vendor + 8, &ecx, 4); - vendor[12] = 0; - - // get feature flags in ecx/edx, and family/model in eax - cpuid(eax, ebx, ecx, edx, 1); - - int family = (eax >> 8) & 0xf; // family and model fields - int model = (eax >> 4) & 0xf; - if (family == 0xf) { // use extended family and model fields - family += (eax >> 20) & 0xff; - model += ((eax >> 16) & 0xf) << 4; - } - - // edx bit 26 is SSE2 which we use to tell use whether we can use mfence - GutilAtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1); - - // ecx bit 13 indicates whether the cmpxchg16b instruction is supported - GutilAtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1); - - VLOG(1) << "vendor " << vendor << - " family " << family << - " model " << model << - " sse2 " << GutilAtomicOps_Internalx86CPUFeatures.has_sse2 << - " cmpxchg16b " << GutilAtomicOps_Internalx86CPUFeatures.has_cmpxchg16b; + uint32 eax; + uint32 ebx; + uint32 ecx; + uint32 edx; + + // Get vendor string (issue CPUID with eax = 0) + cpuid(eax, ebx, ecx, edx, 0); + char vendor[13]; + memcpy(vendor, &ebx, 4); + memcpy(vendor + 4, &edx, 4); + memcpy(vendor + 8, &ecx, 4); + vendor[12] = 0; + + // get feature flags in ecx/edx, and family/model in eax + cpuid(eax, ebx, ecx, edx, 1); + + int family = (eax >> 8) & 0xf; // family and model fields + int model = (eax >> 4) & 0xf; + if (family == 0xf) { // use extended family and model fields + family += (eax >> 20) & 0xff; + model += ((eax >> 16) & 0xf) << 4; + } + + // edx bit 26 is SSE2 which we use to tell use whether we can use mfence + GutilAtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1); + + // ecx bit 13 indicates whether the cmpxchg16b instruction is supported + GutilAtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1); + + VLOG(1) << "vendor " << vendor << " family " << family << " model " << model << " sse2 " + << GutilAtomicOps_Internalx86CPUFeatures.has_sse2 << " cmpxchg16b " + << GutilAtomicOps_Internalx86CPUFeatures.has_cmpxchg16b; } // AtomicOps initialisation routine for external use. void AtomicOps_x86CPUFeaturesInit() { - AtomicOps_Internalx86CPUFeaturesInit(); + AtomicOps_Internalx86CPUFeaturesInit(); } #endif -#endif // GUTIL_ATOMICOPS_INTERNALS_X86_H_ +#endif // GUTIL_ATOMICOPS_INTERNALS_X86_H_ diff --git a/be/src/gutil/atomicops-internals-x86.h b/be/src/gutil/atomicops-internals-x86.h index 5c41356bfa7331..1d8ee431d11fd7 100644 --- a/be/src/gutil/atomicops-internals-x86.h +++ b/be/src/gutil/atomicops-internals-x86.h @@ -27,29 +27,25 @@ #ifndef GUTIL_ATOMICOPS_INTERNALS_X86_H_ #define GUTIL_ATOMICOPS_INTERNALS_X86_H_ -#include - #include +#include -#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* - +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* // NOTE(user): x86 does not need to define AtomicWordCastType, because it // already matches Atomic32 or Atomic64, depending on the platform. - // This struct is not part of the public API of this module; clients may not // use it. // Features of this x86. Values may not be correct before InitGoogle() is run, // but are set conservatively. // Modify AtomicOps_x86CPUFeatureStruct to GutilAtomicOps_x86CPUFeatureStruct for brpc struct GutilAtomicOps_x86CPUFeatureStruct { - bool has_sse2; // Processor has SSE2. - bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. + bool has_sse2; // Processor has SSE2. + bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. }; extern struct GutilAtomicOps_x86CPUFeatureStruct GutilAtomicOps_Internalx86CPUFeatures; - #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") // AtomicOps initialisation for open source use. @@ -67,100 +63,87 @@ typedef int64_t Atomic64; // These atomic primitives don't work atomically, and can cause really nasty // hard-to-track-down bugs, if the pointer isn't naturally aligned. Check alignment // in debug mode. -template -inline void CheckNaturalAlignment(const T *ptr) { - DCHECK_EQ(0, reinterpret_cast(ptr) & (sizeof(T) - 1)) - << "unaligned pointer not allowed for atomics"; +template +inline void CheckNaturalAlignment(const T* ptr) { + DCHECK_EQ(0, reinterpret_cast(ptr) & (sizeof(T) - 1)) + << "unaligned pointer not allowed for atomics"; } // 32-bit low-level operations on any platform. -inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - CheckNaturalAlignment(ptr); - Atomic32 prev; - __asm__ __volatile__("lock; cmpxchgl %1,%2" - : "=a" (prev) - : "q" (new_value), "m" (*ptr), "0" (old_value) - : "memory"); - return prev; + CheckNaturalAlignment(ptr); + Atomic32 prev; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new_value), "m"(*ptr), "0"(old_value) + : "memory"); + return prev; } -inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - CheckNaturalAlignment(ptr); - __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. - : "=r" (new_value) - : "m" (*ptr), "0" (new_value) - : "memory"); - return new_value; // Now it's the previous value. +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + CheckNaturalAlignment(ptr); + __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. + : "=r"(new_value) + : "m"(*ptr), "0"(new_value) + : "memory"); + return new_value; // Now it's the previous value. } -inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - CheckNaturalAlignment(ptr); - Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); - return old_val; +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + CheckNaturalAlignment(ptr); + Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); + return old_val; } -inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - return NoBarrier_AtomicExchange(ptr, new_value); +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) { + return NoBarrier_AtomicExchange(ptr, new_value); } -inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, - Atomic32 increment) { - CheckNaturalAlignment(ptr); - Atomic32 temp = increment; - __asm__ __volatile__("lock; xaddl %0,%1" - : "+r" (temp), "+m" (*ptr) - : : "memory"); - // temp now holds the old value of *ptr - return temp + increment; +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { + CheckNaturalAlignment(ptr); + Atomic32 temp = increment; + __asm__ __volatile__("lock; xaddl %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory"); + // temp now holds the old value of *ptr + return temp + increment; } -inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, - Atomic32 increment) { - CheckNaturalAlignment(ptr); - Atomic32 temp = increment; - __asm__ __volatile__("lock; xaddl %0,%1" - : "+r" (temp), "+m" (*ptr) - : : "memory"); - // temp now holds the old value of *ptr - return temp + increment; +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { + CheckNaturalAlignment(ptr); + Atomic32 temp = increment; + __asm__ __volatile__("lock; xaddl %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory"); + // temp now holds the old value of *ptr + return temp + increment; } // On x86, the NoBarrier_CompareAndSwap() uses a locked instruction and so also // provides both acquire and release barriers. -inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - return NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - return NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -inline Atomic32 Barrier_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Barrier_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - return NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { - CheckNaturalAlignment(ptr); - *ptr = value; + CheckNaturalAlignment(ptr); + *ptr = value; } // Issue the x86 "pause" instruction, which tells the CPU that we // are in a spinlock wait loop and should allow other hyperthreads // to run, not speculate memory access, etc. inline void PauseCPU() { - __asm__ __volatile__("pause" : : : "memory"); + __asm__ __volatile__("pause" : : : "memory"); } #if defined(__x86_64__) @@ -168,175 +151,165 @@ inline void PauseCPU() { // 64-bit implementations of memory barrier can be simpler, because it // "mfence" is guaranteed to exist. inline void MemoryBarrier() { - __asm__ __volatile__("mfence" : : : "memory"); + __asm__ __volatile__("mfence" : : : "memory"); } inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { - CheckNaturalAlignment(ptr); - *ptr = value; - MemoryBarrier(); + CheckNaturalAlignment(ptr); + *ptr = value; + MemoryBarrier(); } #else inline void MemoryBarrier() { - if (GutilAtomicOps_Internalx86CPUFeatures.has_sse2) { - __asm__ __volatile__("mfence" : : : "memory"); - } else { // mfence is faster but not present on PIII - Atomic32 x = 0; - Acquire_AtomicExchange(&x, 0); - } + if (GutilAtomicOps_Internalx86CPUFeatures.has_sse2) { + __asm__ __volatile__("mfence" : : : "memory"); + } else { // mfence is faster but not present on PIII + Atomic32 x = 0; + Acquire_AtomicExchange(&x, 0); + } } inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { - if (GutilAtomicOps_Internalx86CPUFeatures.has_sse2) { - CheckNaturalAlignment(ptr); - *ptr = value; - __asm__ __volatile__("mfence" : : : "memory"); - } else { - Acquire_AtomicExchange(ptr, value); - } + if (GutilAtomicOps_Internalx86CPUFeatures.has_sse2) { + CheckNaturalAlignment(ptr); + *ptr = value; + __asm__ __volatile__("mfence" : : : "memory"); + } else { + Acquire_AtomicExchange(ptr, value); + } } #endif inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { - CheckNaturalAlignment(ptr); - ATOMICOPS_COMPILER_BARRIER(); - *ptr = value; // An x86 store acts as a release barrier. - // See comments in Atomic64 version of Release_Store(), below. + CheckNaturalAlignment(ptr); + ATOMICOPS_COMPILER_BARRIER(); + *ptr = value; // An x86 store acts as a release barrier. + // See comments in Atomic64 version of Release_Store(), below. } inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { - CheckNaturalAlignment(ptr); - return *ptr; + CheckNaturalAlignment(ptr); + return *ptr; } inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { - CheckNaturalAlignment(ptr); - Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. - // See comments in Atomic64 version of Release_Store(), below. - ATOMICOPS_COMPILER_BARRIER(); - return value; + CheckNaturalAlignment(ptr); + Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. + // See comments in Atomic64 version of Release_Store(), below. + ATOMICOPS_COMPILER_BARRIER(); + return value; } inline Atomic32 Release_Load(volatile const Atomic32* ptr) { - CheckNaturalAlignment(ptr); - MemoryBarrier(); - return *ptr; + CheckNaturalAlignment(ptr); + MemoryBarrier(); + return *ptr; } #if defined(__x86_64__) // 64-bit low-level operations on 64-bit platform. -inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - Atomic64 prev; - CheckNaturalAlignment(ptr); - __asm__ __volatile__("lock; cmpxchgq %1,%2" - : "=a" (prev) - : "q" (new_value), "m" (*ptr), "0" (old_value) - : "memory"); - return prev; + Atomic64 prev; + CheckNaturalAlignment(ptr); + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(prev) + : "q"(new_value), "m"(*ptr), "0"(old_value) + : "memory"); + return prev; } -inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_value) { - CheckNaturalAlignment(ptr); - __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. - : "=r" (new_value) - : "m" (*ptr), "0" (new_value) - : "memory"); - return new_value; // Now it's the previous value. +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + CheckNaturalAlignment(ptr); + __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. + : "=r"(new_value) + : "m"(*ptr), "0"(new_value) + : "memory"); + return new_value; // Now it's the previous value. } -inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_value) { - Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); - return old_val; +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); + return old_val; } -inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_value) { - return NoBarrier_AtomicExchange(ptr, new_value); +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) { + return NoBarrier_AtomicExchange(ptr, new_value); } -inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, - Atomic64 increment) { - Atomic64 temp = increment; - CheckNaturalAlignment(ptr); - __asm__ __volatile__("lock; xaddq %0,%1" - : "+r" (temp), "+m" (*ptr) - : : "memory"); - // temp now contains the previous value of *ptr - return temp + increment; +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + Atomic64 temp = increment; + CheckNaturalAlignment(ptr); + __asm__ __volatile__("lock; xaddq %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory"); + // temp now contains the previous value of *ptr + return temp + increment; } -inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, - Atomic64 increment) { - Atomic64 temp = increment; - CheckNaturalAlignment(ptr); - __asm__ __volatile__("lock; xaddq %0,%1" - : "+r" (temp), "+m" (*ptr) - : : "memory"); - // temp now contains the previous value of *ptr - return temp + increment; +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + Atomic64 temp = increment; + CheckNaturalAlignment(ptr); + __asm__ __volatile__("lock; xaddq %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory"); + // temp now contains the previous value of *ptr + return temp + increment; } inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { - CheckNaturalAlignment(ptr); - *ptr = value; + CheckNaturalAlignment(ptr); + *ptr = value; } inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { - CheckNaturalAlignment(ptr); - *ptr = value; - MemoryBarrier(); + CheckNaturalAlignment(ptr); + *ptr = value; + MemoryBarrier(); } inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { - ATOMICOPS_COMPILER_BARRIER(); - CheckNaturalAlignment(ptr); - *ptr = value; // An x86 store acts as a release barrier - // for current AMD/Intel chips as of Jan 2008. - // See also Acquire_Load(), below. - - // When new chips come out, check: - // IA-32 Intel Architecture Software Developer's Manual, Volume 3: - // System Programming Guide, Chatper 7: Multiple-processor management, - // Section 7.2, Memory Ordering. - // Last seen at: - // http://developer.intel.com/design/pentium4/manuals/index_new.htm - // - // x86 stores/loads fail to act as barriers for a few instructions (clflush - // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are - // not generated by the compiler, and are rare. Users of these instructions - // need to know about cache behaviour in any case since all of these involve - // either flushing cache lines or non-temporal cache hints. + ATOMICOPS_COMPILER_BARRIER(); + CheckNaturalAlignment(ptr); + *ptr = value; // An x86 store acts as a release barrier + // for current AMD/Intel chips as of Jan 2008. + // See also Acquire_Load(), below. + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm + // + // x86 stores/loads fail to act as barriers for a few instructions (clflush + // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are + // not generated by the compiler, and are rare. Users of these instructions + // need to know about cache behaviour in any case since all of these involve + // either flushing cache lines or non-temporal cache hints. } inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { - CheckNaturalAlignment(ptr); - return *ptr; + CheckNaturalAlignment(ptr); + return *ptr; } inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { - CheckNaturalAlignment(ptr); - Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, - // for current AMD/Intel chips as of Jan 2008. - // See also Release_Store(), above. - ATOMICOPS_COMPILER_BARRIER(); - return value; + CheckNaturalAlignment(ptr); + Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, + // for current AMD/Intel chips as of Jan 2008. + // See also Release_Store(), above. + ATOMICOPS_COMPILER_BARRIER(); + return value; } inline Atomic64 Release_Load(volatile const Atomic64* ptr) { - CheckNaturalAlignment(ptr); - MemoryBarrier(); - return *ptr; + CheckNaturalAlignment(ptr); + MemoryBarrier(); + return *ptr; } -#else // defined(__x86_64__) +#else // defined(__x86_64__) // 64-bit low-level operations on 32-bit platform. @@ -352,149 +325,140 @@ inline Atomic64 Release_Load(volatile const Atomic64* ptr) { // the same register for another argument which has constraints // that allow it ("r" for example). -inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - CheckNaturalAlignment(ptr); - Atomic64 prev; - __asm__ __volatile__("push %%ebx\n\t" - "movl (%3), %%ebx\n\t" // Move 64-bit new_value into - "movl 4(%3), %%ecx\n\t" // ecx:ebx - "lock; cmpxchg8b (%1)\n\t" // If edx:eax (old_value) same - "pop %%ebx\n\t" - : "=A" (prev) // as contents of ptr: - : "D" (ptr), // ecx:ebx => ptr - "0" (old_value) , // else: - "S" (&new_value) // old *ptr => edx:eax - : "memory", "%ecx"); - return prev; -} -#endif // Compiler < gcc-4.1 - -inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_val, + CheckNaturalAlignment(ptr); + Atomic64 prev; + __asm__ __volatile__( + "push %%ebx\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b (%1)\n\t" // If edx:eax (old_value) same + "pop %%ebx\n\t" + : "=A"(prev) // as contents of ptr: + : "D"(ptr), // ecx:ebx => ptr + "0"(old_value), // else: + "S"(&new_value) // old *ptr => edx:eax + : "memory", "%ecx"); + return prev; +} +#endif // Compiler < gcc-4.1 + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_val, Atomic64 new_val) { - CheckNaturalAlignment(ptr); - return __sync_val_compare_and_swap(ptr, old_val, new_val); + CheckNaturalAlignment(ptr); + return __sync_val_compare_and_swap(ptr, old_val, new_val); } -inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_val) { - Atomic64 old_val; - CheckNaturalAlignment(ptr); +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_val) { + Atomic64 old_val; + CheckNaturalAlignment(ptr); - do { - old_val = *ptr; - } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); + do { + old_val = *ptr; + } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); - return old_val; + return old_val; } -inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_val) { - CheckNaturalAlignment(ptr); - Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); - return old_val; +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_val) { + CheckNaturalAlignment(ptr); + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); + return old_val; } -inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, - Atomic64 new_val) { - return NoBarrier_AtomicExchange(ptr, new_val); +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_val) { + return NoBarrier_AtomicExchange(ptr, new_val); } -inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, - Atomic64 increment) { - CheckNaturalAlignment(ptr); - Atomic64 old_val, new_val; +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + CheckNaturalAlignment(ptr); + Atomic64 old_val, new_val; - do { - old_val = *ptr; - new_val = old_val + increment; - } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); + do { + old_val = *ptr; + new_val = old_val + increment; + } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); - return old_val + increment; + return old_val + increment; } -inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, - Atomic64 increment) { - CheckNaturalAlignment(ptr); - Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); - return new_val; +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { + CheckNaturalAlignment(ptr); + Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); + return new_val; } inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { - CheckNaturalAlignment(ptr); - __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic - "movq %%mm0, %0\n\t" // moves (ptr could be read-only) - "emms\n\t" // Empty mmx state/Reset FP regs - : "=m" (*ptr) - : "m" (value) - : // mark the FP stack and mmx registers as clobbered - "st", "st(1)", "st(2)", "st(3)", "st(4)", - "st(5)", "st(6)", "st(7)", "mm0", "mm1", - "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + CheckNaturalAlignment(ptr); + __asm__ __volatile__( + "movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m"(*ptr) + : "m"(value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); } inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { - NoBarrier_Store(ptr, value); - MemoryBarrier(); + NoBarrier_Store(ptr, value); + MemoryBarrier(); } inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { - ATOMICOPS_COMPILER_BARRIER(); - NoBarrier_Store(ptr, value); + ATOMICOPS_COMPILER_BARRIER(); + NoBarrier_Store(ptr, value); } inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { - CheckNaturalAlignment(ptr); - Atomic64 value; - __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic - "movq %%mm0, %0\n\t" // moves (ptr could be read-only) - "emms\n\t" // Empty mmx state/Reset FP regs - : "=m" (value) - : "m" (*ptr) - : // mark the FP stack and mmx registers as clobbered - "st", "st(1)", "st(2)", "st(3)", "st(4)", - "st(5)", "st(6)", "st(7)", "mm0", "mm1", - "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); - return value; + CheckNaturalAlignment(ptr); + Atomic64 value; + __asm__ __volatile__( + "movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m"(value) + : "m"(*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + return value; } inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { - CheckNaturalAlignment(ptr); - Atomic64 value = NoBarrier_Load(ptr); - ATOMICOPS_COMPILER_BARRIER(); - return value; + CheckNaturalAlignment(ptr); + Atomic64 value = NoBarrier_Load(ptr); + ATOMICOPS_COMPILER_BARRIER(); + return value; } inline Atomic64 Release_Load(volatile const Atomic64* ptr) { - MemoryBarrier(); - return NoBarrier_Load(ptr); + MemoryBarrier(); + return NoBarrier_Load(ptr); } -#endif // defined(__x86_64__) +#endif // defined(__x86_64__) -inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - return NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - return NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -inline Atomic64 Barrier_CompareAndSwap(volatile Atomic64* ptr, - Atomic64 old_value, +inline Atomic64 Barrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { - return NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -} // namespace subtle -} // namespace base +} // namespace subtle +} // namespace base #undef ATOMICOPS_COMPILER_BARRIER -#endif // GUTIL_ATOMICOPS_INTERNALS_X87_H_ +#endif // GUTIL_ATOMICOPS_INTERNALS_X87_H_ diff --git a/be/src/gutil/atomicops.h b/be/src/gutil/atomicops.h index ab8abd4554023a..bcf78f0552345f 100644 --- a/be/src/gutil/atomicops.h +++ b/be/src/gutil/atomicops.h @@ -67,13 +67,9 @@ // #endif // ------------------------------------------------------------------------ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#define CLANG_VERSION (__clang_major__ * 10000 \ - + __clang_minor__ * 100 \ - + __clang_patchlevel__) +#define CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) // ThreadSanitizer provides own implementation of atomicops. #if defined(THREAD_SANITIZER) @@ -112,98 +108,77 @@ namespace subtle { // Always return the old value of "*ptr" // // This routine implies no memory barriers. -inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, +inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr, AtomicWord old_value, AtomicWord new_value) { - return NoBarrier_CompareAndSwap( - reinterpret_cast(ptr), - old_value, new_value); + return NoBarrier_CompareAndSwap(reinterpret_cast(ptr), old_value, + new_value); } // Atomically store new_value into *ptr, returning the previous value held in // *ptr. This routine implies no memory barriers. -inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr, - AtomicWord new_value) { - return NoBarrier_AtomicExchange( - reinterpret_cast(ptr), new_value); +inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr, AtomicWord new_value) { + return NoBarrier_AtomicExchange(reinterpret_cast(ptr), new_value); } -inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr, - AtomicWord new_value) { - return Acquire_AtomicExchange( - reinterpret_cast(ptr), new_value); +inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr, AtomicWord new_value) { + return Acquire_AtomicExchange(reinterpret_cast(ptr), new_value); } -inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr, - AtomicWord new_value) { - return Release_AtomicExchange( - reinterpret_cast(ptr), new_value); +inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr, AtomicWord new_value) { + return Release_AtomicExchange(reinterpret_cast(ptr), new_value); } // Atomically increment *ptr by "increment". Returns the new value of // *ptr with the increment applied. This routine implies no memory // barriers. -inline AtomicWord NoBarrier_AtomicIncrement(volatile AtomicWord* ptr, - AtomicWord increment) { - return NoBarrier_AtomicIncrement( - reinterpret_cast(ptr), increment); +inline AtomicWord NoBarrier_AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment) { + return NoBarrier_AtomicIncrement(reinterpret_cast(ptr), + increment); } -inline AtomicWord Barrier_AtomicIncrement(volatile AtomicWord* ptr, - AtomicWord increment) { - return Barrier_AtomicIncrement( - reinterpret_cast(ptr), increment); +inline AtomicWord Barrier_AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment) { + return Barrier_AtomicIncrement(reinterpret_cast(ptr), increment); } -inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, AtomicWord old_value, AtomicWord new_value) { - return base::subtle::Acquire_CompareAndSwap( - reinterpret_cast(ptr), - old_value, new_value); + return base::subtle::Acquire_CompareAndSwap(reinterpret_cast(ptr), + old_value, new_value); } -inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, AtomicWord old_value, AtomicWord new_value) { - return base::subtle::Release_CompareAndSwap( - reinterpret_cast(ptr), - old_value, new_value); + return base::subtle::Release_CompareAndSwap(reinterpret_cast(ptr), + old_value, new_value); } -inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) { - NoBarrier_Store( - reinterpret_cast(ptr), value); +inline void NoBarrier_Store(volatile AtomicWord* ptr, AtomicWord value) { + NoBarrier_Store(reinterpret_cast(ptr), value); } inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { - return base::subtle::Acquire_Store( - reinterpret_cast(ptr), value); + return base::subtle::Acquire_Store(reinterpret_cast(ptr), value); } inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { - return base::subtle::Release_Store( - reinterpret_cast(ptr), value); + return base::subtle::Release_Store(reinterpret_cast(ptr), value); } -inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) { - return NoBarrier_Load( - reinterpret_cast(ptr)); +inline AtomicWord NoBarrier_Load(volatile const AtomicWord* ptr) { + return NoBarrier_Load(reinterpret_cast(ptr)); } inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { - return base::subtle::Acquire_Load( - reinterpret_cast(ptr)); + return base::subtle::Acquire_Load(reinterpret_cast(ptr)); } inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { - return base::subtle::Release_Load( - reinterpret_cast(ptr)); + return base::subtle::Release_Load(reinterpret_cast(ptr)); } -} // namespace base::subtle -} // namespace base -#endif // AtomicWordCastType +} // namespace subtle +} // namespace base +#endif // AtomicWordCastType // ------------------------------------------------------------------------ // Commented out type definitions and method declarations for documentation @@ -277,8 +252,7 @@ void MemoryBarrier(); void PauseCPU(); -#endif // 0 - +#endif // 0 // ------------------------------------------------------------------------ // The following are to be deprecated when all uses have been changed to @@ -287,87 +261,79 @@ void PauseCPU(); #ifdef AtomicWordCastType // AtomicWord versions to be deprecated -inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, AtomicWord old_value, AtomicWord new_value) { - return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); } -inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, AtomicWord old_value, AtomicWord new_value) { - return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); } inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { - return base::subtle::Acquire_Store(ptr, value); + return base::subtle::Acquire_Store(ptr, value); } inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { - return base::subtle::Release_Store(ptr, value); + return base::subtle::Release_Store(ptr, value); } inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { - return base::subtle::Acquire_Load(ptr); + return base::subtle::Acquire_Load(ptr); } inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { - return base::subtle::Release_Load(ptr); + return base::subtle::Release_Load(ptr); } -#endif // AtomicWordCastType +#endif // AtomicWordCastType // 32-bit Acquire/Release operations to be deprecated. -inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); } -inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value) { - return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); } inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { - base::subtle::Acquire_Store(ptr, value); + base::subtle::Acquire_Store(ptr, value); } inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { - return base::subtle::Release_Store(ptr, value); + return base::subtle::Release_Store(ptr, value); } inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { - return base::subtle::Acquire_Load(ptr); + return base::subtle::Acquire_Load(ptr); } inline Atomic32 Release_Load(volatile const Atomic32* ptr) { - return base::subtle::Release_Load(ptr); + return base::subtle::Release_Load(ptr); } // 64-bit Acquire/Release operations to be deprecated. -inline base::subtle::Atomic64 Acquire_CompareAndSwap( - volatile base::subtle::Atomic64* ptr, - base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { - return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +inline base::subtle::Atomic64 Acquire_CompareAndSwap(volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, + base::subtle::Atomic64 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); } -inline base::subtle::Atomic64 Release_CompareAndSwap( - volatile base::subtle::Atomic64* ptr, - base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { - return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +inline base::subtle::Atomic64 Release_CompareAndSwap(volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, + base::subtle::Atomic64 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); } -inline void Acquire_Store( - volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { - base::subtle::Acquire_Store(ptr, value); +inline void Acquire_Store(volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + base::subtle::Acquire_Store(ptr, value); } -inline void Release_Store( - volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { - return base::subtle::Release_Store(ptr, value); +inline void Release_Store(volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + return base::subtle::Release_Store(ptr, value); } -inline base::subtle::Atomic64 Acquire_Load( - volatile const base::subtle::Atomic64* ptr) { - return base::subtle::Acquire_Load(ptr); +inline base::subtle::Atomic64 Acquire_Load(volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr); } -inline base::subtle::Atomic64 Release_Load( - volatile const base::subtle::Atomic64* ptr) { - return base::subtle::Release_Load(ptr); +inline base::subtle::Atomic64 Release_Load(volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Release_Load(ptr); } -#endif // THREAD_ATOMICOPS_H_ +#endif // THREAD_ATOMICOPS_H_ diff --git a/be/src/gutil/basictypes.h b/be/src/gutil/basictypes.h index 2f21d037d1a3e7..4378514ecd9c8d 100644 --- a/be/src/gutil/basictypes.h +++ b/be/src/gutil/basictypes.h @@ -11,10 +11,7 @@ // Argument type used in interfaces that can optionally take ownership // of a passed in argument. If TAKE_OWNERSHIP is passed, the called // object takes ownership of the argument. Otherwise it does not. -enum Ownership { - DO_NOT_TAKE_OWNERSHIP, - TAKE_OWNERSHIP -}; +enum Ownership { DO_NOT_TAKE_OWNERSHIP, TAKE_OWNERSHIP }; // Used to explicitly mark the return value of a function as unused. If you are // really sure you don't want to do anything with the return value of a function @@ -24,9 +21,7 @@ enum Ownership { // if (TakeOwnership(my_var.get()) == SUCCESS) // ignore_result(my_var.release()); // -template -inline void ignore_result(const T&) { -} +template +inline void ignore_result(const T&) {} - -#endif // BASE_BASICTYPES_H_ +#endif // BASE_BASICTYPES_H_ diff --git a/be/src/gutil/bits.cc b/be/src/gutil/bits.cc index bc4e451c31c302..bc55b4da12e6b9 100644 --- a/be/src/gutil/bits.cc +++ b/be/src/gutil/bits.cc @@ -10,92 +10,80 @@ // (We could make these ints. The tradeoff is size (eg does it overwhelm // the cache?) vs efficiency in referencing sub-word-sized array elements) const char Bits::num_bits[] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, + 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, + 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, + 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, + 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, + 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, + 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -int Bits::Count(const void *m, int num_bytes) { - int nbits = 0; - const uint8 *s = (const uint8 *) m; - for (int i = 0; i < num_bytes; i++) - nbits += num_bits[*s++]; - return nbits; +int Bits::Count(const void* m, int num_bytes) { + int nbits = 0; + const uint8* s = (const uint8*)m; + for (int i = 0; i < num_bytes; i++) nbits += num_bits[*s++]; + return nbits; } -int Bits::Difference(const void *m1, const void *m2, int num_bytes) { - int nbits = 0; - const uint8 *s1 = (const uint8 *) m1; - const uint8 *s2 = (const uint8 *) m2; - for (int i = 0; i < num_bytes; i++) - nbits += num_bits[(*s1++) ^ (*s2++)]; - return nbits; +int Bits::Difference(const void* m1, const void* m2, int num_bytes) { + int nbits = 0; + const uint8* s1 = (const uint8*)m1; + const uint8* s2 = (const uint8*)m2; + for (int i = 0; i < num_bytes; i++) nbits += num_bits[(*s1++) ^ (*s2++)]; + return nbits; } -int Bits::CappedDifference(const void *m1, const void *m2, - int num_bytes, int cap) { - int nbits = 0; - const uint8 *s1 = (const uint8 *) m1; - const uint8 *s2 = (const uint8 *) m2; - for (int i = 0; i < num_bytes && nbits <= cap; i++) - nbits += num_bits[(*s1++) ^ (*s2++)]; - return nbits; +int Bits::CappedDifference(const void* m1, const void* m2, int num_bytes, int cap) { + int nbits = 0; + const uint8* s1 = (const uint8*)m1; + const uint8* s2 = (const uint8*)m2; + for (int i = 0; i < num_bytes && nbits <= cap; i++) nbits += num_bits[(*s1++) ^ (*s2++)]; + return nbits; } int Bits::Log2Floor_Portable(uint32 n) { - if (n == 0) - return -1; - int log = 0; - uint32 value = n; - for (int i = 4; i >= 0; --i) { - int shift = (1 << i); - uint32 x = value >> shift; - if (x != 0) { - value = x; - log += shift; + if (n == 0) return -1; + int log = 0; + uint32 value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32 x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } } - } - assert(value == 1); - return log; + assert(value == 1); + return log; } int Bits::Log2Ceiling(uint32 n) { - int floor = Log2Floor(n); - if (n == (n &~ (n - 1))) // zero or a power of two - return floor; - else - return floor + 1; + int floor = Log2Floor(n); + if (n == (n & ~(n - 1))) // zero or a power of two + return floor; + else + return floor + 1; } int Bits::Log2Ceiling64(uint64 n) { - int floor = Log2Floor64(n); - if (n == (n &~ (n - 1))) // zero or a power of two - return floor; - else - return floor + 1; + int floor = Log2Floor64(n); + if (n == (n & ~(n - 1))) // zero or a power of two + return floor; + else + return floor + 1; } int Bits::FindLSBSetNonZero_Portable(uint32 n) { - int rc = 31; - for (int i = 4, shift = 1 << 4; i >= 0; --i) { - const uint32 x = n << shift; - if (x != 0) { - n = x; - rc -= shift; + int rc = 31; + for (int i = 4, shift = 1 << 4; i >= 0; --i) { + const uint32 x = n << shift; + if (x != 0) { + n = x; + rc -= shift; + } + shift >>= 1; } - shift >>= 1; - } - return rc; + return rc; } diff --git a/be/src/gutil/bits.h b/be/src/gutil/bits.h index 7405c23acdab4f..acd44c9cfd6c16 100644 --- a/be/src/gutil/bits.h +++ b/be/src/gutil/bits.h @@ -12,116 +12,117 @@ #define _BITS_H_ class Bits { - public: - // Return the number of one bits in the given integer. - static int CountOnesInByte(unsigned char n); - - static int CountOnes(uint32 n) { - n -= ((n >> 1) & 0x55555555); - n = ((n >> 2) & 0x33333333) + (n & 0x33333333); - return (((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; - } - - // Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59) - static inline int CountOnes64(uint64 n) { +public: + // Return the number of one bits in the given integer. + static int CountOnesInByte(unsigned char n); + + static int CountOnes(uint32 n) { + n -= ((n >> 1) & 0x55555555); + n = ((n >> 2) & 0x33333333) + (n & 0x33333333); + return (((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; + } + + // Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59) + static inline int CountOnes64(uint64 n) { #if defined(__x86_64__) - n -= (n >> 1) & 0x5555555555555555ULL; - n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL); - return (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) - * 0x101010101010101ULL) >> 56; + n -= (n >> 1) & 0x5555555555555555ULL; + n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL); + return (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56; #else - return CountOnes(n >> 32) + CountOnes(n & 0xffffffff); + return CountOnes(n >> 32) + CountOnes(n & 0xffffffff); #endif - } + } - // Count bits using popcnt instruction (available on argo machines). - // Doesn't check if the instruction exists. - // Please use TestCPUFeature(POPCNT) from base/cpuid/cpuid.h before using this. - static inline int CountOnes64withPopcount(uint64 n) { + // Count bits using popcnt instruction (available on argo machines). + // Doesn't check if the instruction exists. + // Please use TestCPUFeature(POPCNT) from base/cpuid/cpuid.h before using this. + static inline int CountOnes64withPopcount(uint64 n) { #if defined(__x86_64__) && defined __GNUC__ - int64 count = 0; - asm("popcnt %1,%0" : "=r"(count) : "rm"(n) : "cc"); - return count; + int64 count = 0; + asm("popcnt %1,%0" : "=r"(count) : "rm"(n) : "cc"); + return count; #else - return CountOnes64(n); + return CountOnes64(n); #endif - } - - // Reverse the bits in the given integer. - static uint8 ReverseBits8(uint8 n); - static uint32 ReverseBits32(uint32 n); - static uint64 ReverseBits64(uint64 n); - - // Return the number of one bits in the byte sequence. - static int Count(const void *m, int num_bytes); - - // Return the number of different bits in the given byte sequences. - // (i.e., the Hamming distance) - static int Difference(const void *m1, const void *m2, int num_bytes); - - // Return the number of different bits in the given byte sequences, - // up to a maximum. Values larger than the maximum may be returned - // (because multiple bits are checked at a time), but the function - // may exit early if the cap is exceeded. - static int CappedDifference(const void *m1, const void *m2, - int num_bytes, int cap); - - // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Floor(uint32 n); - static int Log2Floor64(uint64 n); - - // Potentially faster version of Log2Floor() that returns an - // undefined value if n == 0 - static int Log2FloorNonZero(uint32 n); - static int Log2FloorNonZero64(uint64 n); - - // Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Ceiling(uint32 n); - static int Log2Ceiling64(uint64 n); - - // Return the first set least / most significant bit, 0-indexed. Returns an - // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except - // that it's 0-indexed, while FindMSBSetNonZero() is the same as - // Log2FloorNonZero(). - static int FindLSBSetNonZero(uint32 n); - static int FindLSBSetNonZero64(uint64 n); - static int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); } - static int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); } - - // Portable implementations - static int Log2Floor_Portable(uint32 n); - static int Log2FloorNonZero_Portable(uint32 n); - static int FindLSBSetNonZero_Portable(uint32 n); - static int Log2Floor64_Portable(uint64 n); - static int Log2FloorNonZero64_Portable(uint64 n); - static int FindLSBSetNonZero64_Portable(uint64 n); - - // Viewing bytes as a stream of unsigned bytes, does that stream - // contain any byte equal to c? - template static bool BytesContainByte(T bytes, uint8 c); - - // Viewing bytes as a stream of unsigned bytes, does that stream - // contain any byte b < c? - template static bool BytesContainByteLessThan(T bytes, uint8 c); - - // Viewing bytes as a stream of unsigned bytes, are all elements of that - // stream in [lo, hi]? - template static bool BytesAllInRange(T bytes, uint8 lo, uint8 hi); - - private: - static const char num_bits[]; - static const unsigned char bit_reverse_table[]; - DISALLOW_COPY_AND_ASSIGN(Bits); + } + + // Reverse the bits in the given integer. + static uint8 ReverseBits8(uint8 n); + static uint32 ReverseBits32(uint32 n); + static uint64 ReverseBits64(uint64 n); + + // Return the number of one bits in the byte sequence. + static int Count(const void* m, int num_bytes); + + // Return the number of different bits in the given byte sequences. + // (i.e., the Hamming distance) + static int Difference(const void* m1, const void* m2, int num_bytes); + + // Return the number of different bits in the given byte sequences, + // up to a maximum. Values larger than the maximum may be returned + // (because multiple bits are checked at a time), but the function + // may exit early if the cap is exceeded. + static int CappedDifference(const void* m1, const void* m2, int num_bytes, int cap); + + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. + static int Log2Floor(uint32 n); + static int Log2Floor64(uint64 n); + + // Potentially faster version of Log2Floor() that returns an + // undefined value if n == 0 + static int Log2FloorNonZero(uint32 n); + static int Log2FloorNonZero64(uint64 n); + + // Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0. + static int Log2Ceiling(uint32 n); + static int Log2Ceiling64(uint64 n); + + // Return the first set least / most significant bit, 0-indexed. Returns an + // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except + // that it's 0-indexed, while FindMSBSetNonZero() is the same as + // Log2FloorNonZero(). + static int FindLSBSetNonZero(uint32 n); + static int FindLSBSetNonZero64(uint64 n); + static int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); } + static int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); } + + // Portable implementations + static int Log2Floor_Portable(uint32 n); + static int Log2FloorNonZero_Portable(uint32 n); + static int FindLSBSetNonZero_Portable(uint32 n); + static int Log2Floor64_Portable(uint64 n); + static int Log2FloorNonZero64_Portable(uint64 n); + static int FindLSBSetNonZero64_Portable(uint64 n); + + // Viewing bytes as a stream of unsigned bytes, does that stream + // contain any byte equal to c? + template + static bool BytesContainByte(T bytes, uint8 c); + + // Viewing bytes as a stream of unsigned bytes, does that stream + // contain any byte b < c? + template + static bool BytesContainByteLessThan(T bytes, uint8 c); + + // Viewing bytes as a stream of unsigned bytes, are all elements of that + // stream in [lo, hi]? + template + static bool BytesAllInRange(T bytes, uint8 lo, uint8 hi); + +private: + static const char num_bits[]; + static const unsigned char bit_reverse_table[]; + DISALLOW_COPY_AND_ASSIGN(Bits); }; // A utility class for some handy bit patterns. The names l and h // were chosen to match Knuth Volume 4: l is 0x010101... and h is 0x808080...; // half_ones is ones in the lower half only. We assume sizeof(T) is 1 or even. -template struct BitPattern { - static const T half_ones = (static_cast(1) << (sizeof(T)*4)) - 1; - static const T l = (sizeof(T) == 1) ? 1 : - (half_ones / 0xff * (half_ones + 2)); - static const T h = ~(l * 0x7f); +template +struct BitPattern { + static const T half_ones = (static_cast(1) << (sizeof(T) * 4)) - 1; + static const T l = (sizeof(T) == 1) ? 1 : (half_ones / 0xff * (half_ones + 2)); + static const T h = ~(l * 0x7f); }; // ------------------------------------------------------------------------ @@ -129,30 +130,29 @@ template struct BitPattern { // ------------------------------------------------------------------------ // use GNU builtins where available -#if defined(__GNUC__) && \ - ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) +#if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) inline int Bits::Log2Floor(uint32 n) { - return n == 0 ? -1 : 31 ^ __builtin_clz(n); + return n == 0 ? -1 : 31 ^ __builtin_clz(n); } inline int Bits::Log2FloorNonZero(uint32 n) { - return 31 ^ __builtin_clz(n); + return 31 ^ __builtin_clz(n); } inline int Bits::FindLSBSetNonZero(uint32 n) { - return __builtin_ctz(n); + return __builtin_ctz(n); } inline int Bits::Log2Floor64(uint64 n) { - return n == 0 ? -1 : 63 ^ __builtin_clzll(n); + return n == 0 ? -1 : 63 ^ __builtin_clzll(n); } inline int Bits::Log2FloorNonZero64(uint64 n) { - return 63 ^ __builtin_clzll(n); + return 63 ^ __builtin_clzll(n); } inline int Bits::FindLSBSetNonZero64(uint64 n) { - return __builtin_ctzll(n); + return __builtin_ctzll(n); } #elif defined(_MSC_VER) #include "gutil/bits-internal-windows.h" @@ -161,106 +161,104 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) { #endif inline int Bits::CountOnesInByte(unsigned char n) { - return num_bits[n]; + return num_bits[n]; } inline uint8 Bits::ReverseBits8(unsigned char n) { - n = ((n >> 1) & 0x55) | ((n & 0x55) << 1); - n = ((n >> 2) & 0x33) | ((n & 0x33) << 2); - return ((n >> 4) & 0x0f) | ((n & 0x0f) << 4); + n = ((n >> 1) & 0x55) | ((n & 0x55) << 1); + n = ((n >> 2) & 0x33) | ((n & 0x33) << 2); + return ((n >> 4) & 0x0f) | ((n & 0x0f) << 4); } inline uint32 Bits::ReverseBits32(uint32 n) { - n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1); - n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2); - n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4); - n = ((n >> 8) & 0x00FF00FF) | ((n & 0x00FF00FF) << 8); - return ( n >> 16 ) | ( n << 16); + n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1); + n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2); + n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4); + n = ((n >> 8) & 0x00FF00FF) | ((n & 0x00FF00FF) << 8); + return (n >> 16) | (n << 16); } inline uint64 Bits::ReverseBits64(uint64 n) { #if defined(__x86_64__) - n = ((n >> 1) & 0x5555555555555555ULL) | ((n & 0x5555555555555555ULL) << 1); - n = ((n >> 2) & 0x3333333333333333ULL) | ((n & 0x3333333333333333ULL) << 2); - n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((n & 0x0F0F0F0F0F0F0F0FULL) << 4); - n = ((n >> 8) & 0x00FF00FF00FF00FFULL) | ((n & 0x00FF00FF00FF00FFULL) << 8); - n = ((n >> 16) & 0x0000FFFF0000FFFFULL) | ((n & 0x0000FFFF0000FFFFULL) << 16); - return ( n >> 32 ) | ( n << 32); + n = ((n >> 1) & 0x5555555555555555ULL) | ((n & 0x5555555555555555ULL) << 1); + n = ((n >> 2) & 0x3333333333333333ULL) | ((n & 0x3333333333333333ULL) << 2); + n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((n & 0x0F0F0F0F0F0F0F0FULL) << 4); + n = ((n >> 8) & 0x00FF00FF00FF00FFULL) | ((n & 0x00FF00FF00FF00FFULL) << 8); + n = ((n >> 16) & 0x0000FFFF0000FFFFULL) | ((n & 0x0000FFFF0000FFFFULL) << 16); + return (n >> 32) | (n << 32); #else - return ReverseBits32( n >> 32 ) | - (static_cast(ReverseBits32( n & 0xffffffff )) << 32); + return ReverseBits32(n >> 32) | (static_cast(ReverseBits32(n & 0xffffffff)) << 32); #endif } inline int Bits::Log2FloorNonZero_Portable(uint32 n) { - // Just use the common routine - return Log2Floor(n); + // Just use the common routine + return Log2Floor(n); } // Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32() inline int Bits::Log2Floor64_Portable(uint64 n) { - const uint32 topbits = static_cast(n >> 32); - if (topbits == 0) { - // Top bits are zero, so scan in bottom bits - return Log2Floor(static_cast(n)); - } else { - return 32 + Log2FloorNonZero(topbits); - } + const uint32 topbits = static_cast(n >> 32); + if (topbits == 0) { + // Top bits are zero, so scan in bottom bits + return Log2Floor(static_cast(n)); + } else { + return 32 + Log2FloorNonZero(topbits); + } } // Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32() inline int Bits::Log2FloorNonZero64_Portable(uint64 n) { - const uint32 topbits = static_cast(n >> 32); - if (topbits == 0) { - // Top bits are zero, so scan in bottom bits - return Log2FloorNonZero(static_cast(n)); - } else { - return 32 + Log2FloorNonZero(topbits); - } + const uint32 topbits = static_cast(n >> 32); + if (topbits == 0) { + // Top bits are zero, so scan in bottom bits + return Log2FloorNonZero(static_cast(n)); + } else { + return 32 + Log2FloorNonZero(topbits); + } } // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero() inline int Bits::FindLSBSetNonZero64_Portable(uint64 n) { - const uint32 bottombits = static_cast(n); - if (bottombits == 0) { - // Bottom bits are zero, so scan in top bits - return 32 + FindLSBSetNonZero(static_cast(n >> 32)); - } else { - return FindLSBSetNonZero(bottombits); - } + const uint32 bottombits = static_cast(n); + if (bottombits == 0) { + // Bottom bits are zero, so scan in top bits + return 32 + FindLSBSetNonZero(static_cast(n >> 32)); + } else { + return FindLSBSetNonZero(bottombits); + } } template inline bool Bits::BytesContainByteLessThan(T bytes, uint8 c) { - T l = BitPattern::l; - T h = BitPattern::h; - // The c <= 0x80 code is straight out of Knuth Volume 4. - // Usually c will be manifestly constant. - return c <= 0x80 ? - ((h & (bytes - l * c) & ~bytes) != 0) : - ((((bytes - l * c) | (bytes ^ h)) & h) != 0); + T l = BitPattern::l; + T h = BitPattern::h; + // The c <= 0x80 code is straight out of Knuth Volume 4. + // Usually c will be manifestly constant. + return c <= 0x80 ? ((h & (bytes - l * c) & ~bytes) != 0) + : ((((bytes - l * c) | (bytes ^ h)) & h) != 0); } -template inline bool Bits::BytesContainByte(T bytes, uint8 c) { - // Usually c will be manifestly constant. - return Bits::BytesContainByteLessThan(bytes ^ (c * BitPattern::l), 1); +template +inline bool Bits::BytesContainByte(T bytes, uint8 c) { + // Usually c will be manifestly constant. + return Bits::BytesContainByteLessThan(bytes ^ (c * BitPattern::l), 1); } template inline bool Bits::BytesAllInRange(T bytes, uint8 lo, uint8 hi) { - T l = BitPattern::l; - T h = BitPattern::h; - // In the common case, lo and hi are manifest constants. - if (lo > hi) { - return false; - } - if (hi - lo < 128) { - T x = bytes - l * lo; - T y = bytes + l * (127 - hi); - return ((x | y) & h) == 0; - } - return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l, - lo + (255 - hi)); + T l = BitPattern::l; + T h = BitPattern::h; + // In the common case, lo and hi are manifest constants. + if (lo > hi) { + return false; + } + if (hi - lo < 128) { + T x = bytes - l * lo; + T y = bytes + l * (127 - hi); + return ((x | y) & h) == 0; + } + return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l, lo + (255 - hi)); } #endif // _BITS_H_ diff --git a/be/src/gutil/casts.h b/be/src/gutil/casts.h index 4c132ae47a0308..94a3a14faf3299 100644 --- a/be/src/gutil/casts.h +++ b/be/src/gutil/casts.h @@ -10,11 +10,10 @@ #ifndef BASE_CASTS_H_ #define BASE_CASTS_H_ -#include // for use with down_cast<> -#include // for memcpy -#include // for enumeration casts and tests - +#include // for use with down_cast<> #include +#include // for enumeration casts and tests +#include // for memcpy #include "gutil/macros.h" #include "gutil/template_util.h" @@ -34,19 +33,18 @@ // // base::identity_ is used to make a non-deduced context, which // forces all callers to explicitly specify the template argument. -template +template inline To implicit_cast(typename base::identity_::type to) { - return to; + return to; } // This version of implicit_cast is used when two template arguments // are specified. It's obsolete and should not be used. -template -inline To implicit_cast(typename base::identity_::type const &f) { - return f; +template +inline To implicit_cast(typename base::identity_::type const& f) { + return f; } - // When you upcast (that is, cast a pointer from type Foo to type // SuperclassOfFoo), it's fine to use implicit_cast<>, since upcasts // always succeed. When you downcast (that is, cast a pointer from @@ -65,21 +63,21 @@ inline To implicit_cast(typename base::identity_::type const &f) { // if (dynamic_cast(foo)) HandleASubclass2Object(foo); // You should design the code some other way not to need this. -template // use like this: down_cast(foo); -inline To down_cast(From* f) { // so we only accept pointers - // Ensures that To is a sub-type of From *. This test is here only - // for compile-time type checking, and has no overhead in an - // optimized build at run-time, as it will be optimized away - // completely. - - // TODO(user): This should use COMPILE_ASSERT. - if (false) { - ::implicit_cast(NULL); - } - - // uses RTTI in dbg and fastbuild. asserts are disabled in opt builds. - assert(f == NULL || dynamic_cast(f) != NULL); - return static_cast(f); +template // use like this: down_cast(foo); +inline To down_cast(From* f) { // so we only accept pointers + // Ensures that To is a sub-type of From *. This test is here only + // for compile-time type checking, and has no overhead in an + // optimized build at run-time, as it will be optimized away + // completely. + + // TODO(user): This should use COMPILE_ASSERT. + if (false) { + ::implicit_cast(NULL); + } + + // uses RTTI in dbg and fastbuild. asserts are disabled in opt builds. + assert(f == NULL || dynamic_cast(f) != NULL); + return static_cast(f); } // Overload of down_cast for references. Use like this: down_cast(foo). @@ -90,17 +88,17 @@ inline To down_cast(From* f) { // so we only accept pointers // There's no need for a special const overload either for the pointer // or the reference form. If you call down_cast with a const T&, the // compiler will just bind From to const T. -template +template inline To down_cast(From& f) { - COMPILE_ASSERT(base::is_reference::value, target_type_not_a_reference); - typedef typename base::remove_reference::type* ToAsPointer; - if (false) { - // Compile-time check that To inherits from From. See above for details. - ::implicit_cast(NULL); - } - - assert(dynamic_cast(&f) != NULL); // RTTI: debug mode only - return static_cast(f); + COMPILE_ASSERT(base::is_reference::value, target_type_not_a_reference); + typedef typename base::remove_reference::type* ToAsPointer; + if (false) { + // Compile-time check that To inherits from From. See above for details. + ::implicit_cast(NULL); + } + + assert(dynamic_cast(&f) != NULL); // RTTI: debug mode only + return static_cast(f); } // bit_cast is a template function that implements the @@ -164,16 +162,15 @@ inline To down_cast(From& f) { template inline Dest bit_cast(const Source& source) { - // Compile time assertion: sizeof(Dest) == sizeof(Source) - // A compile error here means your Dest and Source have different sizes. - COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), VerifySizesAreEqual); + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), VerifySizesAreEqual); - Dest dest; - memcpy(&dest, &source, sizeof(dest)); - return dest; + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; } - // **** Enumeration Casts and Tests // // C++ requires that the value of an integer that is converted to an @@ -235,26 +232,26 @@ inline Dest bit_cast(const Source& source) { template class enum_limits { - public: - static const Enum min_enumerator = 0; - static const Enum max_enumerator = 0; - static const bool is_specialized = false; +public: + static const Enum min_enumerator = 0; + static const Enum max_enumerator = 0; + static const bool is_specialized = false; }; // Now we define the macro to define the specialization for enum_limits. // The specialization checks that the enumerators fit within an int. // This checking relies on integral promotion. -#define MAKE_ENUM_LIMITS(ENUM_TYPE, ENUM_MIN, ENUM_MAX) \ -template <> \ -class enum_limits { \ - public: \ - static const ENUM_TYPE min_enumerator = ENUM_MIN; \ - static const ENUM_TYPE max_enumerator = ENUM_MAX; \ - static const bool is_specialized = true; \ - COMPILE_ASSERT(ENUM_MIN >= INT_MIN, enumerator_too_negative_for_int); \ - COMPILE_ASSERT(ENUM_MAX <= INT_MAX, enumerator_too_positive_for_int); \ -}; +#define MAKE_ENUM_LIMITS(ENUM_TYPE, ENUM_MIN, ENUM_MAX) \ + template <> \ + class enum_limits { \ + public: \ + static const ENUM_TYPE min_enumerator = ENUM_MIN; \ + static const ENUM_TYPE max_enumerator = ENUM_MAX; \ + static const bool is_specialized = true; \ + COMPILE_ASSERT(ENUM_MIN >= INT_MIN, enumerator_too_negative_for_int); \ + COMPILE_ASSERT(ENUM_MAX <= INT_MAX, enumerator_too_positive_for_int); \ + }; // The loose enum test/cast is actually the more complicated one, // because of the problem of finding the bounds. @@ -283,110 +280,110 @@ class enum_limits { \ template inline bool loose_enum_test(int e_val) { - COMPILE_ASSERT(enum_limits::is_specialized, missing_MAKE_ENUM_LIMITS); - const Enum e_min = enum_limits::min_enumerator; - const Enum e_max = enum_limits::max_enumerator; - COMPILE_ASSERT(sizeof(e_val) == 4 || sizeof(e_val) == 8, unexpected_int_size); - - // Find the unary bounding negative number of e_min and e_max. - - // Find the unary bounding negative number of e_max. - // This would be b_min = e_max < 0 ? e_max : ~e_max, - // but we want to avoid branches to help the compiler. - int e_max_sign = e_max >> (sizeof(e_val)*8 - 1); - int b_min = ~e_max_sign ^ e_max; - - // Find the binary bounding negative of both e_min and e_max. - b_min &= e_min; - - // However, if e_min is positive, the result will be positive. - // Now clear all bits right of the most significant clear bit, - // which is a negative saturation for negative numbers. - // In the case of positive numbers, this is flush to zero. - b_min &= b_min >> 1; - b_min &= b_min >> 2; - b_min &= b_min >> 4; - b_min &= b_min >> 8; - b_min &= b_min >> 16; + COMPILE_ASSERT(enum_limits::is_specialized, missing_MAKE_ENUM_LIMITS); + const Enum e_min = enum_limits::min_enumerator; + const Enum e_max = enum_limits::max_enumerator; + COMPILE_ASSERT(sizeof(e_val) == 4 || sizeof(e_val) == 8, unexpected_int_size); + + // Find the unary bounding negative number of e_min and e_max. + + // Find the unary bounding negative number of e_max. + // This would be b_min = e_max < 0 ? e_max : ~e_max, + // but we want to avoid branches to help the compiler. + int e_max_sign = e_max >> (sizeof(e_val) * 8 - 1); + int b_min = ~e_max_sign ^ e_max; + + // Find the binary bounding negative of both e_min and e_max. + b_min &= e_min; + + // However, if e_min is positive, the result will be positive. + // Now clear all bits right of the most significant clear bit, + // which is a negative saturation for negative numbers. + // In the case of positive numbers, this is flush to zero. + b_min &= b_min >> 1; + b_min &= b_min >> 2; + b_min &= b_min >> 4; + b_min &= b_min >> 8; + b_min &= b_min >> 16; #if INT_MAX > 2147483647 - b_min &= b_min >> 32; + b_min &= b_min >> 32; #endif - // Find the unary bounding positive number of e_max. - int b_max = e_max_sign ^ e_max; - - // Find the binary bounding positive number of that - // and the unary bounding positive number of e_min. - int e_min_sign = e_min >> (sizeof(e_val)*8 - 1); - b_max |= e_min_sign ^ e_min; - - // Now set all bits right of the most significant set bit, - // which is a positive saturation for positive numbers. - b_max |= b_max >> 1; - b_max |= b_max >> 2; - b_max |= b_max >> 4; - b_max |= b_max >> 8; - b_max |= b_max >> 16; + // Find the unary bounding positive number of e_max. + int b_max = e_max_sign ^ e_max; + + // Find the binary bounding positive number of that + // and the unary bounding positive number of e_min. + int e_min_sign = e_min >> (sizeof(e_val) * 8 - 1); + b_max |= e_min_sign ^ e_min; + + // Now set all bits right of the most significant set bit, + // which is a positive saturation for positive numbers. + b_max |= b_max >> 1; + b_max |= b_max >> 2; + b_max |= b_max >> 4; + b_max |= b_max >> 8; + b_max |= b_max >> 16; #if INT_MAX > 2147483647 - b_max |= b_max >> 32; + b_max |= b_max >> 32; #endif - // Finally test the bounds. - return b_min <= e_val && e_val <= b_max; + // Finally test the bounds. + return b_min <= e_val && e_val <= b_max; } template inline bool tight_enum_test(int e_val) { - COMPILE_ASSERT(enum_limits::is_specialized, missing_MAKE_ENUM_LIMITS); - const Enum e_min = enum_limits::min_enumerator; - const Enum e_max = enum_limits::max_enumerator; - return e_min <= e_val && e_val <= e_max; + COMPILE_ASSERT(enum_limits::is_specialized, missing_MAKE_ENUM_LIMITS); + const Enum e_min = enum_limits::min_enumerator; + const Enum e_max = enum_limits::max_enumerator; + return e_min <= e_val && e_val <= e_max; } template inline bool loose_enum_test_cast(int e_val, Enum* e_var) { - if (loose_enum_test(e_val)) { - *e_var = static_cast(e_val); - return true; - } else { - return false; - } + if (loose_enum_test(e_val)) { + *e_var = static_cast(e_val); + return true; + } else { + return false; + } } template inline bool tight_enum_test_cast(int e_val, Enum* e_var) { - if (tight_enum_test(e_val)) { - *e_var = static_cast(e_val); - return true; - } else { - return false; - } + if (tight_enum_test(e_val)) { + *e_var = static_cast(e_val); + return true; + } else { + return false; + } } namespace base { namespace internal { inline void WarnEnumCastError(int value_of_int) { - LOG(DFATAL) << "Bad enum value " << value_of_int; + LOG(DFATAL) << "Bad enum value " << value_of_int; } -} // namespace internal -} // namespace base +} // namespace internal +} // namespace base template inline Enum loose_enum_cast(int e_val) { - if (!loose_enum_test(e_val)) { - base::internal::WarnEnumCastError(e_val); - } - return static_cast(e_val); + if (!loose_enum_test(e_val)) { + base::internal::WarnEnumCastError(e_val); + } + return static_cast(e_val); } template inline Enum tight_enum_cast(int e_val) { - if (!tight_enum_test(e_val)) { - base::internal::WarnEnumCastError(e_val); - } - return static_cast(e_val); + if (!tight_enum_test(e_val)) { + base::internal::WarnEnumCastError(e_val); + } + return static_cast(e_val); } -#endif // BASE_CASTS_H_ +#endif // BASE_CASTS_H_ diff --git a/be/src/gutil/charmap.h b/be/src/gutil/charmap.h index 1b8d3aa6907915..bb9deba0986b94 100644 --- a/be/src/gutil/charmap.h +++ b/be/src/gutil/charmap.h @@ -23,65 +23,57 @@ #include "gutil/type_traits.h" class Charmap { - public: - // Initializes with given uint32 values. For instance, the first - // variable contains bits for values 0x1F (US) down to 0x00 (NUL). - Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3, - uint32 b4, uint32 b5, uint32 b6, uint32 b7) { - m_[0] = b0; - m_[1] = b1; - m_[2] = b2; - m_[3] = b3; - m_[4] = b4; - m_[5] = b5; - m_[6] = b6; - m_[7] = b7; - } +public: + // Initializes with given uint32 values. For instance, the first + // variable contains bits for values 0x1F (US) down to 0x00 (NUL). + Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3, uint32 b4, uint32 b5, uint32 b6, + uint32 b7) { + m_[0] = b0; + m_[1] = b1; + m_[2] = b2; + m_[3] = b3; + m_[4] = b4; + m_[5] = b5; + m_[6] = b6; + m_[7] = b7; + } - // Initializes with a given char*. Note that NUL is not treated as - // a terminator, but rather a char to be flicked. - Charmap(const char* str, int len) { - Init(str, len); - } + // Initializes with a given char*. Note that NUL is not treated as + // a terminator, but rather a char to be flicked. + Charmap(const char* str, int len) { Init(str, len); } - // Initializes with a given char*. NUL is treated as a terminator - // and will not be in the charmap. - explicit Charmap(const char* str) { - Init(str, strlen(str)); - } + // Initializes with a given char*. NUL is treated as a terminator + // and will not be in the charmap. + explicit Charmap(const char* str) { Init(str, strlen(str)); } - bool contains(unsigned char c) const { - return (m_[c >> 5] >> (c & 0x1f)) & 0x1; - } + bool contains(unsigned char c) const { return (m_[c >> 5] >> (c & 0x1f)) & 0x1; } - // Returns true if and only if a character exists in both maps. - bool IntersectsWith(const Charmap & c) const { - for (int i = 0; i < 8; ++i) { - if ((m_[i] & c.m_[i]) != 0) - return true; + // Returns true if and only if a character exists in both maps. + bool IntersectsWith(const Charmap& c) const { + for (int i = 0; i < 8; ++i) { + if ((m_[i] & c.m_[i]) != 0) return true; + } + return false; } - return false; - } - bool IsZero() const { - for (uint32 c : m_) { - if (c != 0) - return false; + bool IsZero() const { + for (uint32 c : m_) { + if (c != 0) return false; + } + return true; } - return true; - } - protected: - uint32 m_[8]; +protected: + uint32 m_[8]; - void Init(const char* str, int len) { - memset(&m_, 0, sizeof m_); - for (int i = 0; i < len; ++i) { - unsigned char value = static_cast(str[i]); - m_[value >> 5] |= 1UL << (value & 0x1f); + void Init(const char* str, int len) { + memset(&m_, 0, sizeof m_); + for (int i = 0; i < len; ++i) { + unsigned char value = static_cast(str[i]); + m_[value >> 5] |= 1UL << (value & 0x1f); + } } - } }; DECLARE_POD(Charmap); -#endif // UTIL_GTL_CHARMAP_H_ +#endif // UTIL_GTL_CHARMAP_H_ diff --git a/be/src/gutil/cpu.cc b/be/src/gutil/cpu.cc index c50e142c7d3680..91cf92ef6302b4 100644 --- a/be/src/gutil/cpu.cc +++ b/be/src/gutil/cpu.cc @@ -15,259 +15,252 @@ #if defined(__x86_64__) #if defined(_MSC_VER) +#include // For _xgetbv() #include -#include // For _xgetbv() #endif #endif namespace base { #if defined(ARCH_CPU_X86_FAMILY) namespace internal { -std::tuple ComputeX86FamilyAndModel( - const std::string& vendor, - int signature) { - int family = (signature >> 8) & 0xf; - int model = (signature >> 4) & 0xf; - int ext_family = 0; - int ext_model = 0; - // The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" - // specifies the Extended Model is defined only when the Base Family is - // 06h or 0Fh. - // The "AMD CPUID Specification" specifies that the Extended Model is - // defined only when Base Family is 0Fh. - // Both manuals define the display model as - // {ExtendedModel[3:0],BaseModel[3:0]} in that case. - if (family == 0xf || (family == 0x6 && vendor == "GenuineIntel")) { - ext_model = (signature >> 16) & 0xf; - model += ext_model << 4; - } - // Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" - // and the "AMD CPUID Specification" specify that the Extended Family is - // defined only when the Base Family is 0Fh. - // Both manuals define the display family as {0000b,BaseFamily[3:0]} + - // ExtendedFamily[7:0] in that case. - if (family == 0xf) { - ext_family = (signature >> 20) & 0xff; - family += ext_family; - } - return {family, model, ext_family, ext_model}; +std::tuple ComputeX86FamilyAndModel(const std::string& vendor, int signature) { + int family = (signature >> 8) & 0xf; + int model = (signature >> 4) & 0xf; + int ext_family = 0; + int ext_model = 0; + // The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" + // specifies the Extended Model is defined only when the Base Family is + // 06h or 0Fh. + // The "AMD CPUID Specification" specifies that the Extended Model is + // defined only when Base Family is 0Fh. + // Both manuals define the display model as + // {ExtendedModel[3:0],BaseModel[3:0]} in that case. + if (family == 0xf || (family == 0x6 && vendor == "GenuineIntel")) { + ext_model = (signature >> 16) & 0xf; + model += ext_model << 4; + } + // Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" + // and the "AMD CPUID Specification" specify that the Extended Family is + // defined only when the Base Family is 0Fh. + // Both manuals define the display family as {0000b,BaseFamily[3:0]} + + // ExtendedFamily[7:0] in that case. + if (family == 0xf) { + ext_family = (signature >> 20) & 0xff; + family += ext_family; + } + return {family, model, ext_family, ext_model}; } -} // namespace internal -#endif // defined(ARCH_CPU_X86_FAMILY) +} // namespace internal +#endif // defined(ARCH_CPU_X86_FAMILY) CPU::CPU() - : signature_(0), - type_(0), - family_(0), - model_(0), - stepping_(0), - ext_model_(0), - ext_family_(0), - has_mmx_(false), - has_sse_(false), - has_sse2_(false), - has_sse3_(false), - has_ssse3_(false), - has_sse41_(false), - has_sse42_(false), - has_popcnt_(false), - has_avx_(false), - has_avx2_(false), - has_aesni_(false), - has_non_stop_time_stamp_counter_(false), - is_running_in_vm_(false), - cpu_vendor_("unknown") { - Initialize(); + : signature_(0), + type_(0), + family_(0), + model_(0), + stepping_(0), + ext_model_(0), + ext_family_(0), + has_mmx_(false), + has_sse_(false), + has_sse2_(false), + has_sse3_(false), + has_ssse3_(false), + has_sse41_(false), + has_sse42_(false), + has_popcnt_(false), + has_avx_(false), + has_avx2_(false), + has_aesni_(false), + has_non_stop_time_stamp_counter_(false), + is_running_in_vm_(false), + cpu_vendor_("unknown") { + Initialize(); } namespace { #if defined(ARCH_CPU_X86_FAMILY) #if !defined(COMPILER_MSVC) #if defined(__pic__) && defined(__i386__) void __cpuid(int cpu_info[4], int info_type) { - __asm__ volatile( - "mov %%ebx, %%edi\n" - "cpuid\n" - "xchg %%edi, %%ebx\n" - : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), - "=d"(cpu_info[3]) - : "a"(info_type), "c"(0)); + __asm__ volatile( + "mov %%ebx, %%edi\n" + "cpuid\n" + "xchg %%edi, %%ebx\n" + : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type), "c"(0)); } #else void __cpuid(int cpu_info[4], int info_type) { - __asm__ volatile("cpuid\n" - : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), - "=d"(cpu_info[3]) - : "a"(info_type), "c"(0)); + __asm__ volatile("cpuid\n" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type), "c"(0)); } #endif -#endif // !defined(COMPILER_MSVC) +#endif // !defined(COMPILER_MSVC) // xgetbv returns the value of an Intel Extended Control Register (XCR). // Currently only XCR0 is defined by Intel so |xcr| should always be zero. uint64_t xgetbv(uint32_t xcr) { #if defined(COMPILER_MSVC) - return _xgetbv(xcr); + return _xgetbv(xcr); #else - uint32_t eax, edx; - __asm__ volatile ( - "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); - return (static_cast(edx) << 32) | eax; -#endif // defined(COMPILER_MSVC) + uint32_t eax, edx; + __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); + return (static_cast(edx) << 32) | eax; +#endif // defined(COMPILER_MSVC) } -#endif // ARCH_CPU_X86_FAMILY +#endif // ARCH_CPU_X86_FAMILY #if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX)) std::string* CpuInfoBrand() { - static std::string* brand = []() { - // This function finds the value from /proc/cpuinfo under the key "model - // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7 - // and later for arm64) and is shown once per CPU. "Processor" is used in - // earler versions and is shown only once at the top of /proc/cpuinfo - // regardless of the number CPUs. - const char kModelNamePrefix[] = "model name\t: "; - const char kProcessorPrefix[] = "Processor\t: "; - std::ifstream info("/proc/cpuinfo"); - std::string contents; - contents.assign(std::istreambuf_iterator(info), std::istreambuf_iterator()); - std::istringstream iss(contents); - std::string line; - while (std::getline(iss, line)) { - if (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0) - return new std::string(line.substr(strlen(kModelNamePrefix))); - if (line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0) - return new std::string(line.substr(strlen(kProcessorPrefix))); - } - return new std::string(); - }(); - return brand; + static std::string* brand = []() { + // This function finds the value from /proc/cpuinfo under the key "model + // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7 + // and later for arm64) and is shown once per CPU. "Processor" is used in + // earler versions and is shown only once at the top of /proc/cpuinfo + // regardless of the number CPUs. + const char kModelNamePrefix[] = "model name\t: "; + const char kProcessorPrefix[] = "Processor\t: "; + std::ifstream info("/proc/cpuinfo"); + std::string contents; + contents.assign(std::istreambuf_iterator(info), std::istreambuf_iterator()); + std::istringstream iss(contents); + std::string line; + while (std::getline(iss, line)) { + if (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0) + return new std::string(line.substr(strlen(kModelNamePrefix))); + if (line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0) + return new std::string(line.substr(strlen(kProcessorPrefix))); + } + return new std::string(); + }(); + return brand; } -#endif // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || - // defined(OS_LINUX)) -} // namespace +#endif // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX)) + +} // namespace void CPU::Initialize() { #if defined(ARCH_CPU_X86_FAMILY) - int cpu_info[4] = {-1}; - // This array is used to temporarily hold the vendor name and then the brand - // name. Thus it has to be big enough for both use cases. There are - // static_asserts below for each of the use cases to make sure this array is - // big enough. - char cpu_string[sizeof(cpu_info) * 3 + 1]; - // __cpuid with an InfoType argument of 0 returns the number of - // valid Ids in CPUInfo[0] and the CPU identification string in - // the other three array elements. The CPU identification string is - // not in linear order. The code below arranges the information - // in a human readable form. The human readable order is CPUInfo[1] | - // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped - // before using memcpy() to copy these three array elements to |cpu_string|. - __cpuid(cpu_info, 0); - int num_ids = cpu_info[0]; - std::swap(cpu_info[2], cpu_info[3]); - static constexpr size_t kVendorNameSize = 3 * sizeof(cpu_info[1]); - static_assert(kVendorNameSize < sizeof(cpu_string) / sizeof(cpu_string[0]), - "cpu_string too small"); - memcpy(cpu_string, &cpu_info[1], kVendorNameSize); - cpu_string[kVendorNameSize] = '\0'; - cpu_vendor_ = cpu_string; - // Interpret CPU feature information. - if (num_ids > 0) { - int cpu_info7[4] = {0}; - __cpuid(cpu_info, 1); - if (num_ids >= 7) { - __cpuid(cpu_info7, 7); + int cpu_info[4] = {-1}; + // This array is used to temporarily hold the vendor name and then the brand + // name. Thus it has to be big enough for both use cases. There are + // static_asserts below for each of the use cases to make sure this array is + // big enough. + char cpu_string[sizeof(cpu_info) * 3 + 1]; + // __cpuid with an InfoType argument of 0 returns the number of + // valid Ids in CPUInfo[0] and the CPU identification string in + // the other three array elements. The CPU identification string is + // not in linear order. The code below arranges the information + // in a human readable form. The human readable order is CPUInfo[1] | + // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped + // before using memcpy() to copy these three array elements to |cpu_string|. + __cpuid(cpu_info, 0); + int num_ids = cpu_info[0]; + std::swap(cpu_info[2], cpu_info[3]); + static constexpr size_t kVendorNameSize = 3 * sizeof(cpu_info[1]); + static_assert(kVendorNameSize < sizeof(cpu_string) / sizeof(cpu_string[0]), + "cpu_string too small"); + memcpy(cpu_string, &cpu_info[1], kVendorNameSize); + cpu_string[kVendorNameSize] = '\0'; + cpu_vendor_ = cpu_string; + // Interpret CPU feature information. + if (num_ids > 0) { + int cpu_info7[4] = {0}; + __cpuid(cpu_info, 1); + if (num_ids >= 7) { + __cpuid(cpu_info7, 7); + } + signature_ = cpu_info[0]; + stepping_ = cpu_info[0] & 0xf; + type_ = (cpu_info[0] >> 12) & 0x3; + std::tie(family_, model_, ext_family_, ext_model_) = + internal::ComputeX86FamilyAndModel(cpu_vendor_, signature_); + has_mmx_ = (cpu_info[3] & 0x00800000) != 0; + has_sse_ = (cpu_info[3] & 0x02000000) != 0; + has_sse2_ = (cpu_info[3] & 0x04000000) != 0; + has_sse3_ = (cpu_info[2] & 0x00000001) != 0; + has_ssse3_ = (cpu_info[2] & 0x00000200) != 0; + has_sse41_ = (cpu_info[2] & 0x00080000) != 0; + has_sse42_ = (cpu_info[2] & 0x00100000) != 0; + has_popcnt_ = (cpu_info[2] & 0x00800000) != 0; + // "Hypervisor Present Bit: Bit 31 of ECX of CPUID leaf 0x1." + // See https://lwn.net/Articles/301888/ + // This is checking for any hypervisor. Hypervisors may choose not to + // announce themselves. Hypervisors trap CPUID and sometimes return + // different results to underlying hardware. + is_running_in_vm_ = (cpu_info[2] & 0x80000000) != 0; + // AVX instructions will generate an illegal instruction exception unless + // a) they are supported by the CPU, + // b) XSAVE is supported by the CPU and + // c) XSAVE is enabled by the kernel. + // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // + // In addition, we have observed some crashes with the xgetbv instruction + // even after following Intel's example code. (See crbug.com/375968.) + // Because of that, we also test the XSAVE bit because its description in + // the CPUID documentation suggests that it signals xgetbv support. + has_avx_ = (cpu_info[2] & 0x10000000) != 0 && (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ && + (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ && + (xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; + has_aesni_ = (cpu_info[2] & 0x02000000) != 0; + has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0; + } + // Get the brand string of the cpu. + __cpuid(cpu_info, 0x80000000); + const int max_parameter = cpu_info[0]; + static constexpr int kParameterStart = 0x80000002; + static constexpr int kParameterEnd = 0x80000004; + static constexpr int kParameterSize = kParameterEnd - kParameterStart + 1; + static_assert( + kParameterSize * sizeof(cpu_info) + 1 == sizeof(cpu_string) / sizeof(cpu_string[0]), + "cpu_string has wrong size"); + if (max_parameter >= kParameterEnd) { + size_t i = 0; + for (int parameter = kParameterStart; parameter <= kParameterEnd; ++parameter) { + __cpuid(cpu_info, parameter); + memcpy(&cpu_string[i], cpu_info, sizeof(cpu_info)); + i += sizeof(cpu_info); + } + cpu_string[i] = '\0'; + cpu_brand_ = cpu_string; } - signature_ = cpu_info[0]; - stepping_ = cpu_info[0] & 0xf; - type_ = (cpu_info[0] >> 12) & 0x3; - std::tie(family_, model_, ext_family_, ext_model_) = - internal::ComputeX86FamilyAndModel(cpu_vendor_, signature_); - has_mmx_ = (cpu_info[3] & 0x00800000) != 0; - has_sse_ = (cpu_info[3] & 0x02000000) != 0; - has_sse2_ = (cpu_info[3] & 0x04000000) != 0; - has_sse3_ = (cpu_info[2] & 0x00000001) != 0; - has_ssse3_ = (cpu_info[2] & 0x00000200) != 0; - has_sse41_ = (cpu_info[2] & 0x00080000) != 0; - has_sse42_ = (cpu_info[2] & 0x00100000) != 0; - has_popcnt_ = (cpu_info[2] & 0x00800000) != 0; - // "Hypervisor Present Bit: Bit 31 of ECX of CPUID leaf 0x1." - // See https://lwn.net/Articles/301888/ - // This is checking for any hypervisor. Hypervisors may choose not to - // announce themselves. Hypervisors trap CPUID and sometimes return - // different results to underlying hardware. - is_running_in_vm_ = (cpu_info[2] & 0x80000000) != 0; - // AVX instructions will generate an illegal instruction exception unless - // a) they are supported by the CPU, - // b) XSAVE is supported by the CPU and - // c) XSAVE is enabled by the kernel. - // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled - // - // In addition, we have observed some crashes with the xgetbv instruction - // even after following Intel's example code. (See crbug.com/375968.) - // Because of that, we also test the XSAVE bit because its description in - // the CPUID documentation suggests that it signals xgetbv support. - has_avx_ = - (cpu_info[2] & 0x10000000) != 0 && - (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ && - (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ && - (xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; - has_aesni_ = (cpu_info[2] & 0x02000000) != 0; - has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0; - } - // Get the brand string of the cpu. - __cpuid(cpu_info, 0x80000000); - const int max_parameter = cpu_info[0]; - static constexpr int kParameterStart = 0x80000002; - static constexpr int kParameterEnd = 0x80000004; - static constexpr int kParameterSize = kParameterEnd - kParameterStart + 1; - static_assert(kParameterSize * sizeof(cpu_info) + 1 == sizeof(cpu_string) / sizeof(cpu_string[0]), - "cpu_string has wrong size"); - if (max_parameter >= kParameterEnd) { - size_t i = 0; - for (int parameter = kParameterStart; parameter <= kParameterEnd; - ++parameter) { - __cpuid(cpu_info, parameter); - memcpy(&cpu_string[i], cpu_info, sizeof(cpu_info)); - i += sizeof(cpu_info); + static constexpr int kParameterContainingNonStopTimeStampCounter = 0x80000007; + if (max_parameter >= kParameterContainingNonStopTimeStampCounter) { + __cpuid(cpu_info, kParameterContainingNonStopTimeStampCounter); + has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0; } - cpu_string[i] = '\0'; - cpu_brand_ = cpu_string; - } - static constexpr int kParameterContainingNonStopTimeStampCounter = 0x80000007; - if (max_parameter >= kParameterContainingNonStopTimeStampCounter) { - __cpuid(cpu_info, kParameterContainingNonStopTimeStampCounter); - has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0; - } - if (!has_non_stop_time_stamp_counter_ && is_running_in_vm_) { - int cpu_info_hv[4] = {}; - __cpuid(cpu_info_hv, 0x40000000); - if (cpu_info_hv[1] == 0x7263694D && // Micr - cpu_info_hv[2] == 0x666F736F && // osof - cpu_info_hv[3] == 0x76482074) { // t Hv - // If CPUID says we have a variant TSC and a hypervisor has identified - // itself and the hypervisor says it is Microsoft Hyper-V, then treat - // TSC as invariant. - // - // Microsoft Hyper-V hypervisor reports variant TSC as there are some - // scenarios (eg. VM live migration) where the TSC is variant, but for - // our purposes we can treat it as invariant. - has_non_stop_time_stamp_counter_ = true; + if (!has_non_stop_time_stamp_counter_ && is_running_in_vm_) { + int cpu_info_hv[4] = {}; + __cpuid(cpu_info_hv, 0x40000000); + if (cpu_info_hv[1] == 0x7263694D && // Micr + cpu_info_hv[2] == 0x666F736F && // osof + cpu_info_hv[3] == 0x76482074) { // t Hv + // If CPUID says we have a variant TSC and a hypervisor has identified + // itself and the hypervisor says it is Microsoft Hyper-V, then treat + // TSC as invariant. + // + // Microsoft Hyper-V hypervisor reports variant TSC as there are some + // scenarios (eg. VM live migration) where the TSC is variant, but for + // our purposes we can treat it as invariant. + has_non_stop_time_stamp_counter_ = true; + } } - } #elif defined(ARCH_CPU_ARM_FAMILY) #if (defined(OS_ANDROID) || defined(OS_LINUX)) - cpu_brand_ = *CpuInfoBrand(); + cpu_brand_ = *CpuInfoBrand(); #elif defined(OS_WIN) - // Windows makes high-resolution thread timing information available in - // user-space. - has_non_stop_time_stamp_counter_ = true; + // Windows makes high-resolution thread timing information available in + // user-space. + has_non_stop_time_stamp_counter_ = true; #endif #endif } CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const { - if (has_avx2()) return AVX2; - if (has_avx()) return AVX; - if (has_sse42()) return SSE42; - if (has_sse41()) return SSE41; - if (has_ssse3()) return SSSE3; - if (has_sse3()) return SSE3; - if (has_sse2()) return SSE2; - if (has_sse()) return SSE; - return PENTIUM; + if (has_avx2()) return AVX2; + if (has_avx()) return AVX; + if (has_sse42()) return SSE42; + if (has_sse41()) return SSE41; + if (has_ssse3()) return SSSE3; + if (has_sse3()) return SSE3; + if (has_sse2()) return SSE2; + if (has_sse()) return SSE; + return PENTIUM; } -} // namespace base +} // namespace base diff --git a/be/src/gutil/cpu.h b/be/src/gutil/cpu.h index 2115251a42f36d..37c636b5bffd21 100644 --- a/be/src/gutil/cpu.h +++ b/be/src/gutil/cpu.h @@ -43,78 +43,75 @@ namespace base { namespace internal { // Compute the CPU family and model based on the vendor and CPUID signature. // Returns in order: family, model, extended family, extended model. -std::tuple ComputeX86FamilyAndModel( - const std::string& vendor, - int signature); -} // namespace internal -#endif // defined(ARCH_CPU_X86_FAMILY) +std::tuple ComputeX86FamilyAndModel(const std::string& vendor, int signature); +} // namespace internal +#endif // defined(ARCH_CPU_X86_FAMILY) // Query information about the processor. class CPU final { - public: - CPU(); - enum IntelMicroArchitecture { - PENTIUM, - SSE, - SSE2, - SSE3, - SSSE3, - SSE41, - SSE42, - AVX, - AVX2, - MAX_INTEL_MICRO_ARCHITECTURE - }; - // Accessors for CPU information. - const std::string& vendor_name() const { return cpu_vendor_; } - int signature() const { return signature_; } - int stepping() const { return stepping_; } - int model() const { return model_; } - int family() const { return family_; } - int type() const { return type_; } - int extended_model() const { return ext_model_; } - int extended_family() const { return ext_family_; } - bool has_mmx() const { return has_mmx_; } - bool has_sse() const { return has_sse_; } - bool has_sse2() const { return has_sse2_; } - bool has_sse3() const { return has_sse3_; } - bool has_ssse3() const { return has_ssse3_; } - bool has_sse41() const { return has_sse41_; } - bool has_sse42() const { return has_sse42_; } - bool has_popcnt() const { return has_popcnt_; } - bool has_avx() const { return has_avx_; } - bool has_avx2() const { return has_avx2_; } - bool has_aesni() const { return has_aesni_; } - bool has_non_stop_time_stamp_counter() const { - return has_non_stop_time_stamp_counter_; - } - bool is_running_in_vm() const { return is_running_in_vm_; } - IntelMicroArchitecture GetIntelMicroArchitecture() const; - const std::string& cpu_brand() const { return cpu_brand_; } - private: - // Query the processor for CPUID information. - void Initialize(); - int signature_; // raw form of type, family, model, and stepping - int type_; // process type - int family_; // family of the processor - int model_; // model of processor - int stepping_; // processor revision number - int ext_model_; - int ext_family_; - bool has_mmx_; - bool has_sse_; - bool has_sse2_; - bool has_sse3_; - bool has_ssse3_; - bool has_sse41_; - bool has_sse42_; - bool has_popcnt_; - bool has_avx_; - bool has_avx2_; - bool has_aesni_; - bool has_non_stop_time_stamp_counter_; - bool is_running_in_vm_; - std::string cpu_vendor_; - std::string cpu_brand_; +public: + CPU(); + enum IntelMicroArchitecture { + PENTIUM, + SSE, + SSE2, + SSE3, + SSSE3, + SSE41, + SSE42, + AVX, + AVX2, + MAX_INTEL_MICRO_ARCHITECTURE + }; + // Accessors for CPU information. + const std::string& vendor_name() const { return cpu_vendor_; } + int signature() const { return signature_; } + int stepping() const { return stepping_; } + int model() const { return model_; } + int family() const { return family_; } + int type() const { return type_; } + int extended_model() const { return ext_model_; } + int extended_family() const { return ext_family_; } + bool has_mmx() const { return has_mmx_; } + bool has_sse() const { return has_sse_; } + bool has_sse2() const { return has_sse2_; } + bool has_sse3() const { return has_sse3_; } + bool has_ssse3() const { return has_ssse3_; } + bool has_sse41() const { return has_sse41_; } + bool has_sse42() const { return has_sse42_; } + bool has_popcnt() const { return has_popcnt_; } + bool has_avx() const { return has_avx_; } + bool has_avx2() const { return has_avx2_; } + bool has_aesni() const { return has_aesni_; } + bool has_non_stop_time_stamp_counter() const { return has_non_stop_time_stamp_counter_; } + bool is_running_in_vm() const { return is_running_in_vm_; } + IntelMicroArchitecture GetIntelMicroArchitecture() const; + const std::string& cpu_brand() const { return cpu_brand_; } + +private: + // Query the processor for CPUID information. + void Initialize(); + int signature_; // raw form of type, family, model, and stepping + int type_; // process type + int family_; // family of the processor + int model_; // model of processor + int stepping_; // processor revision number + int ext_model_; + int ext_family_; + bool has_mmx_; + bool has_sse_; + bool has_sse2_; + bool has_sse3_; + bool has_ssse3_; + bool has_sse41_; + bool has_sse42_; + bool has_popcnt_; + bool has_avx_; + bool has_avx2_; + bool has_aesni_; + bool has_non_stop_time_stamp_counter_; + bool is_running_in_vm_; + std::string cpu_vendor_; + std::string cpu_brand_; }; -} // namespace base -#endif // BASE_CPU_H_ +} // namespace base +#endif // BASE_CPU_H_ diff --git a/be/src/gutil/cycleclock-inl.h b/be/src/gutil/cycleclock-inl.h index 063b397a36bb3f..093d301b6ebb47 100644 --- a/be/src/gutil/cycleclock-inl.h +++ b/be/src/gutil/cycleclock-inl.h @@ -36,8 +36,8 @@ #include -#include "gutil/port.h" #include "gutil/arm_instruction_set_select.h" +#include "gutil/port.h" // Please do not nest #if directives. Keep one section, and one #if per // platform. @@ -50,32 +50,32 @@ #if defined(__APPLE__) #include inline int64 CycleClock::Now() { - // this goes at the top because we need ALL Macs, regardless of - // architecture, to return the number of "mach time units" that - // have passed since startup. See sysinfo.cc where - // InitializeSystemInfo() sets the supposed cpu clock frequency of - // macs to the number of mach time units per second, not actual - // CPU clock frequency (which can change in the face of CPU - // frequency scaling). Also note that when the Mac sleeps, this - // counter pauses; it does not continue counting, nor does it - // reset to zero. - return mach_absolute_time(); + // this goes at the top because we need ALL Macs, regardless of + // architecture, to return the number of "mach time units" that + // have passed since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of + // macs to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). Also note that when the Mac sleeps, this + // counter pauses; it does not continue counting, nor does it + // reset to zero. + return mach_absolute_time(); } // ---------------------------------------------------------------- #elif defined(__i386__) inline int64 CycleClock::Now() { - int64 ret; - __asm__ volatile("rdtsc" : "=A" (ret)); - return ret; + int64 ret; + __asm__ volatile("rdtsc" : "=A"(ret)); + return ret; } // ---------------------------------------------------------------- #elif defined(__x86_64__) || defined(__amd64__) inline int64 CycleClock::Now() { - uint64 low, high; - __asm__ volatile("rdtsc" : "=a" (low), "=d" (high)); - return (high << 32) | low; + uint64 low, high; + __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + return (high << 32) | low; } // ---------------------------------------------------------------- @@ -83,52 +83,52 @@ inline int64 CycleClock::Now() { #define SPR_TB 268 #define SPR_TBU 269 inline int64 CycleClock::Now() { - uint64 time_base_value; - if (sizeof(void*) == 8) { - // On PowerPC64, time base can be read with one SPR read. - asm volatile("mfspr %0, %1" : "=r" (time_base_value) : "i"(SPR_TB)); - } else { - uint32 tbl, tbu0, tbu1; - asm volatile (" mfspr %0, %3\n" - " mfspr %1, %4\n" - " mfspr %2, %3\n" : - "=r"(tbu0), "=r"(tbl), "=r"(tbu1) : - "i"(SPR_TBU), "i"(SPR_TB)); - // If there is a carry into the upper half, it is okay to return - // (tbu1, 0) since it must be between the 2 TBU reads. - tbl &= -static_cast(tbu0 == tbu1); - // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) - time_base_value = - (static_cast(tbu1) << 32) | static_cast(tbl); - } - return static_cast(time_base_value); + uint64 time_base_value; + if (sizeof(void*) == 8) { + // On PowerPC64, time base can be read with one SPR read. + asm volatile("mfspr %0, %1" : "=r"(time_base_value) : "i"(SPR_TB)); + } else { + uint32 tbl, tbu0, tbu1; + asm volatile( + " mfspr %0, %3\n" + " mfspr %1, %4\n" + " mfspr %2, %3\n" + : "=r"(tbu0), "=r"(tbl), "=r"(tbu1) + : "i"(SPR_TBU), "i"(SPR_TB)); + // If there is a carry into the upper half, it is okay to return + // (tbu1, 0) since it must be between the 2 TBU reads. + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + time_base_value = (static_cast(tbu1) << 32) | static_cast(tbl); + } + return static_cast(time_base_value); } // ---------------------------------------------------------------- #elif defined(__sparc__) inline int64 CycleClock::Now() { - int64 tick; - asm(".byte 0x83, 0x41, 0x00, 0x00"); - asm("mov %%g1, %0" : "=r" (tick)); - return tick; + int64 tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r"(tick)); + return tick; } // ---------------------------------------------------------------- #elif defined(__ia64__) inline int64 CycleClock::Now() { - int64 itc; - asm("mov %0 = ar.itc" : "=r" (itc)); - return itc; + int64 itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; } // ---------------------------------------------------------------- #elif defined(_MSC_VER) && defined(_M_IX86) inline int64 CycleClock::Now() { - // Older MSVC compilers (like 7.x) don't seem to support the - // __rdtsc intrinsic properly, so I prefer to use _asm instead - // when I know it will work. Otherwise, I'll use __rdtsc and hope - // the code is being compiled with a non-ancient compiler. - _asm rdtsc + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead + // when I know it will work. Otherwise, I'll use __rdtsc and hope + // the code is being compiled with a non-ancient compiler. + _asm rdtsc } // ---------------------------------------------------------------- @@ -143,65 +143,62 @@ inline int64 CycleClock::Now() { extern "C" uint64 __rdtsc(); #pragma intrinsic(__rdtsc) inline int64 CycleClock::Now() { - return __rdtsc(); + return __rdtsc(); } // ---------------------------------------------------------------- -#elif defined(ARMV6) // V6 is the earliest arm that has a standard cyclecount +#elif defined(ARMV6) // V6 is the earliest arm that has a standard cyclecount #include "gutil/sysinfo.h" inline int64 CycleClock::Now() { - uint32 pmccntr; - uint32 pmuseren; - uint32 pmcntenset; - // Read the user mode perf monitor counter access permissions. - asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren)); - if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset)); - if (pmcntenset & 0x80000000ul) { // Is it counting? - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr)); - // The counter is set up to count every 64th cycle - return static_cast(pmccntr) * 64; // Should optimize to << 6 + uint32 pmccntr; + uint32 pmuseren; + uint32 pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } } - } - struct timeval tv; - gettimeofday(&tv, NULL); - return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) - * CyclesPerSecond()); + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) * CyclesPerSecond()); } // ---------------------------------------------------------------- #elif defined(ARMV3) -#include "gutil/sysinfo.h" // for CyclesPerSecond() +#include "gutil/sysinfo.h" // for CyclesPerSecond() inline int64 CycleClock::Now() { - struct timeval tv; - gettimeofday(&tv, NULL); - return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) - * CyclesPerSecond()); + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) * CyclesPerSecond()); } // ---------------------------------------------------------------- #elif defined(__mips__) #include "gutil/sysinfo.h" inline int64 CycleClock::Now() { - // mips apparently only allows rdtsc for superusers, so we fall - // back to gettimeofday. It's possible clock_gettime would be better. - struct timeval tv; - gettimeofday(&tv, NULL); - return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) - * CyclesPerSecond()); + // mips apparently only allows rdtsc for superusers, so we fall + // back to gettimeofday. It's possible clock_gettime would be better. + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast((tv.tv_sec + tv.tv_usec * 0.000001) * CyclesPerSecond()); } // ---------------------------------------------------------------- #elif defined(__aarch64__) #include "gutil/sysinfo.h" inline int64 CycleClock::Now() { - // System timer of ARMv8 runs at a different frequency than the CPU's. - // The frequency is fixed, typically in the range 1-50MHz. It can be - // read at CNTFRQ special register. We assume the OS has set up - // the virtual timer properly. - int64_t virtual_timer_value; - asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); - return virtual_timer_value; + // System timer of ARMv8 runs at a different frequency than the CPU's. + // The frequency is fixed, typically in the range 1-50MHz. It can be + // read at CNTFRQ special register. We assume the OS has set up + // the virtual timer properly. + int64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; } // ---------------------------------------------------------------- #else @@ -212,4 +209,4 @@ inline int64 CycleClock::Now() { #error You need to define CycleTimer for your O/S and CPU #endif -#endif // GUTIL_CYCLECLOCK_INL_H_ +#endif // GUTIL_CYCLECLOCK_INL_H_ diff --git a/be/src/gutil/dynamic_annotations.h b/be/src/gutil/dynamic_annotations.h index dc2571f3b6592f..f0016d20ae8295 100644 --- a/be/src/gutil/dynamic_annotations.h +++ b/be/src/gutil/dynamic_annotations.h @@ -58,12 +58,12 @@ #define __DYNAMIC_ANNOTATIONS_H__ #ifndef DYNAMIC_ANNOTATIONS_ENABLED -# define DYNAMIC_ANNOTATIONS_ENABLED 0 +#define DYNAMIC_ANNOTATIONS_ENABLED 0 #endif #if DYNAMIC_ANNOTATIONS_ENABLED != 0 - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful when implementing condition variables such as CondVar, using conditional critical sections (Await/LockWhen) and when constructing user-defined synchronization mechanisms. @@ -101,295 +101,263 @@ Note: when possible, please use the existing reference counting and message queue implementations instead of inventing new ones. */ - /* Report that wait on the condition variable at address "cv" has succeeded +/* Report that wait on the condition variable at address "cv" has succeeded and the lock at address "lock" is held. */ - #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ - AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) +#define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) - /* Report that wait on the condition variable at "cv" has succeeded. Variant +/* Report that wait on the condition variable at "cv" has succeeded. Variant w/o lock. */ - #define ANNOTATE_CONDVAR_WAIT(cv) \ - AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) +#define ANNOTATE_CONDVAR_WAIT(cv) AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) - /* Report that we are about to signal on the condition variable at address +/* Report that we are about to signal on the condition variable at address "cv". */ - #define ANNOTATE_CONDVAR_SIGNAL(cv) \ - AnnotateCondVarSignal(__FILE__, __LINE__, cv) +#define ANNOTATE_CONDVAR_SIGNAL(cv) AnnotateCondVarSignal(__FILE__, __LINE__, cv) - /* Report that we are about to signal_all on the condition variable at "cv". */ - #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ - AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) +/* Report that we are about to signal_all on the condition variable at "cv". */ +#define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) - /* Annotations for user-defined synchronization mechanisms. */ - #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) - #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) +/* Annotations for user-defined synchronization mechanisms. */ +#define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) +#define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) - /* Report that the bytes in the range [pointer, pointer+size) are about +/* Report that the bytes in the range [pointer, pointer+size) are about to be published safely. The race checker will create a happens-before arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to subsequent accesses to this memory. Note: this annotation may not work properly if the race detector uses sampling, i.e. does not observe all memory accesses. */ - #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ +#define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) - /* DEPRECATED. Don't use it. */ - #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ +/* DEPRECATED. Don't use it. */ +#define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) - /* DEPRECATED. Don't use it. */ - #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ - do { \ - ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ - ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ +/* DEPRECATED. Don't use it. */ +#define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ + do { \ + ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ + ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ } while (0) - /* Instruct the tool to create a happens-before arc between mu->Unlock() and +/* Instruct the tool to create a happens-before arc between mu->Unlock() and mu->Lock(). This annotation may slow down the race detector and hide real races. Normally it is used only when it would be difficult to annotate each of the mutex's critical sections individually using the annotations above. This annotation makes sense only for hybrid race detectors. For pure happens-before detectors this is a no-op. For more details see http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ - #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ - AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) +#define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ - #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ - AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) +/* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ +#define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful when defining memory allocators, or when memory that was protected in one way starts to be protected in another. */ - /* Report that a new memory at "address" of size "size" has been allocated. +/* Report that a new memory at "address" of size "size" has been allocated. This might be used when the memory has been retrieved from a free list and is about to be reused, or when a the locking discipline for a variable changes. */ - #define ANNOTATE_NEW_MEMORY(address, size) \ - AnnotateNewMemory(__FILE__, __LINE__, address, size) +#define ANNOTATE_NEW_MEMORY(address, size) AnnotateNewMemory(__FILE__, __LINE__, address, size) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful when defining FIFO queues that transfer data between threads. */ - /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at +/* Report that the producer-consumer queue (such as ProducerConsumerQueue) at address "pcq" has been created. The ANNOTATE_PCQ_* annotations should be used only for FIFO queues. For non-FIFO queues use ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ - #define ANNOTATE_PCQ_CREATE(pcq) \ - AnnotatePCQCreate(__FILE__, __LINE__, pcq) +#define ANNOTATE_PCQ_CREATE(pcq) AnnotatePCQCreate(__FILE__, __LINE__, pcq) - /* Report that the queue at address "pcq" is about to be destroyed. */ - #define ANNOTATE_PCQ_DESTROY(pcq) \ - AnnotatePCQDestroy(__FILE__, __LINE__, pcq) +/* Report that the queue at address "pcq" is about to be destroyed. */ +#define ANNOTATE_PCQ_DESTROY(pcq) AnnotatePCQDestroy(__FILE__, __LINE__, pcq) - /* Report that we are about to put an element into a FIFO queue at address +/* Report that we are about to put an element into a FIFO queue at address "pcq". */ - #define ANNOTATE_PCQ_PUT(pcq) \ - AnnotatePCQPut(__FILE__, __LINE__, pcq) +#define ANNOTATE_PCQ_PUT(pcq) AnnotatePCQPut(__FILE__, __LINE__, pcq) - /* Report that we've just got an element from a FIFO queue at address "pcq". */ - #define ANNOTATE_PCQ_GET(pcq) \ - AnnotatePCQGet(__FILE__, __LINE__, pcq) +/* Report that we've just got an element from a FIFO queue at address "pcq". */ +#define ANNOTATE_PCQ_GET(pcq) AnnotatePCQGet(__FILE__, __LINE__, pcq) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations that suppress errors. It is usually better to express the program's synchronization using the other annotations, but these can be used when all else fails. */ - /* Report that we may have a benign race at "pointer", with size +/* Report that we may have a benign race at "pointer", with size "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the point where "pointer" has been allocated, preferably close to the point where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */ - #define ANNOTATE_BENIGN_RACE(pointer, description) \ - AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ - sizeof(*(pointer)), description) +#define ANNOTATE_BENIGN_RACE(pointer, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, sizeof(*(pointer)), description) - /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to +/* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to the memory range [address, address+size). */ - #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ +#define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) - /* Request the analysis tool to ignore all reads in the current thread +/* Request the analysis tool to ignore all reads in the current thread until ANNOTATE_IGNORE_READS_END is called. Useful to ignore intentional racey reads, while still checking other reads and all writes. See also ANNOTATE_UNPROTECTED_READ. */ - #define ANNOTATE_IGNORE_READS_BEGIN() \ - AnnotateIgnoreReadsBegin(__FILE__, __LINE__) - - /* Stop ignoring reads. */ - #define ANNOTATE_IGNORE_READS_END() \ - AnnotateIgnoreReadsEnd(__FILE__, __LINE__) - - /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ - #define ANNOTATE_IGNORE_WRITES_BEGIN() \ - AnnotateIgnoreWritesBegin(__FILE__, __LINE__) - - /* Stop ignoring writes. */ - #define ANNOTATE_IGNORE_WRITES_END() \ - AnnotateIgnoreWritesEnd(__FILE__, __LINE__) - - /* Start ignoring all memory accesses (reads and writes). */ - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ - do {\ - ANNOTATE_IGNORE_READS_BEGIN();\ - ANNOTATE_IGNORE_WRITES_BEGIN();\ - }while(0)\ - - /* Stop ignoring all memory accesses. */ - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ - do {\ - ANNOTATE_IGNORE_WRITES_END();\ - ANNOTATE_IGNORE_READS_END();\ - }while(0)\ - - /* Start ignoring all synchronization until ANNOTATE_IGNORE_SYNC_END +#define ANNOTATE_IGNORE_READS_BEGIN() AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + +/* Stop ignoring reads. */ +#define ANNOTATE_IGNORE_READS_END() AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + +/* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ +#define ANNOTATE_IGNORE_WRITES_BEGIN() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + +/* Stop ignoring writes. */ +#define ANNOTATE_IGNORE_WRITES_END() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + +/* Start ignoring all memory accesses (reads and writes). */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + } while (0) + +/* Stop ignoring all memory accesses. */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + } while (0) + +/* Start ignoring all synchronization until ANNOTATE_IGNORE_SYNC_END is called. */ - #define ANNOTATE_IGNORE_SYNC_BEGIN() \ - AnnotateIgnoreSyncBegin(__FILE__, __LINE__) +#define ANNOTATE_IGNORE_SYNC_BEGIN() AnnotateIgnoreSyncBegin(__FILE__, __LINE__) - /* Stop ignoring all synchronization. */ - #define ANNOTATE_IGNORE_SYNC_END() \ - AnnotateIgnoreSyncEnd(__FILE__, __LINE__) +/* Stop ignoring all synchronization. */ +#define ANNOTATE_IGNORE_SYNC_END() AnnotateIgnoreSyncEnd(__FILE__, __LINE__) - /* Enable (enable!=0) or disable (enable==0) race detection for all threads. +/* Enable (enable!=0) or disable (enable==0) race detection for all threads. This annotation could be useful if you want to skip expensive race analysis during some period of program execution, e.g. during initialization. */ - #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ +#define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful for debugging. */ - /* Request to trace every access to "address". */ - #define ANNOTATE_TRACE_MEMORY(address) \ - AnnotateTraceMemory(__FILE__, __LINE__, address) +/* Request to trace every access to "address". */ +#define ANNOTATE_TRACE_MEMORY(address) AnnotateTraceMemory(__FILE__, __LINE__, address) - /* Report the current thread name to a race detector. */ - #define ANNOTATE_THREAD_NAME(name) \ - AnnotateThreadName(__FILE__, __LINE__, name) +/* Report the current thread name to a race detector. */ +#define ANNOTATE_THREAD_NAME(name) AnnotateThreadName(__FILE__, __LINE__, name) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful when implementing locks. They are not normally needed by modules that merely use locks. The "lock" argument is a pointer to the lock object. */ - /* Report that a lock has been created at address "lock". */ - #define ANNOTATE_RWLOCK_CREATE(lock) \ - AnnotateRWLockCreate(__FILE__, __LINE__, lock) +/* Report that a lock has been created at address "lock". */ +#define ANNOTATE_RWLOCK_CREATE(lock) AnnotateRWLockCreate(__FILE__, __LINE__, lock) - /* Report that a linker initialized lock has been created at address "lock". +/* Report that a linker initialized lock has been created at address "lock". */ #ifdef THREAD_SANITIZER - #define ANNOTATE_RWLOCK_CREATE_STATIC(lock) \ - AnnotateRWLockCreateStatic(__FILE__, __LINE__, lock) +#define ANNOTATE_RWLOCK_CREATE_STATIC(lock) AnnotateRWLockCreateStatic(__FILE__, __LINE__, lock) #else - #define ANNOTATE_RWLOCK_CREATE_STATIC(lock) ANNOTATE_RWLOCK_CREATE(lock) +#define ANNOTATE_RWLOCK_CREATE_STATIC(lock) ANNOTATE_RWLOCK_CREATE(lock) #endif - /* Report that the lock at address "lock" is about to be destroyed. */ - #define ANNOTATE_RWLOCK_DESTROY(lock) \ - AnnotateRWLockDestroy(__FILE__, __LINE__, lock) +/* Report that the lock at address "lock" is about to be destroyed. */ +#define ANNOTATE_RWLOCK_DESTROY(lock) AnnotateRWLockDestroy(__FILE__, __LINE__, lock) - /* Report that the lock at address "lock" has been acquired. +/* Report that the lock at address "lock" has been acquired. is_w=1 for writer lock, is_w=0 for reader lock. */ - #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ - AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) +#define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) - /* Report that the lock at address "lock" is about to be released. */ - #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ - AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) +/* Report that the lock at address "lock" is about to be released. */ +#define ANNOTATE_RWLOCK_RELEASED(lock, is_w) AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful when implementing barriers. They are not normally needed by modules that merely use barriers. The "barrier" argument is a pointer to the barrier object. */ - /* Report that the "barrier" has been initialized with initial "count". +/* Report that the "barrier" has been initialized with initial "count". If 'reinitialization_allowed' is true, initialization is allowed to happen multiple times w/o calling barrier_destroy() */ - #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ - AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ - reinitialization_allowed) +#define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, reinitialization_allowed) - /* Report that we are about to enter barrier_wait("barrier"). */ - #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ - AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) +/* Report that we are about to enter barrier_wait("barrier"). */ +#define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) - /* Report that we just exited barrier_wait("barrier"). */ - #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ - AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) +/* Report that we just exited barrier_wait("barrier"). */ +#define ANNOTATE_BARRIER_WAIT_AFTER(barrier) AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) - /* Report that the "barrier" has been destroyed. */ - #define ANNOTATE_BARRIER_DESTROY(barrier) \ - AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) +/* Report that the "barrier" has been destroyed. */ +#define ANNOTATE_BARRIER_DESTROY(barrier) AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) - /* ------------------------------------------------------------- +/* ------------------------------------------------------------- Annotations useful for testing race detectors. */ - /* Report that we expect a race on the variable at "address". +/* Report that we expect a race on the variable at "address". Use only in unit tests for a race detector. */ - #define ANNOTATE_EXPECT_RACE(address, description) \ +#define ANNOTATE_EXPECT_RACE(address, description) \ AnnotateExpectRace(__FILE__, __LINE__, address, description) - /* A no-op. Insert where you like to test the interceptors. */ - #define ANNOTATE_NO_OP(arg) \ - AnnotateNoOp(__FILE__, __LINE__, arg) +/* A no-op. Insert where you like to test the interceptors. */ +#define ANNOTATE_NO_OP(arg) AnnotateNoOp(__FILE__, __LINE__, arg) - /* Force the race detector to flush its state. The actual effect depends on +/* Force the race detector to flush its state. The actual effect depends on * the implementation of the detector. */ - #define ANNOTATE_FLUSH_STATE() \ - AnnotateFlushState(__FILE__, __LINE__) - - -#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ - - #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ - #define ANNOTATE_RWLOCK_CREATE_STATIC(lock) /* empty */ - #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ - #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ - #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ - #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ - #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ - #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ - #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ - #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ - #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ - #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ - #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ - #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ - #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ - #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ - #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ - #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ - #define ANNOTATE_PCQ_CREATE(pcq) /* empty */ - #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ - #define ANNOTATE_PCQ_PUT(pcq) /* empty */ - #define ANNOTATE_PCQ_GET(pcq) /* empty */ - #define ANNOTATE_NEW_MEMORY(address, size) /* empty */ - #define ANNOTATE_EXPECT_RACE(address, description) /* empty */ - #define ANNOTATE_BENIGN_RACE(address, description) /* empty */ - #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ - #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ - #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ - #define ANNOTATE_TRACE_MEMORY(arg) /* empty */ - #define ANNOTATE_THREAD_NAME(name) /* empty */ - #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_READS_END() /* empty */ - #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_WRITES_END() /* empty */ - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ - #define ANNOTATE_IGNORE_SYNC_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_SYNC_END() /* empty */ - #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ - #define ANNOTATE_NO_OP(arg) /* empty */ - #define ANNOTATE_FLUSH_STATE() /* empty */ - -#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ +#define ANNOTATE_FLUSH_STATE() AnnotateFlushState(__FILE__, __LINE__) + +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + +#define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ +#define ANNOTATE_RWLOCK_CREATE_STATIC(lock) /* empty */ +#define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ +#define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ +#define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ +#define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ +#define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ +#define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ +#define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ +#define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ +#define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ +#define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ +#define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ +#define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ +#define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ +#define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ +#define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ +#define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ +#define ANNOTATE_PCQ_CREATE(pcq) /* empty */ +#define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ +#define ANNOTATE_PCQ_PUT(pcq) /* empty */ +#define ANNOTATE_PCQ_GET(pcq) /* empty */ +#define ANNOTATE_NEW_MEMORY(address, size) /* empty */ +#define ANNOTATE_EXPECT_RACE(address, description) /* empty */ +#define ANNOTATE_BENIGN_RACE(address, description) /* empty */ +#define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ +#define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ +#define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ +#define ANNOTATE_TRACE_MEMORY(arg) /* empty */ +#define ANNOTATE_THREAD_NAME(name) /* empty */ +#define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ +#define ANNOTATE_IGNORE_READS_END() /* empty */ +#define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ +#define ANNOTATE_IGNORE_WRITES_END() /* empty */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ +#define ANNOTATE_IGNORE_SYNC_BEGIN() /* empty */ +#define ANNOTATE_IGNORE_SYNC_END() /* empty */ +#define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ +#define ANNOTATE_NO_OP(arg) /* empty */ +#define ANNOTATE_FLUSH_STATE() /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ /* Macro definitions for GCC attributes that allow static thread safety analysis to recognize and use some of the dynamic annotations as @@ -412,39 +380,39 @@ #undef ANNOTALYSIS_STATIC_INLINE #define ANNOTALYSIS_STATIC_INLINE inline //static inline #undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY -#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; } +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY \ + { \ + (void)file; \ + (void)line; \ + } #endif /* Only emit attributes when annotalysis is enabled. */ #if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__) -#undef ANNOTALYSIS_IGNORE_READS_BEGIN -#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__ ((ignore_reads_begin)) -#undef ANNOTALYSIS_IGNORE_READS_END -#define ANNOTALYSIS_IGNORE_READS_END __attribute__ ((ignore_reads_end)) -#undef ANNOTALYSIS_IGNORE_WRITES_BEGIN -#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin)) -#undef ANNOTALYSIS_IGNORE_WRITES_END -#define ANNOTALYSIS_IGNORE_WRITES_END __attribute__ ((ignore_writes_end)) -#undef ANNOTALYSIS_UNPROTECTED_READ -#define ANNOTALYSIS_UNPROTECTED_READ __attribute__ ((unprotected_read)) +#undef ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__((ignore_reads_begin)) +#undef ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_READS_END __attribute__((ignore_reads_end)) +#undef ANNOTALYSIS_IGNORE_WRITES_BEGIN +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__((ignore_writes_begin)) +#undef ANNOTALYSIS_IGNORE_WRITES_END +#define ANNOTALYSIS_IGNORE_WRITES_END __attribute__((ignore_writes_end)) +#undef ANNOTALYSIS_UNPROTECTED_READ +#define ANNOTALYSIS_UNPROTECTED_READ __attribute__((unprotected_read)) #endif #endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) - /* TODO(user) -- Replace __CLANG_SUPPORT_DYN_ANNOTATION__ with the appropriate feature ID. */ -#if defined(__clang__) && (!defined(SWIG)) \ - && defined(__CLANG_SUPPORT_DYN_ANNOTATION__) +#if defined(__clang__) && (!defined(SWIG)) && defined(__CLANG_SUPPORT_DYN_ANNOTATION__) /* TODO(user) -- The exclusive lock here ignores writes as well, but allows INGORE_READS_AND_WRITES to work properly. */ -#undef ANNOTALYSIS_IGNORE_READS_BEGIN -#define ANNOTALYSIS_IGNORE_READS_BEGIN \ - __attribute__((exclusive_lock_function("*"))) -#undef ANNOTALYSIS_IGNORE_READS_END -#define ANNOTALYSIS_IGNORE_READS_END \ - __attribute__((unlock_function("*"))) +#undef ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__((exclusive_lock_function("*"))) +#undef ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_READS_END __attribute__((unlock_function("*"))) #if DYNAMIC_ANNOTATIONS_ENABLED == 0 /* Turn on certain macros for static analysis, even if dynamic annotations are @@ -454,93 +422,65 @@ #undef ANNOTALYSIS_STATIC_INLINE #define ANNOTALYSIS_STATIC_INLINE inline //static inline #undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY -#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; } - -#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ -#endif /* defined(__clang__) && (!defined(SWIG)) */ +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY \ + { \ + (void)file; \ + (void)line; \ + } +#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ +#endif /* defined(__clang__) && (!defined(SWIG)) */ /* Use the macros above rather than using these functions directly. */ #ifdef __cplusplus extern "C" { #endif -void AnnotateRWLockCreate(const char *file, int line, - const volatile void *lock); -void AnnotateRWLockCreateStatic(const char *file, int line, - const volatile void *lock); -void AnnotateRWLockDestroy(const char *file, int line, - const volatile void *lock); -void AnnotateRWLockAcquired(const char *file, int line, - const volatile void *lock, long is_w); -void AnnotateRWLockReleased(const char *file, int line, - const volatile void *lock, long is_w); -void AnnotateBarrierInit(const char *file, int line, - const volatile void *barrier, long count, +void AnnotateRWLockCreate(const char* file, int line, const volatile void* lock); +void AnnotateRWLockCreateStatic(const char* file, int line, const volatile void* lock); +void AnnotateRWLockDestroy(const char* file, int line, const volatile void* lock); +void AnnotateRWLockAcquired(const char* file, int line, const volatile void* lock, long is_w); +void AnnotateRWLockReleased(const char* file, int line, const volatile void* lock, long is_w); +void AnnotateBarrierInit(const char* file, int line, const volatile void* barrier, long count, long reinitialization_allowed); -void AnnotateBarrierWaitBefore(const char *file, int line, - const volatile void *barrier); -void AnnotateBarrierWaitAfter(const char *file, int line, - const volatile void *barrier); -void AnnotateBarrierDestroy(const char *file, int line, - const volatile void *barrier); -void AnnotateCondVarWait(const char *file, int line, - const volatile void *cv, - const volatile void *lock); -void AnnotateCondVarSignal(const char *file, int line, - const volatile void *cv); -void AnnotateCondVarSignalAll(const char *file, int line, - const volatile void *cv); -void AnnotatePublishMemoryRange(const char *file, int line, - const volatile void *address, +void AnnotateBarrierWaitBefore(const char* file, int line, const volatile void* barrier); +void AnnotateBarrierWaitAfter(const char* file, int line, const volatile void* barrier); +void AnnotateBarrierDestroy(const char* file, int line, const volatile void* barrier); +void AnnotateCondVarWait(const char* file, int line, const volatile void* cv, + const volatile void* lock); +void AnnotateCondVarSignal(const char* file, int line, const volatile void* cv); +void AnnotateCondVarSignalAll(const char* file, int line, const volatile void* cv); +void AnnotatePublishMemoryRange(const char* file, int line, const volatile void* address, long size); -void AnnotateUnpublishMemoryRange(const char *file, int line, - const volatile void *address, +void AnnotateUnpublishMemoryRange(const char* file, int line, const volatile void* address, long size); -void AnnotatePCQCreate(const char *file, int line, - const volatile void *pcq); -void AnnotatePCQDestroy(const char *file, int line, - const volatile void *pcq); -void AnnotatePCQPut(const char *file, int line, - const volatile void *pcq); -void AnnotatePCQGet(const char *file, int line, - const volatile void *pcq); -void AnnotateNewMemory(const char *file, int line, - const volatile void *address, - long size); -void AnnotateExpectRace(const char *file, int line, - const volatile void *address, - const char *description); -void AnnotateBenignRace(const char *file, int line, - const volatile void *address, - const char *description); -void AnnotateBenignRaceSized(const char *file, int line, - const volatile void *address, - long size, - const char *description); -void AnnotateMutexIsUsedAsCondVar(const char *file, int line, - const volatile void *mu); -void AnnotateTraceMemory(const char *file, int line, - const volatile void *arg); -void AnnotateThreadName(const char *file, int line, - const char *name); -ANNOTALYSIS_STATIC_INLINE -void AnnotateIgnoreReadsBegin(const char *file, int line) - ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +void AnnotatePCQCreate(const char* file, int line, const volatile void* pcq); +void AnnotatePCQDestroy(const char* file, int line, const volatile void* pcq); +void AnnotatePCQPut(const char* file, int line, const volatile void* pcq); +void AnnotatePCQGet(const char* file, int line, const volatile void* pcq); +void AnnotateNewMemory(const char* file, int line, const volatile void* address, long size); +void AnnotateExpectRace(const char* file, int line, const volatile void* address, + const char* description); +void AnnotateBenignRace(const char* file, int line, const volatile void* address, + const char* description); +void AnnotateBenignRaceSized(const char* file, int line, const volatile void* address, long size, + const char* description); +void AnnotateMutexIsUsedAsCondVar(const char* file, int line, const volatile void* mu); +void AnnotateTraceMemory(const char* file, int line, const volatile void* arg); +void AnnotateThreadName(const char* file, int line, const char* name); ANNOTALYSIS_STATIC_INLINE -void AnnotateIgnoreReadsEnd(const char *file, int line) - ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY -ANNOTALYSIS_STATIC_INLINE -void AnnotateIgnoreWritesBegin(const char *file, int line) - ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY -ANNOTALYSIS_STATIC_INLINE -void AnnotateIgnoreWritesEnd(const char *file, int line) - ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY -void AnnotateIgnoreSyncBegin(const char *file, int line); -void AnnotateIgnoreSyncEnd(const char *file, int line); -void AnnotateEnableRaceDetection(const char *file, int line, int enable); -void AnnotateNoOp(const char *file, int line, - const volatile void *arg); -void AnnotateFlushState(const char *file, int line); +void AnnotateIgnoreReadsBegin(const char* file, int line) + ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ANNOTALYSIS_STATIC_INLINE + void AnnotateIgnoreReadsEnd(const char* file, int line) ANNOTALYSIS_IGNORE_READS_END + ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ANNOTALYSIS_STATIC_INLINE + void AnnotateIgnoreWritesBegin(const char* file, int line) ANNOTALYSIS_IGNORE_WRITES_BEGIN + ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ANNOTALYSIS_STATIC_INLINE + void AnnotateIgnoreWritesEnd(const char* file, int line) + ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY + void AnnotateIgnoreSyncBegin(const char* file, int line); +void AnnotateIgnoreSyncEnd(const char* file, int line); +void AnnotateEnableRaceDetection(const char* file, int line, int enable); +void AnnotateNoOp(const char* file, int line, const volatile void* arg); +void AnnotateFlushState(const char* file, int line); /* Return non-zero value if running under valgrind. @@ -572,10 +512,8 @@ int RunningOnValgrind(void); */ double ValgrindSlowdown(void); - /* AddressSanitizer annotations from LLVM asan_interface.h */ - #if defined(__SANITIZE_ADDRESS__) || defined(ADDRESS_SANITIZER) // Marks memory region [addr, addr+size) as unaddressable. // This memory must be previously allocated by the user program. Accessing @@ -585,7 +523,7 @@ double ValgrindSlowdown(void); // to ASan alignment restrictions. // Method is NOT thread-safe in the sense that no two threads can // (un)poison memory in the same memory region simultaneously. -void __asan_poison_memory_region(void const volatile *addr, size_t size); +void __asan_poison_memory_region(void const volatile* addr, size_t size); // Marks memory region [addr, addr+size) as addressable. // This memory must be previously allocated by the user program. Accessing // addresses in this region is allowed until this region is poisoned again. @@ -593,18 +531,14 @@ void __asan_poison_memory_region(void const volatile *addr, size_t size); // ASan alignment restrictions. // Method is NOT thread-safe in the sense that no two threads can // (un)poison memory in the same memory region simultaneously. -void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +void __asan_unpoison_memory_region(void const volatile* addr, size_t size); // User code should use macros instead of functions. -#define ASAN_POISON_MEMORY_REGION(addr, size) \ - __asan_poison_memory_region((addr), (size)) -#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ - __asan_unpoison_memory_region((addr), (size)) +#define ASAN_POISON_MEMORY_REGION(addr, size) __asan_poison_memory_region((addr), (size)) +#define ASAN_UNPOISON_MEMORY_REGION(addr, size) __asan_unpoison_memory_region((addr), (size)) #else -#define ASAN_POISON_MEMORY_REGION(addr, size) \ - ((void)(addr), (void)(size)) -#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ - ((void)(addr), (void)(size)) +#define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) +#define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) #endif // Sets the callback to be called right before death on error. @@ -612,11 +546,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); void __asan_set_death_callback(void (*callback)(void)); #if defined(__SANITIZE_ADDRESS__) || defined(ADDRESS_SANITIZER) -#define ASAN_SET_DEATH_CALLBACK(cb) \ - __asan_set_death_callback((cb)) +#define ASAN_SET_DEATH_CALLBACK(cb) __asan_set_death_callback((cb)) #else -#define ASAN_SET_DEATH_CALLBACK(cb) \ - ((void)(cb)) +#define ASAN_SET_DEATH_CALLBACK(cb) ((void)(cb)) #endif #ifdef __cplusplus @@ -625,7 +557,7 @@ void __asan_set_death_callback(void (*callback)(void)); #if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) - /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. +/* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. Instead of doing ANNOTATE_IGNORE_READS_BEGIN(); @@ -633,31 +565,29 @@ void __asan_set_death_callback(void (*callback)(void)); ANNOTATE_IGNORE_READS_END(); one can use ... = ANNOTATE_UNPROTECTED_READ(x); */ - template - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) - ANNOTALYSIS_UNPROTECTED_READ { +template +inline T ANNOTATE_UNPROTECTED_READ(const volatile T& x) ANNOTALYSIS_UNPROTECTED_READ { ANNOTATE_IGNORE_READS_BEGIN(); T res = x; ANNOTATE_IGNORE_READS_END(); return res; - } - /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ - #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ - namespace { \ - class static_var ## _annotator { \ - public: \ - static_var ## _annotator() { \ - ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ - sizeof(static_var), \ - # static_var ": " description); \ - } \ - }; \ - static static_var ## _annotator the ## static_var ## _annotator;\ +} +/* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ +#define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + class static_var##_annotator { \ + public: \ + static_var##_annotator() { \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, sizeof(static_var), \ + #static_var ": " description); \ + } \ + }; \ + static static_var##_annotator the##static_var##_annotator; \ } #else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ - #define ANNOTATE_UNPROTECTED_READ(x) (x) - #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ +#define ANNOTATE_UNPROTECTED_READ(x) (x) +#define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ #endif /* DYNAMIC_ANNOTATIONS_ENABLED */ @@ -673,53 +603,47 @@ void __asan_set_death_callback(void (*callback)(void)); #ifdef ANNOTALYSIS_ONLY - #undef ANNOTALYSIS_ONLY - - /* Undefine and re-define the macros that the static analyzer understands. */ - #undef ANNOTATE_IGNORE_READS_BEGIN - #define ANNOTATE_IGNORE_READS_BEGIN() \ - AnnotateIgnoreReadsBegin(__FILE__, __LINE__) - - #undef ANNOTATE_IGNORE_READS_END - #define ANNOTATE_IGNORE_READS_END() \ - AnnotateIgnoreReadsEnd(__FILE__, __LINE__) - - #undef ANNOTATE_IGNORE_WRITES_BEGIN - #define ANNOTATE_IGNORE_WRITES_BEGIN() \ - AnnotateIgnoreWritesBegin(__FILE__, __LINE__) - - #undef ANNOTATE_IGNORE_WRITES_END - #define ANNOTATE_IGNORE_WRITES_END() \ - AnnotateIgnoreWritesEnd(__FILE__, __LINE__) - - #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ - do { \ - ANNOTATE_IGNORE_READS_BEGIN(); \ - ANNOTATE_IGNORE_WRITES_BEGIN(); \ - }while(0) \ - - #undef ANNOTATE_IGNORE_READS_AND_WRITES_END - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ - do { \ - ANNOTATE_IGNORE_WRITES_END(); \ - ANNOTATE_IGNORE_READS_END(); \ - }while(0) \ - - #if defined(__cplusplus) - #undef ANNOTATE_UNPROTECTED_READ - template - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) - ANNOTALYSIS_UNPROTECTED_READ { - ANNOTATE_IGNORE_READS_BEGIN(); - T res = x; - ANNOTATE_IGNORE_READS_END(); - return res; - } - #endif /* __cplusplus */ +#undef ANNOTALYSIS_ONLY -#endif /* ANNOTALYSIS_ONLY */ +/* Undefine and re-define the macros that the static analyzer understands. */ +#undef ANNOTATE_IGNORE_READS_BEGIN +#define ANNOTATE_IGNORE_READS_BEGIN() AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + +#undef ANNOTATE_IGNORE_READS_END +#define ANNOTATE_IGNORE_READS_END() AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + +#undef ANNOTATE_IGNORE_WRITES_BEGIN +#define ANNOTATE_IGNORE_WRITES_BEGIN() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + +#undef ANNOTATE_IGNORE_WRITES_END +#define ANNOTATE_IGNORE_WRITES_END() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + +#undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + } while (0) + +#undef ANNOTATE_IGNORE_READS_AND_WRITES_END +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + } while (0) + +#if defined(__cplusplus) +#undef ANNOTATE_UNPROTECTED_READ +template +inline T ANNOTATE_UNPROTECTED_READ(const volatile T& x) ANNOTALYSIS_UNPROTECTED_READ { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; +} +#endif /* __cplusplus */ +#endif /* ANNOTALYSIS_ONLY */ #ifdef CLANG_ANNOTALYSIS_ONLY @@ -728,44 +652,41 @@ void __asan_set_death_callback(void (*callback)(void)); /* Turn on macros that the static analyzer understands. These should be on * even if dynamic annotations are off. */ - #undef ANNOTATE_IGNORE_READS_BEGIN - #define ANNOTATE_IGNORE_READS_BEGIN() \ - AnnotateIgnoreReadsBegin(__FILE__, __LINE__) - - #undef ANNOTATE_IGNORE_READS_END - #define ANNOTATE_IGNORE_READS_END() \ - AnnotateIgnoreReadsEnd(__FILE__, __LINE__) - - #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ - do { \ - ANNOTATE_IGNORE_READS_BEGIN(); \ - ANNOTATE_IGNORE_WRITES_BEGIN(); \ - } while (0) \ - - #undef ANNOTATE_IGNORE_READS_AND_WRITES_END - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ - do { \ - ANNOTATE_IGNORE_WRITES_END(); \ - ANNOTATE_IGNORE_READS_END(); \ - } while (0) \ - - #if defined(__cplusplus) - #undef ANNOTATE_UNPROTECTED_READ - template - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { +#undef ANNOTATE_IGNORE_READS_BEGIN +#define ANNOTATE_IGNORE_READS_BEGIN() AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + +#undef ANNOTATE_IGNORE_READS_END +#define ANNOTATE_IGNORE_READS_END() AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + +#undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + } while (0) + +#undef ANNOTATE_IGNORE_READS_AND_WRITES_END +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + } while (0) + +#if defined(__cplusplus) +#undef ANNOTATE_UNPROTECTED_READ +template +inline T ANNOTATE_UNPROTECTED_READ(const volatile T& x) { ANNOTATE_IGNORE_READS_BEGIN(); T res = x; ANNOTATE_IGNORE_READS_END(); return res; - } - #endif - -#endif /* CLANG_ANNOTALYSIS_ONLY */ +} +#endif +#endif /* CLANG_ANNOTALYSIS_ONLY */ /* Undefine the macros intended only in this file. */ #undef ANNOTALYSIS_STATIC_INLINE #undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY -#endif /* __DYNAMIC_ANNOTATIONS_H__ */ +#endif /* __DYNAMIC_ANNOTATIONS_H__ */ diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h index f957df74aa5859..4ad709fc581d4b 100644 --- a/be/src/gutil/endian.h +++ b/be/src/gutil/endian.h @@ -38,32 +38,30 @@ inline uint64 gbswap_64(uint64 host_int) { #if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__) - // Adapted from /usr/include/byteswap.h. Not available on Mac. - if (__builtin_constant_p(host_int)) { - return __bswap_constant_64(host_int); - } else { - uint64 result; - __asm__("bswap %0" : "=r" (result) : "0" (host_int)); - return result; - } + // Adapted from /usr/include/byteswap.h. Not available on Mac. + if (__builtin_constant_p(host_int)) { + return __bswap_constant_64(host_int); + } else { + uint64 result; + __asm__("bswap %0" : "=r"(result) : "0"(host_int)); + return result; + } #elif defined(bswap_64) - return bswap_64(host_int); + return bswap_64(host_int); #else - return static_cast(bswap_32(static_cast(host_int >> 32))) | - (static_cast(bswap_32(static_cast(host_int))) << 32); -#endif // bswap_64 + return static_cast(bswap_32(static_cast(host_int >> 32))) | + (static_cast(bswap_32(static_cast(host_int))) << 32); +#endif // bswap_64 } inline unsigned __int128 gbswap_128(unsigned __int128 host_int) { - return static_cast(bswap_64(static_cast(host_int >> 64))) | - (static_cast(bswap_64(static_cast(host_int))) << 64); + return static_cast(bswap_64(static_cast(host_int >> 64))) | + (static_cast(bswap_64(static_cast(host_int))) << 64); } // Swap bytes of a 24-bit value. inline uint32_t bswap_24(uint32_t x) { - return ((x & 0x0000ffULL) << 16) | - ((x & 0x00ff00ULL)) | - ((x & 0xff0000ULL) >> 16); + return ((x & 0x0000ffULL) << 16) | ((x & 0x00ff00ULL)) | ((x & 0xff0000ULL) >> 16); } #ifdef IS_LITTLE_ENDIAN @@ -74,22 +72,34 @@ inline uint32_t bswap_24(uint32_t x) { // correctly handle the (rather involved) definitions of bswap_32. // gcc guarantees that inline functions are as fast as macros, so // this isn't a performance hit. -inline uint16 ghtons(uint16 x) { return bswap_16(x); } -inline uint32 ghtonl(uint32 x) { return bswap_32(x); } -inline uint64 ghtonll(uint64 x) { return gbswap_64(x); } +inline uint16 ghtons(uint16 x) { + return bswap_16(x); +} +inline uint32 ghtonl(uint32 x) { + return bswap_32(x); +} +inline uint64 ghtonll(uint64 x) { + return gbswap_64(x); +} #elif defined IS_BIG_ENDIAN // These definitions are simpler on big-endian machines // These are functions instead of macros to avoid self-assignment warnings // on calls such as "i = ghtnol(i);". This also provides type checking. -inline uint16 ghtons(uint16 x) { return x; } -inline uint32 ghtonl(uint32 x) { return x; } -inline uint64 ghtonll(uint64 x) { return x; } +inline uint16 ghtons(uint16 x) { + return x; +} +inline uint32 ghtonl(uint32 x) { + return x; +} +inline uint64 ghtonll(uint64 x) { + return x; +} #else #error "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined" // NOLINT -#endif // bytesex +#endif // bytesex // ntoh* and hton* are the same thing for any size and bytesex, // since the function is an involution, i.e., its own inverse. @@ -107,134 +117,119 @@ inline uint64 ghtonll(uint64 x) { return x; } // // Load/Store methods are alignment safe class LittleEndian { - public: - // Conversion functions. +public: + // Conversion functions. #ifdef IS_LITTLE_ENDIAN - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } - static uint64 FromHost64(uint64 x) { return x; } - static uint64 ToHost64(uint64 x) { return x; } + static uint64 FromHost64(uint64 x) { return x; } + static uint64 ToHost64(uint64 x) { return x; } - static unsigned __int128 FromHost128(unsigned __int128 x) { return x; } - static unsigned __int128 ToHost128(unsigned __int128 x) { return x; } + static unsigned __int128 FromHost128(unsigned __int128 x) { return x; } + static unsigned __int128 ToHost128(unsigned __int128 x) { return x; } - static bool IsLittleEndian() { return true; } + static bool IsLittleEndian() { return true; } #elif defined IS_BIG_ENDIAN - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } - static uint64 FromHost64(uint64 x) { return gbswap_64(x); } - static uint64 ToHost64(uint64 x) { return gbswap_64(x); } + static uint64 FromHost64(uint64 x) { return gbswap_64(x); } + static uint64 ToHost64(uint64 x) { return gbswap_64(x); } - static bool IsLittleEndian() { return false; } + static bool IsLittleEndian() { return false; } #endif /* ENDIAN */ - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } - - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } - - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } - - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } - - static uint64 Load64(const void *p) { - return ToHost64(UNALIGNED_LOAD64(p)); - } - - // Build a uint64 from 1-8 bytes. - // 8 * len least significant bits are loaded from the memory with - // LittleEndian order. The 64 - 8 * len most significant bits are - // set all to 0. - // In latex-friendly words, this function returns: - // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. - // - // This function is equivalent with: - // uint64 val = 0; - // memcpy(&val, p, len); - // return ToHost64(val); - // TODO(user): write a small benchmark and benchmark the speed - // of a memcpy based approach. - // - // For speed reasons this function does not work for len == 0. - // The caller needs to guarantee that 1 <= len <= 8. - static uint64 Load64VariableLength(const void * const p, int len) { - assert(len >= 1 && len <= 8); - const char * const buf = static_cast(p); - uint64 val = 0; - --len; - do { - val = (val << 8) | buf[len]; - // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. - } while (--len >= 0); - // No ToHost64(...) needed. The bytes are accessed in little-endian manner - // on every architecture. - return val; - } - - static void Store64(void *p, uint64 v) { - UNALIGNED_STORE64(p, FromHost64(v)); - } - - static uint128 Load128(const void *p) { - return uint128( - ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1)), - ToHost64(UNALIGNED_LOAD64(p))); - } - - static void Store128(void *p, const uint128 v) { - UNALIGNED_STORE64(p, FromHost64(Uint128Low64(v))); - UNALIGNED_STORE64(reinterpret_cast(p) + 1, - FromHost64(Uint128High64(v))); - } - - // Build a uint128 from 1-16 bytes. - // 8 * len least significant bits are loaded from the memory with - // LittleEndian order. The 128 - 8 * len most significant bits are - // set all to 0. - static uint128 Load128VariableLength(const void *p, int len) { - if (len <= 8) { - return uint128(Load64VariableLength(p, len)); - } else { - return uint128( - Load64VariableLength(static_cast(p) + 8, len - 8), - Load64(p)); + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); } + + static void Store16(void* p, uint16 v) { UNALIGNED_STORE16(p, FromHost16(v)); } + + static uint32 Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); } + + static void Store32(void* p, uint32 v) { UNALIGNED_STORE32(p, FromHost32(v)); } + + static uint64 Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); } + + // Build a uint64 from 1-8 bytes. + // 8 * len least significant bits are loaded from the memory with + // LittleEndian order. The 64 - 8 * len most significant bits are + // set all to 0. + // In latex-friendly words, this function returns: + // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. + // + // This function is equivalent with: + // uint64 val = 0; + // memcpy(&val, p, len); + // return ToHost64(val); + // TODO(user): write a small benchmark and benchmark the speed + // of a memcpy based approach. + // + // For speed reasons this function does not work for len == 0. + // The caller needs to guarantee that 1 <= len <= 8. + static uint64 Load64VariableLength(const void* const p, int len) { + assert(len >= 1 && len <= 8); + const char* const buf = static_cast(p); + uint64 val = 0; + --len; + do { + val = (val << 8) | buf[len]; + // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. + } while (--len >= 0); + // No ToHost64(...) needed. The bytes are accessed in little-endian manner + // on every architecture. + return val; + } + + static void Store64(void* p, uint64 v) { UNALIGNED_STORE64(p, FromHost64(v)); } + + static uint128 Load128(const void* p) { + return uint128(ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1)), + ToHost64(UNALIGNED_LOAD64(p))); + } + + static void Store128(void* p, const uint128 v) { + UNALIGNED_STORE64(p, FromHost64(Uint128Low64(v))); + UNALIGNED_STORE64(reinterpret_cast(p) + 1, FromHost64(Uint128High64(v))); + } + + // Build a uint128 from 1-16 bytes. + // 8 * len least significant bits are loaded from the memory with + // LittleEndian order. The 128 - 8 * len most significant bits are + // set all to 0. + static uint128 Load128VariableLength(const void* p, int len) { + if (len <= 8) { + return uint128(Load64VariableLength(p, len)); + } else { + return uint128(Load64VariableLength(static_cast(p) + 8, len - 8), + Load64(p)); + } + } + + // Load & Store in machine's word size. + static uword_t LoadUnsignedWord(const void* p) { + if (sizeof(uword_t) == 8) + return Load64(p); + else + return Load32(p); + } + + static void StoreUnsignedWord(void* p, uword_t v) { + if (sizeof(v) == 8) + Store64(p, v); + else + Store32(p, v); } - } - - // Load & Store in machine's word size. - static uword_t LoadUnsignedWord(const void *p) { - if (sizeof(uword_t) == 8) - return Load64(p); - else - return Load32(p); - } - - static void StoreUnsignedWord(void *p, uword_t v) { - if (sizeof(v) == 8) - Store64(p, v); - else - Store32(p, v); - } }; // Utilities to convert numbers between the current hosts's native byte @@ -242,143 +237,127 @@ class LittleEndian { // // Load/Store methods are alignment safe class BigEndian { - public: +public: #ifdef IS_LITTLE_ENDIAN - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } - static uint32 FromHost24(uint32 x) { return bswap_24(x); } - static uint32 ToHost24(uint32 x) { return bswap_24(x); } + static uint32 FromHost24(uint32 x) { return bswap_24(x); } + static uint32 ToHost24(uint32 x) { return bswap_24(x); } - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } - static uint64 FromHost64(uint64 x) { return gbswap_64(x); } - static uint64 ToHost64(uint64 x) { return gbswap_64(x); } + static uint64 FromHost64(uint64 x) { return gbswap_64(x); } + static uint64 ToHost64(uint64 x) { return gbswap_64(x); } - static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } - static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } - static bool IsLittleEndian() { return true; } + static bool IsLittleEndian() { return true; } #elif defined IS_BIG_ENDIAN - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } static uint32 FromHost24(uint32 x) { return x; } static uint32 ToHost24(uint32 x) { return x; } - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } - static uint64 FromHost64(uint64 x) { return x; } - static uint64 ToHost64(uint64 x) { return x; } + static uint64 FromHost64(uint64 x) { return x; } + static uint64 ToHost64(uint64 x) { return x; } - static uint128 FromHost128(uint128 x) { return x; } - static uint128 ToHost128(uint128 x) { return x; } + static uint128 FromHost128(uint128 x) { return x; } + static uint128 ToHost128(uint128 x) { return x; } - static bool IsLittleEndian() { return false; } + static bool IsLittleEndian() { return false; } #endif /* ENDIAN */ - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } - - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } - - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } - - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } - - static uint64 Load64(const void *p) { - return ToHost64(UNALIGNED_LOAD64(p)); - } - - // Build a uint64 from 1-8 bytes. - // 8 * len least significant bits are loaded from the memory with - // BigEndian order. The 64 - 8 * len most significant bits are - // set all to 0. - // In latex-friendly words, this function returns: - // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. - // - // This function is equivalent with: - // uint64 val = 0; - // memcpy(&val, p, len); - // return ToHost64(val); - // TODO(user): write a small benchmark and benchmark the speed - // of a memcpy based approach. - // - // For speed reasons this function does not work for len == 0. - // The caller needs to guarantee that 1 <= len <= 8. - static uint64 Load64VariableLength(const void * const p, int len) { - assert(len >= 1 && len <= 8); - uint64 val = Load64(p); - uint64 mask = 0; - --len; - do { - mask = (mask << 8) | 0xff; - // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. - } while (--len >= 0); - return val & mask; - } - - static void Store64(void *p, uint64 v) { - UNALIGNED_STORE64(p, FromHost64(v)); - } - - static uint128 Load128(const void *p) { - return uint128( - ToHost64(UNALIGNED_LOAD64(p)), - ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1))); - } - - static void Store128(void *p, const uint128 v) { - UNALIGNED_STORE64(p, FromHost64(Uint128High64(v))); - UNALIGNED_STORE64(reinterpret_cast(p) + 1, - FromHost64(Uint128Low64(v))); - } - - // Build a uint128 from 1-16 bytes. - // 8 * len least significant bits are loaded from the memory with - // BigEndian order. The 128 - 8 * len most significant bits are - // set all to 0. - static uint128 Load128VariableLength(const void *p, int len) { - if (len <= 8) { - return uint128(Load64VariableLength(static_cast(p)+8, - len)); - } else { - return uint128( - Load64VariableLength(p, len-8), - Load64(static_cast(p)+8)); + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); } + + static void Store16(void* p, uint16 v) { UNALIGNED_STORE16(p, FromHost16(v)); } + + static uint32 Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); } + + static void Store32(void* p, uint32 v) { UNALIGNED_STORE32(p, FromHost32(v)); } + + static uint64 Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); } + + // Build a uint64 from 1-8 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 64 - 8 * len most significant bits are + // set all to 0. + // In latex-friendly words, this function returns: + // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. + // + // This function is equivalent with: + // uint64 val = 0; + // memcpy(&val, p, len); + // return ToHost64(val); + // TODO(user): write a small benchmark and benchmark the speed + // of a memcpy based approach. + // + // For speed reasons this function does not work for len == 0. + // The caller needs to guarantee that 1 <= len <= 8. + static uint64 Load64VariableLength(const void* const p, int len) { + assert(len >= 1 && len <= 8); + uint64 val = Load64(p); + uint64 mask = 0; + --len; + do { + mask = (mask << 8) | 0xff; + // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. + } while (--len >= 0); + return val & mask; + } + + static void Store64(void* p, uint64 v) { UNALIGNED_STORE64(p, FromHost64(v)); } + + static uint128 Load128(const void* p) { + return uint128(ToHost64(UNALIGNED_LOAD64(p)), + ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1))); + } + + static void Store128(void* p, const uint128 v) { + UNALIGNED_STORE64(p, FromHost64(Uint128High64(v))); + UNALIGNED_STORE64(reinterpret_cast(p) + 1, FromHost64(Uint128Low64(v))); + } + + // Build a uint128 from 1-16 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 128 - 8 * len most significant bits are + // set all to 0. + static uint128 Load128VariableLength(const void* p, int len) { + if (len <= 8) { + return uint128(Load64VariableLength(static_cast(p) + 8, len)); + } else { + return uint128(Load64VariableLength(p, len - 8), + Load64(static_cast(p) + 8)); + } + } + + // Load & Store in machine's word size. + static uword_t LoadUnsignedWord(const void* p) { + if (sizeof(uword_t) == 8) + return Load64(p); + else + return Load32(p); + } + + static void StoreUnsignedWord(void* p, uword_t v) { + if (sizeof(uword_t) == 8) + Store64(p, v); + else + Store32(p, v); } - } - - // Load & Store in machine's word size. - static uword_t LoadUnsignedWord(const void *p) { - if (sizeof(uword_t) == 8) - return Load64(p); - else - return Load32(p); - } - - static void StoreUnsignedWord(void *p, uword_t v) { - if (sizeof(uword_t) == 8) - Store64(p, v); - else - Store32(p, v); - } -}; // BigEndian +}; // BigEndian // Network byte order is big-endian typedef BigEndian NetworkByteOrder; -#endif // UTIL_ENDIAN_ENDIAN_H_ +#endif // UTIL_ENDIAN_ENDIAN_H_ diff --git a/be/src/gutil/gscoped_ptr.h b/be/src/gutil/gscoped_ptr.h index 5472e67b8262e3..f6630c223a9e86 100644 --- a/be/src/gutil/gscoped_ptr.h +++ b/be/src/gutil/gscoped_ptr.h @@ -102,73 +102,75 @@ #include #include -#include // For std::swap(). +#include // For std::swap(). #include "gutil/basictypes.h" +#include "gutil/move.h" #include "gutil/template_util.h" #include "gutil/type_traits.h" -#include "gutil/move.h" namespace doris { namespace subtle { class RefCountedBase; class RefCountedThreadSafeBase; -} // namespace subtle +} // namespace subtle // Function object which deletes its parameter, which must be a pointer. // If C is an array type, invokes 'delete[]' on the parameter; otherwise, // invokes 'delete'. The default deleter for gscoped_ptr. template struct DefaultDeleter { - DefaultDeleter() {} - template DefaultDeleter(const DefaultDeleter& other) { - // IMPLEMENTATION NOTE: C++11 20.7.1.1.2p2 only provides this constructor - // if U* is implicitly convertible to T* and U is not an array type. - // - // Correct implementation should use SFINAE to disable this - // constructor. However, since there are no other 1-argument constructors, - // using a COMPILE_ASSERT() based on is_convertible<> and requiring - // complete types is simpler and will cause compile failures for equivalent - // misuses. - // - // Note, the is_convertible check also ensures that U is not an - // array. T is guaranteed to be a non-array, so any U* where U is an array - // cannot convert to T*. - enum { T_must_be_complete = sizeof(T) }; - enum { U_must_be_complete = sizeof(U) }; - COMPILE_ASSERT((base::is_convertible::value), - U_ptr_must_implicitly_convert_to_T_ptr); - } - inline void operator()(T* ptr) const { - enum { type_must_be_complete = sizeof(T) }; - delete ptr; - } + DefaultDeleter() {} + template + DefaultDeleter(const DefaultDeleter& other) { + // IMPLEMENTATION NOTE: C++11 20.7.1.1.2p2 only provides this constructor + // if U* is implicitly convertible to T* and U is not an array type. + // + // Correct implementation should use SFINAE to disable this + // constructor. However, since there are no other 1-argument constructors, + // using a COMPILE_ASSERT() based on is_convertible<> and requiring + // complete types is simpler and will cause compile failures for equivalent + // misuses. + // + // Note, the is_convertible check also ensures that U is not an + // array. T is guaranteed to be a non-array, so any U* where U is an array + // cannot convert to T*. + enum { T_must_be_complete = sizeof(T) }; + enum { U_must_be_complete = sizeof(U) }; + COMPILE_ASSERT((base::is_convertible::value), + U_ptr_must_implicitly_convert_to_T_ptr); + } + inline void operator()(T* ptr) const { + enum { type_must_be_complete = sizeof(T) }; + delete ptr; + } }; // Specialization of DefaultDeleter for array types. template struct DefaultDeleter { - inline void operator()(T* ptr) const { - enum { type_must_be_complete = sizeof(T) }; - delete[] ptr; - } - - private: - // Disable this operator for any U != T because it is undefined to execute - // an array delete when the static type of the array mismatches the dynamic - // type. - // - // References: - // C++98 [expr.delete]p3 - // http://cplusplus.github.com/LWG/lwg-defects.html#938 - template void operator()(U* array) const; + inline void operator()(T* ptr) const { + enum { type_must_be_complete = sizeof(T) }; + delete[] ptr; + } + +private: + // Disable this operator for any U != T because it is undefined to execute + // an array delete when the static type of the array mismatches the dynamic + // type. + // + // References: + // C++98 [expr.delete]p3 + // http://cplusplus.github.com/LWG/lwg-defects.html#938 + template + void operator()(U* array) const; }; template struct DefaultDeleter { - // Never allow someone to declare something like gscoped_ptr. - COMPILE_ASSERT(sizeof(T) == -1, do_not_use_array_with_size_as_type); + // Never allow someone to declare something like gscoped_ptr. + COMPILE_ASSERT(sizeof(T) == -1, do_not_use_array_with_size_as_type); }; // Function object which invokes 'free' on its parameter, which must be @@ -177,126 +179,123 @@ struct DefaultDeleter { // gscoped_ptr foo_ptr( // static_cast(malloc(sizeof(int)))); struct FreeDeleter { - inline void operator()(void* ptr) const { - free(ptr); - } + inline void operator()(void* ptr) const { free(ptr); } }; namespace internal { -template struct IsNotRefCounted { - enum { - value = !base::is_convertible::value && - !base::is_convertible:: - value - }; +template +struct IsNotRefCounted { + enum { + value = !base::is_convertible::value && + !base::is_convertible::value + }; }; // Minimal implementation of the core logic of gscoped_ptr, suitable for // reuse in both gscoped_ptr and its specializations. template class gscoped_ptr_impl { - public: - explicit gscoped_ptr_impl(T* p) : data_(p) { } - - // Initializer for deleters that have data parameters. - gscoped_ptr_impl(T* p, const D& d) : data_(p, d) {} - - // Templated constructor that destructively takes the value from another - // gscoped_ptr_impl. - template - gscoped_ptr_impl(gscoped_ptr_impl* other) - : data_(other->release(), other->get_deleter()) { - // We do not support move-only deleters. We could modify our move - // emulation to have base::subtle::move() and base::subtle::forward() - // functions that are imperfect emulations of their C++11 equivalents, - // but until there's a requirement, just assume deleters are copyable. - } - - template - void TakeState(gscoped_ptr_impl* other) { - // See comment in templated constructor above regarding lack of support - // for move-only deleters. - reset(other->release()); - get_deleter() = other->get_deleter(); - } - - ~gscoped_ptr_impl() { - if (data_.ptr != NULL) { - // Not using get_deleter() saves one function call in non-optimized - // builds. - static_cast(data_)(data_.ptr); +public: + explicit gscoped_ptr_impl(T* p) : data_(p) {} + + // Initializer for deleters that have data parameters. + gscoped_ptr_impl(T* p, const D& d) : data_(p, d) {} + + // Templated constructor that destructively takes the value from another + // gscoped_ptr_impl. + template + gscoped_ptr_impl(gscoped_ptr_impl* other) + : data_(other->release(), other->get_deleter()) { + // We do not support move-only deleters. We could modify our move + // emulation to have base::subtle::move() and base::subtle::forward() + // functions that are imperfect emulations of their C++11 equivalents, + // but until there's a requirement, just assume deleters are copyable. + } + + template + void TakeState(gscoped_ptr_impl* other) { + // See comment in templated constructor above regarding lack of support + // for move-only deleters. + reset(other->release()); + get_deleter() = other->get_deleter(); } - } - void reset(T* p) { - // This is a self-reset, which is no longer allowed: http://crbug.com/162971 - if (p != NULL && p == data_.ptr) - abort(); + ~gscoped_ptr_impl() { + if (data_.ptr != NULL) { + // Not using get_deleter() saves one function call in non-optimized + // builds. + static_cast(data_)(data_.ptr); + } + } - // Note that running data_.ptr = p can lead to undefined behavior if - // get_deleter()(get()) deletes this. In order to pevent this, reset() - // should update the stored pointer before deleting its old value. - // - // However, changing reset() to use that behavior may cause current code to - // break in unexpected ways. If the destruction of the owned object - // dereferences the gscoped_ptr when it is destroyed by a call to reset(), - // then it will incorrectly dispatch calls to |p| rather than the original - // value of |data_.ptr|. - // - // During the transition period, set the stored pointer to NULL while - // deleting the object. Eventually, this safety check will be removed to - // prevent the scenario initially described from occuring and - // http://crbug.com/176091 can be closed. - T* old = data_.ptr; - data_.ptr = NULL; - if (old != NULL) - static_cast(data_)(old); - data_.ptr = p; - } - - T* get() const { return data_.ptr; } - - D& get_deleter() { return data_; } - const D& get_deleter() const { return data_; } - - void swap(gscoped_ptr_impl& p2) { - // Standard swap idiom: 'using std::swap' ensures that std::swap is - // present in the overload set, but we call swap unqualified so that - // any more-specific overloads can be used, if available. - using std::swap; - swap(static_cast(data_), static_cast(p2.data_)); - swap(data_.ptr, p2.data_.ptr); - } - - T* release() { - T* old_ptr = data_.ptr; - data_.ptr = NULL; - return old_ptr; - } - - private: - // Needed to allow type-converting constructor. - template friend class gscoped_ptr_impl; - - // Use the empty base class optimization to allow us to have a D - // member, while avoiding any space overhead for it when D is an - // empty class. See e.g. http://www.cantrip.org/emptyopt.html for a good - // discussion of this technique. - struct Data : public D { - explicit Data(T* ptr_in) : ptr(ptr_in) {} - Data(T* ptr_in, D other) : D(std::move(other)), ptr(ptr_in) {} - T* ptr; - }; - - Data data_; - - DISALLOW_COPY_AND_ASSIGN(gscoped_ptr_impl); + void reset(T* p) { + // This is a self-reset, which is no longer allowed: http://crbug.com/162971 + if (p != NULL && p == data_.ptr) abort(); + + // Note that running data_.ptr = p can lead to undefined behavior if + // get_deleter()(get()) deletes this. In order to pevent this, reset() + // should update the stored pointer before deleting its old value. + // + // However, changing reset() to use that behavior may cause current code to + // break in unexpected ways. If the destruction of the owned object + // dereferences the gscoped_ptr when it is destroyed by a call to reset(), + // then it will incorrectly dispatch calls to |p| rather than the original + // value of |data_.ptr|. + // + // During the transition period, set the stored pointer to NULL while + // deleting the object. Eventually, this safety check will be removed to + // prevent the scenario initially described from occuring and + // http://crbug.com/176091 can be closed. + T* old = data_.ptr; + data_.ptr = NULL; + if (old != NULL) static_cast(data_)(old); + data_.ptr = p; + } + + T* get() const { return data_.ptr; } + + D& get_deleter() { return data_; } + const D& get_deleter() const { return data_; } + + void swap(gscoped_ptr_impl& p2) { + // Standard swap idiom: 'using std::swap' ensures that std::swap is + // present in the overload set, but we call swap unqualified so that + // any more-specific overloads can be used, if available. + using std::swap; + swap(static_cast(data_), static_cast(p2.data_)); + swap(data_.ptr, p2.data_.ptr); + } + + T* release() { + T* old_ptr = data_.ptr; + data_.ptr = NULL; + return old_ptr; + } + +private: + // Needed to allow type-converting constructor. + template + friend class gscoped_ptr_impl; + + // Use the empty base class optimization to allow us to have a D + // member, while avoiding any space overhead for it when D is an + // empty class. See e.g. http://www.cantrip.org/emptyopt.html for a good + // discussion of this technique. + struct Data : public D { + explicit Data(T* ptr_in) : ptr(ptr_in) {} + Data(T* ptr_in, D other) : D(std::move(other)), ptr(ptr_in) {} + T* ptr; + }; + + Data data_; + + DISALLOW_COPY_AND_ASSIGN(gscoped_ptr_impl); }; -} // namespace internal +} // namespace internal -} // namespace doris +} // namespace doris // A gscoped_ptr is like a T*, except that the destructor of gscoped_ptr // automatically deletes the pointer it holds (if any). @@ -316,258 +315,255 @@ class gscoped_ptr_impl { // types. template > class gscoped_ptr { - MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_ptr, RValue) - - COMPILE_ASSERT(doris::internal::IsNotRefCounted::value, - T_is_refcounted_type_and_needs_scoped_refptr); - - public: - // The element and deleter types. - typedef T element_type; - typedef D deleter_type; - - // Constructor. Defaults to initializing with NULL. - gscoped_ptr() : impl_(NULL) { } - - // Constructor. Takes ownership of p. - explicit gscoped_ptr(element_type* p) : impl_(p) { } - - // Constructor. Allows initialization of a stateful deleter. - gscoped_ptr(element_type* p, const D& d) : impl_(p, d) { } - - // Constructor. Allows construction from a gscoped_ptr rvalue for a - // convertible type and deleter. - // - // IMPLEMENTATION NOTE: C++11 unique_ptr<> keeps this constructor distinct - // from the normal move constructor. By C++11 20.7.1.2.1.21, this constructor - // has different post-conditions if D is a reference type. Since this - // implementation does not support deleters with reference type, - // we do not need a separate move constructor allowing us to avoid one - // use of SFINAE. You only need to care about this if you modify the - // implementation of gscoped_ptr. - template - gscoped_ptr(gscoped_ptr other) : impl_(&other.impl_) { - COMPILE_ASSERT(!base::is_array::value, U_cannot_be_an_array); - } - - // Constructor. Move constructor for C++03 move emulation of this type. - gscoped_ptr(RValue rvalue) : impl_(&rvalue.object->impl_) { } - - // operator=. Allows assignment from a gscoped_ptr rvalue for a convertible - // type and deleter. - // - // IMPLEMENTATION NOTE: C++11 unique_ptr<> keeps this operator= distinct from - // the normal move assignment operator. By C++11 20.7.1.2.3.4, this templated - // form has different requirements on for move-only Deleters. Since this - // implementation does not support move-only Deleters, we do not need a - // separate move assignment operator allowing us to avoid one use of SFINAE. - // You only need to care about this if you modify the implementation of - // gscoped_ptr. - template - gscoped_ptr& operator=(gscoped_ptr rhs) { - COMPILE_ASSERT(!base::is_array::value, U_cannot_be_an_array); - impl_.TakeState(&rhs.impl_); - return *this; - } - - // Reset. Deletes the currently owned object, if any. - // Then takes ownership of a new object, if given. - void reset(element_type* p = NULL) { impl_.reset(p); } - - // Accessors to get the owned object. - // operator* and operator-> will assert() if there is no current object. - element_type& operator*() const { - assert(impl_.get() != NULL); - return *impl_.get(); - } - element_type* operator->() const { - assert(impl_.get() != NULL); - return impl_.get(); - } - element_type* get() const { return impl_.get(); } - - // Access to the deleter. - deleter_type& get_deleter() { return impl_.get_deleter(); } - const deleter_type& get_deleter() const { return impl_.get_deleter(); } - - // Allow gscoped_ptr to be used in boolean expressions, but not - // implicitly convertible to a real bool (which is dangerous). - private: - typedef doris::internal::gscoped_ptr_impl - gscoped_ptr::*Testable; - - public: - operator Testable() const { return impl_.get() ? &gscoped_ptr::impl_ : NULL; } - - // Comparison operators. - // These return whether two gscoped_ptr refer to the same object, not just to - // two different but equal objects. - bool operator==(const element_type* p) const { return impl_.get() == p; } - bool operator!=(const element_type* p) const { return impl_.get() != p; } - - // Swap two scoped pointers. - void swap(gscoped_ptr& p2) { - impl_.swap(p2.impl_); - } - - // Release a pointer. - // The return value is the current pointer held by this object. - // If this object holds a NULL pointer, the return value is NULL. - // After this operation, this object will hold a NULL pointer, - // and will not own the object any more. - element_type* release() WARN_UNUSED_RESULT { - return impl_.release(); - } - - // C++98 doesn't support functions templates with default parameters which - // makes it hard to write a PassAs() that understands converting the deleter - // while preserving simple calling semantics. - // - // Until there is a use case for PassAs() with custom deleters, just ignore - // the custom deleter. - template - gscoped_ptr PassAs() { - return gscoped_ptr(Pass()); - } - - private: - // Needed to reach into |impl_| in the constructor. - template friend class gscoped_ptr; - doris::internal::gscoped_ptr_impl impl_; - - // Forbid comparison of gscoped_ptr types. If U != T, it totally - // doesn't make sense, and if U == T, it still doesn't make sense - // because you should never have the same object owned by two different - // gscoped_ptrs. - template bool operator==(gscoped_ptr const& p2) const; - template bool operator!=(gscoped_ptr const& p2) const; + MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_ptr, RValue) + + COMPILE_ASSERT(doris::internal::IsNotRefCounted::value, + T_is_refcounted_type_and_needs_scoped_refptr); + +public: + // The element and deleter types. + typedef T element_type; + typedef D deleter_type; + + // Constructor. Defaults to initializing with NULL. + gscoped_ptr() : impl_(NULL) {} + + // Constructor. Takes ownership of p. + explicit gscoped_ptr(element_type* p) : impl_(p) {} + + // Constructor. Allows initialization of a stateful deleter. + gscoped_ptr(element_type* p, const D& d) : impl_(p, d) {} + + // Constructor. Allows construction from a gscoped_ptr rvalue for a + // convertible type and deleter. + // + // IMPLEMENTATION NOTE: C++11 unique_ptr<> keeps this constructor distinct + // from the normal move constructor. By C++11 20.7.1.2.1.21, this constructor + // has different post-conditions if D is a reference type. Since this + // implementation does not support deleters with reference type, + // we do not need a separate move constructor allowing us to avoid one + // use of SFINAE. You only need to care about this if you modify the + // implementation of gscoped_ptr. + template + gscoped_ptr(gscoped_ptr other) : impl_(&other.impl_) { + COMPILE_ASSERT(!base::is_array::value, U_cannot_be_an_array); + } + + // Constructor. Move constructor for C++03 move emulation of this type. + gscoped_ptr(RValue rvalue) : impl_(&rvalue.object->impl_) {} + + // operator=. Allows assignment from a gscoped_ptr rvalue for a convertible + // type and deleter. + // + // IMPLEMENTATION NOTE: C++11 unique_ptr<> keeps this operator= distinct from + // the normal move assignment operator. By C++11 20.7.1.2.3.4, this templated + // form has different requirements on for move-only Deleters. Since this + // implementation does not support move-only Deleters, we do not need a + // separate move assignment operator allowing us to avoid one use of SFINAE. + // You only need to care about this if you modify the implementation of + // gscoped_ptr. + template + gscoped_ptr& operator=(gscoped_ptr rhs) { + COMPILE_ASSERT(!base::is_array::value, U_cannot_be_an_array); + impl_.TakeState(&rhs.impl_); + return *this; + } + + // Reset. Deletes the currently owned object, if any. + // Then takes ownership of a new object, if given. + void reset(element_type* p = NULL) { impl_.reset(p); } + + // Accessors to get the owned object. + // operator* and operator-> will assert() if there is no current object. + element_type& operator*() const { + assert(impl_.get() != NULL); + return *impl_.get(); + } + element_type* operator->() const { + assert(impl_.get() != NULL); + return impl_.get(); + } + element_type* get() const { return impl_.get(); } + + // Access to the deleter. + deleter_type& get_deleter() { return impl_.get_deleter(); } + const deleter_type& get_deleter() const { return impl_.get_deleter(); } + + // Allow gscoped_ptr to be used in boolean expressions, but not + // implicitly convertible to a real bool (which is dangerous). +private: + typedef doris::internal::gscoped_ptr_impl gscoped_ptr::*Testable; + +public: + operator Testable() const { return impl_.get() ? &gscoped_ptr::impl_ : NULL; } + + // Comparison operators. + // These return whether two gscoped_ptr refer to the same object, not just to + // two different but equal objects. + bool operator==(const element_type* p) const { return impl_.get() == p; } + bool operator!=(const element_type* p) const { return impl_.get() != p; } + + // Swap two scoped pointers. + void swap(gscoped_ptr& p2) { impl_.swap(p2.impl_); } + + // Release a pointer. + // The return value is the current pointer held by this object. + // If this object holds a NULL pointer, the return value is NULL. + // After this operation, this object will hold a NULL pointer, + // and will not own the object any more. + element_type* release() WARN_UNUSED_RESULT { return impl_.release(); } + + // C++98 doesn't support functions templates with default parameters which + // makes it hard to write a PassAs() that understands converting the deleter + // while preserving simple calling semantics. + // + // Until there is a use case for PassAs() with custom deleters, just ignore + // the custom deleter. + template + gscoped_ptr PassAs() { + return gscoped_ptr(Pass()); + } + +private: + // Needed to reach into |impl_| in the constructor. + template + friend class gscoped_ptr; + doris::internal::gscoped_ptr_impl impl_; + + // Forbid comparison of gscoped_ptr types. If U != T, it totally + // doesn't make sense, and if U == T, it still doesn't make sense + // because you should never have the same object owned by two different + // gscoped_ptrs. + template + bool operator==(gscoped_ptr const& p2) const; + template + bool operator!=(gscoped_ptr const& p2) const; }; template class gscoped_ptr { - MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_ptr, RValue) - - public: - // The element and deleter types. - typedef T element_type; - typedef D deleter_type; - - // Constructor. Defaults to initializing with NULL. - gscoped_ptr() : impl_(NULL) { } - - // Constructor. Stores the given array. Note that the argument's type - // must exactly match T*. In particular: - // - it cannot be a pointer to a type derived from T, because it is - // inherently unsafe in the general case to access an array through a - // pointer whose dynamic type does not match its static type (eg., if - // T and the derived types had different sizes access would be - // incorrectly calculated). Deletion is also always undefined - // (C++98 [expr.delete]p3). If you're doing this, fix your code. - // - it cannot be NULL, because NULL is an integral expression, not a - // pointer to T. Use the no-argument version instead of explicitly - // passing NULL. - // - it cannot be const-qualified differently from T per unique_ptr spec - // (http://cplusplus.github.com/LWG/lwg-active.html#2118). Users wanting - // to work around this may use implicit_cast(). - // However, because of the first bullet in this comment, users MUST - // NOT use implicit_cast() to upcast the static type of the array. - explicit gscoped_ptr(element_type* array) : impl_(array) { } - - // Constructor. Move constructor for C++03 move emulation of this type. - gscoped_ptr(RValue rvalue) : impl_(&rvalue.object->impl_) { } - - // operator=. Move operator= for C++03 move emulation of this type. - gscoped_ptr& operator=(RValue rhs) { - impl_.TakeState(&rhs.object->impl_); - return *this; - } - - // Reset. Deletes the currently owned array, if any. - // Then takes ownership of a new object, if given. - void reset(element_type* array = NULL) { impl_.reset(array); } - - // Accessors to get the owned array. - element_type& operator[](size_t i) const { - assert(impl_.get() != NULL); - return impl_.get()[i]; - } - element_type* get() const { return impl_.get(); } - - // Access to the deleter. - deleter_type& get_deleter() { return impl_.get_deleter(); } - const deleter_type& get_deleter() const { return impl_.get_deleter(); } - - // Allow gscoped_ptr to be used in boolean expressions, but not - // implicitly convertible to a real bool (which is dangerous). - private: - typedef doris::internal::gscoped_ptr_impl - gscoped_ptr::*Testable; - - public: - operator Testable() const { return impl_.get() ? &gscoped_ptr::impl_ : NULL; } - - // Comparison operators. - // These return whether two gscoped_ptr refer to the same object, not just to - // two different but equal objects. - bool operator==(element_type* array) const { return impl_.get() == array; } - bool operator!=(element_type* array) const { return impl_.get() != array; } - - // Swap two scoped pointers. - void swap(gscoped_ptr& p2) { - impl_.swap(p2.impl_); - } - - // Release a pointer. - // The return value is the current pointer held by this object. - // If this object holds a NULL pointer, the return value is NULL. - // After this operation, this object will hold a NULL pointer, - // and will not own the object any more. - element_type* release() WARN_UNUSED_RESULT { - return impl_.release(); - } - - private: - // Force element_type to be a complete type. - enum { type_must_be_complete = sizeof(element_type) }; - - // Actually hold the data. - doris::internal::gscoped_ptr_impl impl_; - - // Disable initialization from any type other than element_type*, by - // providing a constructor that matches such an initialization, but is - // private and has no definition. This is disabled because it is not safe to - // call delete[] on an array whose static type does not match its dynamic - // type. - template explicit gscoped_ptr(U* array); - explicit gscoped_ptr(int disallow_construction_from_null); - - // Disable reset() from any type other than element_type*, for the same - // reasons as the constructor above. - template void reset(U* array); - void reset(int disallow_reset_from_null); - - // Forbid comparison of gscoped_ptr types. If U != T, it totally - // doesn't make sense, and if U == T, it still doesn't make sense - // because you should never have the same object owned by two different - // gscoped_ptrs. - template bool operator==(gscoped_ptr const& p2) const; - template bool operator!=(gscoped_ptr const& p2) const; + MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_ptr, RValue) + +public: + // The element and deleter types. + typedef T element_type; + typedef D deleter_type; + + // Constructor. Defaults to initializing with NULL. + gscoped_ptr() : impl_(NULL) {} + + // Constructor. Stores the given array. Note that the argument's type + // must exactly match T*. In particular: + // - it cannot be a pointer to a type derived from T, because it is + // inherently unsafe in the general case to access an array through a + // pointer whose dynamic type does not match its static type (eg., if + // T and the derived types had different sizes access would be + // incorrectly calculated). Deletion is also always undefined + // (C++98 [expr.delete]p3). If you're doing this, fix your code. + // - it cannot be NULL, because NULL is an integral expression, not a + // pointer to T. Use the no-argument version instead of explicitly + // passing NULL. + // - it cannot be const-qualified differently from T per unique_ptr spec + // (http://cplusplus.github.com/LWG/lwg-active.html#2118). Users wanting + // to work around this may use implicit_cast(). + // However, because of the first bullet in this comment, users MUST + // NOT use implicit_cast() to upcast the static type of the array. + explicit gscoped_ptr(element_type* array) : impl_(array) {} + + // Constructor. Move constructor for C++03 move emulation of this type. + gscoped_ptr(RValue rvalue) : impl_(&rvalue.object->impl_) {} + + // operator=. Move operator= for C++03 move emulation of this type. + gscoped_ptr& operator=(RValue rhs) { + impl_.TakeState(&rhs.object->impl_); + return *this; + } + + // Reset. Deletes the currently owned array, if any. + // Then takes ownership of a new object, if given. + void reset(element_type* array = NULL) { impl_.reset(array); } + + // Accessors to get the owned array. + element_type& operator[](size_t i) const { + assert(impl_.get() != NULL); + return impl_.get()[i]; + } + element_type* get() const { return impl_.get(); } + + // Access to the deleter. + deleter_type& get_deleter() { return impl_.get_deleter(); } + const deleter_type& get_deleter() const { return impl_.get_deleter(); } + + // Allow gscoped_ptr to be used in boolean expressions, but not + // implicitly convertible to a real bool (which is dangerous). +private: + typedef doris::internal::gscoped_ptr_impl gscoped_ptr::*Testable; + +public: + operator Testable() const { return impl_.get() ? &gscoped_ptr::impl_ : NULL; } + + // Comparison operators. + // These return whether two gscoped_ptr refer to the same object, not just to + // two different but equal objects. + bool operator==(element_type* array) const { return impl_.get() == array; } + bool operator!=(element_type* array) const { return impl_.get() != array; } + + // Swap two scoped pointers. + void swap(gscoped_ptr& p2) { impl_.swap(p2.impl_); } + + // Release a pointer. + // The return value is the current pointer held by this object. + // If this object holds a NULL pointer, the return value is NULL. + // After this operation, this object will hold a NULL pointer, + // and will not own the object any more. + element_type* release() WARN_UNUSED_RESULT { return impl_.release(); } + +private: + // Force element_type to be a complete type. + enum { type_must_be_complete = sizeof(element_type) }; + + // Actually hold the data. + doris::internal::gscoped_ptr_impl impl_; + + // Disable initialization from any type other than element_type*, by + // providing a constructor that matches such an initialization, but is + // private and has no definition. This is disabled because it is not safe to + // call delete[] on an array whose static type does not match its dynamic + // type. + template + explicit gscoped_ptr(U* array); + explicit gscoped_ptr(int disallow_construction_from_null); + + // Disable reset() from any type other than element_type*, for the same + // reasons as the constructor above. + template + void reset(U* array); + void reset(int disallow_reset_from_null); + + // Forbid comparison of gscoped_ptr types. If U != T, it totally + // doesn't make sense, and if U == T, it still doesn't make sense + // because you should never have the same object owned by two different + // gscoped_ptrs. + template + bool operator==(gscoped_ptr const& p2) const; + template + bool operator!=(gscoped_ptr const& p2) const; }; // Free functions template void swap(gscoped_ptr& p1, gscoped_ptr& p2) { - p1.swap(p2); + p1.swap(p2); } template bool operator==(T* p1, const gscoped_ptr& p2) { - return p1 == p2.get(); + return p1 == p2.get(); } template bool operator!=(T* p1, const gscoped_ptr& p2) { - return p1 != p2.get(); + return p1 != p2.get(); } // DEPRECATED: Use gscoped_ptr instead. @@ -583,112 +579,109 @@ bool operator!=(T* p1, const gscoped_ptr& p2) { // Size: sizeof(gscoped_array) == sizeof(C*) template class gscoped_array { - MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_array, RValue) - - public: - - // The element type - typedef C element_type; - - // Constructor. Defaults to initializing with NULL. - // There is no way to create an uninitialized gscoped_array. - // The input parameter must be allocated with new []. - explicit gscoped_array(C* p = NULL) : array_(p) { } - - // Constructor. Move constructor for C++03 move emulation of this type. - gscoped_array(RValue rvalue) - : array_(rvalue.object->release()) { - } - - // Destructor. If there is a C object, delete it. - // We don't need to test ptr_ == NULL because C++ does that for us. - ~gscoped_array() { - enum { type_must_be_complete = sizeof(C) }; - delete[] array_; - } - - // operator=. Move operator= for C++03 move emulation of this type. - gscoped_array& operator=(RValue rhs) { - reset(rhs.object->release()); - return *this; - } - - // Reset. Deletes the current owned object, if any. - // Then takes ownership of a new object, if given. - // this->reset(this->get()) works. - void reset(C* p = NULL) { - if (p != array_) { - enum { type_must_be_complete = sizeof(C) }; - delete[] array_; - array_ = p; + MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_array, RValue) + +public: + // The element type + typedef C element_type; + + // Constructor. Defaults to initializing with NULL. + // There is no way to create an uninitialized gscoped_array. + // The input parameter must be allocated with new []. + explicit gscoped_array(C* p = NULL) : array_(p) {} + + // Constructor. Move constructor for C++03 move emulation of this type. + gscoped_array(RValue rvalue) : array_(rvalue.object->release()) {} + + // Destructor. If there is a C object, delete it. + // We don't need to test ptr_ == NULL because C++ does that for us. + ~gscoped_array() { + enum { type_must_be_complete = sizeof(C) }; + delete[] array_; + } + + // operator=. Move operator= for C++03 move emulation of this type. + gscoped_array& operator=(RValue rhs) { + reset(rhs.object->release()); + return *this; + } + + // Reset. Deletes the current owned object, if any. + // Then takes ownership of a new object, if given. + // this->reset(this->get()) works. + void reset(C* p = NULL) { + if (p != array_) { + enum { type_must_be_complete = sizeof(C) }; + delete[] array_; + array_ = p; + } } - } - - // Get one element of the current object. - // Will assert() if there is no current object, or index i is negative. - C& operator[](ptrdiff_t i) const { - assert(i >= 0); - assert(array_ != NULL); - return array_[i]; - } - - // Get a pointer to the zeroth element of the current object. - // If there is no current object, return NULL. - C* get() const { - return array_; - } - - // Allow gscoped_array to be used in boolean expressions, but not - // implicitly convertible to a real bool (which is dangerous). - typedef C* gscoped_array::*Testable; - operator Testable() const { return array_ ? &gscoped_array::array_ : NULL; } - - // Comparison operators. - // These return whether two gscoped_array refer to the same object, not just to - // two different but equal objects. - bool operator==(C* p) const { return array_ == p; } - bool operator!=(C* p) const { return array_ != p; } - - // Swap two scoped arrays. - void swap(gscoped_array& p2) { - C* tmp = array_; - array_ = p2.array_; - p2.array_ = tmp; - } - - // Release an array. - // The return value is the current pointer held by this object. - // If this object holds a NULL pointer, the return value is NULL. - // After this operation, this object will hold a NULL pointer, - // and will not own the object any more. - C* release() WARN_UNUSED_RESULT { - C* retVal = array_; - array_ = NULL; - return retVal; - } - - private: - C* array_; - - // Forbid comparison of different gscoped_array types. - template bool operator==(gscoped_array const& p2) const; - template bool operator!=(gscoped_array const& p2) const; + + // Get one element of the current object. + // Will assert() if there is no current object, or index i is negative. + C& operator[](ptrdiff_t i) const { + assert(i >= 0); + assert(array_ != NULL); + return array_[i]; + } + + // Get a pointer to the zeroth element of the current object. + // If there is no current object, return NULL. + C* get() const { return array_; } + + // Allow gscoped_array to be used in boolean expressions, but not + // implicitly convertible to a real bool (which is dangerous). + typedef C* gscoped_array::*Testable; + operator Testable() const { return array_ ? &gscoped_array::array_ : NULL; } + + // Comparison operators. + // These return whether two gscoped_array refer to the same object, not just to + // two different but equal objects. + bool operator==(C* p) const { return array_ == p; } + bool operator!=(C* p) const { return array_ != p; } + + // Swap two scoped arrays. + void swap(gscoped_array& p2) { + C* tmp = array_; + array_ = p2.array_; + p2.array_ = tmp; + } + + // Release an array. + // The return value is the current pointer held by this object. + // If this object holds a NULL pointer, the return value is NULL. + // After this operation, this object will hold a NULL pointer, + // and will not own the object any more. + C* release() WARN_UNUSED_RESULT { + C* retVal = array_; + array_ = NULL; + return retVal; + } + +private: + C* array_; + + // Forbid comparison of different gscoped_array types. + template + bool operator==(gscoped_array const& p2) const; + template + bool operator!=(gscoped_array const& p2) const; }; // Free functions template void swap(gscoped_array& p1, gscoped_array& p2) { - p1.swap(p2); + p1.swap(p2); } template bool operator==(C* p1, const gscoped_array& p2) { - return p1 == p2.get(); + return p1 == p2.get(); } template bool operator!=(C* p1, const gscoped_array& p2) { - return p1 != p2.get(); + return p1 != p2.get(); } // DEPRECATED: Use gscoped_ptr instead. @@ -696,127 +689,116 @@ bool operator!=(C* p1, const gscoped_array& p2) { // gscoped_ptr_malloc<> is similar to gscoped_ptr<>, but it accepts a // second template argument, the functor used to free the object. -template +template class gscoped_ptr_malloc { - MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_ptr_malloc, RValue) - - public: - - // The element type - typedef C element_type; - - // Constructor. Defaults to initializing with NULL. - // There is no way to create an uninitialized gscoped_ptr. - // The input parameter must be allocated with an allocator that matches the - // Free functor. For the default Free functor, this is malloc, calloc, or - // realloc. - explicit gscoped_ptr_malloc(C* p = NULL): ptr_(p) {} - - // Constructor. Move constructor for C++03 move emulation of this type. - gscoped_ptr_malloc(RValue rvalue) - : ptr_(rvalue.object->release()) { - } - - // Destructor. If there is a C object, call the Free functor. - ~gscoped_ptr_malloc() { - reset(); - } - - // operator=. Move operator= for C++03 move emulation of this type. - gscoped_ptr_malloc& operator=(RValue rhs) { - reset(rhs.object->release()); - return *this; - } - - // Reset. Calls the Free functor on the current owned object, if any. - // Then takes ownership of a new object, if given. - // this->reset(this->get()) works. - void reset(C* p = NULL) { - if (ptr_ != p) { - if (ptr_ != NULL) { - FreeProc free_proc; - free_proc(ptr_); - } - ptr_ = p; + MOVE_ONLY_TYPE_FOR_CPP_03(gscoped_ptr_malloc, RValue) + +public: + // The element type + typedef C element_type; + + // Constructor. Defaults to initializing with NULL. + // There is no way to create an uninitialized gscoped_ptr. + // The input parameter must be allocated with an allocator that matches the + // Free functor. For the default Free functor, this is malloc, calloc, or + // realloc. + explicit gscoped_ptr_malloc(C* p = NULL) : ptr_(p) {} + + // Constructor. Move constructor for C++03 move emulation of this type. + gscoped_ptr_malloc(RValue rvalue) : ptr_(rvalue.object->release()) {} + + // Destructor. If there is a C object, call the Free functor. + ~gscoped_ptr_malloc() { reset(); } + + // operator=. Move operator= for C++03 move emulation of this type. + gscoped_ptr_malloc& operator=(RValue rhs) { + reset(rhs.object->release()); + return *this; + } + + // Reset. Calls the Free functor on the current owned object, if any. + // Then takes ownership of a new object, if given. + // this->reset(this->get()) works. + void reset(C* p = NULL) { + if (ptr_ != p) { + if (ptr_ != NULL) { + FreeProc free_proc; + free_proc(ptr_); + } + ptr_ = p; + } + } + + // Get the current object. + // operator* and operator-> will cause an assert() failure if there is + // no current object. + C& operator*() const { + assert(ptr_ != NULL); + return *ptr_; + } + + C* operator->() const { + assert(ptr_ != NULL); + return ptr_; } - } - - // Get the current object. - // operator* and operator-> will cause an assert() failure if there is - // no current object. - C& operator*() const { - assert(ptr_ != NULL); - return *ptr_; - } - - C* operator->() const { - assert(ptr_ != NULL); - return ptr_; - } - - C* get() const { - return ptr_; - } - - // Allow gscoped_ptr_malloc to be used in boolean expressions, but not - // implicitly convertible to a real bool (which is dangerous). - typedef C* gscoped_ptr_malloc::*Testable; - operator Testable() const { return ptr_ ? &gscoped_ptr_malloc::ptr_ : NULL; } - - // Comparison operators. - // These return whether a gscoped_ptr_malloc and a plain pointer refer - // to the same object, not just to two different but equal objects. - // For compatibility with the boost-derived implementation, these - // take non-const arguments. - bool operator==(C* p) const { - return ptr_ == p; - } - - bool operator!=(C* p) const { - return ptr_ != p; - } - - // Swap two scoped pointers. - void swap(gscoped_ptr_malloc & b) { - C* tmp = b.ptr_; - b.ptr_ = ptr_; - ptr_ = tmp; - } - - // Release a pointer. - // The return value is the current pointer held by this object. - // If this object holds a NULL pointer, the return value is NULL. - // After this operation, this object will hold a NULL pointer, - // and will not own the object any more. - C* release() WARN_UNUSED_RESULT { - C* tmp = ptr_; - ptr_ = NULL; - return tmp; - } - - private: - C* ptr_; - - // no reason to use these: each gscoped_ptr_malloc should have its own object - template - bool operator==(gscoped_ptr_malloc const& p) const; - template - bool operator!=(gscoped_ptr_malloc const& p) const; + + C* get() const { return ptr_; } + + // Allow gscoped_ptr_malloc to be used in boolean expressions, but not + // implicitly convertible to a real bool (which is dangerous). + typedef C* gscoped_ptr_malloc::*Testable; + operator Testable() const { return ptr_ ? &gscoped_ptr_malloc::ptr_ : NULL; } + + // Comparison operators. + // These return whether a gscoped_ptr_malloc and a plain pointer refer + // to the same object, not just to two different but equal objects. + // For compatibility with the boost-derived implementation, these + // take non-const arguments. + bool operator==(C* p) const { return ptr_ == p; } + + bool operator!=(C* p) const { return ptr_ != p; } + + // Swap two scoped pointers. + void swap(gscoped_ptr_malloc& b) { + C* tmp = b.ptr_; + b.ptr_ = ptr_; + ptr_ = tmp; + } + + // Release a pointer. + // The return value is the current pointer held by this object. + // If this object holds a NULL pointer, the return value is NULL. + // After this operation, this object will hold a NULL pointer, + // and will not own the object any more. + C* release() WARN_UNUSED_RESULT { + C* tmp = ptr_; + ptr_ = NULL; + return tmp; + } + +private: + C* ptr_; + + // no reason to use these: each gscoped_ptr_malloc should have its own object + template + bool operator==(gscoped_ptr_malloc const& p) const; + template + bool operator!=(gscoped_ptr_malloc const& p) const; }; -template inline -void swap(gscoped_ptr_malloc& a, gscoped_ptr_malloc& b) { - a.swap(b); +template +inline void swap(gscoped_ptr_malloc& a, gscoped_ptr_malloc& b) { + a.swap(b); } -template inline -bool operator==(C* p, const gscoped_ptr_malloc& b) { - return p == b.get(); +template +inline bool operator==(C* p, const gscoped_ptr_malloc& b) { + return p == b.get(); } -template inline -bool operator!=(C* p, const gscoped_ptr_malloc& b) { - return p != b.get(); +template +inline bool operator!=(C* p, const gscoped_ptr_malloc& b) { + return p != b.get(); } // A function to convert T* into gscoped_ptr @@ -824,7 +806,7 @@ bool operator!=(C* p, const gscoped_ptr_malloc& b) { // for gscoped_ptr>(new FooBarBaz(arg)) template gscoped_ptr make_gscoped_ptr(T* ptr) { - return gscoped_ptr(ptr); + return gscoped_ptr(ptr); } -#endif // GUTIL_GSCOPED_PTR_H_ +#endif // GUTIL_GSCOPED_PTR_H_ diff --git a/be/src/gutil/hash/builtin_type_hash.h b/be/src/gutil/hash/builtin_type_hash.h index 173839b762b783..11733744693425 100644 --- a/be/src/gutil/hash/builtin_type_hash.h +++ b/be/src/gutil/hash/builtin_type_hash.h @@ -11,33 +11,33 @@ #include #include "gutil/casts.h" +#include "gutil/hash/jenkins_lookup2.h" #include "gutil/integral_types.h" #include "gutil/macros.h" -#include "gutil/hash/jenkins_lookup2.h" inline uint32 Hash32NumWithSeed(uint32 num, uint32 c) { - uint32 b = 0x9e3779b9UL; // the golden ratio; an arbitrary value - mix(num, b, c); - return c; + uint32 b = 0x9e3779b9UL; // the golden ratio; an arbitrary value + mix(num, b, c); + return c; } inline uint64 Hash64NumWithSeed(uint64 num, uint64 c) { - uint64 b = GG_ULONGLONG(0xe08c1d668b756f82); // more of the golden ratio - mix(num, b, c); - return c; + uint64 b = GG_ULONGLONG(0xe08c1d668b756f82); // more of the golden ratio + mix(num, b, c); + return c; } // This function hashes pointer sized items and returns a 32b hash, // convenienty hiding the fact that pointers may be 32b or 64b, // depending on the architecture. inline uint32 Hash32PointerWithSeed(const void* p, uint32 seed) { - uintptr_t pvalue = reinterpret_cast(p); - uint32 h = seed; - // Hash the pointer 32b at a time. - for (size_t i = 0; i < sizeof(pvalue); i += 4) { - h = Hash32NumWithSeed(static_cast(pvalue >> (i*8)), h); - } - return h; + uintptr_t pvalue = reinterpret_cast(p); + uint32 h = seed; + // Hash the pointer 32b at a time. + for (size_t i = 0; i < sizeof(pvalue); i += 4) { + h = Hash32NumWithSeed(static_cast(pvalue >> (i * 8)), h); + } + return h; } // ---------------------------------------------------------------------- @@ -58,38 +58,38 @@ inline uint32 Hash32PointerWithSeed(const void* p, uint32 seed) { // avoid any reserved values. // ---------------------------------------------------------------------- inline uint64 Hash64FloatWithSeed(float num, uint64 seed) { - // +0 and -0 are the only floating point numbers which compare equal but - // have distinct bitwise representations in IEEE 754. To work around this, - // we force 0 to be +0. - if (num == 0) { - num = 0; - } - COMPILE_ASSERT(sizeof(float) == sizeof(uint32), float_has_wrong_size); + // +0 and -0 are the only floating point numbers which compare equal but + // have distinct bitwise representations in IEEE 754. To work around this, + // we force 0 to be +0. + if (num == 0) { + num = 0; + } + COMPILE_ASSERT(sizeof(float) == sizeof(uint32), float_has_wrong_size); - const uint64 kMul = 0xc6a4a7935bd1e995ULL; + const uint64 kMul = 0xc6a4a7935bd1e995ULL; - uint64 a = (bit_cast(num) + seed) * kMul; - a ^= (a >> 47); - a *= kMul; - a ^= (a >> 47); - a *= kMul; - return a; + uint64 a = (bit_cast(num) + seed) * kMul; + a ^= (a >> 47); + a *= kMul; + a ^= (a >> 47); + a *= kMul; + return a; } inline uint64 Hash64DoubleWithSeed(double num, uint64 seed) { - if (num == 0) { - num = 0; - } - COMPILE_ASSERT(sizeof(double) == sizeof(uint64), double_has_wrong_size); + if (num == 0) { + num = 0; + } + COMPILE_ASSERT(sizeof(double) == sizeof(uint64), double_has_wrong_size); - const uint64 kMul = 0xc6a4a7935bd1e995ULL; + const uint64 kMul = 0xc6a4a7935bd1e995ULL; - uint64 a = (bit_cast(num) + seed) * kMul; - a ^= (a >> 47); - a *= kMul; - a ^= (a >> 47); - a *= kMul; - return a; + uint64 a = (bit_cast(num) + seed) * kMul; + a ^= (a >> 47); + a *= kMul; + a ^= (a >> 47); + a *= kMul; + return a; } -#endif // UTIL_HASH_BUILTIN_TYPE_HASH_H_ +#endif // UTIL_HASH_BUILTIN_TYPE_HASH_H_ diff --git a/be/src/gutil/hash/city.cc b/be/src/gutil/hash/city.cc index ff67e982026dfc..155c9e7f7d9ba0 100644 --- a/be/src/gutil/hash/city.cc +++ b/be/src/gutil/hash/city.cc @@ -17,6 +17,7 @@ #include "gutil/hash/city.h" #include + #include using std::copy; using std::max; @@ -28,11 +29,12 @@ using std::swap; using std::make_pair; using std::pair; -#include "gutil/int128.h" -#include "gutil/integral_types.h" #include -#include "gutil/hash/hash128to64.h" + #include "gutil/endian.h" +#include "gutil/hash/hash128to64.h" +#include "gutil/int128.h" +#include "gutil/integral_types.h" namespace util_hash { @@ -45,272 +47,259 @@ static const uint64 k3 = 0xc70f6907e782aa0bULL; // Bitwise right rotate. Normally this will compile to a single // instruction, especially if the shift is a manifest constant. static uint64 Rotate(uint64 val, int shift) { - DCHECK_GE(shift, 0); - DCHECK_LE(shift, 63); - // Avoid shifting by 64: doing so yields an undefined result. - return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); + DCHECK_GE(shift, 0); + DCHECK_LE(shift, 63); + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); } // Equivalent to Rotate(), but requires the second arg to be non-zero. // On x86-64, and probably others, it's possible for this to compile // to a single instruction if both args are already in registers. static uint64 RotateByAtLeast1(uint64 val, int shift) { - DCHECK_GE(shift, 1); - DCHECK_LE(shift, 63); - return (val >> shift) | (val << (64 - shift)); + DCHECK_GE(shift, 1); + DCHECK_LE(shift, 63); + return (val >> shift) | (val << (64 - shift)); } static uint64 ShiftMix(uint64 val) { - return val ^ (val >> 47); + return val ^ (val >> 47); } static uint64 HashLen16(uint64 u, uint64 v) { - return Hash128to64(uint128(u, v)); + return Hash128to64(uint128(u, v)); } -static uint64 HashLen0to16(const char *s, size_t len) { - DCHECK_GE(len, 0); - DCHECK_LE(len, 16); - if (len > 8) { - uint64 a = LittleEndian::Load64(s); - uint64 b = LittleEndian::Load64(s + len - 8); - return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b; - } - if (len >= 4) { - uint64 a = LittleEndian::Load32(s); - return HashLen16(len + (a << 3), LittleEndian::Load32(s + len - 4)); - } - if (len > 0) { - uint8 a = s[0]; - uint8 b = s[len >> 1]; - uint8 c = s[len - 1]; - uint32 y = static_cast(a) + (static_cast(b) << 8); - uint32 z = len + (static_cast(c) << 2); - return ShiftMix(y * k2 ^ z * k3) * k2; - } - return k2; +static uint64 HashLen0to16(const char* s, size_t len) { + DCHECK_GE(len, 0); + DCHECK_LE(len, 16); + if (len > 8) { + uint64 a = LittleEndian::Load64(s); + uint64 b = LittleEndian::Load64(s + len - 8); + return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b; + } + if (len >= 4) { + uint64 a = LittleEndian::Load32(s); + return HashLen16(len + (a << 3), LittleEndian::Load32(s + len - 4)); + } + if (len > 0) { + uint8 a = s[0]; + uint8 b = s[len >> 1]; + uint8 c = s[len - 1]; + uint32 y = static_cast(a) + (static_cast(b) << 8); + uint32 z = len + (static_cast(c) << 2); + return ShiftMix(y * k2 ^ z * k3) * k2; + } + return k2; } // This probably works well for 16-byte strings as well, but it may be overkill // in that case. -static uint64 HashLen17to32(const char *s, size_t len) { - DCHECK_GE(len, 17); - DCHECK_LE(len, 32); - uint64 a = LittleEndian::Load64(s) * k1; - uint64 b = LittleEndian::Load64(s + 8); - uint64 c = LittleEndian::Load64(s + len - 8) * k2; - uint64 d = LittleEndian::Load64(s + len - 16) * k0; - return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, - a + Rotate(b ^ k3, 20) - c + len); +static uint64 HashLen17to32(const char* s, size_t len) { + DCHECK_GE(len, 17); + DCHECK_LE(len, 32); + uint64 a = LittleEndian::Load64(s) * k1; + uint64 b = LittleEndian::Load64(s + 8); + uint64 c = LittleEndian::Load64(s + len - 8) * k2; + uint64 d = LittleEndian::Load64(s + len - 16) * k0; + return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, a + Rotate(b ^ k3, 20) - c + len); } // Return a 16-byte hash for 48 bytes. Quick and dirty. // Callers do best to use "random-looking" values for a and b. // (For more, see the code review discussion of CL 18799087.) -static pair WeakHashLen32WithSeeds( - uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { - a += w; - b = Rotate(b + a + z, 51); - uint64 c = a; - a += x; - a += y; - b += Rotate(a, 23); - return make_pair(a + z, b + c); +static pair WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, + uint64 b) { + a += w; + b = Rotate(b + a + z, 51); + uint64 c = a; + a += x; + a += y; + b += Rotate(a, 23); + return make_pair(a + z, b + c); } // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. -static pair WeakHashLen32WithSeeds( - const char* s, uint64 a, uint64 b) { - return WeakHashLen32WithSeeds(LittleEndian::Load64(s), - LittleEndian::Load64(s + 8), - LittleEndian::Load64(s + 16), - LittleEndian::Load64(s + 24), - a, - b); +static pair WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { + return WeakHashLen32WithSeeds(LittleEndian::Load64(s), LittleEndian::Load64(s + 8), + LittleEndian::Load64(s + 16), LittleEndian::Load64(s + 24), a, b); } // Return an 8-byte hash for 33 to 64 bytes. -static uint64 HashLen33to64(const char *s, size_t len) { - uint64 z = LittleEndian::Load64(s + 24); - uint64 a = LittleEndian::Load64(s) + - (len + LittleEndian::Load64(s + len - 16)) * k0; - uint64 b = Rotate(a + z, 52); - uint64 c = Rotate(a, 37); - a += LittleEndian::Load64(s + 8); - c += Rotate(a, 7); - a += LittleEndian::Load64(s + 16); - uint64 vf = a + z; - uint64 vs = b + Rotate(a, 31) + c; - a = LittleEndian::Load64(s + 16) + LittleEndian::Load64(s + len - 32); - z += LittleEndian::Load64(s + len - 8); - b = Rotate(a + z, 52); - c = Rotate(a, 37); - a += LittleEndian::Load64(s + len - 24); - c += Rotate(a, 7); - a += LittleEndian::Load64(s + len - 16); - uint64 wf = a + z; - uint64 ws = b + Rotate(a, 31) + c; - uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); - return ShiftMix(r * k0 + vs) * k2; +static uint64 HashLen33to64(const char* s, size_t len) { + uint64 z = LittleEndian::Load64(s + 24); + uint64 a = LittleEndian::Load64(s) + (len + LittleEndian::Load64(s + len - 16)) * k0; + uint64 b = Rotate(a + z, 52); + uint64 c = Rotate(a, 37); + a += LittleEndian::Load64(s + 8); + c += Rotate(a, 7); + a += LittleEndian::Load64(s + 16); + uint64 vf = a + z; + uint64 vs = b + Rotate(a, 31) + c; + a = LittleEndian::Load64(s + 16) + LittleEndian::Load64(s + len - 32); + z += LittleEndian::Load64(s + len - 8); + b = Rotate(a + z, 52); + c = Rotate(a, 37); + a += LittleEndian::Load64(s + len - 24); + c += Rotate(a, 7); + a += LittleEndian::Load64(s + len - 16); + uint64 wf = a + z; + uint64 ws = b + Rotate(a, 31) + c; + uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); + return ShiftMix(r * k0 + vs) * k2; } -uint64 CityHash64(const char *s, size_t len) { - if (len <= 32) { - if (len <= 16) { - return HashLen0to16(s, len); - } else { - return HashLen17to32(s, len); +uint64 CityHash64(const char* s, size_t len) { + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); } - } else if (len <= 64) { - return HashLen33to64(s, len); - } - // For strings over 64 bytes we hash the end first, and then as we - // loop we keep 56 bytes of state: v, w, x, y, and z. - uint64 x = LittleEndian::Load64(s + len - 40); - uint64 y = LittleEndian::Load64(s + len - 16) + - LittleEndian::Load64(s + len - 56); - uint64 z = HashLen16(LittleEndian::Load64(s + len - 48) + len, - LittleEndian::Load64(s + len - 24)); - pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); - pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); - x = x * k1 + LittleEndian::Load64(s); + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64 x = LittleEndian::Load64(s + len - 40); + uint64 y = LittleEndian::Load64(s + len - 16) + LittleEndian::Load64(s + len - 56); + uint64 z = + HashLen16(LittleEndian::Load64(s + len - 48) + len, LittleEndian::Load64(s + len - 24)); + pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + x = x * k1 + LittleEndian::Load64(s); - // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. - len = (len - 1) & ~static_cast(63); - DCHECK_GT(len, 0); - DCHECK_EQ(len, len / 64 * 64); - do { - x = Rotate(x + y + v.first + LittleEndian::Load64(s + 8), 37) * k1; - y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + LittleEndian::Load64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, - y + LittleEndian::Load64(s + 16)); - std::swap(z, x); - s += 64; - len -= 64; - } while (len != 0); - return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, - HashLen16(v.second, w.second) + x); + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast(63); + DCHECK_GT(len, 0); + DCHECK_EQ(len, len / 64 * 64); + do { + x = Rotate(x + y + v.first + LittleEndian::Load64(s + 8), 37) * k1; + y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + LittleEndian::Load64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + LittleEndian::Load64(s + 16)); + std::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); } -uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { - return CityHash64WithSeeds(s, len, k2, seed); +uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { + return CityHash64WithSeeds(s, len, k2, seed); } -uint64 CityHash64WithSeeds(const char *s, size_t len, - uint64 seed0, uint64 seed1) { - return HashLen16(CityHash64(s, len) - seed0, seed1); +uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { + return HashLen16(CityHash64(s, len) - seed0, seed1); } // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings // of any length representable in ssize_t. Based on City and Murmur128. -static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { - uint64 a = Uint128Low64(seed); - uint64 b = Uint128High64(seed); - uint64 c = 0; - uint64 d = 0; - ssize_t l = len - 16; - if (l <= 0) { // len <= 16 - c = b * k1 + HashLen0to16(s, len); - d = Rotate(a + (len >= 8 ? LittleEndian::Load64(s) : c), 32); - } else { // len > 16 - c = HashLen16(LittleEndian::Load64(s + len - 8) + k1, a); - d = HashLen16(b + len, c + LittleEndian::Load64(s + len - 16)); - a += d; - do { - a ^= ShiftMix(LittleEndian::Load64(s) * k1) * k1; - a *= k1; - b ^= a; - c ^= ShiftMix(LittleEndian::Load64(s + 8) * k1) * k1; - c *= k1; - d ^= c; - s += 16; - l -= 16; - } while (l > 0); - } - a = HashLen16(a, c); - b = HashLen16(d, b); - return uint128(a ^ b, HashLen16(b, a)); +static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { + uint64 a = Uint128Low64(seed); + uint64 b = Uint128High64(seed); + uint64 c = 0; + uint64 d = 0; + ssize_t l = len - 16; + if (l <= 0) { // len <= 16 + c = b * k1 + HashLen0to16(s, len); + d = Rotate(a + (len >= 8 ? LittleEndian::Load64(s) : c), 32); + } else { // len > 16 + c = HashLen16(LittleEndian::Load64(s + len - 8) + k1, a); + d = HashLen16(b + len, c + LittleEndian::Load64(s + len - 16)); + a += d; + do { + a ^= ShiftMix(LittleEndian::Load64(s) * k1) * k1; + a *= k1; + b ^= a; + c ^= ShiftMix(LittleEndian::Load64(s + 8) * k1) * k1; + c *= k1; + d ^= c; + s += 16; + l -= 16; + } while (l > 0); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); } -uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { - // TODO(user): As of February 2011, there's a beta of Murmur3 that would - // most likely be useful here. E.g., if (len < 900) return Murmur3(...) - if (len < 128) { - return CityMurmur(s, len, seed); - } +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { + // TODO(user): As of February 2011, there's a beta of Murmur3 that would + // most likely be useful here. E.g., if (len < 900) return Murmur3(...) + if (len < 128) { + return CityMurmur(s, len, seed); + } - // We expect len >= 128 to be the common case. Keep 56 bytes of state: - // v, w, x, y, and z. - pair v, w; - uint64 x = Uint128Low64(seed); - uint64 y = Uint128High64(seed); - uint64 z = len * k1; - v.first = Rotate(y ^ k1, 49) * k1 + LittleEndian::Load64(s); - v.second = Rotate(v.first, 42) * k1 + LittleEndian::Load64(s + 8); - w.first = Rotate(y + z, 35) * k1 + x; - w.second = Rotate(x + LittleEndian::Load64(s + 88), 53) * k1; + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + pair v, w; + uint64 x = Uint128Low64(seed); + uint64 y = Uint128High64(seed); + uint64 z = len * k1; + v.first = Rotate(y ^ k1, 49) * k1 + LittleEndian::Load64(s); + v.second = Rotate(v.first, 42) * k1 + LittleEndian::Load64(s + 8); + w.first = Rotate(y + z, 35) * k1 + x; + w.second = Rotate(x + LittleEndian::Load64(s + 88), 53) * k1; - // This is similar to the inner loop of CityHash64(), manually unrolled. - do { - x = Rotate(x + y + v.first + LittleEndian::Load64(s + 16), 37) * k1; - y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1; - x ^= w.second; - y ^= v.first; - z = Rotate(z ^ w.first, 33); - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); - std::swap(z, x); - s += 64; - x = Rotate(x + y + v.first + LittleEndian::Load64(s + 16), 37) * k1; - y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1; - x ^= w.second; - y ^= v.first; - z = Rotate(z ^ w.first, 33); - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); - std::swap(z, x); - s += 64; - len -= 128; - } while (PREDICT_TRUE(len >= 128)); - y += Rotate(w.first, 37) * k0 + z; - x += Rotate(v.first + z, 49) * k0; - // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. - for (size_t tail_done = 0; tail_done < len; ) { - tail_done += 32; - y = Rotate(y - x, 42) * k0 + v.second; - w.first += LittleEndian::Load64(s + len - tail_done + 16); - x = Rotate(x, 49) * k0 + w.first; - w.first += v.first; - v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second); - } - // At this point our 48 bytes of state should contain more than - // enough information for a strong 128-bit hash. We use two - // different 48-byte-to-8-byte hashes to get a 16-byte final result. - x = HashLen16(x, v.first); - y = HashLen16(y, w.first); - return uint128(HashLen16(x + v.second, w.second) + y, - HashLen16(x + w.second, y + v.second)); + // This is similar to the inner loop of CityHash64(), manually unrolled. + do { + x = Rotate(x + y + v.first + LittleEndian::Load64(s + 16), 37) * k1; + y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1; + x ^= w.second; + y ^= v.first; + z = Rotate(z ^ w.first, 33); + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); + std::swap(z, x); + s += 64; + x = Rotate(x + y + v.first + LittleEndian::Load64(s + 16), 37) * k1; + y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1; + x ^= w.second; + y ^= v.first; + z = Rotate(z ^ w.first, 33); + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); + std::swap(z, x); + s += 64; + len -= 128; + } while (PREDICT_TRUE(len >= 128)); + y += Rotate(w.first, 37) * k0 + z; + x += Rotate(v.first + z, 49) * k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len;) { + tail_done += 32; + y = Rotate(y - x, 42) * k0 + v.second; + w.first += LittleEndian::Load64(s + len - tail_done + 16); + x = Rotate(x, 49) * k0 + w.first; + w.first += v.first; + v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second); + } + // At this point our 48 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 48-byte-to-8-byte hashes to get a 16-byte final result. + x = HashLen16(x, v.first); + y = HashLen16(y, w.first); + return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); } -uint128 CityHash128(const char *s, size_t len) { - if (len >= 16) { - return CityHash128WithSeed(s + 16, - len - 16, - uint128(LittleEndian::Load64(s) ^ k3, - LittleEndian::Load64(s + 8))); - } else if (len >= 8) { - return CityHash128WithSeed(nullptr, - 0, - uint128(LittleEndian::Load64(s) ^ (len * k0), - LittleEndian::Load64(s + len - 8) ^ k1)); - } else { - return CityHash128WithSeed(s, len, uint128(k0, k1)); - } +uint128 CityHash128(const char* s, size_t len) { + if (len >= 16) { + return CityHash128WithSeed( + s + 16, len - 16, + uint128(LittleEndian::Load64(s) ^ k3, LittleEndian::Load64(s + 8))); + } else if (len >= 8) { + return CityHash128WithSeed(nullptr, 0, + uint128(LittleEndian::Load64(s) ^ (len * k0), + LittleEndian::Load64(s + len - 8) ^ k1)); + } else { + return CityHash128WithSeed(s, len, uint128(k0, k1)); + } } -} // namespace util_hash +} // namespace util_hash diff --git a/be/src/gutil/hash/city.h b/be/src/gutil/hash/city.h index d5925e53c7add0..358092ffbc08e9 100644 --- a/be/src/gutil/hash/city.h +++ b/be/src/gutil/hash/city.h @@ -21,7 +21,7 @@ #ifndef UTIL_HASH_CITY_H_ #define UTIL_HASH_CITY_H_ -#include // for size_t. +#include // for size_t. #include "gutil/int128.h" #include "gutil/integral_types.h" @@ -30,24 +30,23 @@ namespace util_hash { // Hash function for a byte array. // The mapping may change from time to time. -uint64 CityHash64(const char *buf, size_t len); +uint64 CityHash64(const char* buf, size_t len); // Hash function for a byte array. For convenience, a 64-bit seed is also // hashed into the result. The mapping may change from time to time. -uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); +uint64 CityHash64WithSeed(const char* buf, size_t len, uint64 seed); // Hash function for a byte array. For convenience, two seeds are also // hashed into the result. The mapping may change from time to time. -uint64 CityHash64WithSeeds(const char *buf, size_t len, - uint64 seed0, uint64 seed1); +uint64 CityHash64WithSeeds(const char* buf, size_t len, uint64 seed0, uint64 seed1); // Hash function for a byte array. The mapping will never change. -uint128 CityHash128(const char *s, size_t len); +uint128 CityHash128(const char* s, size_t len); // Hash function for a byte array. For convenience, a 128-bit seed is also // hashed into the result. The mapping will never change. -uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed); -} // namespace util_hash +} // namespace util_hash -#endif // UTIL_HASH_CITY_H_ +#endif // UTIL_HASH_CITY_H_ diff --git a/be/src/gutil/hash/hash.cc b/be/src/gutil/hash/hash.cc index a3f64bc7da2996..9b1e440ffb9b27 100644 --- a/be/src/gutil/hash/hash.cc +++ b/be/src/gutil/hash/hash.cc @@ -9,10 +9,11 @@ #include "gutil/hash/hash.h" -#include "gutil/integral_types.h" #include + #include "gutil/hash/jenkins.h" #include "gutil/hash/jenkins_lookup2.h" +#include "gutil/integral_types.h" // For components that ship code externally (notably the Google Search // Appliance) we want to change the fingerprint function so that @@ -32,154 +33,158 @@ static const uint32 kFingerprintSeed1 = 102072; #endif static inline uint32 char2unsigned(char c) { - return static_cast(static_cast(c)); + return static_cast(static_cast(c)); } -uint64 FingerprintReferenceImplementation(const char *s, uint32 len) { - uint32 hi = Hash32StringWithSeed(s, len, kFingerprintSeed0); - uint32 lo = Hash32StringWithSeed(s, len, kFingerprintSeed1); - return CombineFingerprintHalves(hi, lo); +uint64 FingerprintReferenceImplementation(const char* s, uint32 len) { + uint32 hi = Hash32StringWithSeed(s, len, kFingerprintSeed0); + uint32 lo = Hash32StringWithSeed(s, len, kFingerprintSeed1); + return CombineFingerprintHalves(hi, lo); } // This is a faster version of FingerprintReferenceImplementation(), // making use of the fact that we're hashing the same string twice. // The code is tedious to read, but it's just two interleaved copies of // Hash32StringWithSeed(). -uint64 FingerprintInterleavedImplementation(const char *s, uint32 len) { - uint32 a, b, c = kFingerprintSeed0, d, e, f = kFingerprintSeed1; - uint32 keylen; +uint64 FingerprintInterleavedImplementation(const char* s, uint32 len) { + uint32 a, b, c = kFingerprintSeed0, d, e, f = kFingerprintSeed1; + uint32 keylen; - a = b = d = e = 0x9e3779b9UL; // the golden ratio; an arbitrary value + a = b = d = e = 0x9e3779b9UL; // the golden ratio; an arbitrary value - keylen = len; - if (keylen >= 4 * sizeof(a)) { - uint32 word32AtOffset0 = Google1At(s); - do { - a += word32AtOffset0; - d += word32AtOffset0; - b += Google1At(s + sizeof(a)); - e += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a) * 2); - f += Google1At(s + sizeof(a) * 2); - s += 3 * sizeof(a); - word32AtOffset0 = Google1At(s); - mix(a, b, c); - mix(d, e, f); - keylen -= 3 * static_cast(sizeof(a)); - } while (keylen >= 4 * sizeof(a)); - if (keylen >= 3 * sizeof(a)) { - a += word32AtOffset0; - d += word32AtOffset0; - b += Google1At(s + sizeof(a)); - e += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a) * 2); - f += Google1At(s + sizeof(a) * 2); - s += 3 * sizeof(a); - mix(a, b, c); - mix(d, e, f); - keylen -= 3 * static_cast(sizeof(a)); - DCHECK_LT(keylen, sizeof(a)); - c += len; - f += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 3 : - a += char2unsigned(s[2]) << 16; - d += char2unsigned(s[2]) << 16; - case 2 : - a += char2unsigned(s[1]) << 8; - d += char2unsigned(s[1]) << 8; - case 1 : - a += char2unsigned(s[0]); - d += char2unsigned(s[0]); - } + keylen = len; + if (keylen >= 4 * sizeof(a)) { + uint32 word32AtOffset0 = Google1At(s); + do { + a += word32AtOffset0; + d += word32AtOffset0; + b += Google1At(s + sizeof(a)); + e += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + f += Google1At(s + sizeof(a) * 2); + s += 3 * sizeof(a); + word32AtOffset0 = Google1At(s); + mix(a, b, c); + mix(d, e, f); + keylen -= 3 * static_cast(sizeof(a)); + } while (keylen >= 4 * sizeof(a)); + if (keylen >= 3 * sizeof(a)) { + a += word32AtOffset0; + d += word32AtOffset0; + b += Google1At(s + sizeof(a)); + e += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + f += Google1At(s + sizeof(a) * 2); + s += 3 * sizeof(a); + mix(a, b, c); + mix(d, e, f); + keylen -= 3 * static_cast(sizeof(a)); + DCHECK_LT(keylen, sizeof(a)); + c += len; + f += len; + switch (keylen) { // deal with rest. Cases fall through + case 3: + a += char2unsigned(s[2]) << 16; + d += char2unsigned(s[2]) << 16; + case 2: + a += char2unsigned(s[1]) << 8; + d += char2unsigned(s[1]) << 8; + case 1: + a += char2unsigned(s[0]); + d += char2unsigned(s[0]); + } + } else { + DCHECK(sizeof(a) <= keylen && keylen < 3 * sizeof(a)); + c += len; + f += len; + switch (keylen) { // deal with rest. Cases fall through + case 11: + c += char2unsigned(s[10]) << 24; + f += char2unsigned(s[10]) << 24; + case 10: + c += char2unsigned(s[9]) << 16; + f += char2unsigned(s[9]) << 16; + case 9: + c += char2unsigned(s[8]) << 8; + f += char2unsigned(s[8]) << 8; + case 8: + b += Google1At(s + 4); + a += word32AtOffset0; + e += Google1At(s + 4); + d += word32AtOffset0; + break; + case 7: + b += char2unsigned(s[6]) << 16; + e += char2unsigned(s[6]) << 16; + case 6: + b += char2unsigned(s[5]) << 8; + e += char2unsigned(s[5]) << 8; + case 5: + b += char2unsigned(s[4]); + e += char2unsigned(s[4]); + case 4: + a += word32AtOffset0; + d += word32AtOffset0; + } + } } else { - DCHECK(sizeof(a) <= keylen && keylen < 3 * sizeof(a)); - c += len; - f += len; - switch ( keylen ) { // deal with rest. Cases fall through + if (keylen >= 3 * sizeof(a)) { + a += Google1At(s); + d += Google1At(s); + b += Google1At(s + sizeof(a)); + e += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + f += Google1At(s + sizeof(a) * 2); + s += 3 * sizeof(a); + mix(a, b, c); + mix(d, e, f); + keylen -= 3 * static_cast(sizeof(a)); + } + c += len; + f += len; + switch (keylen) { // deal with rest. Cases fall through case 11: - c += char2unsigned(s[10]) << 24; - f += char2unsigned(s[10]) << 24; + c += char2unsigned(s[10]) << 24; + f += char2unsigned(s[10]) << 24; case 10: - c += char2unsigned(s[9]) << 16; - f += char2unsigned(s[9]) << 16; - case 9 : - c += char2unsigned(s[8]) << 8; - f += char2unsigned(s[8]) << 8; - case 8 : - b += Google1At(s+4); a += word32AtOffset0; - e += Google1At(s+4); d += word32AtOffset0; - break; - case 7 : - b += char2unsigned(s[6]) << 16; - e += char2unsigned(s[6]) << 16; - case 6 : - b += char2unsigned(s[5]) << 8; - e += char2unsigned(s[5]) << 8; - case 5 : - b += char2unsigned(s[4]); - e += char2unsigned(s[4]); - case 4 : - a += word32AtOffset0; - d += word32AtOffset0; - } - } - } else { - if (keylen >= 3 * sizeof(a)) { - a += Google1At(s); - d += Google1At(s); - b += Google1At(s + sizeof(a)); - e += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a) * 2); - f += Google1At(s + sizeof(a) * 2); - s += 3 * sizeof(a); - mix(a, b, c); - mix(d, e, f); - keylen -= 3 * static_cast(sizeof(a)); - } - c += len; - f += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 11: - c += char2unsigned(s[10]) << 24; - f += char2unsigned(s[10]) << 24; - case 10: - c += char2unsigned(s[9]) << 16; - f += char2unsigned(s[9]) << 16; - case 9 : - c += char2unsigned(s[8]) << 8; - f += char2unsigned(s[8]) << 8; - case 8 : - b += Google1At(s+4); a += Google1At(s); - e += Google1At(s+4); d += Google1At(s); - break; - case 7 : - b += char2unsigned(s[6]) << 16; - e += char2unsigned(s[6]) << 16; - case 6 : - b += char2unsigned(s[5]) << 8; - e += char2unsigned(s[5]) << 8; - case 5 : - b += char2unsigned(s[4]); - e += char2unsigned(s[4]); - case 4 : - a += Google1At(s); - d += Google1At(s); - break; - case 3 : - a += char2unsigned(s[2]) << 16; - d += char2unsigned(s[2]) << 16; - case 2 : - a += char2unsigned(s[1]) << 8; - d += char2unsigned(s[1]) << 8; - case 1 : - a += char2unsigned(s[0]); - d += char2unsigned(s[0]); + c += char2unsigned(s[9]) << 16; + f += char2unsigned(s[9]) << 16; + case 9: + c += char2unsigned(s[8]) << 8; + f += char2unsigned(s[8]) << 8; + case 8: + b += Google1At(s + 4); + a += Google1At(s); + e += Google1At(s + 4); + d += Google1At(s); + break; + case 7: + b += char2unsigned(s[6]) << 16; + e += char2unsigned(s[6]) << 16; + case 6: + b += char2unsigned(s[5]) << 8; + e += char2unsigned(s[5]) << 8; + case 5: + b += char2unsigned(s[4]); + e += char2unsigned(s[4]); + case 4: + a += Google1At(s); + d += Google1At(s); + break; + case 3: + a += char2unsigned(s[2]) << 16; + d += char2unsigned(s[2]) << 16; + case 2: + a += char2unsigned(s[1]) << 8; + d += char2unsigned(s[1]) << 8; + case 1: + a += char2unsigned(s[0]); + d += char2unsigned(s[0]); + } } - } - mix(a, b, c); - mix(d, e, f); - return CombineFingerprintHalves(c, f); + mix(a, b, c); + mix(d, e, f); + return CombineFingerprintHalves(c, f); } // Extern template definitions. @@ -191,6 +196,6 @@ namespace __gnu_cxx { template class hash_set; template class hash_map; -} // namespace __gnu_cxx +} // namespace __gnu_cxx #endif diff --git a/be/src/gutil/hash/hash.h b/be/src/gutil/hash/hash.h index 5bf76e83b226d3..8d76001c653164 100644 --- a/be/src/gutil/hash/hash.h +++ b/be/src/gutil/hash/hash.h @@ -74,34 +74,33 @@ #define UTIL_HASH_HASH_H_ #include -#include // for uintptr_t +#include // for uintptr_t #include + #include #include using __gnu_cxx::hash; -using __gnu_cxx::hash_map; // hacky way to make sure we import standard hash<> fns +using __gnu_cxx::hash_map; // hacky way to make sure we import standard hash<> fns #include using __gnu_cxx::hash; using __gnu_cxx::hash_set; +#include #include #include #include "gutil/casts.h" -#include "gutil/int128.h" -#include "gutil/integral_types.h" -#include "gutil/macros.h" -#include "gutil/port.h" #include "gutil/hash/city.h" #include "gutil/hash/hash128to64.h" #include "gutil/hash/jenkins.h" #include "gutil/hash/jenkins_lookup2.h" #include "gutil/hash/legacy_hash.h" #include "gutil/hash/string_hash.h" - -#include +#include "gutil/int128.h" +#include "gutil/integral_types.h" +#include "gutil/macros.h" +#include "gutil/port.h" namespace __gnu_cxx { - // STLport and MSVC 10.0 above already define these. #if !defined(_STLP_LONG_LONG) && !(defined(_MSC_VER) && _MSC_VER >= 1600) @@ -110,20 +109,18 @@ namespace __gnu_cxx { // this hash struct. STLport already defines this. template struct hash { - size_t operator()(const T& t) const; + size_t operator()(const T& t) const; }; -#endif // defined(_MSC_VER) +#endif // defined(_MSC_VER) -#endif // !defined(_STLP_LONG_LONG) && !(defined(_MSC_VER) && _MSC_VER >= 1600) +#endif // !defined(_STLP_LONG_LONG) && !(defined(_MSC_VER) && _MSC_VER >= 1600) -template<> struct hash { - size_t operator()(bool x) const { return static_cast(x); } +template <> +struct hash { + size_t operator()(bool x) const { return static_cast(x); } }; - -} // namespace __gnu_cxx - - +} // namespace __gnu_cxx // ---------------------------------------------------------------------- // Fingerprint() @@ -148,56 +145,56 @@ template<> struct hash { // strings with large edit distances.These issues, among others, // led to the recommendation that new code should avoid Fingerprint(). // ---------------------------------------------------------------------- -extern uint64 FingerprintReferenceImplementation(const char *s, uint32 len); -extern uint64 FingerprintInterleavedImplementation(const char *s, uint32 len); -inline uint64 Fingerprint(const char *s, uint32 len) { - if (sizeof(s) == 8) { // 64-bit systems have 8-byte pointers. - // The better choice when we have a decent number of registers. - return FingerprintInterleavedImplementation(s, len); - } else { - return FingerprintReferenceImplementation(s, len); - } +extern uint64 FingerprintReferenceImplementation(const char* s, uint32 len); +extern uint64 FingerprintInterleavedImplementation(const char* s, uint32 len); +inline uint64 Fingerprint(const char* s, uint32 len) { + if (sizeof(s) == 8) { // 64-bit systems have 8-byte pointers. + // The better choice when we have a decent number of registers. + return FingerprintInterleavedImplementation(s, len); + } else { + return FingerprintReferenceImplementation(s, len); + } } // Routine that combines together the hi/lo part of a fingerprint // and changes the result appropriately to avoid returning 0/1. inline uint64 CombineFingerprintHalves(uint32 hi, uint32 lo) { - uint64 result = (static_cast(hi) << 32) | static_cast(lo); - if ((hi == 0) && (lo < 2)) { - result ^= GG_ULONGLONG(0x130f9bef94a0a928); - } - return result; + uint64 result = (static_cast(hi) << 32) | static_cast(lo); + if ((hi == 0) && (lo < 2)) { + result ^= GG_ULONGLONG(0x130f9bef94a0a928); + } + return result; } inline uint64 Fingerprint(const std::string& s) { - return Fingerprint(s.data(), static_cast(s.size())); + return Fingerprint(s.data(), static_cast(s.size())); } inline uint64 Hash64StringWithSeed(const std::string& s, uint64 c) { - return Hash64StringWithSeed(s.data(), static_cast(s.size()), c); + return Hash64StringWithSeed(s.data(), static_cast(s.size()), c); } inline uint64 Fingerprint(schar c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(char c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(uint16 c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(int16 c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(uint32 c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(int32 c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(uint64 c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } inline uint64 Fingerprint(int64 c) { - return Hash64NumWithSeed(static_cast(c), MIX64); + return Hash64NumWithSeed(static_cast(c), MIX64); } // This concatenates two 64-bit fingerprints. It is a convenience function to @@ -211,36 +208,33 @@ inline uint64 Fingerprint(int64 c) { // FingerprintCat(Fingerprint(x), Fingerprint(y)) to indicate // anything about Fingerprint(StrCat(x, y)). inline uint64 FingerprintCat(uint64 fp1, uint64 fp2) { - return Hash64NumWithSeed(fp1, fp2); + return Hash64NumWithSeed(fp1, fp2); } #include namespace __gnu_cxx { - // This intended to be a "good" hash function. It may change from time to time. -template<> struct hash { - size_t operator()(const uint128& x) const { - if (sizeof(&x) == 8) { // 64-bit systems have 8-byte pointers. - return Hash128to64(x); - } else { - uint32 a = static_cast(Uint128Low64(x)) + - static_cast(0x9e3779b9UL); - uint32 b = static_cast(Uint128Low64(x) >> 32) + - static_cast(0x9e3779b9UL); - uint32 c = static_cast(Uint128High64(x)) + MIX32; - mix(a, b, c); - a += static_cast(Uint128High64(x) >> 32); - mix(a, b, c); - return c; +template <> +struct hash { + size_t operator()(const uint128& x) const { + if (sizeof(&x) == 8) { // 64-bit systems have 8-byte pointers. + return Hash128to64(x); + } else { + uint32 a = static_cast(Uint128Low64(x)) + static_cast(0x9e3779b9UL); + uint32 b = + static_cast(Uint128Low64(x) >> 32) + static_cast(0x9e3779b9UL); + uint32 c = static_cast(Uint128High64(x)) + MIX32; + mix(a, b, c); + a += static_cast(Uint128High64(x) >> 32); + mix(a, b, c); + return c; + } } - } - // Less than operator for MSVC use. - bool operator()(const uint128& a, const uint128& b) const { - return a < b; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. + // Less than operator for MSVC use. + bool operator()(const uint128& a, const uint128& b) const { return a < b; } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; // Avoid collision with definition in port_hash.h (via port.h). @@ -248,85 +242,77 @@ template<> struct hash { #define HAVE_DEFINED_HASH_FOR_POINTERS // Hash pointers as if they were int's, but bring more entropy to // the lower bits. -template struct hash { - size_t operator()(T *x) const { - size_t k = reinterpret_cast(x); - return k + (k >> 6); - } +template +struct hash { + size_t operator()(T* x) const { + size_t k = reinterpret_cast(x); + return k + (k >> 6); + } }; -#endif // HAVE_DEFINED_HASH_FOR_POINTERS +#endif // HAVE_DEFINED_HASH_FOR_POINTERS #if defined(__GNUC__) // Use our nice hash function for strings -template +template struct hash> { - size_t operator()(const std::basic_string<_CharT, _Traits, _Alloc>& k) const { - return HashTo32(k.data(), static_cast(k.length())); - } + size_t operator()(const std::basic_string<_CharT, _Traits, _Alloc>& k) const { + return HashTo32(k.data(), static_cast(k.length())); + } }; // they don't define a hash for const string at all -template<> struct hash { - size_t operator()(const std::string& k) const { - return HashTo32(k.data(), static_cast(k.length())); - } +template <> +struct hash { + size_t operator()(const std::string& k) const { + return HashTo32(k.data(), static_cast(k.length())); + } }; -#endif // defined(__GNUC__) +#endif // defined(__GNUC__) // MSVC's STL requires an ever-so slightly different decl #if defined(STL_MSVC) -template<> struct hash { - size_t operator()(char const* const k) const { - return HashTo32(k, strlen(k)); - } - // Less than operator: - bool operator()(char const* const a, char const* const b) const { - return strcmp(a, b) < 0; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. +template <> +struct hash { + size_t operator()(char const* const k) const { return HashTo32(k, strlen(k)); } + // Less than operator: + bool operator()(char const* const a, char const* const b) const { return strcmp(a, b) < 0; } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; // MSVC 10.0 and above have already defined this. #if !defined(_MSC_VER) || _MSC_VER < 1600 -template<> struct hash { - size_t operator()(const std::string& k) const { - return HashTo32(k.data(), k.length()); - } - // Less than operator: - bool operator()(const std::string& a, const std::string& b) const { - return a < b; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. +template <> +struct hash { + size_t operator()(const std::string& k) const { return HashTo32(k.data(), k.length()); } + // Less than operator: + bool operator()(const std::string& a, const std::string& b) const { return a < b; } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; -#endif // !defined(_MSC_VER) || _MSC_VER < 1600 +#endif // !defined(_MSC_VER) || _MSC_VER < 1600 -#endif // defined(STL_MSVC) +#endif // defined(STL_MSVC) // Hasher for STL pairs. Requires hashers for both members to be defined -template +template struct hash> { - size_t operator()(const pair& p) const { - size_t h1 = hash()(p.first); - size_t h2 = hash()(p.second); - // The decision below is at compile time - return (sizeof(h1) <= sizeof(uint32)) ? - Hash32NumWithSeed(h1, h2) - : Hash64NumWithSeed(h1, h2); - } - // Less than operator for MSVC. - bool operator()(const pair& a, - const pair& b) const { - return a < b; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. + size_t operator()(const pair& p) const { + size_t h1 = hash()(p.first); + size_t h2 = hash()(p.second); + // The decision below is at compile time + return (sizeof(h1) <= sizeof(uint32)) ? Hash32NumWithSeed(h1, h2) + : Hash64NumWithSeed(h1, h2); + } + // Less than operator for MSVC. + bool operator()(const pair& a, const pair& b) const { + return a < b; + } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; - -} // namespace __gnu_cxx - +} // namespace __gnu_cxx // If you want an excellent string hash function, and you don't mind if it // might change when you sync and recompile, please use GoodFastHash<>. @@ -339,62 +325,57 @@ struct hash> { // unsafe to delete *iterator because the hash function may be called on // the next iterator advance. Use STLDeleteContainerPointers(). -template struct GoodFastHash; +template +struct GoodFastHash; // This intended to be a "good" hash function. It may change from time to time. -template<> struct GoodFastHash { - size_t operator()(const char* s) const { - return HashStringThoroughly(s, strlen(s)); - } - // Less than operator for MSVC. - bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. +template <> +struct GoodFastHash { + size_t operator()(const char* s) const { return HashStringThoroughly(s, strlen(s)); } + // Less than operator for MSVC. + bool operator()(const char* a, const char* b) const { return strcmp(a, b) < 0; } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; // This intended to be a "good" hash function. It may change from time to time. -template<> struct GoodFastHash { - size_t operator()(const char* s) const { - return HashStringThoroughly(s, strlen(s)); - } - // Less than operator for MSVC. - bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. +template <> +struct GoodFastHash { + size_t operator()(const char* s) const { return HashStringThoroughly(s, strlen(s)); } + // Less than operator for MSVC. + bool operator()(const char* a, const char* b) const { return strcmp(a, b) < 0; } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; // This intended to be a "good" hash function. It may change from time to time. -template +template struct GoodFastHash> { - size_t operator()(const std::basic_string<_CharT, _Traits, _Alloc>& k) const { - return HashStringThoroughly(k.data(), k.length() * sizeof(k[0])); - } - // Less than operator for MSVC. - bool operator()(const std::basic_string<_CharT, _Traits, _Alloc>& a, - const std::basic_string<_CharT, _Traits, _Alloc>& b) const { - return a < b; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. + size_t operator()(const std::basic_string<_CharT, _Traits, _Alloc>& k) const { + return HashStringThoroughly(k.data(), k.length() * sizeof(k[0])); + } + // Less than operator for MSVC. + bool operator()(const std::basic_string<_CharT, _Traits, _Alloc>& a, + const std::basic_string<_CharT, _Traits, _Alloc>& b) const { + return a < b; + } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; // This intended to be a "good" hash function. It may change from time to time. -template +template struct GoodFastHash> { - size_t operator()(const std::basic_string<_CharT, _Traits, _Alloc>& k) const { - return HashStringThoroughly(k.data(), k.length() * sizeof(k[0])); - } - // Less than operator for MSVC. - bool operator()(const std::basic_string<_CharT, _Traits, _Alloc>& a, - const std::basic_string<_CharT, _Traits, _Alloc>& b) const { - return a < b; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. + size_t operator()(const std::basic_string<_CharT, _Traits, _Alloc>& k) const { + return HashStringThoroughly(k.data(), k.length() * sizeof(k[0])); + } + // Less than operator for MSVC. + bool operator()(const std::basic_string<_CharT, _Traits, _Alloc>& a, + const std::basic_string<_CharT, _Traits, _Alloc>& b) const { + return a < b; + } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; // Extern template declarations. @@ -412,8 +393,8 @@ namespace __gnu_cxx { extern template class hash_set; extern template class hash_map; -} // namespace __gnu_cxx +} // namespace __gnu_cxx -#endif // defined(__GNUC__) +#endif // defined(__GNUC__) -#endif // UTIL_HASH_HASH_H_ +#endif // UTIL_HASH_HASH_H_ diff --git a/be/src/gutil/hash/hash128to64.h b/be/src/gutil/hash/hash128to64.h index af9566206c28e3..c65a3817217469 100644 --- a/be/src/gutil/hash/hash128to64.h +++ b/be/src/gutil/hash/hash128to64.h @@ -11,14 +11,14 @@ // This is intended to be a reasonably good hash function. // It may change from time to time. inline uint64 Hash128to64(const uint128& x) { - // Murmur-inspired hashing. - const uint64 kMul = 0xc6a4a7935bd1e995ULL; - uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; - a ^= (a >> 47); - uint64 b = (Uint128High64(x) ^ a) * kMul; - b ^= (b >> 47); - b *= kMul; - return b; + // Murmur-inspired hashing. + const uint64 kMul = 0xc6a4a7935bd1e995ULL; + uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; + a ^= (a >> 47); + uint64 b = (Uint128High64(x) ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; } -#endif // UTIL_HASH_HASH128TO64_H_ +#endif // UTIL_HASH_HASH128TO64_H_ diff --git a/be/src/gutil/hash/jenkins.cc b/be/src/gutil/hash/jenkins.cc index a8169291c124b2..bcd48710ba0549 100644 --- a/be/src/gutil/hash/jenkins.cc +++ b/be/src/gutil/hash/jenkins.cc @@ -18,170 +18,237 @@ #include "gutil/hash/jenkins.h" -#include "gutil/integral_types.h" #include + #include "gutil/hash/jenkins_lookup2.h" +#include "gutil/integral_types.h" static inline uint32 char2unsigned(char c) { - return static_cast(static_cast(c)); + return static_cast(static_cast(c)); } static inline uint64 char2unsigned64(char c) { - return static_cast(static_cast(c)); + return static_cast(static_cast(c)); } -uint32 Hash32StringWithSeedReferenceImplementation(const char *s, uint32 len, - uint32 c) { - uint32 a, b; - uint32 keylen; - - a = b = 0x9e3779b9UL; // the golden ratio; an arbitrary value - - for ( keylen = len; keylen >= 3*sizeof(a); - keylen -= static_cast(3*sizeof(a)), s += 3*sizeof(a) ) { - a += Google1At(s); - b += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a)*2); - mix(a,b,c); - } - - c += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 11: c += char2unsigned(s[10]) << 24; - case 10: c += char2unsigned(s[9]) << 16; - case 9 : c += char2unsigned(s[8]) << 8; - // the first byte of c is reserved for the length - case 8 : b += Google1At(s+4); a += Google1At(s); break; - case 7 : b += char2unsigned(s[6]) << 16; - case 6 : b += char2unsigned(s[5]) << 8; - case 5 : b += char2unsigned(s[4]); - case 4 : a += Google1At(s); break; - case 3 : a += char2unsigned(s[2]) << 16; - case 2 : a += char2unsigned(s[1]) << 8; - case 1 : a += char2unsigned(s[0]); - // case 0 : nothing left to add - } - mix(a,b,c); - return c; -} +uint32 Hash32StringWithSeedReferenceImplementation(const char* s, uint32 len, uint32 c) { + uint32 a, b; + uint32 keylen; + a = b = 0x9e3779b9UL; // the golden ratio; an arbitrary value -uint32 Hash32StringWithSeed(const char *s, uint32 len, uint32 c) { - uint32 a, b; - uint32 keylen; - - a = b = 0x9e3779b9UL; // the golden ratio; an arbitrary value - - keylen = len; - if (keylen >= 4 * sizeof(a)) { - uint32 word32AtOffset0 = Google1At(s); - do { - a += word32AtOffset0; - b += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a) * 2); - s += 3 * sizeof(a); - word32AtOffset0 = Google1At(s); - mix(a, b, c); - keylen -= 3 * static_cast(sizeof(a)); - } while (keylen >= 4 * sizeof(a)); - if (keylen >= 3 * sizeof(a)) { - a += word32AtOffset0; - b += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a) * 2); - s += 3 * sizeof(a); - mix(a, b, c); - keylen -= 3 * static_cast(sizeof(a)); - DCHECK_LT(keylen, sizeof(a)); - c += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 3 : a += char2unsigned(s[2]) << 16; - case 2 : a += char2unsigned(s[1]) << 8; - case 1 : a += char2unsigned(s[0]); - } - } else { - DCHECK(sizeof(a) <= keylen && keylen < 3 * sizeof(a)); - c += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 11: c += char2unsigned(s[10]) << 24; - case 10: c += char2unsigned(s[9]) << 16; - case 9 : c += char2unsigned(s[8]) << 8; - case 8 : b += Google1At(s+4); a += word32AtOffset0; break; - case 7 : b += char2unsigned(s[6]) << 16; - case 6 : b += char2unsigned(s[5]) << 8; - case 5 : b += char2unsigned(s[4]); - case 4 : a += word32AtOffset0; break; - } - } - } else { - if (keylen >= 3 * sizeof(a)) { - a += Google1At(s); - b += Google1At(s + sizeof(a)); - c += Google1At(s + sizeof(a) * 2); - s += 3 * sizeof(a); - mix(a, b, c); - keylen -= 3 * static_cast(sizeof(a)); + for (keylen = len; keylen >= 3 * sizeof(a); + keylen -= static_cast(3 * sizeof(a)), s += 3 * sizeof(a)) { + a += Google1At(s); + b += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + mix(a, b, c); } + c += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 11: c += char2unsigned(s[10]) << 24; - case 10: c += char2unsigned(s[9]) << 16; - case 9 : c += char2unsigned(s[8]) << 8; - case 8 : b += Google1At(s+4); a += Google1At(s); break; - case 7 : b += char2unsigned(s[6]) << 16; - case 6 : b += char2unsigned(s[5]) << 8; - case 5 : b += char2unsigned(s[4]); - case 4 : a += Google1At(s); break; - case 3 : a += char2unsigned(s[2]) << 16; - case 2 : a += char2unsigned(s[1]) << 8; - case 1 : a += char2unsigned(s[0]); + switch (keylen) { // deal with rest. Cases fall through + case 11: + c += char2unsigned(s[10]) << 24; + case 10: + c += char2unsigned(s[9]) << 16; + case 9: + c += char2unsigned(s[8]) << 8; + // the first byte of c is reserved for the length + case 8: + b += Google1At(s + 4); + a += Google1At(s); + break; + case 7: + b += char2unsigned(s[6]) << 16; + case 6: + b += char2unsigned(s[5]) << 8; + case 5: + b += char2unsigned(s[4]); + case 4: + a += Google1At(s); + break; + case 3: + a += char2unsigned(s[2]) << 16; + case 2: + a += char2unsigned(s[1]) << 8; + case 1: + a += char2unsigned(s[0]); + // case 0 : nothing left to add } - } - mix(a, b, c); - return c; + mix(a, b, c); + return c; } -uint64 Hash64StringWithSeed(const char *s, uint32 len, uint64 c) { - uint64 a, b; - uint32 keylen; - - a = b = GG_ULONGLONG(0xe08c1d668b756f82); // the golden ratio; an arbitrary value - - for ( keylen = len; keylen >= 3 * sizeof(a); - keylen -= 3 * static_cast(sizeof(a)), s += 3 * sizeof(a) ) { - a += Word64At(s); - b += Word64At(s + sizeof(a)); - c += Word64At(s + sizeof(a) * 2); - mix(a,b,c); - } - - c += len; - switch ( keylen ) { // deal with rest. Cases fall through - case 23: c += char2unsigned64(s[22]) << 56; - case 22: c += char2unsigned64(s[21]) << 48; - case 21: c += char2unsigned64(s[20]) << 40; - case 20: c += char2unsigned64(s[19]) << 32; - case 19: c += char2unsigned64(s[18]) << 24; - case 18: c += char2unsigned64(s[17]) << 16; - case 17: c += char2unsigned64(s[16]) << 8; - // the first byte of c is reserved for the length - case 16: b += Word64At(s+8); a += Word64At(s); break; - case 15: b += char2unsigned64(s[14]) << 48; - case 14: b += char2unsigned64(s[13]) << 40; - case 13: b += char2unsigned64(s[12]) << 32; - case 12: b += char2unsigned64(s[11]) << 24; - case 11: b += char2unsigned64(s[10]) << 16; - case 10: b += char2unsigned64(s[ 9]) << 8; - case 9: b += char2unsigned64(s[ 8]) ; - case 8: a += Word64At(s); break; - case 7: a += char2unsigned64(s[ 6]) << 48; - case 6: a += char2unsigned64(s[ 5]) << 40; - case 5: a += char2unsigned64(s[ 4]) << 32; - case 4: a += char2unsigned64(s[ 3]) << 24; - case 3: a += char2unsigned64(s[ 2]) << 16; - case 2: a += char2unsigned64(s[ 1]) << 8; - case 1: a += char2unsigned64(s[ 0]) ; - // case 0: nothing left to add - } - mix(a,b,c); - return c; +uint32 Hash32StringWithSeed(const char* s, uint32 len, uint32 c) { + uint32 a, b; + uint32 keylen; + + a = b = 0x9e3779b9UL; // the golden ratio; an arbitrary value + + keylen = len; + if (keylen >= 4 * sizeof(a)) { + uint32 word32AtOffset0 = Google1At(s); + do { + a += word32AtOffset0; + b += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + s += 3 * sizeof(a); + word32AtOffset0 = Google1At(s); + mix(a, b, c); + keylen -= 3 * static_cast(sizeof(a)); + } while (keylen >= 4 * sizeof(a)); + if (keylen >= 3 * sizeof(a)) { + a += word32AtOffset0; + b += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + s += 3 * sizeof(a); + mix(a, b, c); + keylen -= 3 * static_cast(sizeof(a)); + DCHECK_LT(keylen, sizeof(a)); + c += len; + switch (keylen) { // deal with rest. Cases fall through + case 3: + a += char2unsigned(s[2]) << 16; + case 2: + a += char2unsigned(s[1]) << 8; + case 1: + a += char2unsigned(s[0]); + } + } else { + DCHECK(sizeof(a) <= keylen && keylen < 3 * sizeof(a)); + c += len; + switch (keylen) { // deal with rest. Cases fall through + case 11: + c += char2unsigned(s[10]) << 24; + case 10: + c += char2unsigned(s[9]) << 16; + case 9: + c += char2unsigned(s[8]) << 8; + case 8: + b += Google1At(s + 4); + a += word32AtOffset0; + break; + case 7: + b += char2unsigned(s[6]) << 16; + case 6: + b += char2unsigned(s[5]) << 8; + case 5: + b += char2unsigned(s[4]); + case 4: + a += word32AtOffset0; + break; + } + } + } else { + if (keylen >= 3 * sizeof(a)) { + a += Google1At(s); + b += Google1At(s + sizeof(a)); + c += Google1At(s + sizeof(a) * 2); + s += 3 * sizeof(a); + mix(a, b, c); + keylen -= 3 * static_cast(sizeof(a)); + } + c += len; + switch (keylen) { // deal with rest. Cases fall through + case 11: + c += char2unsigned(s[10]) << 24; + case 10: + c += char2unsigned(s[9]) << 16; + case 9: + c += char2unsigned(s[8]) << 8; + case 8: + b += Google1At(s + 4); + a += Google1At(s); + break; + case 7: + b += char2unsigned(s[6]) << 16; + case 6: + b += char2unsigned(s[5]) << 8; + case 5: + b += char2unsigned(s[4]); + case 4: + a += Google1At(s); + break; + case 3: + a += char2unsigned(s[2]) << 16; + case 2: + a += char2unsigned(s[1]) << 8; + case 1: + a += char2unsigned(s[0]); + } + } + mix(a, b, c); + return c; +} + +uint64 Hash64StringWithSeed(const char* s, uint32 len, uint64 c) { + uint64 a, b; + uint32 keylen; + + a = b = GG_ULONGLONG(0xe08c1d668b756f82); // the golden ratio; an arbitrary value + + for (keylen = len; keylen >= 3 * sizeof(a); + keylen -= 3 * static_cast(sizeof(a)), s += 3 * sizeof(a)) { + a += Word64At(s); + b += Word64At(s + sizeof(a)); + c += Word64At(s + sizeof(a) * 2); + mix(a, b, c); + } + + c += len; + switch (keylen) { // deal with rest. Cases fall through + case 23: + c += char2unsigned64(s[22]) << 56; + case 22: + c += char2unsigned64(s[21]) << 48; + case 21: + c += char2unsigned64(s[20]) << 40; + case 20: + c += char2unsigned64(s[19]) << 32; + case 19: + c += char2unsigned64(s[18]) << 24; + case 18: + c += char2unsigned64(s[17]) << 16; + case 17: + c += char2unsigned64(s[16]) << 8; + // the first byte of c is reserved for the length + case 16: + b += Word64At(s + 8); + a += Word64At(s); + break; + case 15: + b += char2unsigned64(s[14]) << 48; + case 14: + b += char2unsigned64(s[13]) << 40; + case 13: + b += char2unsigned64(s[12]) << 32; + case 12: + b += char2unsigned64(s[11]) << 24; + case 11: + b += char2unsigned64(s[10]) << 16; + case 10: + b += char2unsigned64(s[9]) << 8; + case 9: + b += char2unsigned64(s[8]); + case 8: + a += Word64At(s); + break; + case 7: + a += char2unsigned64(s[6]) << 48; + case 6: + a += char2unsigned64(s[5]) << 40; + case 5: + a += char2unsigned64(s[4]) << 32; + case 4: + a += char2unsigned64(s[3]) << 24; + case 3: + a += char2unsigned64(s[2]) << 16; + case 2: + a += char2unsigned64(s[1]) << 8; + case 1: + a += char2unsigned64(s[0]); + // case 0: nothing left to add + } + mix(a, b, c); + return c; } diff --git a/be/src/gutil/hash/jenkins.h b/be/src/gutil/hash/jenkins.h index 6837a669902587..789a6a861cb3af 100644 --- a/be/src/gutil/hash/jenkins.h +++ b/be/src/gutil/hash/jenkins.h @@ -29,12 +29,11 @@ // ---------------------------------------------------------------------- // These slow down a lot if inlined, so do not inline them --Sanjay -uint32 Hash32StringWithSeed(const char *s, uint32 len, uint32 c); -uint64 Hash64StringWithSeed(const char *s, uint32 len, uint64 c); +uint32 Hash32StringWithSeed(const char* s, uint32 len, uint32 c); +uint64 Hash64StringWithSeed(const char* s, uint32 len, uint64 c); // This is a reference implementation of the same fundamental algorithm as // Hash32StringWithSeed. It is used primarily as a performance metric. -uint32 Hash32StringWithSeedReferenceImplementation(const char *s, - uint32 len, uint32 c); +uint32 Hash32StringWithSeedReferenceImplementation(const char* s, uint32 len, uint32 c); -#endif // UTIL_HASH_JENKINS_H_ +#endif // UTIL_HASH_JENKINS_H_ diff --git a/be/src/gutil/hash/jenkins_lookup2.h b/be/src/gutil/hash/jenkins_lookup2.h index 209af9f3e0026a..7be68f510bd332 100644 --- a/be/src/gutil/hash/jenkins_lookup2.h +++ b/be/src/gutil/hash/jenkins_lookup2.h @@ -28,34 +28,75 @@ // if you're lucky. // ---------------------------------------------------------------------- -static inline void mix(uint32& a, uint32& b, uint32& c) { // 32bit version - a -= b; a -= c; a ^= (c>>13); - b -= c; b -= a; b ^= (a<<8); - c -= a; c -= b; c ^= (b>>13); - a -= b; a -= c; a ^= (c>>12); - b -= c; b -= a; b ^= (a<<16); - c -= a; c -= b; c ^= (b>>5); - a -= b; a -= c; a ^= (c>>3); - b -= c; b -= a; b ^= (a<<10); - c -= a; c -= b; c ^= (b>>15); +static inline void mix(uint32& a, uint32& b, uint32& c) { // 32bit version + a -= b; + a -= c; + a ^= (c >> 13); + b -= c; + b -= a; + b ^= (a << 8); + c -= a; + c -= b; + c ^= (b >> 13); + a -= b; + a -= c; + a ^= (c >> 12); + b -= c; + b -= a; + b ^= (a << 16); + c -= a; + c -= b; + c ^= (b >> 5); + a -= b; + a -= c; + a ^= (c >> 3); + b -= c; + b -= a; + b ^= (a << 10); + c -= a; + c -= b; + c ^= (b >> 15); } -static inline void mix(uint64& a, uint64& b, uint64& c) { // 64bit version - a -= b; a -= c; a ^= (c>>43); - b -= c; b -= a; b ^= (a<<9); - c -= a; c -= b; c ^= (b>>8); - a -= b; a -= c; a ^= (c>>38); - b -= c; b -= a; b ^= (a<<23); - c -= a; c -= b; c ^= (b>>5); - a -= b; a -= c; a ^= (c>>35); - b -= c; b -= a; b ^= (a<<49); - c -= a; c -= b; c ^= (b>>11); - a -= b; a -= c; a ^= (c>>12); - b -= c; b -= a; b ^= (a<<18); - c -= a; c -= b; c ^= (b>>22); +static inline void mix(uint64& a, uint64& b, uint64& c) { // 64bit version + a -= b; + a -= c; + a ^= (c >> 43); + b -= c; + b -= a; + b ^= (a << 9); + c -= a; + c -= b; + c ^= (b >> 8); + a -= b; + a -= c; + a ^= (c >> 38); + b -= c; + b -= a; + b ^= (a << 23); + c -= a; + c -= b; + c ^= (b >> 5); + a -= b; + a -= c; + a ^= (c >> 35); + b -= c; + b -= a; + b ^= (a << 49); + c -= a; + c -= b; + c ^= (b >> 11); + a -= b; + a -= c; + a ^= (c >> 12); + b -= c; + b -= a; + b ^= (a << 18); + c -= a; + c -= b; + c ^= (b >> 22); } - // Load an unaligned little endian word from memory. // // These routines are named Word32At(), Word64At() and Google1At(). @@ -74,12 +115,12 @@ static inline void mix(uint64& a, uint64& b, uint64& c) { // 64bit version // but that seems overly verbose.] #if !defined(NEED_ALIGNED_LOADS) && defined(IS_LITTLE_ENDIAN) -static inline uint64 Word64At(const char *ptr) { - return UNALIGNED_LOAD64(ptr); +static inline uint64 Word64At(const char* ptr) { + return UNALIGNED_LOAD64(ptr); } -static inline uint32 Word32At(const char *ptr) { - return UNALIGNED_LOAD32(ptr); +static inline uint32 Word32At(const char* ptr) { + return UNALIGNED_LOAD32(ptr); } // This produces the same results as the byte-by-byte version below. @@ -88,7 +129,7 @@ static inline uint32 Word32At(const char *ptr) { // start by considering the low-order byte. If we loaded an unsigned // word and wanted to sign extend it, we isolate the sign bit and subtract // that from zero which gives us a sequence of bits matching the sign bit -// at and above the sign bit. If we remove (subtract) the sign bit and +// at and above the sign bit. If we remove (subtract) the sign bit and // add in the low order byte, we now have a sign-extended byte as desired. // We can then operate on all four bytes in parallel because addition // is associative and commutative. @@ -108,40 +149,32 @@ static inline uint32 Word32At(const char *ptr) { // == 0x8281 - 0x8080 - 0x8000 - 0x80 // == 0x8281 - 0x8080 - 0x8080 -static inline uint32 Google1At(const char *ptr) { - uint32 t = UNALIGNED_LOAD32(ptr); - uint32 masked = t & 0x80808080; - return t - masked - masked; +static inline uint32 Google1At(const char* ptr) { + uint32 t = UNALIGNED_LOAD32(ptr); + uint32 masked = t & 0x80808080; + return t - masked - masked; } #else // NOTE: This code is not normally used or tested. -static inline uint64 Word64At(const char *ptr) { - return (static_cast(ptr[0]) + - (static_cast(ptr[1]) << 8) + - (static_cast(ptr[2]) << 16) + - (static_cast(ptr[3]) << 24) + - (static_cast(ptr[4]) << 32) + - (static_cast(ptr[5]) << 40) + - (static_cast(ptr[6]) << 48) + - (static_cast(ptr[7]) << 56)); +static inline uint64 Word64At(const char* ptr) { + return (static_cast(ptr[0]) + (static_cast(ptr[1]) << 8) + + (static_cast(ptr[2]) << 16) + (static_cast(ptr[3]) << 24) + + (static_cast(ptr[4]) << 32) + (static_cast(ptr[5]) << 40) + + (static_cast(ptr[6]) << 48) + (static_cast(ptr[7]) << 56)); } -static inline uint32 Word32At(const char *ptr) { - return (static_cast(ptr[0]) + - (static_cast(ptr[1]) << 8) + - (static_cast(ptr[2]) << 16) + - (static_cast(ptr[3]) << 24)); +static inline uint32 Word32At(const char* ptr) { + return (static_cast(ptr[0]) + (static_cast(ptr[1]) << 8) + + (static_cast(ptr[2]) << 16) + (static_cast(ptr[3]) << 24)); } -static inline uint32 Google1At(const char *ptr2) { - const schar * ptr = reinterpret_cast(ptr2); - return (static_cast(ptr[0]) + - (static_cast(ptr[1]) << 8) + - (static_cast(ptr[2]) << 16) + - (static_cast(ptr[3]) << 24)); +static inline uint32 Google1At(const char* ptr2) { + const schar* ptr = reinterpret_cast(ptr2); + return (static_cast(ptr[0]) + (static_cast(ptr[1]) << 8) + + (static_cast(ptr[2]) << 16) + (static_cast(ptr[3]) << 24)); } #endif /* !NEED_ALIGNED_LOADS && IS_LITTLE_ENDIAN */ @@ -151,6 +184,6 @@ static inline uint32 Google1At(const char *ptr2) { // // TODO(user): find occurrences of WORD_HASH and adjust the code to // use more meaningful concepts. -# define WORD_HASH +#define WORD_HASH -#endif // UTIL_HASH_JENKINS_LOOKUP2_H_ +#endif // UTIL_HASH_JENKINS_LOOKUP2_H_ diff --git a/be/src/gutil/hash/legacy_hash.h b/be/src/gutil/hash/legacy_hash.h index 8872356be1d3bd..c84e4a218ae8bc 100644 --- a/be/src/gutil/hash/legacy_hash.h +++ b/be/src/gutil/hash/legacy_hash.h @@ -10,9 +10,9 @@ #ifndef UTIL_HASH_LEGACY_HASH_H_ #define UTIL_HASH_LEGACY_HASH_H_ -#include "gutil/integral_types.h" #include "gutil/hash/builtin_type_hash.h" #include "gutil/hash/string_hash.h" +#include "gutil/integral_types.h" // Hash8, Hash16 and Hash32 are for legacy use only. typedef uint32 Hash32; @@ -22,8 +22,8 @@ typedef uint8 Hash8; const Hash32 kIllegalHash32 = static_cast(0xffffffffUL); const Hash16 kIllegalHash16 = static_cast(0xffff); -static const uint32 MIX32 = 0x12b9b0a1UL; // pi; an arbitrary number -static const uint64 MIX64 = GG_ULONGLONG(0x2b992ddfa23249d6); // more of pi +static const uint32 MIX32 = 0x12b9b0a1UL; // pi; an arbitrary number +static const uint64 MIX64 = GG_ULONGLONG(0x2b992ddfa23249d6); // more of pi // ---------------------------------------------------------------------- // HashTo32() @@ -50,35 +50,34 @@ static const uint64 MIX64 = GG_ULONGLONG(0x2b992ddfa23249d6); // more of pi // #define HASH_TO(arglist, command) \ -inline uint32 HashTo32 arglist { \ - uint32 retval = command; \ - return retval == kIllegalHash32 ? retval-1 : retval; \ -} + inline uint32 HashTo32 arglist { \ + uint32 retval = command; \ + return retval == kIllegalHash32 ? retval - 1 : retval; \ + } // This defines: // HashToXX(char *s, int slen); // HashToXX(char c); // etc -HASH_TO((const char *s, uint32 slen), Hash32StringWithSeed(s, slen, MIX32)) -HASH_TO((const wchar_t *s, uint32 slen), +HASH_TO((const char* s, uint32 slen), Hash32StringWithSeed(s, slen, MIX32)) +HASH_TO((const wchar_t* s, uint32 slen), Hash32StringWithSeed(reinterpret_cast(s), - static_cast(sizeof(wchar_t) * slen), - MIX32)) -HASH_TO((char c), Hash32NumWithSeed(static_cast(c), MIX32)) -HASH_TO((schar c), Hash32NumWithSeed(static_cast(c), MIX32)) + static_cast(sizeof(wchar_t) * slen), MIX32)) +HASH_TO((char c), Hash32NumWithSeed(static_cast(c), MIX32)) +HASH_TO((schar c), Hash32NumWithSeed(static_cast(c), MIX32)) HASH_TO((uint16 c), Hash32NumWithSeed(static_cast(c), MIX32)) -HASH_TO((int16 c), Hash32NumWithSeed(static_cast(c), MIX32)) +HASH_TO((int16 c), Hash32NumWithSeed(static_cast(c), MIX32)) HASH_TO((uint32 c), Hash32NumWithSeed(static_cast(c), MIX32)) -HASH_TO((int32 c), Hash32NumWithSeed(static_cast(c), MIX32)) +HASH_TO((int32 c), Hash32NumWithSeed(static_cast(c), MIX32)) HASH_TO((uint64 c), static_cast(Hash64NumWithSeed(c, MIX64) >> 32)) -HASH_TO((int64 c), static_cast(Hash64NumWithSeed(c, MIX64) >> 32)) +HASH_TO((int64 c), static_cast(Hash64NumWithSeed(c, MIX64) >> 32)) -#undef HASH_TO // clean up the macro space +#undef HASH_TO // clean up the macro space -inline uint16 HashTo16(const char *s, uint32 slen) { - uint16 retval = Hash32StringWithSeed(s, slen, MIX32) >> 16; - return retval == kIllegalHash16 ? static_cast(retval-1) : retval; +inline uint16 HashTo16(const char* s, uint32 slen) { + uint16 retval = Hash32StringWithSeed(s, slen, MIX32) >> 16; + return retval == kIllegalHash16 ? static_cast(retval - 1) : retval; } -#endif // UTIL_HASH_LEGACY_HASH_H_ +#endif // UTIL_HASH_LEGACY_HASH_H_ diff --git a/be/src/gutil/hash/string_hash.h b/be/src/gutil/hash/string_hash.h index 4e399973e74f4d..2593e60ad58d9e 100644 --- a/be/src/gutil/hash/string_hash.h +++ b/be/src/gutil/hash/string_hash.h @@ -13,11 +13,11 @@ #include -#include "gutil/port.h" -#include "gutil/integral_types.h" #include "gutil/hash/city.h" #include "gutil/hash/jenkins.h" #include "gutil/hash/jenkins_lookup2.h" +#include "gutil/integral_types.h" +#include "gutil/port.h" namespace hash_internal { @@ -36,50 +36,45 @@ enum { x86_64 = false, sixty_four_bit = false }; static const uint32 kMix32 = 0x12b9b0a1UL; static const uint64 kMix64 = GG_ULONGLONG(0x2b992ddfa23249d6); -} // namespace hash_internal +} // namespace hash_internal -inline size_t HashStringThoroughlyWithSeed(const char* s, size_t len, - size_t seed) { - if (hash_internal::x86_64) - return static_cast(util_hash::CityHash64WithSeed(s, len, seed)); +inline size_t HashStringThoroughlyWithSeed(const char* s, size_t len, size_t seed) { + if (hash_internal::x86_64) + return static_cast(util_hash::CityHash64WithSeed(s, len, seed)); - if (hash_internal::sixty_four_bit) - return Hash64StringWithSeed(s, static_cast(len), seed); + if (hash_internal::sixty_four_bit) + return Hash64StringWithSeed(s, static_cast(len), seed); - return static_cast(Hash32StringWithSeed(s, static_cast(len), - static_cast(seed))); + return static_cast( + Hash32StringWithSeed(s, static_cast(len), static_cast(seed))); } inline size_t HashStringThoroughly(const char* s, size_t len) { - if (hash_internal::x86_64) - return static_cast(util_hash::CityHash64(s, len)); + if (hash_internal::x86_64) return static_cast(util_hash::CityHash64(s, len)); - if (hash_internal::sixty_four_bit) - return Hash64StringWithSeed(s, static_cast(len), - hash_internal::kMix64); + if (hash_internal::sixty_four_bit) + return Hash64StringWithSeed(s, static_cast(len), hash_internal::kMix64); - return static_cast(Hash32StringWithSeed(s, static_cast(len), - hash_internal::kMix32)); + return static_cast( + Hash32StringWithSeed(s, static_cast(len), hash_internal::kMix32)); } -inline size_t HashStringThoroughlyWithSeeds(const char* s, size_t len, - size_t seed0, size_t seed1) { - if (hash_internal::x86_64) - return util_hash::CityHash64WithSeeds(s, len, seed0, seed1); +inline size_t HashStringThoroughlyWithSeeds(const char* s, size_t len, size_t seed0, size_t seed1) { + if (hash_internal::x86_64) return util_hash::CityHash64WithSeeds(s, len, seed0, seed1); + + if (hash_internal::sixty_four_bit) { + uint64 a = seed0; + uint64 b = seed1; + uint64 c = HashStringThoroughly(s, len); + mix(a, b, c); + return c; + } - if (hash_internal::sixty_four_bit) { - uint64 a = seed0; - uint64 b = seed1; - uint64 c = HashStringThoroughly(s, len); + uint32 a = static_cast(seed0); + uint32 b = static_cast(seed1); + uint32 c = static_cast(HashStringThoroughly(s, len)); mix(a, b, c); return c; - } - - uint32 a = static_cast(seed0); - uint32 b = static_cast(seed1); - uint32 c = static_cast(HashStringThoroughly(s, len)); - mix(a, b, c); - return c; } -#endif // UTIL_HASH_STRING_HASH_H_ +#endif // UTIL_HASH_STRING_HASH_H_ diff --git a/be/src/gutil/int128.cc b/be/src/gutil/int128.cc index 9e2ab189d348c8..cd2964a08cab4e 100644 --- a/be/src/gutil/int128.cc +++ b/be/src/gutil/int128.cc @@ -9,11 +9,9 @@ using std::endl; #include "gutil/int128.h" #include "gutil/integral_types.h" -const uint128_pod kuint128max = { - static_cast(GG_LONGLONG(0xFFFFFFFFFFFFFFFF)), - static_cast(GG_LONGLONG(0xFFFFFFFFFFFFFFFF)) -}; +const uint128_pod kuint128max = {static_cast(GG_LONGLONG(0xFFFFFFFFFFFFFFFF)), + static_cast(GG_LONGLONG(0xFFFFFFFFFFFFFFFF))}; std::ostream& operator<<(std::ostream& o, const uint128& b) { - return (o << b.hi_ << "::" << b.lo_); + return (o << b.hi_ << "::" << b.lo_); } diff --git a/be/src/gutil/int128.h b/be/src/gutil/int128.h index 72b87cd28d101a..4a80366d1cd0fd 100644 --- a/be/src/gutil/int128.h +++ b/be/src/gutil/int128.h @@ -13,66 +13,66 @@ struct uint128_pod; // An unsigned 128-bit integer type. Thread-compatible. class uint128 { - public: - uint128(); // Sets to 0, but don't trust on this behavior. - uint128(uint64 top, uint64 bottom); +public: + uint128(); // Sets to 0, but don't trust on this behavior. + uint128(uint64 top, uint64 bottom); #ifndef SWIG - uint128(int bottom); - uint128(uint32 bottom); // Top 96 bits = 0 + uint128(int bottom); + uint128(uint32 bottom); // Top 96 bits = 0 #endif - uint128(uint64 bottom); // hi_ = 0 - uint128(const uint128 &val); - uint128(const uint128_pod &val); - - void Initialize(uint64 top, uint64 bottom); - - uint128& operator=(const uint128& b); - - // Arithmetic operators. - // TODO: division, etc. - uint128& operator+=(const uint128& b); - uint128& operator-=(const uint128& b); - uint128& operator*=(const uint128& b); - uint128 operator++(int); - uint128 operator--(int); - uint128& operator<<=(int); - uint128& operator>>=(int); - uint128& operator&=(const uint128& b); - uint128& operator|=(const uint128& b); - uint128& operator^=(const uint128& b); - uint128& operator++(); - uint128& operator--(); - - friend uint64 Uint128Low64(const uint128& v); - friend uint64 Uint128High64(const uint128& v); - - // We add "std::" to avoid including all of port.h. - friend std::ostream& operator<<(std::ostream& o, const uint128& b); - - private: - // Little-endian memory order optimizations can benefit from - // having lo_ first, hi_ last. - // See util/endian/endian.h and Load128/Store128 for storing a uint128. - uint64 lo_; - uint64 hi_; - - // Not implemented, just declared for catching automatic type conversions. - uint128(uint8); - uint128(uint16); - uint128(float v); - uint128(double v); + uint128(uint64 bottom); // hi_ = 0 + uint128(const uint128& val); + uint128(const uint128_pod& val); + + void Initialize(uint64 top, uint64 bottom); + + uint128& operator=(const uint128& b); + + // Arithmetic operators. + // TODO: division, etc. + uint128& operator+=(const uint128& b); + uint128& operator-=(const uint128& b); + uint128& operator*=(const uint128& b); + uint128 operator++(int); + uint128 operator--(int); + uint128& operator<<=(int); + uint128& operator>>=(int); + uint128& operator&=(const uint128& b); + uint128& operator|=(const uint128& b); + uint128& operator^=(const uint128& b); + uint128& operator++(); + uint128& operator--(); + + friend uint64 Uint128Low64(const uint128& v); + friend uint64 Uint128High64(const uint128& v); + + // We add "std::" to avoid including all of port.h. + friend std::ostream& operator<<(std::ostream& o, const uint128& b); + +private: + // Little-endian memory order optimizations can benefit from + // having lo_ first, hi_ last. + // See util/endian/endian.h and Load128/Store128 for storing a uint128. + uint64 lo_; + uint64 hi_; + + // Not implemented, just declared for catching automatic type conversions. + uint128(uint8); + uint128(uint16); + uint128(float v); + uint128(double v); }; // This is a POD form of uint128 which can be used for static variables which // need to be operated on as uint128. struct uint128_pod { - // Note: The ordering of fields is different than 'class uint128' but the - // same as its 2-arg constructor. This enables more obvious initialization - // of static instances, which is the primary reason for this struct in the - // first place. This does not seem to defeat any optimizations wrt - // operations involving this struct. - uint64 hi; - uint64 lo; + // Note: The ordering of fields is different than 'class uint128' but the + // same as its 2-arg constructor. This enables more obvious initialization + // of static instances, which is the primary reason for this struct in the + // first place. This does not seem to defeat any optimizations wrt + // operations involving this struct. + uint64 hi; + uint64 lo; }; extern const uint128_pod kuint128max; @@ -83,8 +83,12 @@ extern std::ostream& operator<<(std::ostream& o, const uint128& b); // Methods to access low and high pieces of 128-bit value. // Defined externally from uint128 to facilitate conversion // to native 128-bit types when compilers support them. -inline uint64 Uint128Low64(const uint128& v) { return v.lo_; } -inline uint64 Uint128High64(const uint128& v) { return v.hi_; } +inline uint64 Uint128Low64(const uint128& v) { + return v.lo_; +} +inline uint64 Uint128High64(const uint128& v) { + return v.hi_; +} // TODO: perhaps it would be nice to have int128, a signed 128-bit type? @@ -92,44 +96,43 @@ inline uint64 Uint128High64(const uint128& v) { return v.hi_; } // Implementation details follow // -------------------------------------------------------------------------- inline bool operator==(const uint128& lhs, const uint128& rhs) { - return (Uint128Low64(lhs) == Uint128Low64(rhs) && - Uint128High64(lhs) == Uint128High64(rhs)); + return (Uint128Low64(lhs) == Uint128Low64(rhs) && Uint128High64(lhs) == Uint128High64(rhs)); } inline bool operator!=(const uint128& lhs, const uint128& rhs) { - return !(lhs == rhs); + return !(lhs == rhs); } inline uint128& uint128::operator=(const uint128& b) { - lo_ = b.lo_; - hi_ = b.hi_; - return *this; + lo_ = b.lo_; + hi_ = b.hi_; + return *this; } -inline uint128::uint128(): lo_(0), hi_(0) { } -inline uint128::uint128(uint64 top, uint64 bottom) : lo_(bottom), hi_(top) { } -inline uint128::uint128(const uint128 &v) : lo_(v.lo_), hi_(v.hi_) { } -inline uint128::uint128(const uint128_pod &v) : lo_(v.lo), hi_(v.hi) { } -inline uint128::uint128(uint64 bottom) : lo_(bottom), hi_(0) { } +inline uint128::uint128() : lo_(0), hi_(0) {} +inline uint128::uint128(uint64 top, uint64 bottom) : lo_(bottom), hi_(top) {} +inline uint128::uint128(const uint128& v) : lo_(v.lo_), hi_(v.hi_) {} +inline uint128::uint128(const uint128_pod& v) : lo_(v.lo), hi_(v.hi) {} +inline uint128::uint128(uint64 bottom) : lo_(bottom), hi_(0) {} #ifndef SWIG -inline uint128::uint128(uint32 bottom) : lo_(bottom), hi_(0) { } +inline uint128::uint128(uint32 bottom) : lo_(bottom), hi_(0) {} inline uint128::uint128(int bottom) : lo_(bottom), hi_(0) { - if (bottom < 0) { - --hi_; - } + if (bottom < 0) { + --hi_; + } } #endif inline void uint128::Initialize(uint64 top, uint64 bottom) { - hi_ = top; - lo_ = bottom; + hi_ = top; + lo_ = bottom; } // Comparison operators. -#define CMP128(op) \ -inline bool operator op(const uint128& lhs, const uint128& rhs) { \ - return (Uint128High64(lhs) == Uint128High64(rhs)) ? \ - (Uint128Low64(lhs) op Uint128Low64(rhs)) : \ - (Uint128High64(lhs) op Uint128High64(rhs)); \ -} +#define CMP128(op) \ + inline bool operator op(const uint128& lhs, const uint128& rhs) { \ + return (Uint128High64(lhs) == Uint128High64(rhs)) \ + ? (Uint128Low64(lhs) op Uint128Low64(rhs)) \ + : (Uint128High64(lhs) op Uint128High64(rhs)); \ + } CMP128(<) CMP128(>) @@ -141,30 +144,30 @@ CMP128(<=) // Unary operators inline uint128 operator-(const uint128& val) { - const uint64 hi_flip = ~Uint128High64(val); - const uint64 lo_flip = ~Uint128Low64(val); - const uint64 lo_add = lo_flip + 1; - if (lo_add < lo_flip) { - return uint128(hi_flip + 1, lo_add); - } - return uint128(hi_flip, lo_add); + const uint64 hi_flip = ~Uint128High64(val); + const uint64 lo_flip = ~Uint128Low64(val); + const uint64 lo_add = lo_flip + 1; + if (lo_add < lo_flip) { + return uint128(hi_flip + 1, lo_add); + } + return uint128(hi_flip, lo_add); } inline bool operator!(const uint128& val) { - return !Uint128High64(val) && !Uint128Low64(val); + return !Uint128High64(val) && !Uint128Low64(val); } // Logical operators. inline uint128 operator~(const uint128& val) { - return uint128(~Uint128High64(val), ~Uint128Low64(val)); + return uint128(~Uint128High64(val), ~Uint128Low64(val)); } -#define LOGIC128(op) \ -inline uint128 operator op(const uint128& lhs, const uint128& rhs) { \ - return uint128(Uint128High64(lhs) op Uint128High64(rhs), \ - Uint128Low64(lhs) op Uint128Low64(rhs)); \ -} +#define LOGIC128(op) \ + inline uint128 operator op(const uint128& lhs, const uint128& rhs) { \ + return uint128(Uint128High64(lhs) op Uint128High64(rhs), \ + Uint128Low64(lhs) op Uint128Low64(rhs)); \ + } LOGIC128(|) LOGIC128(&) @@ -172,12 +175,12 @@ LOGIC128(^) #undef LOGIC128 -#define LOGICASSIGN128(op) \ -inline uint128& uint128::operator op(const uint128& other) { \ - hi_ op other.hi_; \ - lo_ op other.lo_; \ - return *this; \ -} +#define LOGICASSIGN128(op) \ + inline uint128& uint128::operator op(const uint128& other) { \ + hi_ op other.hi_; \ + lo_ op other.lo_; \ + return *this; \ + } LOGICASSIGN128(|=) LOGICASSIGN128(&=) @@ -188,145 +191,141 @@ LOGICASSIGN128(^=) // Shift operators. inline uint128 operator<<(const uint128& val, int amount) { - // uint64 shifts of >= 64 are undefined, so we will need some special-casing. - if (amount < 64) { - if (amount == 0) { - return val; + // uint64 shifts of >= 64 are undefined, so we will need some special-casing. + if (amount < 64) { + if (amount == 0) { + return val; + } + uint64 new_hi = (Uint128High64(val) << amount) | (Uint128Low64(val) >> (64 - amount)); + uint64 new_lo = Uint128Low64(val) << amount; + return uint128(new_hi, new_lo); + } else if (amount < 128) { + return uint128(Uint128Low64(val) << (amount - 64), 0); + } else { + return uint128(0, 0); } - uint64 new_hi = (Uint128High64(val) << amount) | - (Uint128Low64(val) >> (64 - amount)); - uint64 new_lo = Uint128Low64(val) << amount; - return uint128(new_hi, new_lo); - } else if (amount < 128) { - return uint128(Uint128Low64(val) << (amount - 64), 0); - } else { - return uint128(0, 0); - } } inline uint128 operator>>(const uint128& val, int amount) { - // uint64 shifts of >= 64 are undefined, so we will need some special-casing. - if (amount < 64) { - if (amount == 0) { - return val; + // uint64 shifts of >= 64 are undefined, so we will need some special-casing. + if (amount < 64) { + if (amount == 0) { + return val; + } + uint64 new_hi = Uint128High64(val) >> amount; + uint64 new_lo = (Uint128Low64(val) >> amount) | (Uint128High64(val) << (64 - amount)); + return uint128(new_hi, new_lo); + } else if (amount < 128) { + return uint128(0, Uint128High64(val) >> (amount - 64)); + } else { + return uint128(0, 0); } - uint64 new_hi = Uint128High64(val) >> amount; - uint64 new_lo = (Uint128Low64(val) >> amount) | - (Uint128High64(val) << (64 - amount)); - return uint128(new_hi, new_lo); - } else if (amount < 128) { - return uint128(0, Uint128High64(val) >> (amount - 64)); - } else { - return uint128(0, 0); - } } inline uint128& uint128::operator<<=(int amount) { - // uint64 shifts of >= 64 are undefined, so we will need some special-casing. - if (amount < 64) { - if (amount != 0) { - hi_ = (hi_ << amount) | (lo_ >> (64 - amount)); - lo_ = lo_ << amount; + // uint64 shifts of >= 64 are undefined, so we will need some special-casing. + if (amount < 64) { + if (amount != 0) { + hi_ = (hi_ << amount) | (lo_ >> (64 - amount)); + lo_ = lo_ << amount; + } + } else if (amount < 128) { + hi_ = lo_ << (amount - 64); + lo_ = 0; + } else { + hi_ = 0; + lo_ = 0; } - } else if (amount < 128) { - hi_ = lo_ << (amount - 64); - lo_ = 0; - } else { - hi_ = 0; - lo_ = 0; - } - return *this; + return *this; } inline uint128& uint128::operator>>=(int amount) { - // uint64 shifts of >= 64 are undefined, so we will need some special-casing. - if (amount < 64) { - if (amount != 0) { - lo_ = (lo_ >> amount) | (hi_ << (64 - amount)); - hi_ = hi_ >> amount; + // uint64 shifts of >= 64 are undefined, so we will need some special-casing. + if (amount < 64) { + if (amount != 0) { + lo_ = (lo_ >> amount) | (hi_ << (64 - amount)); + hi_ = hi_ >> amount; + } + } else if (amount < 128) { + hi_ = 0; + lo_ = hi_ >> (amount - 64); + } else { + hi_ = 0; + lo_ = 0; } - } else if (amount < 128) { - hi_ = 0; - lo_ = hi_ >> (amount - 64); - } else { - hi_ = 0; - lo_ = 0; - } - return *this; + return *this; } inline uint128 operator+(const uint128& lhs, const uint128& rhs) { - return uint128(lhs) += rhs; + return uint128(lhs) += rhs; } inline uint128 operator-(const uint128& lhs, const uint128& rhs) { - return uint128(lhs) -= rhs; + return uint128(lhs) -= rhs; } inline uint128 operator*(const uint128& lhs, const uint128& rhs) { - return uint128(lhs) *= rhs; + return uint128(lhs) *= rhs; } inline uint128& uint128::operator+=(const uint128& b) { - hi_ += b.hi_; - uint64 lolo = lo_ + b.lo_; - if (lolo < lo_) - ++hi_; - lo_ = lolo; - return *this; + hi_ += b.hi_; + uint64 lolo = lo_ + b.lo_; + if (lolo < lo_) ++hi_; + lo_ = lolo; + return *this; } inline uint128& uint128::operator-=(const uint128& b) { - hi_ -= b.hi_; - if (b.lo_ > lo_) - --hi_; - lo_ -= b.lo_; - return *this; + hi_ -= b.hi_; + if (b.lo_ > lo_) --hi_; + lo_ -= b.lo_; + return *this; } inline uint128& uint128::operator*=(const uint128& b) { - uint64 a96 = hi_ >> 32; - uint64 a64 = hi_ & 0xffffffffu; - uint64 a32 = lo_ >> 32; - uint64 a00 = lo_ & 0xffffffffu; - uint64 b96 = b.hi_ >> 32; - uint64 b64 = b.hi_ & 0xffffffffu; - uint64 b32 = b.lo_ >> 32; - uint64 b00 = b.lo_ & 0xffffffffu; - // multiply [a96 .. a00] x [b96 .. b00] - // terms higher than c96 disappear off the high side - // terms c96 and c64 are safe to ignore carry bit - uint64 c96 = a96 * b00 + a64 * b32 + a32 * b64 + a00 * b96; - uint64 c64 = a64 * b00 + a32 * b32 + a00 * b64; - this->hi_ = (c96 << 32) + c64; - this->lo_ = 0; - // add terms after this one at a time to capture carry - *this += uint128(a32 * b00) << 32; - *this += uint128(a00 * b32) << 32; - *this += a00 * b00; - return *this; + uint64 a96 = hi_ >> 32; + uint64 a64 = hi_ & 0xffffffffu; + uint64 a32 = lo_ >> 32; + uint64 a00 = lo_ & 0xffffffffu; + uint64 b96 = b.hi_ >> 32; + uint64 b64 = b.hi_ & 0xffffffffu; + uint64 b32 = b.lo_ >> 32; + uint64 b00 = b.lo_ & 0xffffffffu; + // multiply [a96 .. a00] x [b96 .. b00] + // terms higher than c96 disappear off the high side + // terms c96 and c64 are safe to ignore carry bit + uint64 c96 = a96 * b00 + a64 * b32 + a32 * b64 + a00 * b96; + uint64 c64 = a64 * b00 + a32 * b32 + a00 * b64; + this->hi_ = (c96 << 32) + c64; + this->lo_ = 0; + // add terms after this one at a time to capture carry + *this += uint128(a32 * b00) << 32; + *this += uint128(a00 * b32) << 32; + *this += a00 * b00; + return *this; } inline uint128 uint128::operator++(int) { - uint128 tmp(*this); - *this += 1; - return tmp; + uint128 tmp(*this); + *this += 1; + return tmp; } inline uint128 uint128::operator--(int) { - uint128 tmp(*this); - *this -= 1; - return tmp; + uint128 tmp(*this); + *this -= 1; + return tmp; } inline uint128& uint128::operator++() { - *this += 1; - return *this; + *this += 1; + return *this; } inline uint128& uint128::operator--() { - *this -= 1; - return *this; + *this -= 1; + return *this; } -#endif // BASE_INT128_H_ +#endif // BASE_INT128_H_ diff --git a/be/src/gutil/integral_types.h b/be/src/gutil/integral_types.h index cbcf917ea5ab06..cf2ccdc8c4d575 100644 --- a/be/src/gutil/integral_types.h +++ b/be/src/gutil/integral_types.h @@ -20,14 +20,14 @@ // Standard typedefs // All Google2 code is compiled with -funsigned-char to make "char" // unsigned. Google2 code therefore doesn't need a "uchar" type. -typedef int8_t schar; -typedef int8_t int8; -typedef int16_t int16; -typedef int32_t int32; +typedef int8_t schar; +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; #ifdef _MSC_VER -typedef __int64 int64; +typedef __int64 int64; #else -typedef int64_t int64; +typedef int64_t int64; #endif /* _MSC_VER */ // NOTE: unsigned types are DANGEROUS in loops and other arithmetical @@ -36,11 +36,11 @@ typedef int64_t int64; // use 'unsigned' to express "this value should always be positive"; // use assertions for this. -typedef uint8_t uint8; -typedef uint16_t uint16; -typedef uint32_t uint32; +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; #ifdef _MSC_VER -typedef unsigned __int64 uint64; +typedef unsigned __int64 uint64; #else typedef uint64_t uint64; #endif /* _MSC_VER */ @@ -49,7 +49,7 @@ typedef uint64_t uint64; // such values require up to 21 bits. // (For type-checking on pointers, make this explicitly signed, // and it should always be the signed version of whatever int32 is.) -typedef signed int char32; +typedef signed int char32; // A type to represent a natural machine word (for e.g. efficiently // scanning through memory for checksums or index searching). Don't use @@ -58,7 +58,7 @@ typedef signed int char32; // (http://www.opengroup.org/public/tech/aspen/lp64_wp.htm), hence // their ints are only 32 bits. We want to use the same fundamental // type on all archs if possible to preserve *printf() compatability. -typedef unsigned long uword_t; +typedef unsigned long uword_t; #endif /* SWIG */ @@ -68,32 +68,31 @@ typedef unsigned long uword_t; #undef GG_ULONGLONG #undef GG_LL_FORMAT -#ifdef _MSC_VER /* if Visual C++ */ +#ifdef _MSC_VER /* if Visual C++ */ // VC++ long long suffixes #define GG_LONGLONG(x) x##I64 #define GG_ULONGLONG(x) x##UI64 -#else /* not Visual C++ */ +#else /* not Visual C++ */ #define GG_LONGLONG(x) x##LL #define GG_ULONGLONG(x) x##ULL -#endif // _MSC_VER - - -static const uint8 kuint8max = (( uint8) 0xFF); -static const uint16 kuint16max = ((uint16) 0xFFFF); -static const uint32 kuint32max = ((uint32) 0xFFFFFFFF); -static const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF)); -static const int8 kint8min = (( int8) ~0x7F); -static const int8 kint8max = (( int8) 0x7F); -static const int16 kint16min = (( int16) ~0x7FFF); -static const int16 kint16max = (( int16) 0x7FFF); -static const int32 kint32min = (( int32) ~0x7FFFFFFF); -static const int32 kint32max = (( int32) 0x7FFFFFFF); -static const int64 kint64min = (( int64) GG_LONGLONG(~0x7FFFFFFFFFFFFFFF)); -static const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF)); +#endif // _MSC_VER + +static const uint8 kuint8max = ((uint8)0xFF); +static const uint16 kuint16max = ((uint16)0xFFFF); +static const uint32 kuint32max = ((uint32)0xFFFFFFFF); +static const uint64 kuint64max = ((uint64)GG_LONGLONG(0xFFFFFFFFFFFFFFFF)); +static const int8 kint8min = ((int8)~0x7F); +static const int8 kint8max = ((int8)0x7F); +static const int16 kint16min = ((int16)~0x7FFF); +static const int16 kint16max = ((int16)0x7FFF); +static const int32 kint32min = ((int32)~0x7FFFFFFF); +static const int32 kint32max = ((int32)0x7FFFFFFF); +static const int64 kint64min = ((int64)GG_LONGLONG(~0x7FFFFFFFFFFFFFFF)); +static const int64 kint64max = ((int64)GG_LONGLONG(0x7FFFFFFFFFFFFFFF)); // TODO(user): remove this eventually. // No object has kIllegalFprint as its Fingerprint. @@ -101,4 +100,4 @@ typedef uint64 Fprint; static const Fprint kIllegalFprint = 0; static const Fprint kMaxFprint = GG_ULONGLONG(0xFFFFFFFFFFFFFFFF); -#endif // BASE_INTEGRAL_TYPES_H_ +#endif // BASE_INTEGRAL_TYPES_H_ diff --git a/be/src/gutil/linux_syscall_support.h b/be/src/gutil/linux_syscall_support.h index 13aa415e2503eb..03ede26bb877fb 100644 --- a/be/src/gutil/linux_syscall_support.h +++ b/be/src/gutil/linux_syscall_support.h @@ -134,10 +134,9 @@ * on Linux. * Porting to other related platforms should not be difficult. */ -#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ - defined(__mips__) || defined(__PPC__) || \ - defined(__aarch64__) || defined(__s390__)) \ - && (defined(__linux)) +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__PPC__) || defined(__aarch64__) || defined(__s390__)) && \ + (defined(__linux)) #ifndef SYS_CPLUSPLUS #ifdef __cplusplus @@ -148,7 +147,10 @@ extern "C" { #endif +#include #include +#include +#include #include #include #include @@ -160,9 +162,6 @@ extern "C" { #include #include #include -#include -#include -#include #ifdef __mips__ /* Include definitions of the ABI currently in use. */ @@ -202,70 +201,70 @@ extern "C" { /* include/linux/dirent.h */ struct kernel_dirent64 { - unsigned long long d_ino; - long long d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[256]; + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; }; /* include/linux/dirent.h */ struct kernel_dirent { - long d_ino; - long d_off; - unsigned short d_reclen; - char d_name[256]; + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; }; /* include/linux/time.h */ struct kernel_timespec { - long tv_sec; - long tv_nsec; + long tv_sec; + long tv_nsec; }; /* include/linux/time.h */ struct kernel_timeval { - long tv_sec; - long tv_usec; + long tv_sec; + long tv_usec; }; /* include/linux/resource.h */ struct kernel_rusage { - struct kernel_timeval ru_utime; - struct kernel_timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; }; -#if defined(__i386__) || defined(__arm__) \ - || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__)) +#if defined(__i386__) || defined(__arm__) || defined(__PPC__) || \ + (defined(__s390__) && !defined(__s390x__)) /* include/asm-{arm,i386,mips,ppc}/signal.h */ struct kernel_old_sigaction { - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, siginfo_t *, void *); - }; - unsigned long sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -} __attribute__((packed,aligned(4))); + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t*, void*); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed, aligned(4))); #elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - #define kernel_old_sigaction kernel_sigaction +#define kernel_old_sigaction kernel_sigaction #elif defined(__aarch64__) - // No kernel_old_sigaction defined for arm64. +// No kernel_old_sigaction defined for arm64. #endif /* Some kernel functions (e.g. sigaction() in 2.6.23) require that the @@ -280,32 +279,31 @@ struct kernel_old_sigaction { #ifdef __mips__ #define KERNEL_NSIG 128 #else -#define KERNEL_NSIG 64 +#define KERNEL_NSIG 64 #endif /* include/asm-{arm,i386,mips,x86_64}/signal.h */ struct kernel_sigset_t { - unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ - (8*sizeof(unsigned long))]; + unsigned long sig[(KERNEL_NSIG + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))]; }; /* include/asm-{arm,generic,i386,mips,x86_64,ppc}/signal.h */ struct kernel_sigaction { #ifdef __mips__ - unsigned long sa_flags; - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, siginfo_t *, void *); - }; - struct kernel_sigset_t sa_mask; + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t*, void*); + }; + struct kernel_sigset_t sa_mask; #else - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, siginfo_t *, void *); - }; - unsigned long sa_flags; - void (*sa_restorer)(void); - struct kernel_sigset_t sa_mask; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t*, void*); + }; + unsigned long sa_flags; + void (*sa_restorer)(void); + struct kernel_sigset_t sa_mask; #endif }; @@ -316,299 +314,297 @@ struct kernel_stat { #else struct kernel_stat64 { #endif - unsigned st_dev; - unsigned __pad0[3]; - unsigned long long st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; - unsigned __pad1[3]; - long long st_size; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned st_blksize; - unsigned __pad2; - unsigned long long st_blocks; + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; }; #elif defined __PPC__ struct kernel_stat64 { - unsigned long long st_dev; - unsigned long long st_ino; - unsigned st_nlink; - unsigned st_mode; - unsigned st_uid; - unsigned st_gid; - int __pad2; - unsigned long long st_rdev; - long long st_size; - long long st_blksize; - long long st_blocks; - kernel_timespec st_atim; - kernel_timespec st_mtim; - kernel_timespec st_ctim; - unsigned long __unused4; - unsigned long __unused5; - unsigned long __unused6; + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + int __pad2; + unsigned long long st_rdev; + long long st_size; + long long st_blksize; + long long st_blocks; + kernel_timespec st_atim; + kernel_timespec st_mtim; + kernel_timespec st_ctim; + unsigned long __unused4; + unsigned long __unused5; + unsigned long __unused6; }; #else struct kernel_stat64 { - unsigned long long st_dev; - unsigned char __pad0[4]; - unsigned __st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned long long st_rdev; - unsigned char __pad3[4]; - long long st_size; - unsigned st_blksize; - unsigned long long st_blocks; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned long long st_ino; + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; }; #endif /* include/asm-{arm,generic,i386,mips,x86_64,ppc,s390}/stat.h */ #if defined(__i386__) || defined(__arm__) struct kernel_stat { - /* The kernel headers suggest that st_dev and st_rdev should be 32bit + /* The kernel headers suggest that st_dev and st_rdev should be 32bit * quantities encoding 12bit major and 20bit minor numbers in an interleaved * format. In reality, we do not see useful data in the top bits. So, * we'll leave the padding in here, until we find a better solution. */ - unsigned short st_dev; - short pad1; - unsigned st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - short pad2; - unsigned st_size; - unsigned st_blksize; - unsigned st_blocks; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned __unused4; - unsigned __unused5; + unsigned short st_dev; + short pad1; + unsigned st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + short pad2; + unsigned st_size; + unsigned st_blksize; + unsigned st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; }; #elif defined(__x86_64__) struct kernel_stat { - uint64_t st_dev; - uint64_t st_ino; - uint64_t st_nlink; - unsigned st_mode; - unsigned st_uid; - unsigned st_gid; - unsigned __pad0; - uint64_t st_rdev; - int64_t st_size; - int64_t st_blksize; - int64_t st_blocks; - uint64_t st_atime_; - uint64_t st_atime_nsec_; - uint64_t st_mtime_; - uint64_t st_mtime_nsec_; - uint64_t st_ctime_; - uint64_t st_ctime_nsec_; - int64_t __unused[3]; + uint64_t st_dev; + uint64_t st_ino; + uint64_t st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; + uint64_t st_rdev; + int64_t st_size; + int64_t st_blksize; + int64_t st_blocks; + uint64_t st_atime_; + uint64_t st_atime_nsec_; + uint64_t st_mtime_; + uint64_t st_mtime_nsec_; + uint64_t st_ctime_; + uint64_t st_ctime_nsec_; + int64_t __unused[3]; }; #elif defined(__PPC__) struct kernel_stat { - unsigned long long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned long st_mode; - unsigned st_uid; - unsigned st_gid; - int __pad2; - unsigned long long st_rdev; - long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - kernel_timespec st_atim; - kernel_timespec st_mtim; - kernel_timespec st_ctim; - unsigned long __unused4; - unsigned long __unused5; - unsigned long __unused6; + unsigned long long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned long st_mode; + unsigned st_uid; + unsigned st_gid; + int __pad2; + unsigned long long st_rdev; + long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + kernel_timespec st_atim; + kernel_timespec st_mtim; + kernel_timespec st_ctim; + unsigned long __unused4; + unsigned long __unused5; + unsigned long __unused6; }; -#elif defined(__mips__) \ - && !(_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32) +#elif defined(__mips__) && !(_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32) struct kernel_stat { - unsigned st_dev; - int st_pad1[3]; - unsigned st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; - int st_pad2[2]; - long st_size; - int st_pad3; - long st_atime_; - long st_atime_nsec_; - long st_mtime_; - long st_mtime_nsec_; - long st_ctime_; - long st_ctime_nsec_; - int st_blksize; - int st_blocks; - int st_pad4[14]; + unsigned st_dev; + int st_pad1[3]; + unsigned st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + int st_pad2[2]; + long st_size; + int st_pad3; + long st_atime_; + long st_atime_nsec_; + long st_mtime_; + long st_mtime_nsec_; + long st_ctime_; + long st_ctime_nsec_; + int st_blksize; + int st_blocks; + int st_pad4[14]; }; #elif defined(__aarch64__) struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - unsigned long st_rdev; - unsigned long __pad1; - long st_size; - int st_blksize; - int __pad2; - long st_blocks; - long st_atime_; - unsigned long st_atime_nsec_; - long st_mtime_; - unsigned long st_mtime_nsec_; - long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned int __unused4; - unsigned int __unused5; + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned int __unused4; + unsigned int __unused5; }; #elif defined(__s390x__) struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned int st_mode; - unsigned int st_uid; - unsigned int st_gid; - unsigned int __pad1; - unsigned long st_rdev; - unsigned long st_size; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned long st_blksize; - long st_blocks; - unsigned long __unused[3]; + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad1; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; }; #elif defined(__s390__) struct kernel_stat { - unsigned short st_dev; - unsigned short __pad1; - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - unsigned short __pad2; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned long __unused4; - unsigned long __unused5; + unsigned short st_dev; + unsigned short __pad1; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned short __pad2; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; }; #endif - /* Definitions missing from the standard header files */ #ifndef O_DIRECTORY #if defined(__arm__) -#define O_DIRECTORY 0040000 +#define O_DIRECTORY 0040000 #else -#define O_DIRECTORY 0200000 +#define O_DIRECTORY 0200000 #endif #endif #ifndef PR_GET_DUMPABLE -#define PR_GET_DUMPABLE 3 +#define PR_GET_DUMPABLE 3 #endif #ifndef PR_SET_DUMPABLE -#define PR_SET_DUMPABLE 4 +#define PR_SET_DUMPABLE 4 #endif #ifndef AT_FDCWD -#define AT_FDCWD (-100) +#define AT_FDCWD (-100) #endif #ifndef AT_SYMLINK_NOFOLLOW -#define AT_SYMLINK_NOFOLLOW 0x100 +#define AT_SYMLINK_NOFOLLOW 0x100 #endif #ifndef AT_REMOVEDIR -#define AT_REMOVEDIR 0x200 +#define AT_REMOVEDIR 0x200 #endif #ifndef MREMAP_FIXED -#define MREMAP_FIXED 2 +#define MREMAP_FIXED 2 #endif #ifndef SA_RESTORER -#define SA_RESTORER 0x04000000 +#define SA_RESTORER 0x04000000 #endif #if defined(__i386__) #ifndef __NR_rt_sigaction -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 #endif #ifndef __NR_stat64 -#define __NR_stat64 195 +#define __NR_stat64 195 #endif #ifndef __NR_fstat64 -#define __NR_fstat64 197 +#define __NR_fstat64 197 #endif #ifndef __NR_getdents64 -#define __NR_getdents64 220 +#define __NR_getdents64 220 #endif #ifndef __NR_gettid -#define __NR_gettid 224 +#define __NR_gettid 224 #endif #ifndef __NR_futex -#define __NR_futex 240 +#define __NR_futex 240 #endif #ifndef __NR_openat -#define __NR_openat 295 +#define __NR_openat 295 #endif #ifndef __NR_getcpu -#define __NR_getcpu 318 +#define __NR_getcpu 318 #endif /* End of i386 definitions */ #elif defined(__arm__) #ifndef __syscall #if defined(__thumb__) || defined(__ARM_EABI__) #define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; -#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs +#define __SYS_REG_LIST(regs...) [sysreg] "r"(__sysreg), ##regs #define __syscall(name) "swi\t0" -#define __syscall_safe(name) \ - "push {r7}\n" \ +#define __syscall_safe(name) \ + "push {r7}\n" \ "mov r7,%[sysreg]\n" \ __syscall(name)"\n" \ "pop {r7}" @@ -620,435 +616,433 @@ struct kernel_stat { #endif #endif #ifndef __NR_rt_sigaction -#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) -#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) #endif #ifndef __NR_stat64 -#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) #endif #ifndef __NR_fstat64 -#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) #endif #ifndef __NR_getdents64 -#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) #endif #ifndef __NR_gettid -#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#define __NR_gettid (__NR_SYSCALL_BASE + 224) #endif #ifndef __NR_futex -#define __NR_futex (__NR_SYSCALL_BASE + 240) +#define __NR_futex (__NR_SYSCALL_BASE + 240) #endif /* End of ARM definitions */ #elif defined(__x86_64__) #ifndef __NR_gettid -#define __NR_gettid 186 +#define __NR_gettid 186 #endif #ifndef __NR_futex -#define __NR_futex 202 +#define __NR_futex 202 #endif #ifndef __NR_getdents64 -#define __NR_getdents64 217 +#define __NR_getdents64 217 #endif #ifndef __NR_openat -#define __NR_openat 257 +#define __NR_openat 257 #endif /* End of x86-64 definitions */ #elif defined(__mips__) #if _MIPS_SIM == _MIPS_SIM_ABI32 #ifndef __NR_rt_sigaction -#define __NR_rt_sigaction (__NR_Linux + 194) -#define __NR_rt_sigprocmask (__NR_Linux + 195) +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) #endif #ifndef __NR_stat64 -#define __NR_stat64 (__NR_Linux + 213) +#define __NR_stat64 (__NR_Linux + 213) #endif #ifndef __NR_fstat64 -#define __NR_fstat64 (__NR_Linux + 215) +#define __NR_fstat64 (__NR_Linux + 215) #endif #ifndef __NR_getdents64 -#define __NR_getdents64 (__NR_Linux + 219) +#define __NR_getdents64 (__NR_Linux + 219) #endif #ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 222) +#define __NR_gettid (__NR_Linux + 222) #endif #ifndef __NR_futex -#define __NR_futex (__NR_Linux + 238) +#define __NR_futex (__NR_Linux + 238) #endif #ifndef __NR_openat -#define __NR_openat (__NR_Linux + 288) +#define __NR_openat (__NR_Linux + 288) #endif #ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 293) +#define __NR_fstatat (__NR_Linux + 293) #endif #ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 312) +#define __NR_getcpu (__NR_Linux + 312) #endif /* End of MIPS (old 32bit API) definitions */ #elif (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32) #ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 178) +#define __NR_gettid (__NR_Linux + 178) #endif #ifndef __NR_futex -#define __NR_futex (__NR_Linux + 194) +#define __NR_futex (__NR_Linux + 194) #endif #ifndef __NR_openat -#define __NR_openat (__NR_Linux + 247) +#define __NR_openat (__NR_Linux + 247) #endif #ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 252) +#define __NR_fstatat (__NR_Linux + 252) #endif #ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 271) +#define __NR_getcpu (__NR_Linux + 271) #endif /* End of MIPS (64bit API) definitions */ #else #ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 178) +#define __NR_gettid (__NR_Linux + 178) #endif #ifndef __NR_futex -#define __NR_futex (__NR_Linux + 194) +#define __NR_futex (__NR_Linux + 194) #endif #ifndef __NR_openat -#define __NR_openat (__NR_Linux + 251) +#define __NR_openat (__NR_Linux + 251) #endif #ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 256) +#define __NR_fstatat (__NR_Linux + 256) #endif #ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 275) +#define __NR_getcpu (__NR_Linux + 275) #endif /* End of MIPS (new 32bit API) definitions */ #endif /* End of MIPS definitions */ #elif defined(__PPC__) #ifndef __NR_rt_sigaction -#define __NR_rt_sigaction 173 -#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 #endif #ifndef __NR_stat64 -#define __NR_stat64 195 +#define __NR_stat64 195 #endif #ifndef __NR_fstat64 -#define __NR_fstat64 197 +#define __NR_fstat64 197 #endif #ifndef __NR_socket -#define __NR_socket 198 +#define __NR_socket 198 #endif #ifndef __NR_getdents64 -#define __NR_getdents64 202 +#define __NR_getdents64 202 #endif #ifndef __NR_gettid -#define __NR_gettid 207 +#define __NR_gettid 207 #endif #ifndef __NR_futex -#define __NR_futex 221 +#define __NR_futex 221 #endif #ifndef __NR_openat -#define __NR_openat 286 +#define __NR_openat 286 #endif #ifndef __NR_getcpu -#define __NR_getcpu 302 +#define __NR_getcpu 302 #endif /* End of powerpc defininitions */ #elif defined(__aarch64__) #ifndef __NR_fstatat -#define __NR_fstatat 79 +#define __NR_fstatat 79 #endif /* End of aarch64 defininitions */ #elif defined(__s390__) #ifndef __NR_quotactl -#define __NR_quotactl 131 +#define __NR_quotactl 131 #endif #ifndef __NR_rt_sigreturn -#define __NR_rt_sigreturn 173 +#define __NR_rt_sigreturn 173 #endif #ifndef __NR_rt_sigaction -#define __NR_rt_sigaction 174 +#define __NR_rt_sigaction 174 #endif #ifndef __NR_rt_sigprocmask -#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigprocmask 175 #endif #ifndef __NR_rt_sigpending -#define __NR_rt_sigpending 176 +#define __NR_rt_sigpending 176 #endif #ifndef __NR_rt_sigsuspend -#define __NR_rt_sigsuspend 179 +#define __NR_rt_sigsuspend 179 #endif #ifndef __NR_pread64 -#define __NR_pread64 180 +#define __NR_pread64 180 #endif #ifndef __NR_pwrite64 -#define __NR_pwrite64 181 +#define __NR_pwrite64 181 #endif #ifndef __NR_getdents64 -#define __NR_getdents64 220 +#define __NR_getdents64 220 #endif #ifndef __NR_readahead -#define __NR_readahead 222 +#define __NR_readahead 222 #endif #ifndef __NR_setxattr -#define __NR_setxattr 224 +#define __NR_setxattr 224 #endif #ifndef __NR_lsetxattr -#define __NR_lsetxattr 225 +#define __NR_lsetxattr 225 #endif #ifndef __NR_getxattr -#define __NR_getxattr 227 +#define __NR_getxattr 227 #endif #ifndef __NR_lgetxattr -#define __NR_lgetxattr 228 +#define __NR_lgetxattr 228 #endif #ifndef __NR_listxattr -#define __NR_listxattr 230 +#define __NR_listxattr 230 #endif #ifndef __NR_llistxattr -#define __NR_llistxattr 231 +#define __NR_llistxattr 231 #endif #ifndef __NR_gettid -#define __NR_gettid 236 +#define __NR_gettid 236 #endif #ifndef __NR_tkill -#define __NR_tkill 237 +#define __NR_tkill 237 #endif #ifndef __NR_futex -#define __NR_futex 238 +#define __NR_futex 238 #endif #ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 239 +#define __NR_sched_setaffinity 239 #endif #ifndef __NR_sched_getaffinity -#define __NR_sched_getaffinity 240 +#define __NR_sched_getaffinity 240 #endif #ifndef __NR_set_tid_address -#define __NR_set_tid_address 252 +#define __NR_set_tid_address 252 #endif #ifndef __NR_clock_gettime -#define __NR_clock_gettime 260 +#define __NR_clock_gettime 260 #endif #ifndef __NR_clock_getres -#define __NR_clock_getres 261 +#define __NR_clock_getres 261 #endif #ifndef __NR_statfs64 -#define __NR_statfs64 265 +#define __NR_statfs64 265 #endif #ifndef __NR_fstatfs64 -#define __NR_fstatfs64 266 +#define __NR_fstatfs64 266 #endif #ifndef __NR_ioprio_set -#define __NR_ioprio_set 282 +#define __NR_ioprio_set 282 #endif #ifndef __NR_ioprio_get -#define __NR_ioprio_get 283 +#define __NR_ioprio_get 283 #endif #ifndef __NR_openat -#define __NR_openat 288 +#define __NR_openat 288 #endif #ifndef __NR_unlinkat -#define __NR_unlinkat 294 +#define __NR_unlinkat 294 #endif #ifndef __NR_move_pages -#define __NR_move_pages 310 +#define __NR_move_pages 310 #endif #ifndef __NR_getcpu -#define __NR_getcpu 311 +#define __NR_getcpu 311 #endif #ifndef __NR_fallocate -#define __NR_fallocate 314 +#define __NR_fallocate 314 #endif /* Some syscalls are named/numbered differently between s390 and s390x. */ #ifdef __s390x__ -# ifndef __NR_getrlimit -# define __NR_getrlimit 191 -# endif -# ifndef __NR_setresuid -# define __NR_setresuid 208 -# endif -# ifndef __NR_getresuid -# define __NR_getresuid 209 -# endif -# ifndef __NR_setresgid -# define __NR_setresgid 210 -# endif -# ifndef __NR_getresgid -# define __NR_getresgid 211 -# endif -# ifndef __NR_setfsuid -# define __NR_setfsuid 215 -# endif -# ifndef __NR_setfsgid -# define __NR_setfsgid 216 -# endif -# ifndef __NR_fadvise64 -# define __NR_fadvise64 253 -# endif -# ifndef __NR_newfstatat -# define __NR_newfstatat 293 -# endif +#ifndef __NR_getrlimit +#define __NR_getrlimit 191 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 208 +#endif +#ifndef __NR_getresuid +#define __NR_getresuid 209 +#endif +#ifndef __NR_setresgid +#define __NR_setresgid 210 +#endif +#ifndef __NR_getresgid +#define __NR_getresgid 211 +#endif +#ifndef __NR_setfsuid +#define __NR_setfsuid 215 +#endif +#ifndef __NR_setfsgid +#define __NR_setfsgid 216 +#endif +#ifndef __NR_fadvise64 +#define __NR_fadvise64 253 +#endif +#ifndef __NR_newfstatat +#define __NR_newfstatat 293 +#endif #else /* __s390x__ */ -# ifndef __NR_getrlimit -# define __NR_getrlimit 76 -# endif -# ifndef __NR_setfsuid -# define __NR_setfsuid 138 -# endif -# ifndef __NR_setfsgid -# define __NR_setfsgid 139 -# endif -# ifndef __NR_setresuid -# define __NR_setresuid 164 -# endif -# ifndef __NR_getresuid -# define __NR_getresuid 165 -# endif -# ifndef __NR_setresgid -# define __NR_setresgid 170 -# endif -# ifndef __NR_getresgid -# define __NR_getresgid 171 -# endif -# ifndef __NR_ugetrlimit -# define __NR_ugetrlimit 191 -# endif -# ifndef __NR_mmap2 -# define __NR_mmap2 192 -# endif -# ifndef __NR_setresuid32 -# define __NR_setresuid32 208 -# endif -# ifndef __NR_getresuid32 -# define __NR_getresuid32 209 -# endif -# ifndef __NR_setresgid32 -# define __NR_setresgid32 210 -# endif -# ifndef __NR_getresgid32 -# define __NR_getresgid32 211 -# endif -# ifndef __NR_setfsuid32 -# define __NR_setfsuid32 215 -# endif -# ifndef __NR_setfsgid32 -# define __NR_setfsgid32 216 -# endif -# ifndef __NR_fadvise64_64 -# define __NR_fadvise64_64 264 -# endif -# ifndef __NR_fstatat64 -# define __NR_fstatat64 293 -# endif +#ifndef __NR_getrlimit +#define __NR_getrlimit 76 +#endif +#ifndef __NR_setfsuid +#define __NR_setfsuid 138 +#endif +#ifndef __NR_setfsgid +#define __NR_setfsgid 139 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 164 +#endif +#ifndef __NR_getresuid +#define __NR_getresuid 165 +#endif +#ifndef __NR_setresgid +#define __NR_setresgid 170 +#endif +#ifndef __NR_getresgid +#define __NR_getresgid 171 +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit 191 +#endif +#ifndef __NR_mmap2 +#define __NR_mmap2 192 +#endif +#ifndef __NR_setresuid32 +#define __NR_setresuid32 208 +#endif +#ifndef __NR_getresuid32 +#define __NR_getresuid32 209 +#endif +#ifndef __NR_setresgid32 +#define __NR_setresgid32 210 +#endif +#ifndef __NR_getresgid32 +#define __NR_getresgid32 211 +#endif +#ifndef __NR_setfsuid32 +#define __NR_setfsuid32 215 +#endif +#ifndef __NR_setfsgid32 +#define __NR_setfsgid32 216 +#endif +#ifndef __NR_fadvise64_64 +#define __NR_fadvise64_64 264 +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 293 +#endif #endif /* __s390__ */ /* End of s390/s390x definitions */ #endif - /* After forking, we must make sure to only call system calls. */ #if __BOUNDED_POINTERS__ - #error "Need to port invocations of syscalls for bounded ptrs" +#error "Need to port invocations of syscalls for bounded ptrs" #else - /* The core dumper and the thread lister get executed after threads +/* The core dumper and the thread lister get executed after threads * have been suspended. As a consequence, we cannot call any functions * that acquire locks. Unfortunately, libc wraps most system calls * (e.g. in order to implement pthread_atfork, and to make calls * cancellable), which means we cannot call these functions. Instead, * we have to call syscall() directly. */ - #undef LSS_ERRNO - #ifdef SYS_ERRNO - /* Allow the including file to override the location of errno. This can +#undef LSS_ERRNO +#ifdef SYS_ERRNO +/* Allow the including file to override the location of errno. This can * be useful when using clone() with the CLONE_VM option. */ - #define LSS_ERRNO SYS_ERRNO - #else - #define LSS_ERRNO errno - #endif - - #undef LSS_INLINE - #ifdef SYS_INLINE - #define LSS_INLINE SYS_INLINE - #else - #define LSS_INLINE static inline - #endif - - /* Allow the including file to override the prefix used for all new +#define LSS_ERRNO SYS_ERRNO +#else +#define LSS_ERRNO errno +#endif + +#undef LSS_INLINE +#ifdef SYS_INLINE +#define LSS_INLINE SYS_INLINE +#else +#define LSS_INLINE static inline +#endif + +/* Allow the including file to override the prefix used for all new * system calls. By default, it will be set to "sys_". */ - #undef LSS_NAME - #ifndef SYS_PREFIX - #define LSS_NAME(name) sys_##name - #elif SYS_PREFIX < 0 - #define LSS_NAME(name) name - #elif SYS_PREFIX == 0 - #define LSS_NAME(name) sys0_##name - #elif SYS_PREFIX == 1 - #define LSS_NAME(name) sys1_##name - #elif SYS_PREFIX == 2 - #define LSS_NAME(name) sys2_##name - #elif SYS_PREFIX == 3 - #define LSS_NAME(name) sys3_##name - #elif SYS_PREFIX == 4 - #define LSS_NAME(name) sys4_##name - #elif SYS_PREFIX == 5 - #define LSS_NAME(name) sys5_##name - #elif SYS_PREFIX == 6 - #define LSS_NAME(name) sys6_##name - #elif SYS_PREFIX == 7 - #define LSS_NAME(name) sys7_##name - #elif SYS_PREFIX == 8 - #define LSS_NAME(name) sys8_##name - #elif SYS_PREFIX == 9 - #define LSS_NAME(name) sys9_##name - #endif - - #undef LSS_RETURN - #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ - defined(__aarch64__) || defined(__s390__)) - /* Failing system calls return a negative result in the range of +#undef LSS_NAME +#ifndef SYS_PREFIX +#define LSS_NAME(name) sys_##name +#elif SYS_PREFIX < 0 +#define LSS_NAME(name) name +#elif SYS_PREFIX == 0 +#define LSS_NAME(name) sys0_##name +#elif SYS_PREFIX == 1 +#define LSS_NAME(name) sys1_##name +#elif SYS_PREFIX == 2 +#define LSS_NAME(name) sys2_##name +#elif SYS_PREFIX == 3 +#define LSS_NAME(name) sys3_##name +#elif SYS_PREFIX == 4 +#define LSS_NAME(name) sys4_##name +#elif SYS_PREFIX == 5 +#define LSS_NAME(name) sys5_##name +#elif SYS_PREFIX == 6 +#define LSS_NAME(name) sys6_##name +#elif SYS_PREFIX == 7 +#define LSS_NAME(name) sys7_##name +#elif SYS_PREFIX == 8 +#define LSS_NAME(name) sys8_##name +#elif SYS_PREFIX == 9 +#define LSS_NAME(name) sys9_##name +#endif + +#undef LSS_RETURN +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \ + defined(__s390__)) +/* Failing system calls return a negative result in the range of * -1..-4095. These are "errno" values with the sign inverted. */ - #define LSS_RETURN(type, res) \ - do { \ - if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ - LSS_ERRNO = -(res); \ - res = -1; \ - } \ - return (type) (res); \ +#define LSS_RETURN(type, res) \ + do { \ + if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type)(res); \ } while (0) - #elif defined(__mips__) - /* On MIPS, failing system calls return -1, and set errno in a +#elif defined(__mips__) +/* On MIPS, failing system calls return -1, and set errno in a * separate CPU register. */ - #define LSS_RETURN(type, res, err) \ - do { \ - if (err) { \ - LSS_ERRNO = (res); \ - res = -1; \ - } \ - return (type) (res); \ +#define LSS_RETURN(type, res, err) \ + do { \ + if (err) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type)(res); \ } while (0) - #elif defined(__PPC__) - /* On PPC, failing system calls return -1, and set errno in a +#elif defined(__PPC__) +/* On PPC, failing system calls return -1, and set errno in a * separate CPU register. See linux/unistd.h. */ - #define LSS_RETURN(type, res, err) \ - do { \ - if (err & 0x10000000 ) { \ - LSS_ERRNO = (res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #endif - #if defined(__i386__) - #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) - /* This only works for GCC-4.4 and above -- the first version to use +#define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type)(res); \ + } while (0) +#endif +#if defined(__i386__) +#if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) +/* This only works for GCC-4.4 and above -- the first version to use .cfi directives for dwarf unwind info. */ - #define CFI_ADJUST_CFA_OFFSET(adjust) \ - ".cfi_adjust_cfa_offset " #adjust "\n" - #else - #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ - #endif +#define CFI_ADJUST_CFA_OFFSET(adjust) ".cfi_adjust_cfa_offset " #adjust "\n" +#else +#define CFI_ADJUST_CFA_OFFSET(adjust) /**/ +#endif - /* In PIC mode (e.g. when building shared libraries), gcc for i386 +/* In PIC mode (e.g. when building shared libraries), gcc for i386 * reserves ebx. Unfortunately, most distribution ship with implementations * of _syscallX() which clobber ebx. * Also, most definitions of _syscallX() neglect to mark "memory" as being @@ -1056,129 +1050,120 @@ struct kernel_stat { * at optimizing across __asm__ calls. * So, we just have to redefine all of the _syscallX() macros. */ - #undef LSS_BODY - #define LSS_BODY(type,args...) \ - long __res; \ - __asm__ __volatile__("push %%ebx\n" \ +#undef LSS_BODY +#define LSS_BODY(type, args...) \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ CFI_ADJUST_CFA_OFFSET(4) \ "movl %2,%%ebx\n" \ "int $0x80\n" \ "pop %%ebx\n" \ CFI_ADJUST_CFA_OFFSET(-4) \ args \ - : "esp", "memory"); \ - LSS_RETURN(type,__res) - #undef _syscall0 - #define _syscall0(type,name) \ - type LSS_NAME(name)(void) { \ - long __res; \ - __asm__ volatile("int $0x80" \ - : "=a" (__res) \ - : "0" (__NR_##name) \ - : "memory"); \ - LSS_RETURN(type,__res); \ - } - #undef _syscall1 - #define _syscall1(type,name,type1,arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1))); \ - } - #undef _syscall2 - #define _syscall2(type,name,type1,arg1,type2,arg2) \ - type LSS_NAME(name)(type1 arg1,type2 arg2) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ - } - #undef _syscall3 - #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ - type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ - "d" ((long)(arg3))); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4))); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - long __res; \ - __asm__ __volatile__("push %%ebx\n" \ - "movl %2,%%ebx\n" \ - "movl %1,%%eax\n" \ - "int $0x80\n" \ - "pop %%ebx" \ - : "=a" (__res) \ - : "i" (__NR_##name), "ri" ((long)(arg1)), \ - "c" ((long)(arg2)), "d" ((long)(arg3)), \ - "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "esp", "memory"); \ - LSS_RETURN(type,__res); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - long __res; \ - struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ - __asm__ __volatile__("push %%ebp\n" \ - "push %%ebx\n" \ - "movl 4(%2),%%ebp\n" \ - "movl 0(%2), %%ebx\n" \ - "movl %1,%%eax\n" \ - "int $0x80\n" \ - "pop %%ebx\n" \ - "pop %%ebp" \ - : "=a" (__res) \ - : "i" (__NR_##name), "0" ((long)(&__s)), \ - "c" ((long)(arg2)), "d" ((long)(arg3)), \ - "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "esp", "memory"); \ - LSS_RETURN(type,__res); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - __asm__ __volatile__(/* if (fn == NULL) + : "esp", "memory"); \ + LSS_RETURN(type, __res) +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + long __res; \ + __asm__ volatile("int $0x80" : "=a"(__res) : "0"(__NR_##name) : "memory"); \ + LSS_RETURN(type, __res); \ + } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, : "=a"(__res) : "0"(__NR_##name), "ri"((long)(arg1))); \ + } +#undef _syscall2 +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(type, : "=a"(__res) : "0"(__NR_##name), "ri"((long)(arg1)), "c"((long)(arg2))); \ + } +#undef _syscall3 +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(type, \ + : "=a"(__res) \ + : "0"(__NR_##name), "ri"((long)(arg1)), "c"((long)(arg2)), "d"((long)(arg3))); \ + } +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(type, \ + : "=a"(__res) \ + : "0"(__NR_##name), "ri"((long)(arg1)), "c"((long)(arg2)), "d"((long)(arg3)), \ + "S"((long)(arg4))); \ + } +#undef _syscall5 +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + long __res; \ + __asm__ __volatile__( \ + "push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx" \ + : "=a"(__res) \ + : "i"(__NR_##name), "ri"((long)(arg1)), "c"((long)(arg2)), "d"((long)(arg3)), \ + "S"((long)(arg4)), "D"((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type, __res); \ + } +#undef _syscall6 +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + long __res; \ + struct { \ + long __a1; \ + long __a6; \ + } __s = {(long)arg1, (long)arg6}; \ + __asm__ __volatile__( \ + "push %%ebp\n" \ + "push %%ebx\n" \ + "movl 4(%2),%%ebp\n" \ + "movl 0(%2), %%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + "pop %%ebp" \ + : "=a"(__res) \ + : "i"(__NR_##name), "0"((long)(&__s)), "c"((long)(arg2)), "d"((long)(arg3)), \ + "S"((long)(arg4)), "D"((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type, __res); \ + } +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + long __res; + __asm__ __volatile__(/* if (fn == NULL) * return -EINVAL; */ - "movl %3,%%ecx\n" - "jecxz 1f\n" + "movl %3,%%ecx\n" + "jecxz 1f\n" - /* if (child_stack == NULL) + /* if (child_stack == NULL) * return -EINVAL; */ - "movl %4,%%ecx\n" - "jecxz 1f\n" + "movl %4,%%ecx\n" + "jecxz 1f\n" - /* Set up alignment of the child stack: + /* Set up alignment of the child stack: * child_stack = (child_stack & ~0xF) - 20; */ - "andl $-16,%%ecx\n" - "subl $20,%%ecx\n" + "andl $-16,%%ecx\n" + "subl $20,%%ecx\n" - /* Push "arg" and "fn" onto the stack that will be + /* Push "arg" and "fn" onto the stack that will be * used by the child. */ - "movl %6,%%eax\n" - "movl %%eax,4(%%ecx)\n" - "movl %3,%%eax\n" - "movl %%eax,(%%ecx)\n" + "movl %6,%%eax\n" + "movl %%eax,4(%%ecx)\n" + "movl %3,%%eax\n" + "movl %%eax,(%%ecx)\n" - /* %eax = syscall(%eax = __NR_clone, + /* %eax = syscall(%eax = __NR_clone, * %ebx = flags, * %ecx = child_stack, * %edx = parent_tidptr, @@ -1187,234 +1172,220 @@ struct kernel_stat { * Also, make sure that %ebx gets preserved as it is * used in PIC mode. */ - "movl %8,%%esi\n" - "movl %7,%%edx\n" - "movl %5,%%eax\n" - "movl %9,%%edi\n" - "pushl %%ebx\n" - "movl %%eax,%%ebx\n" - "movl %2,%%eax\n" - "int $0x80\n" - - /* In the parent: restore %ebx + "movl %8,%%esi\n" + "movl %7,%%edx\n" + "movl %5,%%eax\n" + "movl %9,%%edi\n" + "pushl %%ebx\n" + "movl %%eax,%%ebx\n" + "movl %2,%%eax\n" + "int $0x80\n" + + /* In the parent: restore %ebx * In the child: move "fn" into %ebx */ - "popl %%ebx\n" + "popl %%ebx\n" - /* if (%eax != 0) + /* if (%eax != 0) * return %eax; */ - "test %%eax,%%eax\n" - "jnz 1f\n" + "test %%eax,%%eax\n" + "jnz 1f\n" - /* In the child, now. Terminate frame pointer chain. + /* In the child, now. Terminate frame pointer chain. */ - "movl $0,%%ebp\n" + "movl $0,%%ebp\n" - /* Call "fn". "arg" is already on the stack. + /* Call "fn". "arg" is already on the stack. */ - "call *%%ebx\n" + "call *%%ebx\n" - /* Call _exit(%ebx). Unfortunately older versions + /* Call _exit(%ebx). Unfortunately older versions * of gcc restrict the number of arguments that can * be passed to asm(). So, we need to hard-code the * system call number. */ - "movl %%eax,%%ebx\n" - "movl $1,%%eax\n" - "int $0x80\n" + "movl %%eax,%%ebx\n" + "movl $1,%%eax\n" + "int $0x80\n" - /* Return to parent. + /* Return to parent. */ "1:\n" - : "=a" (__res) - : "0"(-EINVAL), "i"(__NR_clone), - "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), - "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) - : "esp", "memory", "ecx", "edx", "esi", "edi"); - LSS_RETURN(int, __res); - } + : "=a"(__res) + : "0"(-EINVAL), "i"(__NR_clone), "m"(fn), "m"(child_stack), "m"(flags), + "m"(arg), "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) + : "esp", "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); +} - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* On i386, the kernel does not know how to return from a signal +LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On i386, the kernel does not know how to return from a signal * handler. Instead, it relies on user space to provide a * restorer function that calls the {rt_,}sigreturn() system call. * Unfortunately, we cannot just reference the glibc version of this * function, as glibc goes out of its way to make it inaccessible. */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:movl %1,%%eax\n" - "int $0x80\n" - "2:popl %0\n" - "addl $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_rt_sigreturn)); - return res; - } - LSS_INLINE void (*LSS_NAME(restore)(void))(void) { - /* On i386, the kernel does not know how to return from a signal + void (*res)(void); + __asm__ __volatile__( + "call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a"(res) + : "i"(__NR_rt_sigreturn)); + return res; +} +LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal * handler. Instead, it relies on user space to provide a * restorer function that calls the {rt_,}sigreturn() system call. * Unfortunately, we cannot just reference the glibc version of this * function, as glibc goes out of its way to make it inaccessible. */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:pop %%eax\n" - "movl %1,%%eax\n" - "int $0x80\n" - "2:popl %0\n" - "addl $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_sigreturn)); - return res; - } - #elif defined(__x86_64__) - /* There are no known problems with any of the _syscallX() macros + void (*res)(void); + __asm__ __volatile__( + "call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a"(res) + : "i"(__NR_sigreturn)); + return res; +} +#elif defined(__x86_64__) +/* There are no known problems with any of the _syscallX() macros * currently shipping for x86_64, but we still need to be able to define * our own version so that we can override the location of the errno * location (e.g. when using the clone() system call with the CLONE_VM * option). */ - #undef LSS_ENTRYPOINT - #define LSS_ENTRYPOINT "syscall\n" +#undef LSS_ENTRYPOINT +#define LSS_ENTRYPOINT "syscall\n" - /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. +/* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. * We need to explicitly cast to an unsigned 64 bit type to avoid implicit * sign extension. We can't cast pointers directly because those are * 32 bits, and gcc will dump ugly warnings about casting from a pointer * to an integer of a different size. */ - #undef LSS_SYSCALL_ARG - #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) - #undef _LSS_RETURN - #define _LSS_RETURN(type, res, cast) \ - do { \ - if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ - LSS_ERRNO = -(res); \ - res = -1; \ - } \ - return (type)(cast)(res); \ - } while (0) - #undef LSS_RETURN - #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) - - #undef _LSS_BODY - #define _LSS_BODY(nr, type, name, cast, ...) \ - long long __res; \ - __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ - : "=a" (__res) \ - : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \ - : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ - _LSS_RETURN(type, __res, cast) - #undef LSS_BODY - #define LSS_BODY(nr, type, name, args...) \ - _LSS_BODY(nr, type, name, uintptr_t, ## args) - - #undef LSS_BODY_ASM0 - #undef LSS_BODY_ASM1 - #undef LSS_BODY_ASM2 - #undef LSS_BODY_ASM3 - #undef LSS_BODY_ASM4 - #undef LSS_BODY_ASM5 - #undef LSS_BODY_ASM6 - #define LSS_BODY_ASM0 - #define LSS_BODY_ASM1 LSS_BODY_ASM0 - #define LSS_BODY_ASM2 LSS_BODY_ASM1 - #define LSS_BODY_ASM3 LSS_BODY_ASM2 - #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" - #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" - #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" - - #undef LSS_BODY_CLOBBER0 - #undef LSS_BODY_CLOBBER1 - #undef LSS_BODY_CLOBBER2 - #undef LSS_BODY_CLOBBER3 - #undef LSS_BODY_CLOBBER4 - #undef LSS_BODY_CLOBBER5 - #undef LSS_BODY_CLOBBER6 - #define LSS_BODY_CLOBBER0 - #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 - #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 - #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 - #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", - #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", - #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", - - #undef LSS_BODY_ARG0 - #undef LSS_BODY_ARG1 - #undef LSS_BODY_ARG2 - #undef LSS_BODY_ARG3 - #undef LSS_BODY_ARG4 - #undef LSS_BODY_ARG5 - #undef LSS_BODY_ARG6 - #define LSS_BODY_ARG0() - #define LSS_BODY_ARG1(arg1) \ - LSS_BODY_ARG0(), "D" (arg1) - #define LSS_BODY_ARG2(arg1, arg2) \ - LSS_BODY_ARG1(arg1), "S" (arg2) - #define LSS_BODY_ARG3(arg1, arg2, arg3) \ - LSS_BODY_ARG2(arg1, arg2), "d" (arg3) - #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \ - LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4) - #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \ - LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5) - #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ - LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6) - - #undef _syscall0 - #define _syscall0(type,name) \ - type LSS_NAME(name)() { \ - LSS_BODY(0, type, name); \ - } - #undef _syscall1 - #define _syscall1(type,name,type1,arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \ - } - #undef _syscall2 - #define _syscall2(type,name,type1,arg1,type2,arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\ - } - #undef _syscall3 - #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ +#undef LSS_SYSCALL_ARG +#define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) +#undef _LSS_RETURN +#define _LSS_RETURN(type, res, cast) \ + do { \ + if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type)(cast)(res); \ + } while (0) +#undef LSS_RETURN +#define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) + +#undef _LSS_BODY +#define _LSS_BODY(nr, type, name, cast, ...) \ + long long __res; \ + __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ + : "=a"(__res) \ + : "0"(__NR_##name)LSS_BODY_ARG##nr(__VA_ARGS__) \ + : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ + _LSS_RETURN(type, __res, cast) +#undef LSS_BODY +#define LSS_BODY(nr, type, name, args...) _LSS_BODY(nr, type, name, uintptr_t, ##args) + +#undef LSS_BODY_ASM0 +#undef LSS_BODY_ASM1 +#undef LSS_BODY_ASM2 +#undef LSS_BODY_ASM3 +#undef LSS_BODY_ASM4 +#undef LSS_BODY_ASM5 +#undef LSS_BODY_ASM6 +#define LSS_BODY_ASM0 +#define LSS_BODY_ASM1 LSS_BODY_ASM0 +#define LSS_BODY_ASM2 LSS_BODY_ASM1 +#define LSS_BODY_ASM3 LSS_BODY_ASM2 +#define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" +#define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" +#define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" + +#undef LSS_BODY_CLOBBER0 +#undef LSS_BODY_CLOBBER1 +#undef LSS_BODY_CLOBBER2 +#undef LSS_BODY_CLOBBER3 +#undef LSS_BODY_CLOBBER4 +#undef LSS_BODY_CLOBBER5 +#undef LSS_BODY_CLOBBER6 +#define LSS_BODY_CLOBBER0 +#define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 +#define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 +#define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 +#define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", +#define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", +#define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", + +#undef LSS_BODY_ARG0 +#undef LSS_BODY_ARG1 +#undef LSS_BODY_ARG2 +#undef LSS_BODY_ARG3 +#undef LSS_BODY_ARG4 +#undef LSS_BODY_ARG5 +#undef LSS_BODY_ARG6 +#define LSS_BODY_ARG0() +#define LSS_BODY_ARG1(arg1) LSS_BODY_ARG0(), "D"(arg1) +#define LSS_BODY_ARG2(arg1, arg2) LSS_BODY_ARG1(arg1), "S"(arg2) +#define LSS_BODY_ARG3(arg1, arg2, arg3) LSS_BODY_ARG2(arg1, arg2), "d"(arg3) +#define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) LSS_BODY_ARG3(arg1, arg2, arg3), "r"(arg4) +#define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r"(arg5) +#define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r"(arg6) + +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)() { LSS_BODY(0, type, name); } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); } +#undef _syscall2 +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2)); \ + } +#undef _syscall3 +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ - LSS_SYSCALL_ARG(arg3)); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ - LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ - LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ - LSS_SYSCALL_ARG(arg5)); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ - LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ - LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long long __res; - { + LSS_SYSCALL_ARG(arg3)); \ + } +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4)); \ + } +#undef _syscall5 +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), LSS_SYSCALL_ARG(arg5)); \ + } +#undef _syscall6 +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), LSS_SYSCALL_ARG(arg5), \ + LSS_SYSCALL_ARG(arg6)); \ + } +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + long long __res; + { __asm__ __volatile__(/* if (fn == NULL) * return -EINVAL; */ @@ -1475,155 +1446,144 @@ struct kernel_stat { /* Return to parent. */ - "1:\n" - : "=a" (__res) + "1:\n" + : "=a"(__res) : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(LSS_SYSCALL_ARG(fn)), - "S"(LSS_SYSCALL_ARG(child_stack)), - "D"(LSS_SYSCALL_ARG(flags)), - "r"(LSS_SYSCALL_ARG(arg)), - "d"(LSS_SYSCALL_ARG(parent_tidptr)), - "r"(LSS_SYSCALL_ARG(newtls)), + "r"(LSS_SYSCALL_ARG(fn)), "S"(LSS_SYSCALL_ARG(child_stack)), + "D"(LSS_SYSCALL_ARG(flags)), "r"(LSS_SYSCALL_ARG(arg)), + "d"(LSS_SYSCALL_ARG(parent_tidptr)), "r"(LSS_SYSCALL_ARG(newtls)), "r"(LSS_SYSCALL_ARG(child_tidptr)) : "rsp", "memory", "r8", "r10", "r11", "rcx"); - } - LSS_RETURN(int, __res); } + LSS_RETURN(int, __res); +} - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* On x86-64, the kernel does not know how to return from +LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from * a signal handler. Instead, it relies on user space to provide a * restorer function that calls the rt_sigreturn() system call. * Unfortunately, we cannot just reference the glibc version of this * function, as glibc goes out of its way to make it inaccessible. */ - long long res; - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:movq %1,%%rax\n" - "syscall\n" - "2:popq %0\n" - "addq $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_rt_sigreturn)); - return (void (*)(void))(uintptr_t)res; - } - #elif defined(__arm__) - /* Most definitions of _syscallX() neglect to mark "memory" as being + long long res; + __asm__ __volatile__( + "call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" + "syscall\n" + "2:popq %0\n" + "addq $(1b-0b),%0\n" + : "=a"(res) + : "i"(__NR_rt_sigreturn)); + return (void (*)(void))(uintptr_t)res; +} +#elif defined(__arm__) +/* Most definitions of _syscallX() neglect to mark "memory" as being * clobbered. This causes problems with compilers, that do a better job * at optimizing across __asm__ calls. * So, we just have to redefine all fo the _syscallX() macros. */ - #undef LSS_REG - #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a +#undef LSS_REG +#define LSS_REG(r, a) register long __r##r __asm__("r" #r) = (long)a - /* r0..r3 are scratch registers and not preserved across function +/* r0..r3 are scratch registers and not preserved across function * calls. We need to first evaluate the first 4 syscall arguments * and store them on stack. They must be loaded into r0..r3 after * all function calls to avoid r0..r3 being clobbered. */ - #undef LSS_SAVE_ARG - #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a - #undef LSS_LOAD_ARG - #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r - - #undef LSS_BODY - #define LSS_BODY(type, name, args...) \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __SYS_REG(name) \ - __asm__ __volatile__ (__syscall_safe(name) \ - : "=r"(__res_r0) \ - : __SYS_REG_LIST(args) \ - : "lr", "memory"); \ - __res = __res_r0; \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)() { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - /* There is no need for using a volatile temp. */ \ - LSS_REG(0, arg1); \ - LSS_BODY(type, name, "r"(__r0)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ - } - #undef _syscall4 - #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_SAVE_ARG(3, arg4); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_LOAD_ARG(3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ - } - #undef _syscall5 - #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_SAVE_ARG(3, arg4); \ - LSS_REG(4, arg5); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_LOAD_ARG(3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ - "r"(__r4)); \ - } - #undef _syscall6 - #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5, type6, arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_SAVE_ARG(3, arg4); \ - LSS_REG(4, arg5); \ - LSS_REG(5, arg6); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_LOAD_ARG(3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ - "r"(__r4), "r"(__r5)); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - register long __res __asm__("r5"); - { +#undef LSS_SAVE_ARG +#define LSS_SAVE_ARG(r, a) long __tmp##r = (long)a +#undef LSS_LOAD_ARG +#define LSS_LOAD_ARG(r) register long __r##r __asm__("r" #r) = __tmp##r + +#undef LSS_BODY +#define LSS_BODY(type, name, args...) \ + register long __res_r0 __asm__("r0"); \ + long __res; \ + __SYS_REG(name) \ + __asm__ __volatile__(__syscall_safe(name) \ + : "=r"(__res_r0) \ + : __SYS_REG_LIST(args) \ + : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)() { LSS_BODY(type, name); } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + /* There is no need for using a volatile temp. */ \ + LSS_REG(0, arg1); \ + LSS_BODY(type, name, "r"(__r0)); \ + } +#undef _syscall2 +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } +#undef _syscall3 +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } +#undef _syscall5 +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), "r"(__r4)); \ + } +#undef _syscall6 +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_REG(5, arg6); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), "r"(__r4), "r"(__r5)); \ + } +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + register long __res __asm__("r5"); + { if (fn == NULL || child_stack == NULL) { __res = -EINVAL; goto clone_exit; @@ -1632,24 +1592,24 @@ struct kernel_stat { /* stash first 4 arguments on stack first because we can only load * them after all function calls. */ - int tmp_flags = flags; - int * tmp_stack = (int*) child_stack; - void * tmp_ptid = parent_tidptr; - void * tmp_tls = newtls; + int tmp_flags = flags; + int* tmp_stack = (int*)child_stack; + void* tmp_ptid = parent_tidptr; + void* tmp_tls = newtls; - register int *__ctid __asm__("r4") = child_tidptr; + register int* __ctid __asm__("r4") = child_tidptr; /* Push "arg" and "fn" onto the stack that will be * used by the child. */ - *(--tmp_stack) = (int) arg; - *(--tmp_stack) = (int) fn; + *(--tmp_stack) = (int)arg; + *(--tmp_stack) = (int)fn; /* We must load r0..r3 last after all possible function calls. */ - register int __flags __asm__("r0") = tmp_flags; - register void *__stack __asm__("r1") = tmp_stack; - register void *__ptid __asm__("r2") = tmp_ptid; - register void *__tls __asm__("r3") = tmp_tls; + register int __flags __asm__("r0") = tmp_flags; + register void* __stack __asm__("r1") = tmp_stack; + register void* __ptid __asm__("r2") = tmp_ptid; + register void* __tls __asm__("r3") = tmp_tls; /* %r0 = syscall(%r0 = flags, * %r1 = child_stack, @@ -1696,592 +1656,578 @@ struct kernel_stat { "i"(__NR_exit), "r"(__stack), "r"(__flags), "r"(__ptid), "r"(__tls), "r"(__ctid) : "cc", "lr", "memory"); - } - clone_exit: - LSS_RETURN(int, __res); } - #elif defined(__mips__) - #undef LSS_REG - #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ - (unsigned long)(a) - - #if _MIPS_SIM == _MIPS_SIM_ABI32 - // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html - // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html - #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\ - "$13", "$14", "$15", "$24", "$25", "memory" - #else - #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13", \ - "$14", "$15", "$24", "$25", "memory" - #endif - - #undef LSS_BODY - #define LSS_BODY(type,name,r7,...) \ - register unsigned long __v0 __asm__("$2") = __NR_##name; \ - __asm__ __volatile__ ("syscall\n" \ - : "=&r"(__v0), r7 (__r7) \ - : "0"(__v0), ##__VA_ARGS__ \ - : MIPS_SYSCALL_CLOBBERS); \ - LSS_RETURN(type, __v0, __r7) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)() { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_BODY(type, name, "=r"); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_REG(5, arg2); \ - LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ - } - #undef _syscall5 - #if _MIPS_SIM == _MIPS_SIM_ABI32 - /* The old 32bit MIPS system call API passes the fifth and sixth argument +clone_exit: + LSS_RETURN(int, __res); +} +#elif defined(__mips__) +#undef LSS_REG +#define LSS_REG(r, a) register unsigned long __r##r __asm__("$" #r) = (unsigned long)(a) + +#if _MIPS_SIM == _MIPS_SIM_ABI32 +// See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html +// or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html +#define MIPS_SYSCALL_CLOBBERS \ + "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", "$25", "memory" +#else +#define MIPS_SYSCALL_CLOBBERS \ + "$1", "$3", "$10", "$11", "$12", "$13", "$14", "$15", "$24", "$25", "memory" +#endif + +#undef LSS_BODY +#define LSS_BODY(type, name, r7, ...) \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__("syscall\n" \ + : "=&r"(__v0), r7(__r7) \ + : "0"(__v0), ##__VA_ARGS__ \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7) +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_BODY(type, name, "=r"); \ + } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); \ + LSS_BODY(type, name, "=r", "r"(__r4)); \ + } +#undef _syscall2 +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ + } +#undef _syscall3 +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_REG(6, arg3); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } +#undef _syscall5 +#if _MIPS_SIM == _MIPS_SIM_ABI32 +/* The old 32bit MIPS system call API passes the fifth and sixth argument * on the stack, whereas the new APIs use registers "r8" and "r9". */ - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - register unsigned long __v0 __asm__("$2"); \ - __asm__ __volatile__ (".set noreorder\n" \ - "lw $2, %6\n" \ - "subu $29, 32\n" \ - "sw $2, 16($29)\n" \ - "li $2, %2\n" \ - "syscall\n" \ - "addiu $29, 32\n" \ - ".set reorder\n" \ - : "=&r"(__v0), "+r" (__r7) \ - : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "m" ((unsigned long)arg5) \ - : MIPS_SYSCALL_CLOBBERS); \ - LSS_RETURN(type, __v0, __r7); \ - } - #else - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); LSS_REG(8, arg5); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ - "r"(__r8)); \ - } - #endif - #undef _syscall6 - #if _MIPS_SIM == _MIPS_SIM_ABI32 - /* The old 32bit MIPS system call API passes the fifth and sixth argument +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__( \ + ".set noreorder\n" \ + "lw $2, %6\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r"(__r7) \ + : "i"(__NR_##name), "r"(__r4), "r"(__r5), "r"(__r6), "m"((unsigned long)arg5) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } +#else +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_REG(8, arg5); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), "r"(__r8)); \ + } +#endif +#undef _syscall6 +#if _MIPS_SIM == _MIPS_SIM_ABI32 +/* The old 32bit MIPS system call API passes the fifth and sixth argument * on the stack, whereas the new APIs use registers "r8" and "r9". */ - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - register unsigned long __v0 __asm__("$2"); \ - __asm__ __volatile__ (".set noreorder\n" \ - "lw $2, %6\n" \ - "lw $8, %7\n" \ - "subu $29, 32\n" \ - "sw $2, 16($29)\n" \ - "sw $8, 20($29)\n" \ - "li $2, %2\n" \ - "syscall\n" \ - "addiu $29, 32\n" \ - ".set reorder\n" \ - : "=&r"(__v0), "+r" (__r7) \ - : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "m" ((unsigned long)arg5), \ - "m" ((unsigned long)arg6) \ - : MIPS_SYSCALL_CLOBBERS); \ - LSS_RETURN(type, __v0, __r7); \ - } - #else - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5,type6 arg6) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ - "r"(__r8), "r"(__r9)); \ - } - #endif - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - register unsigned long __v0 __asm__("$2"); - register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; - { - register int __flags __asm__("$4") = flags; - register void *__stack __asm__("$5") = child_stack; - register void *__ptid __asm__("$6") = parent_tidptr; - register int *__ctid __asm__("$8") = child_tidptr; +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__( \ + ".set noreorder\n" \ + "lw $2, %6\n" \ + "lw $8, %7\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "sw $8, 20($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r"(__r7) \ + : "i"(__NR_##name), "r"(__r4), "r"(__r5), "r"(__r6), "m"((unsigned long)arg5), \ + "m"((unsigned long)arg6) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } +#else +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); \ + LSS_REG(5, arg2); \ + LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_REG(8, arg5); \ + LSS_REG(9, arg6); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), "r"(__r8), "r"(__r9)); \ + } +#endif +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + register unsigned long __v0 __asm__("$2"); + register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; + { + register int __flags __asm__("$4") = flags; + register void* __stack __asm__("$5") = child_stack; + register void* __ptid __asm__("$6") = parent_tidptr; + register int* __ctid __asm__("$8") = child_tidptr; __asm__ __volatile__( - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "subu $29,24\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "sub $29,16\n" - #else - "dsubu $29,16\n" - #endif - - /* if (fn == NULL || child_stack == NULL) +#if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu $29,24\n" +#elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub $29,16\n" +#else + "dsubu $29,16\n" +#endif + + /* if (fn == NULL || child_stack == NULL) * return -EINVAL; */ - "li %0,%2\n" - "beqz %5,1f\n" - "beqz %6,1f\n" + "li %0,%2\n" + "beqz %5,1f\n" + "beqz %6,1f\n" - /* Push "arg" and "fn" onto the stack that will be + /* Push "arg" and "fn" onto the stack that will be * used by the child. */ - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "subu %6,32\n" - "sw %5,0(%6)\n" - "sw %8,4(%6)\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "sub %6,32\n" - "sw %5,0(%6)\n" - "sw %8,8(%6)\n" - #else - "dsubu %6,32\n" - "sd %5,0(%6)\n" - "sd %8,8(%6)\n" - #endif - - /* $7 = syscall($4 = flags, +#if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu %6,32\n" + "sw %5,0(%6)\n" + "sw %8,4(%6)\n" +#elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub %6,32\n" + "sw %5,0(%6)\n" + "sw %8,8(%6)\n" +#else + "dsubu %6,32\n" + "sd %5,0(%6)\n" + "sd %8,8(%6)\n" +#endif + + /* $7 = syscall($4 = flags, * $5 = child_stack, * $6 = parent_tidptr, * $7 = newtls, * $8 = child_tidptr) */ - "li $2,%3\n" - "syscall\n" + "li $2,%3\n" + "syscall\n" - /* if ($7 != 0) + /* if ($7 != 0) * return $2; */ - "bnez $7,1f\n" - "bnez $2,1f\n" + "bnez $7,1f\n" + "bnez $2,1f\n" - /* In the child, now. Call "fn(arg)". + /* In the child, now. Call "fn(arg)". */ - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "lw $25,0($29)\n" - "lw $4,4($29)\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "lw $25,0($29)\n" - "lw $4,8($29)\n" - #else - "ld $25,0($29)\n" - "ld $4,8($29)\n" - #endif - "jalr $25\n" - - /* Call _exit($2) +#if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "lw $25,0($29)\n" + "lw $4,4($29)\n" +#elif _MIPS_SIM == _MIPS_SIM_NABI32 + "lw $25,0($29)\n" + "lw $4,8($29)\n" +#else + "ld $25,0($29)\n" + "ld $4,8($29)\n" +#endif + "jalr $25\n" + + /* Call _exit($2) */ - "move $4,$2\n" - "li $2,%4\n" - "syscall\n" - - "1:\n" - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "addu $29, 24\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "add $29, 16\n" - #else - "daddu $29,16\n" - #endif - : "=&r" (__v0), "=r" (__r7) - : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), - "r"(__ptid), "r"(__r7), "r"(__ctid) - : "$9", "$10", "$11", "$12", "$13", "$14", "$15", - "$24", "memory"); - } - LSS_RETURN(int, __v0, __r7); + "move $4,$2\n" + "li $2,%4\n" + "syscall\n" + + "1:\n" +#if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "addu $29, 24\n" +#elif _MIPS_SIM == _MIPS_SIM_NABI32 + "add $29, 16\n" +#else + "daddu $29,16\n" +#endif + : "=&r"(__v0), "=r"(__r7) + : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(fn), "r"(__stack), + "r"(__flags), "r"(arg), "r"(__ptid), "r"(__r7), "r"(__ctid) + : "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", "memory"); + } + LSS_RETURN(int, __v0, __r7); +} +#elif defined(__PPC__) +#undef LSS_LOADARGS_0 +#define LSS_LOADARGS_0(name, dummy...) __sc_0 = __NR_##name +#undef LSS_LOADARGS_1 +#define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long)(arg1) +#undef LSS_LOADARGS_2 +#define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long)(arg2) +#undef LSS_LOADARGS_3 +#define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long)(arg3) +#undef LSS_LOADARGS_4 +#define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long)(arg4) +#undef LSS_LOADARGS_5 +#define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long)(arg5) +#undef LSS_LOADARGS_6 +#define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long)(arg6) +#undef LSS_ASMINPUT_0 +#define LSS_ASMINPUT_0 "0"(__sc_0) +#undef LSS_ASMINPUT_1 +#define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1"(__sc_3) +#undef LSS_ASMINPUT_2 +#define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2"(__sc_4) +#undef LSS_ASMINPUT_3 +#define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3"(__sc_5) +#undef LSS_ASMINPUT_4 +#define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4"(__sc_6) +#undef LSS_ASMINPUT_5 +#define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5"(__sc_7) +#undef LSS_ASMINPUT_6 +#define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6"(__sc_8) +#undef LSS_BODY +#define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__("r0"); \ + register unsigned long __sc_3 __asm__("r3"); \ + register unsigned long __sc_4 __asm__("r4"); \ + register unsigned long __sc_5 __asm__("r5"); \ + register unsigned long __sc_6 __asm__("r6"); \ + register unsigned long __sc_7 __asm__("r7"); \ + register unsigned long __sc_8 __asm__("r8"); \ + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__( \ + "sc\n\t" \ + "mfcr %0" \ + : "=&r"(__sc_0), "=&r"(__sc_3), "=&r"(__sc_4), "=&r"(__sc_5), "=&r"(__sc_6), \ + "=&r"(__sc_7), "=&r"(__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)(void) { LSS_BODY(0, type, name); } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { LSS_BODY(1, type, name, arg1); } +#undef _syscall2 +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { LSS_BODY(2, type, name, arg1, arg2); } +#undef _syscall3 +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } +#undef _syscall5 +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ } - #elif defined (__PPC__) - #undef LSS_LOADARGS_0 - #define LSS_LOADARGS_0(name, dummy...) \ - __sc_0 = __NR_##name - #undef LSS_LOADARGS_1 - #define LSS_LOADARGS_1(name, arg1) \ - LSS_LOADARGS_0(name); \ - __sc_3 = (unsigned long) (arg1) - #undef LSS_LOADARGS_2 - #define LSS_LOADARGS_2(name, arg1, arg2) \ - LSS_LOADARGS_1(name, arg1); \ - __sc_4 = (unsigned long) (arg2) - #undef LSS_LOADARGS_3 - #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ - LSS_LOADARGS_2(name, arg1, arg2); \ - __sc_5 = (unsigned long) (arg3) - #undef LSS_LOADARGS_4 - #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ - LSS_LOADARGS_3(name, arg1, arg2, arg3); \ - __sc_6 = (unsigned long) (arg4) - #undef LSS_LOADARGS_5 - #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ - LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ - __sc_7 = (unsigned long) (arg5) - #undef LSS_LOADARGS_6 - #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ - LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ - __sc_8 = (unsigned long) (arg6) - #undef LSS_ASMINPUT_0 - #define LSS_ASMINPUT_0 "0" (__sc_0) - #undef LSS_ASMINPUT_1 - #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) - #undef LSS_ASMINPUT_2 - #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) - #undef LSS_ASMINPUT_3 - #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) - #undef LSS_ASMINPUT_4 - #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) - #undef LSS_ASMINPUT_5 - #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) - #undef LSS_ASMINPUT_6 - #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) - #undef LSS_BODY - #define LSS_BODY(nr, type, name, args...) \ - long __sc_ret, __sc_err; \ - { \ - register unsigned long __sc_0 __asm__ ("r0"); \ - register unsigned long __sc_3 __asm__ ("r3"); \ - register unsigned long __sc_4 __asm__ ("r4"); \ - register unsigned long __sc_5 __asm__ ("r5"); \ - register unsigned long __sc_6 __asm__ ("r6"); \ - register unsigned long __sc_7 __asm__ ("r7"); \ - register unsigned long __sc_8 __asm__ ("r8"); \ - \ - LSS_LOADARGS_##nr(name, args); \ - __asm__ __volatile__ \ - ("sc\n\t" \ - "mfcr %0" \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : LSS_ASMINPUT_##nr \ - : "cr0", "ctr", "memory", \ - "r9", "r10", "r11", "r12"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - LSS_RETURN(type, __sc_ret, __sc_err) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)(void) { \ - LSS_BODY(0, type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(1, type, name, arg1); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(2, type, name, arg1, arg2); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_BODY(3, type, name, arg1, arg2, arg3); \ - } - #undef _syscall4 - #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ - } - #undef _syscall5 - #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ - } - #undef _syscall6 - #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5, type6, arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ - } - /* clone function adapted from glibc 2.18 clone.S */ - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __ret, __err; - { +#undef _syscall6 +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } +/* clone function adapted from glibc 2.18 clone.S */ +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + long __ret, __err; + { #if defined(__PPC64__) /* Stack frame offsets. */ #if _CALL_ELF != 2 -#define FRAME_MIN_SIZE 112 -#define FRAME_TOC_SAVE 40 +#define FRAME_MIN_SIZE 112 +#define FRAME_TOC_SAVE 40 #else -#define FRAME_MIN_SIZE 32 -#define FRAME_TOC_SAVE 24 +#define FRAME_MIN_SIZE 32 +#define FRAME_TOC_SAVE 24 #endif - - register int (*__fn)(void *) __asm__ ("r3") = fn; - register void *__cstack __asm__ ("r4") = child_stack; - register int __flags __asm__ ("r5") = flags; - register void * __arg __asm__ ("r6") = arg; - register int * __ptidptr __asm__ ("r7") = parent_tidptr; - register void * __newtls __asm__ ("r8") = newtls; - register int * __ctidptr __asm__ ("r9") = child_tidptr; + register int (*__fn)(void*) __asm__("r3") = fn; + register void* __cstack __asm__("r4") = child_stack; + register int __flags __asm__("r5") = flags; + register void* __arg __asm__("r6") = arg; + register int* __ptidptr __asm__("r7") = parent_tidptr; + register void* __newtls __asm__("r8") = newtls; + register int* __ctidptr __asm__("r9") = child_tidptr; __asm__ __volatile__( - /* check for fn == NULL + /* check for fn == NULL * and child_stack == NULL */ - "cmpdi cr0, %6, 0\n\t" - "cmpdi cr1, %7, 0\n\t" - "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" - "beq- cr0, 1f\n\t" - - /* set up stack frame for child */ - "clrrdi %7, %7, 4\n\t" - "li 0, 0\n\t" - "stdu 0, -%13(%7)\n\t" - - /* fn, arg, child_stack are saved acrVoss the syscall */ - "mr 28, %6\n\t" - "mr 29, %7\n\t" - "mr 27, %9\n\t" - - /* syscall + "cmpdi cr0, %6, 0\n\t" + "cmpdi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrdi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stdu 0, -%13(%7)\n\t" + + /* fn, arg, child_stack are saved acrVoss the syscall */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall r3 == flags r4 == child_stack r5 == parent_tidptr r6 == newtls r7 == child_tidptr */ - "mr 3, %8\n\t" - "mr 5, %10\n\t" - "mr 6, %11\n\t" - "mr 7, %12\n\t" - "li 0, %4\n\t" - "sc\n\t" - - /* Test if syscall was successful */ - "cmpdi cr1, 3, 0\n\t" - "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" - "bne- cr1, 1f\n\t" - - /* Do the function call */ - "std 2, %14(1)\n\t" + "mr 3, %8\n\t" + "mr 5, %10\n\t" + "mr 6, %11\n\t" + "mr 7, %12\n\t" + "li 0, %4\n\t" + "sc\n\t" + + /* Test if syscall was successful */ + "cmpdi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "std 2, %14(1)\n\t" #if _CALL_ELF != 2 - "ld 0, 0(28)\n\t" - "ld 2, 8(28)\n\t" - "mtctr 0\n\t" + "ld 0, 0(28)\n\t" + "ld 2, 8(28)\n\t" + "mtctr 0\n\t" #else - "mr 12, 28\n\t" - "mtctr 12\n\t" -#endif - "mr 3, 27\n\t" - "bctrl\n\t" - "ld 2, %14(1)\n\t" - - /* Call _exit(r3) */ - "li 0, %5\n\t" - "sc\n\t" - - /* Return to parent */ - "1:\n\t" - "mr %0, 3\n\t" - : "=r" (__ret), "=r" (__err) - : "0" (-1), "i" (EINVAL), - "i" (__NR_clone), "i" (__NR_exit), - "r" (__fn), "r" (__cstack), "r" (__flags), - "r" (__arg), "r" (__ptidptr), "r" (__newtls), - "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE) - : "cr0", "cr1", "memory", "ctr", - "r0", "r29", "r27", "r28"); + "mr 12, 28\n\t" + "mtctr 12\n\t" +#endif + "mr 3, 27\n\t" + "bctrl\n\t" + "ld 2, %14(1)\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n\t" + "mr %0, 3\n\t" + : "=r"(__ret), "=r"(__err) + : "0"(-1), "i"(EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(__fn), "r"(__cstack), + "r"(__flags), "r"(__arg), "r"(__ptidptr), "r"(__newtls), "r"(__ctidptr), + "i"(FRAME_MIN_SIZE), "i"(FRAME_TOC_SAVE) + : "cr0", "cr1", "memory", "ctr", "r0", "r29", "r27", "r28"); #else - register int (*__fn)(void *) __asm__ ("r8") = fn; - register void *__cstack __asm__ ("r4") = child_stack; - register int __flags __asm__ ("r3") = flags; - register void * __arg __asm__ ("r9") = arg; - register int * __ptidptr __asm__ ("r5") = parent_tidptr; - register void * __newtls __asm__ ("r6") = newtls; - register int * __ctidptr __asm__ ("r7") = child_tidptr; + register int (*__fn)(void*) __asm__("r8") = fn; + register void* __cstack __asm__("r4") = child_stack; + register int __flags __asm__("r3") = flags; + register void* __arg __asm__("r9") = arg; + register int* __ptidptr __asm__("r5") = parent_tidptr; + register void* __newtls __asm__("r6") = newtls; + register int* __ctidptr __asm__("r7") = child_tidptr; __asm__ __volatile__( - /* check for fn == NULL + /* check for fn == NULL * and child_stack == NULL */ - "cmpwi cr0, %6, 0\n\t" - "cmpwi cr1, %7, 0\n\t" - "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" - "beq- cr0, 1f\n\t" - - /* set up stack frame for child */ - "clrrwi %7, %7, 4\n\t" - "li 0, 0\n\t" - "stwu 0, -16(%7)\n\t" - - /* fn, arg, child_stack are saved across the syscall: r28-30 */ - "mr 28, %6\n\t" - "mr 29, %7\n\t" - "mr 27, %9\n\t" - - /* syscall */ - "li 0, %4\n\t" - /* flags already in r3 + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 * child_stack already in r4 * ptidptr already in r5 * newtls already in r6 * ctidptr already in r7 */ - "sc\n\t" - - /* Test if syscall was successful */ - "cmpwi cr1, 3, 0\n\t" - "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" - "bne- cr1, 1f\n\t" - - /* Do the function call */ - "mtctr 28\n\t" - "mr 3, 27\n\t" - "bctrl\n\t" - - /* Call _exit(r3) */ - "li 0, %5\n\t" - "sc\n\t" - - /* Return to parent */ - "1:\n" - "mfcr %1\n\t" - "mr %0, 3\n\t" - : "=r" (__ret), "=r" (__err) - : "0" (-1), "1" (EINVAL), - "i" (__NR_clone), "i" (__NR_exit), - "r" (__fn), "r" (__cstack), "r" (__flags), - "r" (__arg), "r" (__ptidptr), "r" (__newtls), - "r" (__ctidptr) - : "cr0", "cr1", "memory", "ctr", - "r0", "r29", "r27", "r28"); - -#endif - } - LSS_RETURN(int, __ret, __err); + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r"(__ret), "=r"(__err) + : "0"(-1), "1"(EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(__fn), "r"(__cstack), + "r"(__flags), "r"(__arg), "r"(__ptidptr), "r"(__newtls), "r"(__ctidptr) + : "cr0", "cr1", "memory", "ctr", "r0", "r29", "r27", "r28"); + +#endif + } + LSS_RETURN(int, __ret, __err); +} +#elif defined(__aarch64__) +#undef LSS_REG +#define LSS_REG(r, a) register long __x##r __asm__("x" #r) = (long)a +#undef LSS_BODY +#define LSS_BODY(type, name, args...) \ + register long __res_x0 __asm__("x0"); \ + long __res; \ + __asm__ __volatile__( \ + "mov x8, %1\n" \ + "svc 0x0\n" \ + : "=r"(__res_x0) \ + : "i"(__NR_##name), ##args \ + : "memory"); \ + __res = __res_x0; \ + LSS_RETURN(type, __res) +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)(void) { LSS_BODY(type, name); } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); \ + LSS_BODY(type, name, "r"(__x0)); \ + } +#undef _syscall2 +#define _syscall2_long(type, name, svc, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); \ + LSS_REG(1, arg2); \ + LSS_BODY(type, svc, "r"(__x0), "r"(__x1)); \ + } +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + _syscall2_long(type, name, name, type1, arg1, type2, arg2) +#undef _syscall3 +#define _syscall3_long(type, name, svc, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); \ + LSS_REG(1, arg2); \ + LSS_REG(2, arg3); \ + LSS_BODY(type, svc, "r"(__x0), "r"(__x1), "r"(__x2)); \ + } +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + _syscall3_long(type, name, name, type1, arg1, type2, arg2, type3, arg3) +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); \ + LSS_REG(1, arg2); \ + LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3)); \ } - #elif defined(__aarch64__) - #undef LSS_REG - #define LSS_REG(r,a) register long __x##r __asm__("x"#r) = (long)a - #undef LSS_BODY - #define LSS_BODY(type,name,args...) \ - register long __res_x0 __asm__("x0"); \ - long __res; \ - __asm__ __volatile__ ("mov x8, %1\n" \ - "svc 0x0\n" \ - : "=r"(__res_x0) \ - : "i"(__NR_##name) , ## args \ - : "memory"); \ - __res = __res_x0; \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)(void) { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__x0)); \ - } - #undef _syscall2 - #define _syscall2_long(type, name, svc, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); \ - LSS_BODY(type, svc, "r"(__x0), "r"(__x1)); \ - } - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - _syscall2_long(type, name, name, type1, arg1, type2, arg2) - #undef _syscall3 - #define _syscall3_long(type, name, svc, type1, arg1, type2, arg2, \ - type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_BODY(type, svc, "r"(__x0), "r"(__x1), "r"(__x2)); \ - } - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - _syscall3_long(type, name, name, type1, arg1, type2, arg2, \ - type3, arg3) - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); \ - LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3)); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); LSS_REG(4, arg5); \ - LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3), \ - "r"(__x4)); \ - } - #undef _syscall6 - #define _syscall6_long(type,name,svc,type1,arg1,type2,arg2,type3,arg3, \ - type4,arg4,type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ - LSS_BODY(type, svc, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3), \ - "r"(__x4), "r"(__x5)); \ - } - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - _syscall6_long(type,name,name,type1,arg1,type2,arg2,type3,arg3, \ - type4,arg4,type5,arg5,type6,arg6) - /* clone function adapted from glibc 2.18 clone.S */ - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - { - register int (*__fn)(void *) __asm__("x0") = fn; - register void *__stack __asm__("x1") = child_stack; - register int __flags __asm__("x2") = flags; - register void *__arg __asm__("x3") = arg; - register int *__ptid __asm__("x4") = parent_tidptr; - register void *__tls __asm__("x5") = newtls; - register int *__ctid __asm__("x6") = child_tidptr; +#undef _syscall5 +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_REG(0, arg1); \ + LSS_REG(1, arg2); \ + LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3), "r"(__x4)); \ + } +#undef _syscall6 +#define _syscall6_long(type, name, svc, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, \ + arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); \ + LSS_REG(1, arg2); \ + LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_REG(5, arg6); \ + LSS_BODY(type, svc, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3), "r"(__x4), "r"(__x5)); \ + } +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + _syscall6_long(type, name, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, \ + arg5, type6, arg6) +/* clone function adapted from glibc 2.18 clone.S */ +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + long __res; + { + register int (*__fn)(void*) __asm__("x0") = fn; + register void* __stack __asm__("x1") = child_stack; + register int __flags __asm__("x2") = flags; + register void* __arg __asm__("x3") = arg; + register int* __ptid __asm__("x4") = parent_tidptr; + register void* __tls __asm__("x5") = newtls; + register int* __ctid __asm__("x6") = child_tidptr; __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) * return -EINVAL; */ @@ -2316,391 +2262,331 @@ struct kernel_stat { */ "mov x8, %10\n" "svc 0x0\n" - "1:\n" + "1:\n" "mov x8, %1\n" - "2:\n" - : "=r" (__res) - : "i"(-EINVAL), - "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), - "r"(__ptid), "r"(__tls), "r"(__ctid), - "i"(__NR_clone), "i"(__NR_exit) + "2:\n" + : "=r"(__res) + : "i"(-EINVAL), "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), "i"(__NR_clone), "i"(__NR_exit) : "x30", "memory"); - } - LSS_RETURN(int, __res); } - #elif defined(__s390__) - #undef LSS_REG - #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a - #undef LSS_BODY - #define LSS_BODY(type, name, args...) \ - register unsigned long __nr __asm__("r1") \ - = (unsigned long)(__NR_##name); \ - register long __res_r2 __asm__("r2"); \ - long __res; \ - __asm__ __volatile__ \ - ("svc 0\n\t" \ - : "=d"(__res_r2) \ - : "d"(__nr), ## args \ - : "memory"); \ - __res = __res_r2; \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)(void) { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_REG(2, arg1); \ - LSS_BODY(type, name, "0"(__r2)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_REG(2, arg1); LSS_REG(3, arg2); \ - LSS_BODY(type, name, "0"(__r2), "d"(__r3)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ - LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4)); \ - } - #undef _syscall4 - #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ - type4 arg4) { \ - LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ - LSS_REG(5, arg4); \ - LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ - "d"(__r5)); \ - } - #undef _syscall5 - #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ - type4 arg4, type5 arg5) { \ - LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ - LSS_REG(5, arg4); LSS_REG(6, arg5); \ - LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ - "d"(__r5), "d"(__r6)); \ - } - #undef _syscall6 - #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5, type6, arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ - type4 arg4, type5 arg5, type6 arg6) { \ - LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ - LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6); \ - LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ - "d"(__r5), "d"(__r6), "d"(__r7)); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __ret; - { - register int (*__fn)(void *) __asm__ ("r1") = fn; - register void *__cstack __asm__ ("r2") = child_stack; - register int __flags __asm__ ("r3") = flags; - register void *__arg __asm__ ("r0") = arg; - register int *__ptidptr __asm__ ("r4") = parent_tidptr; - register void *__newtls __asm__ ("r6") = newtls; - register int *__ctidptr __asm__ ("r5") = child_tidptr; - __asm__ __volatile__ ( - #ifndef __s390x__ - /* arg already in r0 */ - "ltr %4, %4\n\t" /* check fn, which is already in r1 */ - "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ - "ltr %5, %5\n\t" /* check child_stack, which is already in r2 */ - "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ - /* flags already in r3 */ - /* parent_tidptr already in r4 */ - /* child_tidptr already in r5 */ - /* newtls already in r6 */ - "svc %2\n\t" /* invoke clone syscall */ - "ltr %0,%%r2\n\t" /* load return code into __ret and test */ - "jnz 1f\n\t" /* return to parent if non-zero */ - /* start child thread */ - "lr %%r2, %7\n\t" /* set first parameter to void *arg */ - "ahi %%r15, -96\n\t" /* make room on the stack for the save area */ - "xc 0(4,%%r15), 0(%%r15)\n\t" - "basr %%r14, %4\n\t" /* jump to fn */ - "svc %3\n" /* invoke exit syscall */ - "1:\n" - #else - /* arg already in r0 */ - "ltgr %4, %4\n\t" /* check fn, which is already in r1 */ - "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ - "ltgr %5, %5\n\t" /* check child_stack, which is already in r2 */ - "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ - /* flags already in r3 */ - /* parent_tidptr already in r4 */ - /* child_tidptr already in r5 */ - /* newtls already in r6 */ - "svc %2\n\t" /* invoke clone syscall */ - "ltgr %0, %%r2\n\t" /* load return code into __ret and test */ - "jnz 1f\n\t" /* return to parent if non-zero */ - /* start child thread */ - "lgr %%r2, %7\n\t" /* set first parameter to void *arg */ - "aghi %%r15, -160\n\t" /* make room on the stack for the save area */ - "xc 0(8,%%r15), 0(%%r15)\n\t" - "basr %%r14, %4\n\t" /* jump to fn */ - "svc %3\n" /* invoke exit syscall */ - "1:\n" - #endif - : "=r" (__ret) - : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit), - "d" (__fn), "d" (__cstack), "d" (__flags), "d" (__arg), - "d" (__ptidptr), "d" (__newtls), "d" (__ctidptr) - : "cc", "r14", "memory" - ); - } - LSS_RETURN(int, __ret); + LSS_RETURN(int, __res); +} +#elif defined(__s390__) +#undef LSS_REG +#define LSS_REG(r, a) register unsigned long __r##r __asm__("r" #r) = (unsigned long)a +#undef LSS_BODY +#define LSS_BODY(type, name, args...) \ + register unsigned long __nr __asm__("r1") = (unsigned long)(__NR_##name); \ + register long __res_r2 __asm__("r2"); \ + long __res; \ + __asm__ __volatile__("svc 0\n\t" : "=d"(__res_r2) : "d"(__nr), ##args : "memory"); \ + __res = __res_r2; \ + LSS_RETURN(type, __res) +#undef _syscall0 +#define _syscall0(type, name) \ + type LSS_NAME(name)(void) { LSS_BODY(type, name); } +#undef _syscall1 +#define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(2, arg1); \ + LSS_BODY(type, name, "0"(__r2)); \ + } +#undef _syscall2 +#define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(2, arg1); \ + LSS_REG(3, arg2); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3)); \ + } +#undef _syscall3 +#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(2, arg1); \ + LSS_REG(3, arg2); \ + LSS_REG(4, arg3); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4)); \ } - #endif - #define __NR__exit __NR_exit - #define __NR__gettid __NR_gettid - #define __NR__mremap __NR_mremap - LSS_INLINE _syscall1(int, close, int, f) - LSS_INLINE _syscall1(int, _exit, int, e) -#if defined(__aarch64__) && defined (__ILP32__) - /* aarch64_ilp32 uses fcntl64 for sys_fcntl() */ - LSS_INLINE _syscall3_long(int, fcntl, fcntl64, int, f, - int, c, long, a) +#undef _syscall4 +#define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(2, arg1); \ + LSS_REG(3, arg2); \ + LSS_REG(4, arg3); \ + LSS_REG(5, arg4); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), "d"(__r5)); \ + } +#undef _syscall5 +#define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + LSS_REG(2, arg1); \ + LSS_REG(3, arg2); \ + LSS_REG(4, arg3); \ + LSS_REG(5, arg4); \ + LSS_REG(6, arg5); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), "d"(__r5), "d"(__r6)); \ + } +#undef _syscall6 +#define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \ + type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(2, arg1); \ + LSS_REG(3, arg2); \ + LSS_REG(4, arg3); \ + LSS_REG(5, arg4); \ + LSS_REG(6, arg5); \ + LSS_REG(7, arg6); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), "d"(__r5), "d"(__r6), "d"(__r7)); \ + } +LSS_INLINE int LSS_NAME(clone)(int (*fn)(void*), void* child_stack, int flags, void* arg, + int* parent_tidptr, void* newtls, int* child_tidptr) { + long __ret; + { + register int (*__fn)(void*) __asm__("r1") = fn; + register void* __cstack __asm__("r2") = child_stack; + register int __flags __asm__("r3") = flags; + register void* __arg __asm__("r0") = arg; + register int* __ptidptr __asm__("r4") = parent_tidptr; + register void* __newtls __asm__("r6") = newtls; + register int* __ctidptr __asm__("r5") = child_tidptr; + __asm__ __volatile__( +#ifndef __s390x__ + /* arg already in r0 */ + "ltr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltr %0,%%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lr %%r2, %7\n\t" /* set first parameter to void *arg */ + "ahi %%r15, -96\n\t" /* make room on the stack for the save area */ + "xc 0(4,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + "1:\n" +#else + /* arg already in r0 */ + "ltgr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltgr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltgr %0, %%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lgr %%r2, %7\n\t" /* set first parameter to void *arg */ + "aghi %%r15, -160\n\t" /* make room on the stack for the save area */ + "xc 0(8,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + "1:\n" +#endif + : "=r"(__ret) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), "d"(__fn), "d"(__cstack), + "d"(__flags), "d"(__arg), "d"(__ptidptr), "d"(__newtls), "d"(__ctidptr) + : "cc", "r14", "memory"); + } + LSS_RETURN(int, __ret); +} +#endif +#define __NR__exit __NR_exit +#define __NR__gettid __NR_gettid +#define __NR__mremap __NR_mremap +LSS_INLINE _syscall1(int, close, int, f) LSS_INLINE _syscall1(int, _exit, int, e) +#if defined(__aarch64__) && defined(__ILP32__) + /* aarch64_ilp32 uses fcntl64 for sys_fcntl() */ + LSS_INLINE _syscall3_long(int, fcntl, fcntl64, int, f, int, c, long, a) #else - LSS_INLINE _syscall3(int, fcntl, int, f, - int, c, long, a) + LSS_INLINE _syscall3(int, fcntl, int, f, int, c, long, a) #endif -#if defined(__aarch64__) && defined (__ILP32__) - /* aarch64_ilp32 uses fstat64 for sys_fstat() */ - LSS_INLINE _syscall2_long(int, fstat, fstat64, int, f, - struct kernel_stat*, b) +#if defined(__aarch64__) && defined(__ILP32__) + /* aarch64_ilp32 uses fstat64 for sys_fstat() */ + LSS_INLINE _syscall2_long(int, fstat, fstat64, int, f, struct kernel_stat*, b) #else - LSS_INLINE _syscall2(int, fstat, int, f, - struct kernel_stat*, b) -#endif - LSS_INLINE _syscall6(int, futex, int*, a, - int, o, int, v, - struct kernel_timespec*, t, - int*, a2, - int, v3) + LSS_INLINE + _syscall2(int, fstat, int, f, struct kernel_stat*, b) +#endif + LSS_INLINE _syscall6(int, futex, int*, a, int, o, int, v, struct kernel_timespec*, + t, int*, a2, int, v3) #ifdef __NR_getdents64 - LSS_INLINE _syscall3(int, getdents64, int, f, - struct kernel_dirent64*, d, int, c) + LSS_INLINE + _syscall3(int, getdents64, int, f, struct kernel_dirent64*, d, int, c) #define KERNEL_DIRENT kernel_dirent64 #define GETDENTS sys_getdents64 #else - LSS_INLINE _syscall3(int, getdents, int, f, - struct kernel_dirent*, d, int, c) + LSS_INLINE + _syscall3(int, getdents, int, f, struct kernel_dirent*, d, int, c) #define KERNEL_DIRENT kernel_dirent #define GETDENTS sys_getdents #endif - LSS_INLINE _syscall0(pid_t, getpid) - LSS_INLINE _syscall0(pid_t, getppid) - LSS_INLINE _syscall0(pid_t, _gettid) - LSS_INLINE _syscall2(int, kill, pid_t, p, - int, s) - #if defined(__x86_64__) - /* Need to make sure off_t isn't truncated to 32-bits under x32. */ - LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { - _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), - LSS_SYSCALL_ARG(w)); - } - #elif defined(__aarch64__) && defined (__ILP32__) - /* aarch64_ilp32 uses llseek for sys_lseek() */ - LSS_INLINE _syscall3_long(off_t, lseek, llseek, int, f, - off_t, o, int, w) - #else - LSS_INLINE _syscall3(off_t, lseek, int, f, - off_t, o, int, w) - #endif - LSS_INLINE _syscall2(int, munmap, void*, s, - size_t, l) - LSS_INLINE _syscall5(void*, _mremap, void*, o, - size_t, os, size_t, ns, - unsigned long, f, void *, a) - LSS_INLINE _syscall2(int, prctl, int, o, - long, a) - LSS_INLINE _syscall4(long, ptrace, int, r, - pid_t, p, void *, a, void *, d) - LSS_INLINE _syscall3(ssize_t, read, int, f, - void *, b, size_t, c) - LSS_INLINE _syscall4(int, rt_sigaction, int, s, - const struct kernel_sigaction*, a, - struct kernel_sigaction*, o, size_t, c) - LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, - const struct kernel_sigset_t*, s, - struct kernel_sigset_t*, o, size_t, c); - LSS_INLINE _syscall0(int, sched_yield) - LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, - const stack_t*, o) - #if defined(__NR_fstatat) - LSS_INLINE _syscall4(int, fstatat, int, d, const char *, p, - struct kernel_stat*, b, int, flags) - LSS_INLINE int LSS_NAME(stat)(const char* p, struct kernel_stat* b) { - return LSS_NAME(fstatat)(AT_FDCWD,p,b,0); - } - #else - LSS_INLINE _syscall2(int, stat, const char*, f, - struct kernel_stat*, b) - #endif - LSS_INLINE _syscall3(ssize_t, write, int, f, - const void *, b, size_t, c) - #if defined(__NR_getcpu) - LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, - unsigned *, node, void *, unused); - #endif - #if defined(__x86_64__) || defined(__aarch64__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(int, socket, int, d, - int, t, int, p) - #endif - #if defined(__x86_64__) || defined(__s390x__) - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - #if defined(__x86_64__) - /* On x86_64, the kernel requires us to always set our own + LSS_INLINE _syscall0(pid_t, getpid) LSS_INLINE _syscall0(pid_t, getppid) LSS_INLINE + _syscall0(pid_t, _gettid) LSS_INLINE _syscall2(int, kill, pid_t, p, int, s) +#if defined(__x86_64__) + /* Need to make sure off_t isn't truncated to 32-bits under x32. */ + LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { + _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), LSS_SYSCALL_ARG(w)); +} +#elif defined(__aarch64__) && defined(__ILP32__) + /* aarch64_ilp32 uses llseek for sys_lseek() */ + LSS_INLINE _syscall3_long(off_t, lseek, llseek, int, f, off_t, o, int, w) +#else + LSS_INLINE _syscall3(off_t, lseek, int, f, off_t, o, int, w) +#endif +LSS_INLINE _syscall2(int, munmap, void*, s, size_t, l) LSS_INLINE + _syscall5(void*, _mremap, void*, o, size_t, os, size_t, ns, unsigned long, f, void*, + a) LSS_INLINE _syscall2(int, prctl, int, o, long, a) LSS_INLINE + _syscall4(long, ptrace, int, r, pid_t, p, void*, a, void*, d) LSS_INLINE + _syscall3(ssize_t, read, int, f, void*, b, size_t, c) LSS_INLINE + _syscall4(int, rt_sigaction, int, s, const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) LSS_INLINE + _syscall4(int, rt_sigprocmask, int, h, const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c); +LSS_INLINE _syscall0(int, sched_yield) LSS_INLINE + _syscall2(int, sigaltstack, const stack_t*, s, const stack_t*, o) +#if defined(__NR_fstatat) + LSS_INLINE _syscall4(int, fstatat, int, d, const char*, p, struct kernel_stat*, b, + int, flags) LSS_INLINE + int LSS_NAME(stat)(const char* p, struct kernel_stat* b) { + return LSS_NAME(fstatat)(AT_FDCWD, p, b, 0); +} +#else + LSS_INLINE _syscall2(int, stat, const char*, f, struct kernel_stat*, b) +#endif +LSS_INLINE _syscall3(ssize_t, write, int, f, const void*, b, size_t, c) +#if defined(__NR_getcpu) + LSS_INLINE _syscall3(long, getcpu, unsigned*, cpu, unsigned*, node, void*, unused); +#endif +#if defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) +LSS_INLINE _syscall3(int, socket, int, d, int, t, int, p) +#endif +#if defined(__x86_64__) || defined(__s390x__) + LSS_INLINE int LSS_NAME(sigaction)(int signum, const struct kernel_sigaction* act, + struct kernel_sigaction* oldact) { +#if defined(__x86_64__) + /* On x86_64, the kernel requires us to always set our own * SA_RESTORER in order to be able to return from a signal handler. * This function must have a "magic" signature that the "gdb" * (and maybe the kernel?) can recognize. */ - if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { struct kernel_sigaction a = *act; - a.sa_flags |= SA_RESTORER; + a.sa_flags |= SA_RESTORER; a.sa_restorer = LSS_NAME(restore_rt)(); - return LSS_NAME(rt_sigaction)(signum, &a, oldact, - (KERNEL_NSIG+7)/8); - } else - #endif - return LSS_NAME(rt_sigaction)(signum, act, oldact, - (KERNEL_NSIG+7)/8); - } - - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - } - #endif - #if (defined(__aarch64__)) || \ - (defined(__mips__) \ - && (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32)) - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, (KERNEL_NSIG + 7) / 8); + } else +#endif + return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG + 7) / 8); +} - } - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - } - #endif - #ifdef __NR_wait4 - LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, - int*, s, int, o, - struct kernel_rusage*, r) - LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ - return LSS_NAME(wait4)(pid, status, options, 0); - } - #else - LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, - int*, s, int, o) - #endif - #ifdef __NR_openat - LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) - LSS_INLINE int LSS_NAME(open)(const char* p, int f, int m) { - return LSS_NAME(openat)(AT_FDCWD,p,f,m ); - } - #else - LSS_INLINE _syscall3(int, open, const char*, p, - int, f, int, m) - #endif - LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { +LSS_INLINE int LSS_NAME(sigprocmask)(int how, const struct kernel_sigset_t* set, + struct kernel_sigset_t* oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG + 7) / 8); +} +#endif +#if (defined(__aarch64__)) || \ + (defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32)) +LSS_INLINE int LSS_NAME(sigaction)(int signum, const struct kernel_sigaction* act, + struct kernel_sigaction* oldact) { + return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG + 7) / 8); +} +LSS_INLINE int LSS_NAME(sigprocmask)(int how, const struct kernel_sigset_t* set, + struct kernel_sigset_t* oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG + 7) / 8); +} +#endif +#ifdef __NR_wait4 +LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, int*, s, int, o, struct kernel_rusage*, + r) LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int* status, int options) { + return LSS_NAME(wait4)(pid, status, options, 0); +} +#else + LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, int*, s, int, o) +#endif +#ifdef __NR_openat +LSS_INLINE _syscall4(int, openat, int, d, const char*, p, int, f, int, m) LSS_INLINE + int LSS_NAME(open)(const char* p, int f, int m) { + return LSS_NAME(openat)(AT_FDCWD, p, f, m); +} +#else + LSS_INLINE + _syscall3(int, open, const char*, p, int, f, int, m) +#endif +LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t* set) { memset(&set->sig, 0, sizeof(set->sig)); return 0; - } +} - LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { +LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t* set) { memset(&set->sig, -1, sizeof(set->sig)); return 0; - } +} - LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; +LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t* set, int signum) { + if (signum < 1 || signum > (int)(8 * sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); - return 0; + set->sig[(signum - 1) / (8 * sizeof(set->sig[0]))] |= + 1UL << ((signum - 1) % (8 * sizeof(set->sig[0]))); + return 0; } - } +} - LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; +LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t* set, int signum) { + if (signum < 1 || signum > (int)(8 * sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); - return 0; + set->sig[(signum - 1) / (8 * sizeof(set->sig[0]))] &= + ~(1UL << ((signum - 1) % (8 * sizeof(set->sig[0])))); + return 0; } - } - - #if defined(__i386__) || \ - defined(__arm__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ - defined(__PPC__) || \ - (defined(__s390__) && !defined(__s390x__)) - #define __NR__sigaction __NR_sigaction - #define __NR__sigprocmask __NR_sigprocmask - LSS_INLINE _syscall2(int, fstat64, int, f, - struct kernel_stat64 *, b) - LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, - loff_t *, res, uint, wh) +} + +#if defined(__i386__) || defined(__arm__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) || \ + (defined(__s390__) && !defined(__s390x__)) +#define __NR__sigaction __NR_sigaction +#define __NR__sigprocmask __NR_sigprocmask +LSS_INLINE _syscall2(int, fstat64, int, f, struct kernel_stat64*, b) LSS_INLINE + _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, loff_t*, res, uint, wh) #if defined(__s390__) && !defined(__s390x__) - /* On s390, mmap2() arguments are passed in memory. */ - LSS_INLINE void* LSS_NAME(_mmap2)(void *s, size_t l, int p, int f, int d, - off_t o) { - unsigned long buf[6] = { (unsigned long) s, (unsigned long) l, - (unsigned long) p, (unsigned long) f, - (unsigned long) d, (unsigned long) o }; - LSS_REG(2, buf); - LSS_BODY(void*, mmap2, "0"(__r2)); - } + /* On s390, mmap2() arguments are passed in memory. */ + LSS_INLINE void* LSS_NAME(_mmap2)(void* s, size_t l, int p, int f, int d, off_t o) { + unsigned long buf[6] = {(unsigned long)s, (unsigned long)l, (unsigned long)p, + (unsigned long)f, (unsigned long)d, (unsigned long)o}; + LSS_REG(2, buf); + LSS_BODY(void*, mmap2, "0"(__r2)); +} #elif !defined(__PPC64__) - #define __NR__mmap2 __NR_mmap2 - LSS_INLINE _syscall6(void*, _mmap2, void*, s, - size_t, l, int, p, - int, f, int, d, - off_t, o) -#endif - LSS_INLINE _syscall3(int, _sigaction, int, s, - const struct kernel_old_sigaction*, a, - struct kernel_old_sigaction*, o) - LSS_INLINE _syscall3(int, _sigprocmask, int, h, - const unsigned long*, s, - unsigned long*, o) - LSS_INLINE _syscall2(int, stat64, const char *, p, - struct kernel_stat64 *, b) - - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - int old_errno = LSS_ERRNO; - int rc; - struct kernel_sigaction a; - if (act != NULL) { - a = *act; - #ifdef __i386__ +#define __NR__mmap2 __NR_mmap2 + LSS_INLINE + _syscall6(void*, _mmap2, void*, s, size_t, l, int, p, int, f, int, d, off_t, o) +#endif +LSS_INLINE _syscall3(int, _sigaction, int, s, const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) LSS_INLINE + _syscall3(int, _sigprocmask, int, h, const unsigned long*, s, unsigned long*, o) LSS_INLINE + _syscall2(int, stat64, const char*, p, struct kernel_stat64*, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, const struct kernel_sigaction* act, + struct kernel_sigaction* oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; +#ifdef __i386__ /* On i386, the kernel requires us to always set our own * SA_RESTORER when using realtime signals. Otherwise, it does not * know how to return from a signal handler. This function must have @@ -2712,183 +2598,161 @@ struct kernel_stat { * TODO: Test whether ARM needs a restorer */ if (!(a.sa_flags & SA_RESTORER)) { - a.sa_flags |= SA_RESTORER; - a.sa_restorer = (a.sa_flags & SA_SIGINFO) - ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + a.sa_flags |= SA_RESTORER; + a.sa_restorer = + (a.sa_flags & SA_SIGINFO) ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); } - #endif - } - rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, - (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { +#endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, (KERNEL_NSIG + 7) / 8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; if (!act) { - ptr_a = NULL; + ptr_a = NULL; } else { - oa.sa_handler_ = act->sa_handler_; - memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); - #ifndef __mips__ - oa.sa_restorer = act->sa_restorer; - #endif - oa.sa_flags = act->sa_flags; + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); +#ifndef __mips__ + oa.sa_restorer = act->sa_restorer; +#endif + oa.sa_flags = act->sa_flags; } if (!oldact) { - ptr_oa = NULL; + ptr_oa = NULL; } LSS_ERRNO = old_errno; rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); if (rc == 0 && oldact) { - if (act) { - memcpy(oldact, act, sizeof(*act)); - } else { - memset(oldact, 0, sizeof(*oldact)); - } - oldact->sa_handler_ = ptr_oa->sa_handler_; - oldact->sa_flags = ptr_oa->sa_flags; - memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); - #ifndef __mips__ - oldact->sa_restorer = ptr_oa->sa_restorer; - #endif + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); +#ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; +#endif } - } - return rc; } + return rc; +} - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - int olderrno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { +LSS_INLINE int LSS_NAME(sigprocmask)(int how, const struct kernel_sigset_t* set, + struct kernel_sigset_t* oldset) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG + 7) / 8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { LSS_ERRNO = olderrno; if (oldset) { - LSS_NAME(sigemptyset)(oldset); + LSS_NAME(sigemptyset)(oldset); } - rc = LSS_NAME(_sigprocmask)(how, - set ? &set->sig[0] : NULL, + rc = LSS_NAME(_sigprocmask)(how, set ? &set->sig[0] : NULL, oldset ? &oldset->sig[0] : NULL); - } - return rc; } - #endif - #if defined(__i386__) || \ - defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ - (defined(__PPC__) && !defined(__PPC64__)) || \ - (defined(__s390__) && !defined(__s390x__)) - /* On these architectures, implement mmap() with mmap2(). */ - LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, - int64_t o) { - if (o % 4096) { + return rc; +} +#endif +#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + (defined(__PPC__) && !defined(__PPC64__)) || (defined(__s390__) && !defined(__s390x__)) +/* On these architectures, implement mmap() with mmap2(). */ +LSS_INLINE void* LSS_NAME(mmap)(void* s, size_t l, int p, int f, int d, int64_t o) { + if (o % 4096) { LSS_ERRNO = EINVAL; - return (void *) -1; - } - return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096)); + return (void*)-1; } - #elif defined(__s390x__) - /* On s390x, mmap() arguments are passed in memory. */ - LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, - int64_t o) { - unsigned long buf[6] = { (unsigned long) s, (unsigned long) l, - (unsigned long) p, (unsigned long) f, - (unsigned long) d, (unsigned long) o }; - LSS_REG(2, buf); - LSS_BODY(void*, mmap, "0"(__r2)); - } - #elif defined(__x86_64__) - /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ - LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, - int64_t o) { - LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), - LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f), - LSS_SYSCALL_ARG(d), (uint64_t)(o)); - } - #elif defined(__aarch64__) && defined (__ILP32__) - /* aarch64_ilp32 uses mmap2 for sys_mmap() */ - LSS_INLINE _syscall6_long(void*, mmap, mmap2, void*, addr, size_t, length, - int, prot, int, flags, int, fd, int64_t, offset) - #else - /* Remaining 64-bit architectures. */ - LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot, - int, flags, int, fd, int64_t, offset) - #endif - #if defined(__i386__) || \ - defined(__PPC__) || \ - (defined(__arm__) && !defined(__ARM_EABI__)) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ - defined(__s390__) - - /* See sys_socketcall in net/socket.c in kernel source. + return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096)); +} +#elif defined(__s390x__) +/* On s390x, mmap() arguments are passed in memory. */ +LSS_INLINE void* LSS_NAME(mmap)(void* s, size_t l, int p, int f, int d, int64_t o) { + unsigned long buf[6] = {(unsigned long)s, (unsigned long)l, (unsigned long)p, + (unsigned long)f, (unsigned long)d, (unsigned long)o}; + LSS_REG(2, buf); + LSS_BODY(void*, mmap, "0"(__r2)); +} +#elif defined(__x86_64__) +/* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ +LSS_INLINE void* LSS_NAME(mmap)(void* s, size_t l, int p, int f, int d, int64_t o) { + LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), LSS_SYSCALL_ARG(p), + LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(d), (uint64_t)(o)); +} +#elif defined(__aarch64__) && defined(__ILP32__) +/* aarch64_ilp32 uses mmap2 for sys_mmap() */ +LSS_INLINE _syscall6_long(void*, mmap, mmap2, void*, addr, size_t, length, int, prot, int, flags, + int, fd, int64_t, offset) +#else +/* Remaining 64-bit architectures. */ +LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot, int, flags, int, fd, + int64_t, offset) +#endif +#if defined(__i386__) || defined(__PPC__) || (defined(__arm__) && !defined(__ARM_EABI__)) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__s390__) + +/* See sys_socketcall in net/socket.c in kernel source. * It de-multiplexes on its first arg and unpacks the arglist * array in its second arg. */ - LSS_INLINE _syscall2(int, socketcall, int, c, unsigned long*, a) - - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { - unsigned long args[3] = { - (unsigned long) domain, - (unsigned long) type, - (unsigned long) protocol - }; - return LSS_NAME(socketcall)(1, args); - } - #elif defined(__ARM_EABI__) - LSS_INLINE _syscall3(int, socket, int, d, - int, t, int, p) - #endif - #if defined(__mips__) - /* sys_pipe() on MIPS has non-standard calling conventions, as it returns +LSS_INLINE _syscall2(int, socketcall, int, c, unsigned long*, a) + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + unsigned long args[3] = {(unsigned long)domain, (unsigned long)type, (unsigned long)protocol}; + return LSS_NAME(socketcall)(1, args); +} +#elif defined(__ARM_EABI__) +LSS_INLINE _syscall3(int, socket, int, d, int, t, int, p) +#endif +#if defined(__mips__) +/* sys_pipe() on MIPS has non-standard calling conventions, as it returns * both file handles through CPU registers. */ - LSS_INLINE int LSS_NAME(pipe)(int *p) { - register unsigned long __v0 __asm__("$2") = __NR_pipe; - register unsigned long __v1 __asm__("$3"); - register unsigned long __r7 __asm__("$7"); - __asm__ __volatile__ ("syscall\n" - : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) - : "0"(__v0) - : "$8", "$9", "$10", "$11", "$12", - "$13", "$14", "$15", "$24", "memory"); - if (__r7) { +LSS_INLINE int LSS_NAME(pipe)(int* p) { + register unsigned long __v0 __asm__("$2") = __NR_pipe; + register unsigned long __v1 __asm__("$3"); + register unsigned long __r7 __asm__("$7"); + __asm__ __volatile__("syscall\n" + : "=&r"(__v0), "=&r"(__v1), "+r"(__r7) + : "0"(__v0) + : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", "memory"); + if (__r7) { LSS_ERRNO = __v0; return -1; - } else { + } else { p[0] = __v0; p[1] = __v1; return 0; - } - } - #elif defined(__NR_pipe2) - LSS_INLINE _syscall2(int, pipe2, int *, p, - int, f ) - LSS_INLINE int LSS_NAME(pipe)( int * p) { - return LSS_NAME(pipe2)(p, 0); } - #else - LSS_INLINE _syscall1(int, pipe, int *, p) - #endif +} +#elif defined(__NR_pipe2) + LSS_INLINE _syscall2(int, pipe2, int*, p, int, f) LSS_INLINE int LSS_NAME(pipe)(int* p) { + return LSS_NAME(pipe2)(p, 0); +} +#else +LSS_INLINE _syscall1(int, pipe, int*, p) +#endif - LSS_INLINE pid_t LSS_NAME(gettid)() { +LSS_INLINE pid_t LSS_NAME(gettid)() { pid_t tid = LSS_NAME(_gettid)(); if (tid != -1) { - return tid; + return tid; } return LSS_NAME(getpid)(); - } +} - LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, - size_t new_size, int flags, ...) { +LSS_INLINE void* LSS_NAME(mremap)(void* old_address, size_t old_size, size_t new_size, int flags, + ...) { va_list ap; void *new_address, *rc; va_start(ap, flags); - new_address = va_arg(ap, void *); - rc = LSS_NAME(_mremap)(old_address, old_size, new_size, - flags, new_address); + new_address = va_arg(ap, void*); + rc = LSS_NAME(_mremap)(old_address, old_size, new_size, flags, new_address); va_end(ap); return rc; - } +} - LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { +LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { /* PTRACE_DETACH can sometimes forget to wake up the tracee and it * then sends job control signals to the real parent, rather than to * the tracer. We reduce the risk of this happening by starting a @@ -2897,12 +2761,12 @@ struct kernel_stat { */ int rc, err; LSS_NAME(sched_yield)(); - rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); + rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void*)0, (void*)0); err = LSS_ERRNO; LSS_NAME(kill)(pid, SIGCONT); LSS_ERRNO = err; return rc; - } +} #endif #if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) diff --git a/be/src/gutil/logging-inl.h b/be/src/gutil/logging-inl.h index 0dbd35984531d9..8f6d46ac403e5a 100644 --- a/be/src/gutil/logging-inl.h +++ b/be/src/gutil/logging-inl.h @@ -47,4 +47,4 @@ const bool GUTIL_DEBUG_MODE = false; const bool GUTIL_DEBUG_MODE = true; #endif -#endif // _LOGGING_IN_H_ +#endif // _LOGGING_IN_H_ diff --git a/be/src/gutil/macros.h b/be/src/gutil/macros.h index da7ea13fd790cb..dfebbf0b124d47 100644 --- a/be/src/gutil/macros.h +++ b/be/src/gutil/macros.h @@ -10,7 +10,8 @@ #ifndef BASE_MACROS_H_ #define BASE_MACROS_H_ -#include // For size_t +#include // For size_t + #include "gutil/port.h" // The swigged version of an abstract class must be concrete if any methods @@ -36,13 +37,12 @@ // containing the name of the variable. template -struct CompileAssert { -}; +struct CompileAssert {}; #ifndef COMPILE_ASSERT #define COMPILE_ASSERT(expr, msg) \ - typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED // Implementation details of COMPILE_ASSERT: // @@ -86,7 +86,6 @@ struct CompileAssert { // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. #endif // COMPILE_ASSERT - // A macro to disallow the copy constructor and operator= functions // This should be used in the private: declarations for a class // @@ -98,8 +97,8 @@ struct CompileAssert { // avoid these in new code. #ifndef DISALLOW_COPY_AND_ASSIGN #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&) = delete; \ - void operator=(const TypeName&) = delete + TypeName(const TypeName&) = delete; \ + void operator=(const TypeName&) = delete #endif // An older, politically incorrect name for the above. @@ -113,8 +112,8 @@ struct CompileAssert { // that wants to prevent anyone from instantiating it. This is // especially useful for classes containing only static methods. #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - TypeName() = delete; \ - DISALLOW_COPY_AND_ASSIGN(TypeName) + TypeName() = delete; \ + DISALLOW_COPY_AND_ASSIGN(TypeName) // The arraysize(arr) macro returns the # of elements in an array arr. // The expression is a compile-time constant, and therefore can be @@ -186,22 +185,20 @@ char (&ArraySizeHelper(const T (&array)[N]))[N]; // // Starting with Visual C++ 2005, WinNT.h includes ARRAYSIZE. #if !defined(_MSC_VER) || (defined(_MSC_VER) && _MSC_VER < 1400) -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) +#define ARRAYSIZE(a) ((sizeof(a) / sizeof(*(a))) / static_cast(!(sizeof(a) % sizeof(*(a))))) #endif // A macro to turn a symbol into a string -#define AS_STRING(x) AS_STRING_INTERNAL(x) -#define AS_STRING_INTERNAL(x) #x +#define AS_STRING(x) AS_STRING_INTERNAL(x) +#define AS_STRING_INTERNAL(x) #x // Macro that allows definition of a variable appended with the current line // number in the source file. Typically for use by other macros to allow the // user to declare multiple variables with the same "base" name inside the same // lexical block. -#define VARNAME_LINENUM(varname) VARNAME_LINENUM_INTERNAL(varname ## _L, __LINE__) +#define VARNAME_LINENUM(varname) VARNAME_LINENUM_INTERNAL(varname##_L, __LINE__) #define VARNAME_LINENUM_INTERNAL(v, line) VARNAME_LINENUM_INTERNAL2(v, line) -#define VARNAME_LINENUM_INTERNAL2(v, line) v ## line +#define VARNAME_LINENUM_INTERNAL2(v, line) v##line // The following enum should be used only as a constructor argument to indicate // that the variable has static storage class, and that the constructor should @@ -254,34 +251,37 @@ enum LinkerInitialized { LINKER_INITIALIZED }; // of code. #if defined(__clang__) && defined(LANG_CXX11) && defined(__has_warning) #if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") -#define FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT +#define FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT #endif #endif #ifndef FALLTHROUGH_INTENDED -#define FALLTHROUGH_INTENDED do { } while (0) +#define FALLTHROUGH_INTENDED \ + do { \ + } while (0) #endif // Retry on EINTR for functions like read() that return -1 on error. -#define RETRY_ON_EINTR(err, expr) do { \ - static_assert(std::is_signed::value, \ - #err " must be a signed integer"); \ - (err) = (expr); \ -} while ((err) == -1 && errno == EINTR) +#define RETRY_ON_EINTR(err, expr) \ + do { \ + static_assert(std::is_signed::value, #err " must be a signed integer"); \ + (err) = (expr); \ + } while ((err) == -1 && errno == EINTR) // Same as above but for stream API calls like fread() and fwrite(). -#define STREAM_RETRY_ON_EINTR(nread, stream, expr) do { \ - static_assert(std::is_unsigned::value == true, \ - #nread " must be an unsigned integer"); \ - (nread) = (expr); \ -} while ((nread) == 0 && ferror(stream) == EINTR) +#define STREAM_RETRY_ON_EINTR(nread, stream, expr) \ + do { \ + static_assert(std::is_unsigned::value == true, \ + #nread " must be an unsigned integer"); \ + (nread) = (expr); \ + } while ((nread) == 0 && ferror(stream) == EINTR) // Same as above but for functions that return pointer types (like // fopen() and freopen()). -#define POINTER_RETRY_ON_EINTR(ptr, expr) do { \ - static_assert(std::is_pointer::value == true, \ - #ptr " must be a pointer"); \ - (ptr) = (expr); \ -} while ((ptr) == nullptr && errno == EINTR) +#define POINTER_RETRY_ON_EINTR(ptr, expr) \ + do { \ + static_assert(std::is_pointer::value == true, #ptr " must be a pointer"); \ + (ptr) = (expr); \ + } while ((ptr) == nullptr && errno == EINTR) -#endif // BASE_MACROS_H_ +#endif // BASE_MACROS_H_ diff --git a/be/src/gutil/map-util.h b/be/src/gutil/map-util.h index c42672fd5f6787..12b8205fecf834 100644 --- a/be/src/gutil/map-util.h +++ b/be/src/gutil/map-util.h @@ -62,14 +62,13 @@ #ifndef UTIL_GTL_MAP_UTIL_H_ #define UTIL_GTL_MAP_UTIL_H_ +#include #include #include #include #include -#include - #include "gutil/logging-inl.h" // @@ -92,42 +91,38 @@ // This version assumes the key is printable, and includes it in the fatal log // message. template -const typename Collection::mapped_type& -FindOrDie(const Collection& collection, - const typename Collection::key_type& key) { - auto it = collection.find(key); - CHECK(it != collection.end()) << "Map key not found: " << key; - return it->second; +const typename Collection::mapped_type& FindOrDie(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found: " << key; + return it->second; } // Same as above, but returns a non-const reference. template -typename Collection::mapped_type& -FindOrDie(Collection& collection, // NOLINT - const typename Collection::key_type& key) { - auto it = collection.find(key); - CHECK(it != collection.end()) << "Map key not found: " << key; - return it->second; +typename Collection::mapped_type& FindOrDie(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found: " << key; + return it->second; } // Same as FindOrDie above, but doesn't log the key on failure. template -const typename Collection::mapped_type& -FindOrDieNoPrint(const Collection& collection, - const typename Collection::key_type& key) { - typename Collection::const_iterator it = collection.find(key); - CHECK(it != collection.end()) << "Map key not found"; - return it->second; +const typename Collection::mapped_type& FindOrDieNoPrint(const Collection& collection, + const typename Collection::key_type& key) { + typename Collection::const_iterator it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found"; + return it->second; } // Same as above, but returns a non-const reference. template -typename Collection::mapped_type& -FindOrDieNoPrint(Collection& collection, // NOLINT - const typename Collection::key_type& key) { - typename Collection::iterator it = collection.find(key); - CHECK(it != collection.end()) << "Map key not found"; - return it->second; +typename Collection::mapped_type& FindOrDieNoPrint(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + typename Collection::iterator it = collection.find(key); + CHECK(it != collection.end()) << "Map key not found"; + return it->second; } // Returns a const reference to the value associated with the given key if it @@ -140,86 +135,79 @@ FindOrDieNoPrint(Collection& collection, // NOLINT // string values, and you pass a char* as the default "value," either use the // returned value immediately or store it in a string (not string&). Details: template -const typename Collection::mapped_type& -FindWithDefault(const Collection& collection, - const typename Collection::key_type& key, - const typename Collection::mapped_type& value) { - auto it = collection.find(key); - if (it == collection.end()) { - return value; - } - return it->second; +const typename Collection::mapped_type& FindWithDefault( + const Collection& collection, const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + auto it = collection.find(key); + if (it == collection.end()) { + return value; + } + return it->second; } // Returns a pointer to the const value associated with the given key if it // exists, or NULL otherwise. template -const typename Collection::mapped_type* -FindOrNull(const Collection& collection, - const typename Collection::key_type& key) { - auto it = collection.find(key); - if (it == collection.end()) { - return 0; - } - return &it->second; +const typename Collection::mapped_type* FindOrNull(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return 0; + } + return &it->second; } // Same as above but returns a pointer to the non-const value. template -typename Collection::mapped_type* -FindOrNull(Collection& collection, // NOLINT - const typename Collection::key_type& key) { - auto it = collection.find(key); - if (it == collection.end()) { - return 0; - } - return &it->second; +typename Collection::mapped_type* FindOrNull(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return 0; + } + return &it->second; } // Returns a pointer to the const value associated with the greatest key // that's less than or equal to the given key, or NULL if no such key exists. template -const typename Collection::mapped_type* -FindFloorOrNull(const Collection& collection, - const typename Collection::key_type& key) { - auto it = collection.upper_bound(key); - if (it == collection.begin()) { - return 0; - } - return &(--it)->second; +const typename Collection::mapped_type* FindFloorOrNull(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + if (it == collection.begin()) { + return 0; + } + return &(--it)->second; } // Same as above but returns a pointer to the non-const value. template -typename Collection::mapped_type* -FindFloorOrNull(Collection& collection, // NOLINT - const typename Collection::key_type& key) { - auto it = collection.upper_bound(key); - if (it == collection.begin()) { - return 0; - } - return &(--it)->second; +typename Collection::mapped_type* FindFloorOrNull(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + if (it == collection.begin()) { + return 0; + } + return &(--it)->second; } // Returns a const-reference to the value associated with the greatest key // that's less than or equal to the given key, or crashes if it does not exist. template -const typename Collection::mapped_type& -FindFloorOrDie(const Collection& collection, - const typename Collection::key_type& key) { - auto it = collection.upper_bound(key); - CHECK(it != collection.begin()); - return (--it)->second; +const typename Collection::mapped_type& FindFloorOrDie(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + CHECK(it != collection.begin()); + return (--it)->second; } // Same as above, but returns a non-const reference. template -typename Collection::mapped_type& -FindFloorOrDie(Collection& collection, - const typename Collection::key_type& key) { - auto it = collection.upper_bound(key); - CHECK(it != collection.begin()); - return (--it)->second; +typename Collection::mapped_type& FindFloorOrDie(Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.upper_bound(key); + CHECK(it != collection.begin()); + return (--it)->second; } // Returns the pointer value associated with the given key. If none is found, @@ -229,14 +217,13 @@ FindFloorOrDie(Collection& collection, // This function does not distinguish between a missing key and a key mapped // to a NULL value. template -typename Collection::mapped_type -FindPtrOrNull(const Collection& collection, - const typename Collection::key_type& key) { - auto it = collection.find(key); - if (it == collection.end()) { - return typename Collection::mapped_type(0); - } - return it->second; +typename Collection::mapped_type FindPtrOrNull(const Collection& collection, + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return typename Collection::mapped_type(0); + } + return it->second; } // Same as above, except takes non-const reference to collection. @@ -244,14 +231,13 @@ FindPtrOrNull(const Collection& collection, // This function is needed for containers that propagate constness to the // pointee, such as boost::ptr_map. template -typename Collection::mapped_type -FindPtrOrNull(Collection& collection, // NOLINT - const typename Collection::key_type& key) { - auto it = collection.find(key); - if (it == collection.end()) { - return typename Collection::mapped_type(0); - } - return it->second; +typename Collection::mapped_type FindPtrOrNull(Collection& collection, // NOLINT + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return typename Collection::mapped_type(0); + } + return it->second; } // FindPtrOrNull like function for maps whose value is a smart pointer like shared_ptr or @@ -259,30 +245,28 @@ FindPtrOrNull(Collection& collection, // NOLINT // Returns the raw pointer contained in the smart pointer for the first found key, if it exists, // or null if it doesn't. template -typename Collection::mapped_type::element_type* -FindPointeeOrNull(const Collection& collection, // NOLINT, - const typename Collection::key_type& key) { - auto it = collection.find(key); - if (it == collection.end()) { - return nullptr; - } - return it->second.get(); +typename Collection::mapped_type::element_type* FindPointeeOrNull( + const Collection& collection, // NOLINT, + const typename Collection::key_type& key) { + auto it = collection.find(key); + if (it == collection.end()) { + return nullptr; + } + return it->second.get(); } // Finds the value associated with the given key and copies it to *value (if not // NULL). Returns false if the key was not found, true otherwise. template -bool FindCopy(const Collection& collection, - const Key& key, - Value* const value) { - auto it = collection.find(key); - if (it == collection.end()) { - return false; - } - if (value) { - *value = it->second; - } - return true; +bool FindCopy(const Collection& collection, const Key& key, Value* const value) { + auto it = collection.find(key); + if (it == collection.end()) { + return false; + } + if (value) { + *value = it->second; + } + return true; } // @@ -292,22 +276,20 @@ bool FindCopy(const Collection& collection, // Returns true iff the given collection contains the given key. template bool ContainsKey(const Collection& collection, const Key& key) { - return collection.find(key) != collection.end(); + return collection.find(key) != collection.end(); } // Returns true iff the given collection contains the given key-value pair. template -bool ContainsKeyValuePair(const Collection& collection, - const Key& key, - const Value& value) { - typedef typename Collection::const_iterator const_iterator; - std::pair range = collection.equal_range(key); - for (const_iterator it = range.first; it != range.second; ++it) { - if (it->second == value) { - return true; +bool ContainsKeyValuePair(const Collection& collection, const Key& key, const Value& value) { + typedef typename Collection::const_iterator const_iterator; + std::pair range = collection.equal_range(key); + for (const_iterator it = range.first; it != range.second; ++it) { + if (it->second == value) { + return true; + } } - } - return false; + return false; } // @@ -318,34 +300,30 @@ bool ContainsKeyValuePair(const Collection& collection, // given key didn't previously exist. If the given key already existed in the // map, its value is changed to the given "value" and false is returned. template -bool InsertOrUpdate(Collection* const collection, - const typename Collection::value_type& vt) { - std::pair ret = collection->insert(vt); - if (!ret.second) { - // update - ret.first->second = vt.second; - return false; - } - return true; +bool InsertOrUpdate(Collection* const collection, const typename Collection::value_type& vt) { + std::pair ret = collection->insert(vt); + if (!ret.second) { + // update + ret.first->second = vt.second; + return false; + } + return true; } // Same as above, except that the key and value are passed separately. template -bool InsertOrUpdate(Collection* const collection, - const typename Collection::key_type& key, +bool InsertOrUpdate(Collection* const collection, const typename Collection::key_type& key, const typename Collection::mapped_type& value) { - return InsertOrUpdate( - collection, typename Collection::value_type(key, value)); + return InsertOrUpdate(collection, typename Collection::value_type(key, value)); } // Inserts/updates all the key-value pairs from the range defined by the // iterators "first" and "last" into the given collection. template -void InsertOrUpdateMany(Collection* const collection, - InputIterator first, InputIterator last) { - for (; first != last; ++first) { - InsertOrUpdate(collection, *first); - } +void InsertOrUpdateMany(Collection* const collection, InputIterator first, InputIterator last) { + for (; first != last; ++first) { + InsertOrUpdate(collection, *first); + } } // Change the value associated with a particular key in a map or hash_map @@ -353,18 +331,16 @@ void InsertOrUpdateMany(Collection* const collection, // value pointers. If there was an existing value for the key, it is deleted. // True indicates an insert took place, false indicates an update + delete. template -bool InsertAndDeleteExisting( - Collection* const collection, - const typename Collection::key_type& key, - const typename Collection::mapped_type& value) { - std::pair ret = - collection->insert(typename Collection::value_type(key, value)); - if (!ret.second) { - delete ret.first->second; - ret.first->second = value; - return false; - } - return true; +bool InsertAndDeleteExisting(Collection* const collection, const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + std::pair ret = + collection->insert(typename Collection::value_type(key, value)); + if (!ret.second) { + delete ret.first->second; + ret.first->second = value; + return false; + } + return true; } // Inserts the given key and value into the given collection iff the given key @@ -372,52 +348,43 @@ bool InsertAndDeleteExisting( // collection, the value is not changed. Returns true if the key-value pair was // inserted; returns false if the key was already present. template -bool InsertIfNotPresent(Collection* const collection, - const typename Collection::value_type& vt) { - return collection->insert(vt).second; +bool InsertIfNotPresent(Collection* const collection, const typename Collection::value_type& vt) { + return collection->insert(vt).second; } // Same as above except the key and value are passed separately. template -bool InsertIfNotPresent( - Collection* const collection, - const typename Collection::key_type& key, - const typename Collection::mapped_type& value) { - return InsertIfNotPresent( - collection, typename Collection::value_type(key, value)); +bool InsertIfNotPresent(Collection* const collection, const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + return InsertIfNotPresent(collection, typename Collection::value_type(key, value)); } // Same as above except dies if the key already exists in the collection. template -void InsertOrDie(Collection* const collection, - const typename Collection::value_type& value) { - CHECK(InsertIfNotPresent(collection, value)) << "duplicate value: " << value; +void InsertOrDie(Collection* const collection, const typename Collection::value_type& value) { + CHECK(InsertIfNotPresent(collection, value)) << "duplicate value: " << value; } // Same as above except doesn't log the value on error. template void InsertOrDieNoPrint(Collection* const collection, const typename Collection::value_type& value) { - CHECK(InsertIfNotPresent(collection, value)) << "duplicate value."; + CHECK(InsertIfNotPresent(collection, value)) << "duplicate value."; } // Inserts the key-value pair into the collection. Dies if key was already // present. template -void InsertOrDie(Collection* const collection, - const typename Collection::key_type& key, +void InsertOrDie(Collection* const collection, const typename Collection::key_type& key, const typename Collection::mapped_type& data) { - CHECK(InsertIfNotPresent(collection, key, data)) - << "duplicate key: " << key; + CHECK(InsertIfNotPresent(collection, key, data)) << "duplicate key: " << key; } // Same as above except deson't log the key on error. template -void InsertOrDieNoPrint( - Collection* const collection, - const typename Collection::key_type& key, - const typename Collection::mapped_type& data) { - CHECK(InsertIfNotPresent(collection, key, data)) << "duplicate key."; +void InsertOrDieNoPrint(Collection* const collection, const typename Collection::key_type& key, + const typename Collection::mapped_type& data) { + CHECK(InsertIfNotPresent(collection, key, data)) << "duplicate key."; } // Inserts a new key and default-initialized value. Dies if the key was already @@ -427,47 +394,42 @@ void InsertOrDieNoPrint( // SomeProto& proto = InsertKeyOrDie(&m, 3); // proto.set_field("foo"); template -typename Collection::mapped_type& InsertKeyOrDie( - Collection* const collection, - const typename Collection::key_type& key) { - typedef typename Collection::value_type value_type; - std::pair res = - collection->insert(value_type(key, typename Collection::mapped_type())); - CHECK(res.second) << "duplicate key: " << key; - return res.first->second; +typename Collection::mapped_type& InsertKeyOrDie(Collection* const collection, + const typename Collection::key_type& key) { + typedef typename Collection::value_type value_type; + std::pair res = + collection->insert(value_type(key, typename Collection::mapped_type())); + CHECK(res.second) << "duplicate key: " << key; + return res.first->second; } // // Emplace*() // template -bool EmplaceIfNotPresent(Collection* const collection, - Args&&... args) { - return collection->emplace(std::forward(args)...).second; +bool EmplaceIfNotPresent(Collection* const collection, Args&&... args) { + return collection->emplace(std::forward(args)...).second; } // Emplaces the given key-value pair into the collection. Returns true if the // given key didn't previously exist. If the given key already existed in the // map, its value is changed to the given "value" and false is returned. template -bool EmplaceOrUpdate(Collection* const collection, - const typename Collection::key_type& key, +bool EmplaceOrUpdate(Collection* const collection, const typename Collection::key_type& key, typename Collection::mapped_type&& value) { - typedef typename Collection::mapped_type mapped_type; - auto it = collection->find(key); - if (it == collection->end()) { - collection->emplace(key, std::forward(value)); - return true; - } - it->second = std::forward(value); - return false; + typedef typename Collection::mapped_type mapped_type; + auto it = collection->find(key); + if (it == collection->end()) { + collection->emplace(key, std::forward(value)); + return true; + } + it->second = std::forward(value); + return false; } template -void EmplaceOrDie(Collection* const collection, - Args&&... args) { - CHECK(EmplaceIfNotPresent(collection, std::forward(args)...)) - << "duplicate value"; +void EmplaceOrDie(Collection* const collection, Args&&... args) { + CHECK(EmplaceIfNotPresent(collection, std::forward(args)...)) << "duplicate value"; } // @@ -478,20 +440,17 @@ void EmplaceOrDie(Collection* const collection, // pair if it's not already present. Returns a reference to the value associated // with the key. template -typename Collection::mapped_type& -LookupOrInsert(Collection* const collection, - const typename Collection::value_type& vt) { - return collection->insert(vt).first->second; +typename Collection::mapped_type& LookupOrInsert(Collection* const collection, + const typename Collection::value_type& vt) { + return collection->insert(vt).first->second; } // Same as above except the key-value are passed separately. template -typename Collection::mapped_type& -LookupOrInsert(Collection* const collection, - const typename Collection::key_type& key, - const typename Collection::mapped_type& value) { - return LookupOrInsert( - collection, typename Collection::value_type(key, value)); +typename Collection::mapped_type& LookupOrInsert(Collection* const collection, + const typename Collection::key_type& key, + const typename Collection::mapped_type& value) { + return LookupOrInsert(collection, typename Collection::value_type(key, value)); } // It's similar to LookupOrInsert() but uses the emplace and r-value mechanics @@ -504,9 +463,8 @@ LookupOrInsert(Collection* const collection, // https://en.cppreference.com/w/cpp/container/map/emplace // https://en.cppreference.com/w/cpp/container/unordered_map/emplace template -typename Collection::mapped_type& -LookupOrEmplace(Collection* const collection, Args&&... args) { - return collection->emplace(std::forward(args)...).first->second; +typename Collection::mapped_type& LookupOrEmplace(Collection* const collection, Args&&... args) { + return collection->emplace(std::forward(args)...).first->second; } // Counts the number of equivalent elements in the given "sequence", and stores @@ -520,29 +478,25 @@ LookupOrEmplace(Collection* const collection, Args&&... args) { // assert(m["b"] == 2); // assert(m["c"] == 1); template -void AddTokenCounts( - const Sequence& sequence, - const typename Collection::mapped_type& increment, - Collection* const count_map) { - for (typename Sequence::const_iterator it = sequence.begin(); - it != sequence.end(); ++it) { - typename Collection::mapped_type& value = - LookupOrInsert(count_map, *it, - typename Collection::mapped_type()); - value += increment; - } +void AddTokenCounts(const Sequence& sequence, const typename Collection::mapped_type& increment, + Collection* const count_map) { + for (typename Sequence::const_iterator it = sequence.begin(); it != sequence.end(); ++it) { + typename Collection::mapped_type& value = + LookupOrInsert(count_map, *it, typename Collection::mapped_type()); + value += increment; + } } // Helpers for LookupOrInsertNew(), needed to create a new value type when the // type itself is a pointer, i.e., these extract the actual type from a pointer. template void MapUtilAssignNewDefaultInstance(T** location) { - *location = new T(); + *location = new T(); } template -void MapUtilAssignNewInstance(T** location, const Arg &arg) { - *location = new T(arg); +void MapUtilAssignNewInstance(T** location, const Arg& arg) { + *location = new T(arg); } // Returns a reference to the value associated with key. If not found, a value @@ -552,39 +506,34 @@ void MapUtilAssignNewInstance(T** location, const Arg &arg) { // inserting a new key, value pair involves constructing a new heap-allocated // Value, and storing a pointer to that in the collection. template -typename Collection::mapped_type& -LookupOrInsertNew(Collection* const collection, - const typename Collection::key_type& key) { - std::pair ret = - collection->insert( - typename Collection::value_type(key, - static_cast(NULL))); - if (ret.second) { - // This helper is needed to 'extract' the Value type from the type of the - // container value, which is (Value*). - MapUtilAssignNewDefaultInstance(&(ret.first->second)); - } - return ret.first->second; +typename Collection::mapped_type& LookupOrInsertNew(Collection* const collection, + const typename Collection::key_type& key) { + std::pair ret = + collection->insert(typename Collection::value_type( + key, static_cast(NULL))); + if (ret.second) { + // This helper is needed to 'extract' the Value type from the type of the + // container value, which is (Value*). + MapUtilAssignNewDefaultInstance(&(ret.first->second)); + } + return ret.first->second; } // Same as above but constructs the value using the single-argument constructor // and the given "arg". template -typename Collection::mapped_type& -LookupOrInsertNew(Collection* const collection, - const typename Collection::key_type& key, - const Arg& arg) { - std::pair ret = - collection->insert( - typename Collection::value_type( - key, - static_cast(NULL))); - if (ret.second) { - // This helper is needed to 'extract' the Value type from the type of the - // container value, which is (Value*). - MapUtilAssignNewInstance(&(ret.first->second), arg); - } - return ret.first->second; +typename Collection::mapped_type& LookupOrInsertNew(Collection* const collection, + const typename Collection::key_type& key, + const Arg& arg) { + std::pair ret = + collection->insert(typename Collection::value_type( + key, static_cast(NULL))); + if (ret.second) { + // This helper is needed to 'extract' the Value type from the type of the + // container value, which is (Value*). + MapUtilAssignNewInstance(&(ret.first->second), arg); + } + return ret.first->second; } // Lookup of linked/shared pointers is used in two scenarios: @@ -601,18 +550,16 @@ LookupOrInsertNew(Collection* const collection, // LookupOrInsertNewLinkedPtr, this function returns the shared_ptr instead of // the raw pointer. Value::element_type must be default constructable. template -typename Collection::mapped_type& -LookupOrInsertNewSharedPtr( - Collection* const collection, - const typename Collection::key_type& key) { - typedef typename Collection::mapped_type SharedPtr; - typedef typename Collection::mapped_type::element_type Element; - std::pair ret = - collection->insert(typename Collection::value_type(key, SharedPtr())); - if (ret.second) { - ret.first->second.reset(new Element()); - } - return ret.first->second; +typename Collection::mapped_type& LookupOrInsertNewSharedPtr( + Collection* const collection, const typename Collection::key_type& key) { + typedef typename Collection::mapped_type SharedPtr; + typedef typename Collection::mapped_type::element_type Element; + std::pair ret = + collection->insert(typename Collection::value_type(key, SharedPtr())); + if (ret.second) { + ret.first->second.reset(new Element()); + } + return ret.first->second; } // A variant of LookupOrInsertNewSharedPtr where the value is constructed using @@ -621,19 +568,16 @@ LookupOrInsertNewSharedPtr( // here. On the other hand it does not matter how expensive the construction of // the actual stored value is, as that only occurs if necessary. template -typename Collection::mapped_type& -LookupOrInsertNewSharedPtr( - Collection* const collection, - const typename Collection::key_type& key, - const Arg& arg) { - typedef typename Collection::mapped_type SharedPtr; - typedef typename Collection::mapped_type::element_type Element; - std::pair ret = - collection->insert(typename Collection::value_type(key, SharedPtr())); - if (ret.second) { - ret.first->second.reset(new Element(arg)); - } - return ret.first->second; +typename Collection::mapped_type& LookupOrInsertNewSharedPtr( + Collection* const collection, const typename Collection::key_type& key, const Arg& arg) { + typedef typename Collection::mapped_type SharedPtr; + typedef typename Collection::mapped_type::element_type Element; + std::pair ret = + collection->insert(typename Collection::value_type(key, SharedPtr())); + if (ret.second) { + ret.first->second.reset(new Element(arg)); + } + return ret.first->second; } // @@ -648,39 +592,36 @@ LookupOrInsertNewSharedPtr( // InsertOrReturnExisting has complementary behavior that returns the // address of an already existing value, rather than updating it. template -bool UpdateReturnCopy(Collection* const collection, - const typename Collection::key_type& key, +bool UpdateReturnCopy(Collection* const collection, const typename Collection::key_type& key, const typename Collection::mapped_type& value, typename Collection::mapped_type* previous) { - std::pair ret = - collection->insert(typename Collection::value_type(key, value)); - if (!ret.second) { - // update - if (previous) { - *previous = ret.first->second; - } - ret.first->second = value; - return true; - } - return false; + std::pair ret = + collection->insert(typename Collection::value_type(key, value)); + if (!ret.second) { + // update + if (previous) { + *previous = ret.first->second; + } + ret.first->second = value; + return true; + } + return false; } // Same as above except that the key and value are passed as a pair. template -bool UpdateReturnCopy(Collection* const collection, - const typename Collection::value_type& vt, +bool UpdateReturnCopy(Collection* const collection, const typename Collection::value_type& vt, typename Collection::mapped_type* previous) { - std::pair ret = - collection->insert(vt); - if (!ret.second) { - // update - if (previous) { - *previous = ret.first->second; - } - ret.first->second = vt.second; - return true; - } - return false; + std::pair ret = collection->insert(vt); + if (!ret.second) { + // update + if (previous) { + *previous = ret.first->second; + } + ret.first->second = vt.second; + return true; + } + return false; } // Tries to insert the given key-value pair into the collection. Returns NULL if @@ -691,39 +632,33 @@ bool UpdateReturnCopy(Collection* const collection, // twice. Unlike UpdateReturnCopy this also does not come with the issue of an // undefined previous* in case new data was inserted. template -typename Collection::mapped_type* const -InsertOrReturnExisting(Collection* const collection, - const typename Collection::value_type& vt) { - std::pair ret = collection->insert(vt); - if (ret.second) { - return NULL; // Inserted, no existing previous value. - } else { - return &ret.first->second; // Return address of already existing value. - } +typename Collection::mapped_type* const InsertOrReturnExisting( + Collection* const collection, const typename Collection::value_type& vt) { + std::pair ret = collection->insert(vt); + if (ret.second) { + return NULL; // Inserted, no existing previous value. + } else { + return &ret.first->second; // Return address of already existing value. + } } // Same as above, except for explicit key and data. template -typename Collection::mapped_type* const -InsertOrReturnExisting( - Collection* const collection, - const typename Collection::key_type& key, - const typename Collection::mapped_type& data) { - return InsertOrReturnExisting(collection, - typename Collection::value_type(key, data)); +typename Collection::mapped_type* const InsertOrReturnExisting( + Collection* const collection, const typename Collection::key_type& key, + const typename Collection::mapped_type& data) { + return InsertOrReturnExisting(collection, typename Collection::value_type(key, data)); } // Saves the reverse mapping into reverse. Key/value pairs are inserted in the // order the iterator returns them. template -void ReverseMap(const Collection& collection, - ReverseCollection* const reverse) { - CHECK(reverse != NULL); - for (typename Collection::const_iterator it = collection.begin(); - it != collection.end(); - ++it) { - InsertOrUpdate(reverse, it->second, it->first); - } +void ReverseMap(const Collection& collection, ReverseCollection* const reverse) { + CHECK(reverse != NULL); + for (typename Collection::const_iterator it = collection.begin(); it != collection.end(); + ++it) { + InsertOrUpdate(reverse, it->second, it->first); + } } // Erases the collection item identified by the given key, and returns the value @@ -745,16 +680,15 @@ void ReverseMap(const Collection& collection, // Note: if 'collection' is a multimap, this will only erase and return the // first value. template -typename Collection::mapped_type EraseKeyReturnValuePtr( - Collection* const collection, - const typename Collection::key_type& key) { - auto it = collection->find(key); - if (it == collection->end()) { - return typename Collection::mapped_type(); - } - typename Collection::mapped_type v = std::move(it->second); - collection->erase(it); - return v; +typename Collection::mapped_type EraseKeyReturnValuePtr(Collection* const collection, + const typename Collection::key_type& key) { + auto it = collection->find(key); + if (it == collection->end()) { + return typename Collection::mapped_type(); + } + typename Collection::mapped_type v = std::move(it->second); + collection->erase(it); + return v; } // Inserts all the keys from map_container into key_container, which must @@ -762,13 +696,12 @@ typename Collection::mapped_type EraseKeyReturnValuePtr( // // Note: any initial contents of the key_container are not cleared. template -void InsertKeysFromMap(const MapContainer& map_container, - KeyContainer* key_container) { - CHECK(key_container != NULL); - for (typename MapContainer::const_iterator it = map_container.begin(); - it != map_container.end(); ++it) { - key_container->insert(it->first); - } +void InsertKeysFromMap(const MapContainer& map_container, KeyContainer* key_container) { + CHECK(key_container != NULL); + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + key_container->insert(it->first); + } } // Appends all the keys from map_container into key_container, which must @@ -776,13 +709,12 @@ void InsertKeysFromMap(const MapContainer& map_container, // // Note: any initial contents of the key_container are not cleared. template -void AppendKeysFromMap(const MapContainer& map_container, - KeyContainer* key_container) { - CHECK(key_container != NULL); - for (typename MapContainer::const_iterator it = map_container.begin(); - it != map_container.end(); ++it) { - key_container->push_back(it->first); - } +void AppendKeysFromMap(const MapContainer& map_container, KeyContainer* key_container) { + CHECK(key_container != NULL); + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + key_container->push_back(it->first); + } } // A more specialized overload of AppendKeysFromMap to optimize reallocations @@ -793,26 +725,25 @@ void AppendKeysFromMap(const MapContainer& map_container, // container that supports it, but this seems to get us 99% of what we need // without the complexity of a SFINAE-based solution.) template -void AppendKeysFromMap(const MapContainer& map_container, - std::vector* key_container) { - CHECK(key_container != NULL); - // We now have the opportunity to call reserve(). Calling reserve() every - // time is a bad idea for some use cases: libstdc++'s implementation of - // vector<>::reserve() resizes the vector's backing store to exactly the - // given size (unless it's already at least that big). Because of this, - // the use case that involves appending a lot of small maps (total size - // N) one by one to a vector would be O(N^2). But never calling reserve() - // loses the opportunity to improve the use case of adding from a large - // map to an empty vector (this improves performance by up to 33%). A - // number of heuristics are possible; see the discussion in - // cl/34081696. Here we use the simplest one. - if (key_container->empty()) { - key_container->reserve(map_container.size()); - } - for (typename MapContainer::const_iterator it = map_container.begin(); - it != map_container.end(); ++it) { - key_container->push_back(it->first); - } +void AppendKeysFromMap(const MapContainer& map_container, std::vector* key_container) { + CHECK(key_container != NULL); + // We now have the opportunity to call reserve(). Calling reserve() every + // time is a bad idea for some use cases: libstdc++'s implementation of + // vector<>::reserve() resizes the vector's backing store to exactly the + // given size (unless it's already at least that big). Because of this, + // the use case that involves appending a lot of small maps (total size + // N) one by one to a vector would be O(N^2). But never calling reserve() + // loses the opportunity to improve the use case of adding from a large + // map to an empty vector (this improves performance by up to 33%). A + // number of heuristics are possible; see the discussion in + // cl/34081696. Here we use the simplest one. + if (key_container->empty()) { + key_container->reserve(map_container.size()); + } + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + key_container->push_back(it->first); + } } // Inserts all the values from map_container into value_container, which must @@ -820,26 +751,24 @@ void AppendKeysFromMap(const MapContainer& map_container, // // Note: any initial contents of the value_container are not cleared. template -void AppendValuesFromMap(const MapContainer& map_container, - ValueContainer* value_container) { - CHECK(value_container != NULL); - for (typename MapContainer::const_iterator it = map_container.begin(); - it != map_container.end(); ++it) { - value_container->push_back(it->second); - } +void AppendValuesFromMap(const MapContainer& map_container, ValueContainer* value_container) { + CHECK(value_container != NULL); + for (typename MapContainer::const_iterator it = map_container.begin(); + it != map_container.end(); ++it) { + value_container->push_back(it->second); + } } template -void EmplaceValuesFromMap(MapContainer&& map_container, - ValueContainer* value_container) { - CHECK(value_container != nullptr); - // See AppendKeysFromMap for why this is done. - if (value_container->empty()) { - value_container->reserve(map_container.size()); - } - for (auto&& entry : map_container) { - value_container->emplace_back(std::move(entry.second)); - } +void EmplaceValuesFromMap(MapContainer&& map_container, ValueContainer* value_container) { + CHECK(value_container != nullptr); + // See AppendKeysFromMap for why this is done. + if (value_container->empty()) { + value_container->reserve(map_container.size()); + } + for (auto&& entry : map_container) { + value_container->emplace_back(std::move(entry.second)); + } } // A more specialized overload of AppendValuesFromMap to optimize reallocations @@ -852,7 +781,7 @@ void EmplaceValuesFromMap(MapContainer&& map_container, template void AppendValuesFromMap(const MapContainer& map_container, std::vector* value_container) { - EmplaceValuesFromMap(map_container, value_container); + EmplaceValuesFromMap(map_container, value_container); } // Compute and insert new value if it's absent from the map. Return a pair with a reference to the @@ -893,29 +822,26 @@ void AppendValuesFromMap(const MapContainer& map_container, // return make_pair(StringPiece(pbs.back()->string()), idx); // }); template -std::pair -ComputePairIfAbsentReturnAbsense(MapContainer* container, - const typename MapContainer::key_type& key, - Function compute_pair_func) { - typename MapContainer::iterator iter = container->find(key); - bool new_value = iter == container->end(); - if (new_value) { - auto p = compute_pair_func(); - std::pair result = - container->emplace(std::move(p.first), std::move(p.second)); - DCHECK(result.second) << "duplicate key: " << key; - iter = result.first; - } - return std::make_pair(&iter->second, new_value); +std::pair ComputePairIfAbsentReturnAbsense( + MapContainer* container, const typename MapContainer::key_type& key, + Function compute_pair_func) { + typename MapContainer::iterator iter = container->find(key); + bool new_value = iter == container->end(); + if (new_value) { + auto p = compute_pair_func(); + std::pair result = + container->emplace(std::move(p.first), std::move(p.second)); + DCHECK(result.second) << "duplicate key: " << key; + iter = result.first; + } + return std::make_pair(&iter->second, new_value); } template -std::pair -ComputeIfAbsentReturnAbsense(MapContainer* container, - const typename MapContainer::key_type& key, - Function compute_func) { - return ComputePairIfAbsentReturnAbsense(container, key, [&key, &compute_func] { - return std::make_pair(key, compute_func()); - }); +std::pair ComputeIfAbsentReturnAbsense( + MapContainer* container, const typename MapContainer::key_type& key, + Function compute_func) { + return ComputePairIfAbsentReturnAbsense( + container, key, [&key, &compute_func] { return std::make_pair(key, compute_func()); }); }; // Like the above but doesn't return a pair, just returns a pointer to the value. @@ -926,19 +852,19 @@ ComputeIfAbsentReturnAbsense(MapContainer* container, // [] { return new_value; }); // template -typename MapContainer::mapped_type* const -ComputeIfAbsent(MapContainer* container, - const typename MapContainer::key_type& key, - Function compute_func) { - return ComputeIfAbsentReturnAbsense(container, key, compute_func).first; +typename MapContainer::mapped_type* const ComputeIfAbsent( + MapContainer* container, const typename MapContainer::key_type& key, + Function compute_func) { + return ComputeIfAbsentReturnAbsense(container, key, compute_func).first; }; template -typename MapContainer::mapped_type* const -ComputePairIfAbsent(MapContainer* container, - const typename MapContainer::key_type& key, - Function compute_pair_func) { - return ComputePairIfAbsentReturnAbsense(container, key, compute_pair_func).first; +typename MapContainer::mapped_type* const ComputePairIfAbsent( + MapContainer* container, const typename MapContainer::key_type& key, + Function compute_pair_func) { + return ComputePairIfAbsentReturnAbsense(container, key, + compute_pair_func) + .first; }; -#endif // UTIL_GTL_MAP_UTIL_H_ +#endif // UTIL_GTL_MAP_UTIL_H_ diff --git a/be/src/gutil/move.h b/be/src/gutil/move.h index d94ebf6fddf74c..129c3f4e2e2c75 100644 --- a/be/src/gutil/move.h +++ b/be/src/gutil/move.h @@ -201,18 +201,20 @@ // // The workaround is to explicitly declare your copy constructor. // -#define MOVE_ONLY_TYPE_FOR_CPP_03(type, rvalue_type) \ - private: \ - struct rvalue_type { \ - explicit rvalue_type(type* object) : object(object) {} \ - type* object; \ - }; \ - type(type&); \ - void operator=(type&); \ - public: \ - operator rvalue_type() { return rvalue_type(this); } \ - type Pass() { return type(rvalue_type(this)); } \ - typedef void MoveOnlyTypeForCPP03; \ - private: +#define MOVE_ONLY_TYPE_FOR_CPP_03(type, rvalue_type) \ +private: \ + struct rvalue_type { \ + explicit rvalue_type(type* object) : object(object) {} \ + type* object; \ + }; \ + type(type&); \ + void operator=(type&); \ + \ +public: \ + operator rvalue_type() { return rvalue_type(this); } \ + type Pass() { return type(rvalue_type(this)); } \ + typedef void MoveOnlyTypeForCPP03; \ + \ +private: -#endif // BASE_MOVE_H_ +#endif // BASE_MOVE_H_ diff --git a/be/src/gutil/once.cc b/be/src/gutil/once.cc index dfc4c20361deda..2c65a12a0155fa 100644 --- a/be/src/gutil/once.cc +++ b/be/src/gutil/once.cc @@ -1,7 +1,9 @@ // Copyright 2008 Google Inc. All Rights Reserved. -#include #include "gutil/once.h" + +#include + #include "gutil/dynamic_annotations.h" #include "gutil/spinlock_internal.h" @@ -10,39 +12,39 @@ // This is safe provided we always perform a memory barrier // immediately before setting the value to GOOGLE_ONCE_INTERNAL_DONE. -void GoogleOnceInternalInit(Atomic32 *control, void (*func)(), - void (*func_with_arg)(void*), void* arg) { -// if (DEBUG_MODE) { -// int32 old_control = base::subtle::Acquire_Load(control); -// if (old_control != GOOGLE_ONCE_INTERNAL_INIT && -// old_control != GOOGLE_ONCE_INTERNAL_RUNNING && -// old_control != GOOGLE_ONCE_INTERNAL_WAITER && -// old_control != GOOGLE_ONCE_INTERNAL_DONE) { -// LOG(FATAL) << "Either GoogleOnceType is used in non-static storage " -// "(where GoogleOnceDynamic might be appropriate), " -// "or there's a memory corruption."; -// } -// } - static const base::internal::SpinLockWaitTransition trans[] = { - { GOOGLE_ONCE_INTERNAL_INIT, GOOGLE_ONCE_INTERNAL_RUNNING, true }, - { GOOGLE_ONCE_INTERNAL_RUNNING, GOOGLE_ONCE_INTERNAL_WAITER, false }, - { GOOGLE_ONCE_INTERNAL_DONE, GOOGLE_ONCE_INTERNAL_DONE, true } - }; - // Short circuit the simplest case to avoid procedure call overhead. - if (base::subtle::Acquire_CompareAndSwap(control, GOOGLE_ONCE_INTERNAL_INIT, - GOOGLE_ONCE_INTERNAL_RUNNING) == GOOGLE_ONCE_INTERNAL_INIT || - base::internal::SpinLockWait(control, ARRAYSIZE(trans), trans) == - GOOGLE_ONCE_INTERNAL_INIT) { - if (func != nullptr) { - (*func)(); - } else { - (*func_with_arg)(arg); - } - ANNOTATE_HAPPENS_BEFORE(control); - int32 old_control = base::subtle::NoBarrier_Load(control); - base::subtle::Release_Store(control, GOOGLE_ONCE_INTERNAL_DONE); - if (old_control == GOOGLE_ONCE_INTERNAL_WAITER) { - base::internal::SpinLockWake(control, true); - } - } // else *control is already GOOGLE_ONCE_INTERNAL_DONE +void GoogleOnceInternalInit(Atomic32* control, void (*func)(), void (*func_with_arg)(void*), + void* arg) { + // if (DEBUG_MODE) { + // int32 old_control = base::subtle::Acquire_Load(control); + // if (old_control != GOOGLE_ONCE_INTERNAL_INIT && + // old_control != GOOGLE_ONCE_INTERNAL_RUNNING && + // old_control != GOOGLE_ONCE_INTERNAL_WAITER && + // old_control != GOOGLE_ONCE_INTERNAL_DONE) { + // LOG(FATAL) << "Either GoogleOnceType is used in non-static storage " + // "(where GoogleOnceDynamic might be appropriate), " + // "or there's a memory corruption."; + // } + // } + static const base::internal::SpinLockWaitTransition trans[] = { + {GOOGLE_ONCE_INTERNAL_INIT, GOOGLE_ONCE_INTERNAL_RUNNING, true}, + {GOOGLE_ONCE_INTERNAL_RUNNING, GOOGLE_ONCE_INTERNAL_WAITER, false}, + {GOOGLE_ONCE_INTERNAL_DONE, GOOGLE_ONCE_INTERNAL_DONE, true}}; + // Short circuit the simplest case to avoid procedure call overhead. + if (base::subtle::Acquire_CompareAndSwap(control, GOOGLE_ONCE_INTERNAL_INIT, + GOOGLE_ONCE_INTERNAL_RUNNING) == + GOOGLE_ONCE_INTERNAL_INIT || + base::internal::SpinLockWait(control, ARRAYSIZE(trans), trans) == + GOOGLE_ONCE_INTERNAL_INIT) { + if (func != nullptr) { + (*func)(); + } else { + (*func_with_arg)(arg); + } + ANNOTATE_HAPPENS_BEFORE(control); + int32 old_control = base::subtle::NoBarrier_Load(control); + base::subtle::Release_Store(control, GOOGLE_ONCE_INTERNAL_DONE); + if (old_control == GOOGLE_ONCE_INTERNAL_WAITER) { + base::internal::SpinLockWake(control, true); + } + } // else *control is already GOOGLE_ONCE_INTERNAL_DONE } diff --git a/be/src/gutil/once.h b/be/src/gutil/once.h index c81e87131f7222..31eed46d64ce64 100644 --- a/be/src/gutil/once.h +++ b/be/src/gutil/once.h @@ -25,52 +25,51 @@ #define BASE_ONCE_H_ #include "gutil/atomicops.h" -#include "gutil/integral_types.h" #include "gutil/dynamic_annotations.h" +#include "gutil/integral_types.h" #include "gutil/macros.h" #include "gutil/port.h" #include "gutil/type_traits.h" // The following enum values are not for use by clients enum { - GOOGLE_ONCE_INTERNAL_INIT = 0, - GOOGLE_ONCE_INTERNAL_RUNNING = 0x65C2937B, // an improbable 32-bit value - GOOGLE_ONCE_INTERNAL_WAITER = 0x05A308D2, // a different improbable value - GOOGLE_ONCE_INTERNAL_DONE = 0x3F2D8AB0, // yet another improbable value + GOOGLE_ONCE_INTERNAL_INIT = 0, + GOOGLE_ONCE_INTERNAL_RUNNING = 0x65C2937B, // an improbable 32-bit value + GOOGLE_ONCE_INTERNAL_WAITER = 0x05A308D2, // a different improbable value + GOOGLE_ONCE_INTERNAL_DONE = 0x3F2D8AB0, // yet another improbable value }; struct GoogleOnceType { - Atomic32 state; + Atomic32 state; }; -#define GOOGLE_ONCE_INIT { GOOGLE_ONCE_INTERNAL_INIT } +#define GOOGLE_ONCE_INIT \ + { GOOGLE_ONCE_INTERNAL_INIT } // For internal use only. -extern void GoogleOnceInternalInit(Atomic32* state, void (*func)(), - void (*func_with_arg)(void*), void* arg); +extern void GoogleOnceInternalInit(Atomic32* state, void (*func)(), void (*func_with_arg)(void*), + void* arg); inline void GoogleOnceInit(GoogleOnceType* state, void (*func)()) { - Atomic32 s = Acquire_Load(&state->state); - if (PREDICT_FALSE(s != GOOGLE_ONCE_INTERNAL_DONE)) { - GoogleOnceInternalInit(&state->state, func, 0, 0); - } - ANNOTATE_HAPPENS_AFTER(&state->state); + Atomic32 s = Acquire_Load(&state->state); + if (PREDICT_FALSE(s != GOOGLE_ONCE_INTERNAL_DONE)) { + GoogleOnceInternalInit(&state->state, func, 0, 0); + } + ANNOTATE_HAPPENS_AFTER(&state->state); } // A version of GoogleOnceInit where the function argument takes a pointer // of arbitrary type. -template -inline void GoogleOnceInitArg(GoogleOnceType* state, - void (*func_with_arg)(T*), T* arg) { - Atomic32 s = Acquire_Load(&state->state); - if (PREDICT_FALSE(s != GOOGLE_ONCE_INTERNAL_DONE)) { - // Deal with const T as well as non-const T. - typedef typename base::remove_const::type mutable_T; - GoogleOnceInternalInit(&state->state, 0, - reinterpret_cast(func_with_arg), - const_cast(arg)); - } - ANNOTATE_HAPPENS_AFTER(&state->state); +template +inline void GoogleOnceInitArg(GoogleOnceType* state, void (*func_with_arg)(T*), T* arg) { + Atomic32 s = Acquire_Load(&state->state); + if (PREDICT_FALSE(s != GOOGLE_ONCE_INTERNAL_DONE)) { + // Deal with const T as well as non-const T. + typedef typename base::remove_const::type mutable_T; + GoogleOnceInternalInit(&state->state, 0, reinterpret_cast(func_with_arg), + const_cast(arg)); + } + ANNOTATE_HAPPENS_AFTER(&state->state); } // GoogleOnceDynamic is like GoogleOnceType, but is dynamically @@ -92,28 +91,29 @@ inline void GoogleOnceInitArg(GoogleOnceType* state, // } // } class GoogleOnceDynamic { - public: - GoogleOnceDynamic() : state_(GOOGLE_ONCE_INTERNAL_INIT) { } +public: + GoogleOnceDynamic() : state_(GOOGLE_ONCE_INTERNAL_INIT) {} - // If this->Init() has not been called before by any thread, - // execute (*func_with_arg)(arg) then return. - // Otherwise, wait until that prior invocation has finished - // executing its function, then return. - template - void Init(void (*func_with_arg)(T*), T* arg) { - Atomic32 s = Acquire_Load(&this->state_); - if (PREDICT_FALSE(s != GOOGLE_ONCE_INTERNAL_DONE)) { - // Deal with const T as well as non-const T. - typedef typename base::remove_const::type mutable_T; - GoogleOnceInternalInit(&this->state_, 0, - reinterpret_cast(func_with_arg), - const_cast(arg)); + // If this->Init() has not been called before by any thread, + // execute (*func_with_arg)(arg) then return. + // Otherwise, wait until that prior invocation has finished + // executing its function, then return. + template + void Init(void (*func_with_arg)(T*), T* arg) { + Atomic32 s = Acquire_Load(&this->state_); + if (PREDICT_FALSE(s != GOOGLE_ONCE_INTERNAL_DONE)) { + // Deal with const T as well as non-const T. + typedef typename base::remove_const::type mutable_T; + GoogleOnceInternalInit(&this->state_, 0, + reinterpret_cast(func_with_arg), + const_cast(arg)); + } + ANNOTATE_HAPPENS_AFTER(&this->state_); } - ANNOTATE_HAPPENS_AFTER(&this->state_); - } - private: - Atomic32 state_; - DISALLOW_COPY_AND_ASSIGN(GoogleOnceDynamic); + +private: + Atomic32 state_; + DISALLOW_COPY_AND_ASSIGN(GoogleOnceDynamic); }; -#endif // BASE_ONCE_H_ +#endif // BASE_ONCE_H_ diff --git a/be/src/gutil/port.h b/be/src/gutil/port.h index a09b1aa1eec666..0750d4bf589550 100644 --- a/be/src/gutil/port.h +++ b/be/src/gutil/port.h @@ -8,14 +8,14 @@ #ifndef BASE_PORT_H_ #define BASE_PORT_H_ -#include // So we can set the bounds of our types -#include // for memcpy() -#include // for free() +#include // So we can set the bounds of our types +#include // for free() +#include // for memcpy() #if defined(__APPLE__) -#include // for getpagesize() on mac +#include // for getpagesize() on mac #elif defined(OS_CYGWIN) -#include // for memalign() +#include // for memalign() #endif #include "gutil/integral_types.h" @@ -27,12 +27,12 @@ * __STDC_FORMAT_MACROS is defined before is included." */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS -#endif /* __STDC_FORMAT_MACROS */ -#endif /* __APPLE__ */ +#endif /* __STDC_FORMAT_MACROS */ +#endif /* __APPLE__ */ /* Default for most OSes */ /* We use SIGPWR since that seems unlikely to be used for other reasons. */ -#define GOOGLE_OBSCURE_SIGNAL SIGPWR +#define GOOGLE_OBSCURE_SIGNAL SIGPWR #if defined OS_LINUX || defined OS_CYGWIN @@ -61,7 +61,7 @@ typedef unsigned long ulong; #endif #if defined(__cplusplus) -#include // For _GLIBCXX macros +#include // For _GLIBCXX macros #endif #if !defined(HAVE_TLS) && defined(_GLIBCXX_HAVE_TLS) && defined(__x86_64__) @@ -82,19 +82,19 @@ typedef unsigned long ulong; typedef void (*sig_t)(int); // Solaris only defines strtoll, not strtoq -#define strtoq strtoll +#define strtoq strtoll #define strtouq strtoull // It doesn't define the posix-standard(?) u_int_16 -#include // NOLINT(build/include) +#include // NOLINT(build/include) typedef uint16_t u_int16_t; #elif defined __APPLE__ // BIG_ENDIAN -#include // NOLINT(build/include) +#include // NOLINT(build/include) /* Let's try and follow the Linux convention */ -#define __BYTE_ORDER BYTE_ORDER +#define __BYTE_ORDER BYTE_ORDER #define __LITTLE_ENDIAN LITTLE_ENDIAN #define __BIG_ENDIAN BIG_ENDIAN @@ -103,7 +103,7 @@ typedef uint16_t u_int16_t; // The following guarenty declaration of the byte swap functions, and // define __BYTE_ORDER for MSVC #ifdef _MSC_VER -#include // NOLINT(build/include) +#include // NOLINT(build/include) #define __BYTE_ORDER __LITTLE_ENDIAN #define bswap_16(x) _byteswap_ushort(x) #define bswap_32(x) _byteswap_ulong(x) @@ -117,36 +117,30 @@ typedef uint16_t u_int16_t; #define bswap_64(x) OSSwapInt64(x) #elif defined(__GLIBC__) -#include // IWYU pragma: export +#include // IWYU pragma: export #else static inline uint16 bswap_16(uint16 x) { - return ((x & 0xFF) << 8) | ((x & 0xFF00) >> 8); + return ((x & 0xFF) << 8) | ((x & 0xFF00) >> 8); } #define bswap_16(x) bswap_16(x) static inline uint32 bswap_32(uint32 x) { - return (((x & 0xFF) << 24) | - ((x & 0xFF00) << 8) | - ((x & 0xFF0000) >> 8) | - ((x & 0xFF000000) >> 24)); + return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) | + ((x & 0xFF000000) >> 24)); } #define bswap_32(x) bswap_32(x) static inline uint64 bswap_64(uint64 x) { - return (((x & GG_ULONGLONG(0xFF)) << 56) | - ((x & GG_ULONGLONG(0xFF00)) << 40) | - ((x & GG_ULONGLONG(0xFF0000)) << 24) | - ((x & GG_ULONGLONG(0xFF000000)) << 8) | - ((x & GG_ULONGLONG(0xFF00000000)) >> 8) | - ((x & GG_ULONGLONG(0xFF0000000000)) >> 24) | - ((x & GG_ULONGLONG(0xFF000000000000)) >> 40) | - ((x & GG_ULONGLONG(0xFF00000000000000)) >> 56)); + return (((x & GG_ULONGLONG(0xFF)) << 56) | ((x & GG_ULONGLONG(0xFF00)) << 40) | + ((x & GG_ULONGLONG(0xFF0000)) << 24) | ((x & GG_ULONGLONG(0xFF000000)) << 8) | + ((x & GG_ULONGLONG(0xFF00000000)) >> 8) | ((x & GG_ULONGLONG(0xFF0000000000)) >> 24) | + ((x & GG_ULONGLONG(0xFF000000000000)) >> 40) | + ((x & GG_ULONGLONG(0xFF00000000000000)) >> 56)); } #define bswap_64(x) bswap_64(x) #endif - // define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN // using the above endian defintions from endian.h if // endian.h was included @@ -169,10 +163,10 @@ static inline uint64 bswap_64(uint64 x) { // there is also PDP endian ... -#endif // __BYTE_ORDER +#endif // __BYTE_ORDER // Define the OS's path separator -#ifdef __cplusplus // C won't merge duplicate const variables at link time +#ifdef __cplusplus // C won't merge duplicate const variables at link time // Some headers provide a macro for this (GCC's system.h), remove it so that we // can use our own. #undef PATH_SEPARATOR @@ -199,7 +193,7 @@ const char PATH_SEPARATOR = '/'; // #include inline void va_copy(va_list& a, va_list& b) { - a = b; + a = b; } // Nor does it have uid_t @@ -218,30 +212,29 @@ typedef int uid_t; #endif // Linux has this in -#define __ptr_t void * +#define __ptr_t void* // Linux has this in -#define EXFULL ENOMEM // not really that great a translation... +#define EXFULL ENOMEM // not really that great a translation... // Darwin doesn't have strnlen. No comment. -inline size_t strnlen(const char *s, size_t maxlen) { - const char* end = (const char *)memchr(s, '\0', maxlen); - if (end) - return end - s; - return maxlen; +inline size_t strnlen(const char* s, size_t maxlen) { + const char* end = (const char*)memchr(s, '\0', maxlen); + if (end) return end - s; + return maxlen; } -namespace std {} // Avoid error if we didn't see std. -using namespace std; // Just like VC++, we need a using here. +namespace std {} // namespace std +using namespace std; // Just like VC++, we need a using here. // Doesn't exist on OSX; used in google.cc for send() to mean "no flags". #define MSG_NOSIGNAL 0 // No SIGPWR on MacOSX. SIGINFO seems suitably obscure. #undef GOOGLE_OBSCURE_SIGNAL -#define GOOGLE_OBSCURE_SIGNAL SIGINFO +#define GOOGLE_OBSCURE_SIGNAL SIGINFO -#elif defined(OS_CYGWIN) // Cygwin-specific behavior. +#elif defined(OS_CYGWIN) // Cygwin-specific behavior. #if defined(__CYGWIN32__) #define __WORDSIZE 32 @@ -255,26 +248,27 @@ using namespace std; // Just like VC++, we need a using here. #define GOOGLE_OBSCURE_SIGNAL 0 struct stack_t { - void* ss_sp; - int ss_flags; - size_t ss_size; + void* ss_sp; + int ss_flags; + size_t ss_size; }; -inline int sigaltstack(stack_t* ss, stack_t* oss) { return 0; } +inline int sigaltstack(stack_t* ss, stack_t* oss) { + return 0; +} -#define PTHREAD_STACK_MIN 0 // Not provided by cygwin +#define PTHREAD_STACK_MIN 0 // Not provided by cygwin // Scans memory for a character. // memrchr is used in a few places, but it's linux-specific. inline void* memrchr(const void* bytes, int find_char, size_t len) { - const unsigned char* cursor = - reinterpret_cast(bytes) + len - 1; - unsigned char actual_char = find_char; - for (; cursor >= bytes; --cursor) { - if (*cursor == actual_char) { - return const_cast(reinterpret_cast(cursor)); + const unsigned char* cursor = reinterpret_cast(bytes) + len - 1; + unsigned char actual_char = find_char; + for (; cursor >= bytes; --cursor) { + if (*cursor == actual_char) { + return const_cast(reinterpret_cast(cursor)); + } } - } - return NULL; + return NULL; } #endif @@ -284,7 +278,6 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { #define STATIC_ANALYSIS #endif // __KLOCWORK__ - // Annotate a function indicating the caller must examine the return value. // Use like: // int foo() WARN_UNUSED_RESULT; @@ -309,14 +302,14 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { // should be counted from two, not one." // #define PRINTF_ATTRIBUTE(string_index, first_to_check) \ - __attribute__((__format__ (__printf__, string_index, first_to_check))) + __attribute__((__format__(__printf__, string_index, first_to_check))) #define SCANF_ATTRIBUTE(string_index, first_to_check) \ - __attribute__((__format__ (__scanf__, string_index, first_to_check))) + __attribute__((__format__(__scanf__, string_index, first_to_check))) // // Prevent the compiler from padding a structure to natural alignment // -#define PACKED __attribute__ ((packed)) +#define PACKED __attribute__((packed)) // Cache line alignment #if defined(__i386__) || defined(__x86_64__) @@ -349,7 +342,7 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { // that appear unused // (careful, others e.g. third_party/libxml/xmlversion.h also define this) #undef ATTRIBUTE_UNUSED -#define ATTRIBUTE_UNUSED __attribute__ ((unused)) +#define ATTRIBUTE_UNUSED __attribute__((unused)) // Same as above, but for class members. // As of 10/2013 this appears to only be supported in Clang/LLVM. @@ -364,30 +357,29 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { // // For functions we want to force inline or not inline. // Introduced in gcc 3.1. -#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline)) +#define ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) #define HAVE_ATTRIBUTE_ALWAYS_INLINE 1 -#define ATTRIBUTE_NOINLINE __attribute__ ((noinline)) +#define ATTRIBUTE_NOINLINE __attribute__((noinline)) #define HAVE_ATTRIBUTE_NOINLINE 1 // For weak functions #undef ATTRIBUTE_WEAK -#define ATTRIBUTE_WEAK __attribute__ ((weak)) +#define ATTRIBUTE_WEAK __attribute__((weak)) #define HAVE_ATTRIBUTE_WEAK 1 // For deprecated functions or variables, generate a warning at usage sites. // Verified to work as early as GCC 3.1.1 and clang 3.2 (so we'll assume any // clang is new enough). #if defined(__clang__) || \ - (defined(COMPILER_GCC) && \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 30200) -#define ATTRIBUTE_DEPRECATED(msg) __attribute__ ((deprecated (msg) )) + (defined(COMPILER_GCC) && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 30200) +#define ATTRIBUTE_DEPRECATED(msg) __attribute__((deprecated(msg))) #else #define ATTRIBUTE_DEPRECATED(msg) #endif // Tell the compiler to use "initial-exec" mode for a thread-local variable. // See http://people.redhat.com/drepper/tls.pdf for the gory details. -#define ATTRIBUTE_INITIAL_EXEC __attribute__ ((tls_model ("initial-exec"))) +#define ATTRIBUTE_INITIAL_EXEC __attribute__((tls_model("initial-exec"))) // // Tell the compiler that some function parameters should be non-null pointers. @@ -407,8 +399,7 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { // calls _exit from a cloned subprocess, deliberately accesses buffer // out of bounds or does other scary things with memory. #ifdef ADDRESS_SANITIZER -#define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \ - __attribute__((no_address_safety_analysis)) +#define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS __attribute__((no_address_safety_analysis)) #else #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS #endif @@ -417,13 +408,12 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { // the running time and memory requirements for racy code when TSAN is active. // GCC does not support this attribute at the time of this writing (GCC 4.8). #if defined(__llvm__) -#define ATTRIBUTE_NO_SANITIZE_THREAD \ - __attribute__((no_sanitize_thread)) +#define ATTRIBUTE_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) #else #define ATTRIBUTE_NO_SANITIZE_THREAD #endif -#ifndef HAVE_ATTRIBUTE_SECTION // may have been pre-set to 0, e.g. for Darwin +#ifndef HAVE_ATTRIBUTE_SECTION // may have been pre-set to 0, e.g. for Darwin #define HAVE_ATTRIBUTE_SECTION 1 #endif @@ -452,7 +442,6 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { #define ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC #endif - // // Tell the compiler to warn about unused return values for functions declared // with this macro. The macro should be used on function declarations @@ -463,7 +452,7 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { #if defined(SWIG) #define MUST_USE_RESULT #elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) -#define MUST_USE_RESULT __attribute__ ((warn_unused_result)) +#define MUST_USE_RESULT __attribute__((warn_unused_result)) #else #define MUST_USE_RESULT #endif @@ -477,7 +466,7 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { #elif defined(__clang__) #define OVERRIDE override #elif defined(COMPILER_GCC) && __cplusplus >= 201103 && \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled. #define OVERRIDE override #else @@ -495,7 +484,7 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { #elif defined(__clang__) #define FINAL final #elif defined(COMPILER_GCC) && __cplusplus >= 201103 && \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled. #define FINAL final #else @@ -511,78 +500,77 @@ inline void* memrchr(const void* bytes, int find_char, size_t len) { // core: skip L2, go directly to L1 // k8 rev E and later: skip L2, can go to either of the 2-ways in L1 enum PrefetchHint { - PREFETCH_HINT_T0 = 3, // More temporal locality - PREFETCH_HINT_T1 = 2, - PREFETCH_HINT_T2 = 1, // Less temporal locality - PREFETCH_HINT_NTA = 0 // No temporal locality + PREFETCH_HINT_T0 = 3, // More temporal locality + PREFETCH_HINT_T1 = 2, + PREFETCH_HINT_T2 = 1, // Less temporal locality + PREFETCH_HINT_NTA = 0 // No temporal locality }; #else // prefetch is a no-op for this target. Feel free to add more sections above. #endif -extern inline void prefetch(const char *x, int hint) { +extern inline void prefetch(const char* x, int hint) { #if defined(__llvm__) - // In the gcc version of prefetch(), hint is only a constant _after_ inlining - // (assumed to have been successful). llvm views things differently, and - // checks constant-ness _before_ inlining. This leads to compilation errors - // with using the other version of this code with llvm. - // - // One way round this is to use a switch statement to explicitly match - // prefetch hint enumerations, and invoke __builtin_prefetch for each valid - // value. llvm's optimization removes the switch and unused case statements - // after inlining, so that this boils down in the end to the same as for gcc; - // that is, a single inlined prefetchX instruction. - // - // Note that this version of prefetch() cannot verify constant-ness of hint. - // If client code calls prefetch() with a variable value for hint, it will - // receive the full expansion of the switch below, perhaps also not inlined. - // This should however not be a problem in the general case of well behaved - // caller code that uses the supplied prefetch hint enumerations. - switch (hint) { + // In the gcc version of prefetch(), hint is only a constant _after_ inlining + // (assumed to have been successful). llvm views things differently, and + // checks constant-ness _before_ inlining. This leads to compilation errors + // with using the other version of this code with llvm. + // + // One way round this is to use a switch statement to explicitly match + // prefetch hint enumerations, and invoke __builtin_prefetch for each valid + // value. llvm's optimization removes the switch and unused case statements + // after inlining, so that this boils down in the end to the same as for gcc; + // that is, a single inlined prefetchX instruction. + // + // Note that this version of prefetch() cannot verify constant-ness of hint. + // If client code calls prefetch() with a variable value for hint, it will + // receive the full expansion of the switch below, perhaps also not inlined. + // This should however not be a problem in the general case of well behaved + // caller code that uses the supplied prefetch hint enumerations. + switch (hint) { case PREFETCH_HINT_T0: - __builtin_prefetch(x, 0, PREFETCH_HINT_T0); - break; + __builtin_prefetch(x, 0, PREFETCH_HINT_T0); + break; case PREFETCH_HINT_T1: - __builtin_prefetch(x, 0, PREFETCH_HINT_T1); - break; + __builtin_prefetch(x, 0, PREFETCH_HINT_T1); + break; case PREFETCH_HINT_T2: - __builtin_prefetch(x, 0, PREFETCH_HINT_T2); - break; + __builtin_prefetch(x, 0, PREFETCH_HINT_T2); + break; case PREFETCH_HINT_NTA: - __builtin_prefetch(x, 0, PREFETCH_HINT_NTA); - break; + __builtin_prefetch(x, 0, PREFETCH_HINT_NTA); + break; default: - __builtin_prefetch(x); - break; - } + __builtin_prefetch(x); + break; + } #elif defined(__GNUC__) - #if !defined(__i386) || defined(__SSE__) - if (__builtin_constant_p(hint)) { - __builtin_prefetch(x, 0, hint); - } else { - // Defaults to PREFETCH_HINT_T0 - __builtin_prefetch(x); - } +#if !defined(__i386) || defined(__SSE__) + if (__builtin_constant_p(hint)) { + __builtin_prefetch(x, 0, hint); + } else { + // Defaults to PREFETCH_HINT_T0 + __builtin_prefetch(x); + } #else - // We want a __builtin_prefetch, but we build with the default -march=i386 - // where __builtin_prefetch quietly turns into nothing. - // Once we crank up to -march=pentium3 or higher the __SSE__ - // clause above will kick in with the builtin. - // -- mec 2006-06-06 - if (hint == PREFETCH_HINT_NTA) - __asm__ __volatile__("prefetchnta (%0)" : : "r"(x)); - #endif + // We want a __builtin_prefetch, but we build with the default -march=i386 + // where __builtin_prefetch quietly turns into nothing. + // Once we crank up to -march=pentium3 or higher the __SSE__ + // clause above will kick in with the builtin. + // -- mec 2006-06-06 + if (hint == PREFETCH_HINT_NTA) __asm__ __volatile__("prefetchnta (%0)" : : "r"(x)); +#endif #else - // You get no effect. Feel free to add more sections above. + // You get no effect. Feel free to add more sections above. #endif } #ifdef __cplusplus // prefetch intrinsic (bring data to L1 without polluting L2 cache) -extern inline void prefetch(const char *x) { - return prefetch(x, 0); +extern inline void prefetch(const char* x) { + return prefetch(x, 0); } -#endif // ifdef __cplusplus +#endif // ifdef __cplusplus // // GCC can be told that a certain branch is not likely to be taken (for @@ -606,8 +594,8 @@ extern inline void prefetch(const char *x) { // int foo() ATTRIBUTE_HOT; // #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) -#define ATTRIBUTE_HOT __attribute__ ((hot)) -#define ATTRIBUTE_COLD __attribute__ ((cold)) +#define ATTRIBUTE_HOT __attribute__((hot)) +#define ATTRIBUTE_COLD __attribute__((cold)) #else #define ATTRIBUTE_HOT #define ATTRIBUTE_COLD @@ -619,33 +607,31 @@ extern inline void prefetch(const char *x) { #if !defined(__cplusplus) && !defined(__APPLE__) && !defined(OS_CYGWIN) // stdlib.h only declares this in C++, not in C, so we declare it here. // Also make sure to avoid declaring it on platforms which don't support it. -extern int posix_memalign(void **memptr, size_t alignment, size_t size); +extern int posix_memalign(void** memptr, size_t alignment, size_t size); #endif -inline void *aligned_malloc(size_t size, int minimum_alignment) { +inline void* aligned_malloc(size_t size, int minimum_alignment) { #if defined(__APPLE__) - // mac lacks memalign(), posix_memalign(), however, according to - // http://stackoverflow.com/questions/196329/osx-lacks-memalign - // mac allocs are already 16-byte aligned. - if (minimum_alignment <= 16) - return malloc(size); - // next, try to return page-aligned memory. perhaps overkill - if (minimum_alignment <= getpagesize()) - return valloc(size); - // give up - return NULL; -#elif defined(OS_CYGWIN) - return memalign(minimum_alignment, size); -#else // !__APPLE__ && !OS_CYGWIN - void *ptr = NULL; - if (posix_memalign(&ptr, minimum_alignment, size) != 0) + // mac lacks memalign(), posix_memalign(), however, according to + // http://stackoverflow.com/questions/196329/osx-lacks-memalign + // mac allocs are already 16-byte aligned. + if (minimum_alignment <= 16) return malloc(size); + // next, try to return page-aligned memory. perhaps overkill + if (minimum_alignment <= getpagesize()) return valloc(size); + // give up return NULL; - else - return ptr; +#elif defined(OS_CYGWIN) + return memalign(minimum_alignment, size); +#else // !__APPLE__ && !OS_CYGWIN + void* ptr = NULL; + if (posix_memalign(&ptr, minimum_alignment, size) != 0) + return NULL; + else + return ptr; #endif } -#else // not GCC +#else // not GCC #define PRINTF_ATTRIBUTE(string_index, first_to_check) #define SCANF_ATTRIBUTE(string_index, first_to_check) @@ -664,7 +650,7 @@ inline void *aligned_malloc(size_t size, int minimum_alignment) { #define ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC #define REQUIRE_STACK_ALIGN_TRAMPOLINE (0) #define MUST_USE_RESULT -extern inline void prefetch(const char *x) {} +extern inline void prefetch(const char* x) {} #define PREDICT_FALSE(x) x #define PREDICT_TRUE(x) x @@ -673,7 +659,7 @@ extern inline void prefetch(const char *x) {} #define FTELLO ftell #define FSEEKO fseek -#endif // GCC +#endif // GCC // // Provides a char array with the exact same alignment as another type. The @@ -688,8 +674,12 @@ extern inline void prefetch(const char *x) {} // construct to be a literal constant integer, we use a template instantiated // at all the possible powers of two. #ifndef SWIG -template struct AlignType { }; -template struct AlignType<0, size> { typedef char result[size]; }; +template +struct AlignType {}; +template +struct AlignType<0, size> { + typedef char result[size]; +}; #if defined(_MSC_VER) #define BASE_PORT_H_ALIGN_ATTRIBUTE(X) __declspec(align(X)) #define BASE_PORT_H_ALIGN_OF(T) __alignof(T) @@ -700,10 +690,11 @@ template struct AlignType<0, size> { typedef char result[size]; }; #if defined(BASE_PORT_H_ALIGN_ATTRIBUTE) -#define BASE_PORT_H_ALIGNTYPE_TEMPLATE(X) \ - template struct AlignType { \ - typedef BASE_PORT_H_ALIGN_ATTRIBUTE(X) char result[size]; \ - } +#define BASE_PORT_H_ALIGNTYPE_TEMPLATE(X) \ + template \ + struct AlignType { \ + typedef BASE_PORT_H_ALIGN_ATTRIBUTE(X) char result[size]; \ + } BASE_PORT_H_ALIGNTYPE_TEMPLATE(1); BASE_PORT_H_ALIGNTYPE_TEMPLATE(2); @@ -722,29 +713,31 @@ BASE_PORT_H_ALIGNTYPE_TEMPLATE(8192); // Any larger and MSVC++ will complain. #define ALIGNED_CHAR_ARRAY(T, Size) \ - typename AlignType::result + typename AlignType::result #undef BASE_PORT_H_ALIGNTYPE_TEMPLATE #undef BASE_PORT_H_ALIGN_ATTRIBUTE -#else // defined(BASE_PORT_H_ALIGN_ATTRIBUTE) +#else // defined(BASE_PORT_H_ALIGN_ATTRIBUTE) #define ALIGNED_CHAR_ARRAY you_must_define_ALIGNED_CHAR_ARRAY_for_your_compiler_in_base_port_h #endif // defined(BASE_PORT_H_ALIGN_ATTRIBUTE) -#else // !SWIG +#else // !SWIG // SWIG can't represent alignment and doesn't care about alignment on data // members (it works fine without it). -template -struct AlignType { typedef char result[Size]; }; +template +struct AlignType { + typedef char result[Size]; +}; #define ALIGNED_CHAR_ARRAY(T, Size) AlignType::result #endif // !SWIG -#else // __cpluscplus +#else // __cpluscplus #define ALIGNED_CHAR_ARRAY ALIGNED_CHAR_ARRAY_is_not_available_without_Cplusplus #endif // __cplusplus -#ifdef _MSC_VER /* if Visual C++ */ +#ifdef _MSC_VER /* if Visual C++ */ // This compiler flag can be easily overlooked on MSVC. // _CHAR_UNSIGNED gets set with the /J flag. @@ -754,38 +747,38 @@ struct AlignType { typedef char result[Size]; }; // MSVC is a little hyper-active in its warnings // Signed vs. unsigned comparison is ok. -#pragma warning(disable : 4018 ) +#pragma warning(disable : 4018) // We know casting from a long to a char may lose data -#pragma warning(disable : 4244 ) +#pragma warning(disable : 4244) // Don't need performance warnings about converting ints to bools -#pragma warning(disable : 4800 ) +#pragma warning(disable : 4800) // Integral constant overflow is apparently ok too // for example: // short k; int n; // k = k + n; -#pragma warning(disable : 4307 ) +#pragma warning(disable : 4307) // It's ok to use this* in constructor // Example: // class C { // Container cont_; // C() : cont_(this) { ... -#pragma warning(disable : 4355 ) +#pragma warning(disable : 4355) // Truncating from double to float is ok -#pragma warning(disable : 4305 ) +#pragma warning(disable : 4305) -#include #include #include +#include #undef ERROR -#include // for nextafter functionality on windows -#include // for HUGE_VAL +#include // for nextafter functionality on windows +#include // for HUGE_VAL #ifndef HUGE_VALF #define HUGE_VALF (static_cast(HUGE_VAL)) #endif -namespace std {} // Avoid error if we didn't see std. +namespace std {} // namespace std using namespace std; // VC++ doesn't understand "uint" @@ -807,21 +800,20 @@ typedef unsigned int uint; typedef int ssize_t; #endif -#define strtoq _strtoi64 -#define strtouq _strtoui64 -#define strtoll _strtoi64 +#define strtoq _strtoi64 +#define strtouq _strtoui64 +#define strtoll _strtoi64 #define strtoull _strtoui64 -#define atoll _atoi64 - +#define atoll _atoi64 // VC++ 6 and before ship without an ostream << operator for 64-bit ints #if (_MSC_VER <= 1200) #include using std::ostream; -inline ostream& operator<< (ostream& os, const unsigned __int64& num ) { - // Fake operator; doesn't actually do anything. - LOG(FATAL) << "64-bit ostream operator << not supported in VC++ 6"; - return os; +inline ostream& operator<<(ostream& os, const unsigned __int64& num) { + // Fake operator; doesn't actually do anything. + LOG(FATAL) << "64-bit ostream operator << not supported in VC++ 6"; + return os; } #endif @@ -841,10 +833,9 @@ inline ostream& operator<< (ostream& os, const unsigned __int64& num ) { #define strdup _strdup #define tempnam _tempnam -#define chdir _chdir +#define chdir _chdir #define getcwd _getcwd -#define putenv _putenv - +#define putenv _putenv // You say tomato, I say toma #define random() rand() @@ -853,8 +844,8 @@ inline ostream& operator<< (ostream& os, const unsigned __int64& num ) { // You say juxtapose, I say transpose #define bcopy(s, d, n) memcpy(d, s, n) -inline void *aligned_malloc(size_t size, int minimum_alignment) { - return _aligned_malloc(size, minimum_alignment); +inline void* aligned_malloc(size_t size, int minimum_alignment) { + return _aligned_malloc(size, minimum_alignment); } // ----- BEGIN VC++ STUBS & FAKE DEFINITIONS --------------------------------- @@ -863,43 +854,43 @@ inline void *aligned_malloc(size_t size, int minimum_alignment) { // floating point format. enum { - FP_NAN, // is "Not a Number" - FP_INFINITE, // is either plus or minus infinity. - FP_ZERO, - FP_SUBNORMAL, // is too small to be represented in normalized format. - FP_NORMAL // if nothing of the above is correct that it must be a - // normal floating-point number. + FP_NAN, // is "Not a Number" + FP_INFINITE, // is either plus or minus infinity. + FP_ZERO, + FP_SUBNORMAL, // is too small to be represented in normalized format. + FP_NORMAL // if nothing of the above is correct that it must be a + // normal floating-point number. }; inline int fpclassify_double(double x) { - const int float_point_class =_fpclass(x); - int c99_class; - switch (float_point_class) { - case _FPCLASS_SNAN: // Signaling NaN - case _FPCLASS_QNAN: // Quiet NaN - c99_class = FP_NAN; - break; - case _FPCLASS_NZ: // Negative zero ( -0) - case _FPCLASS_PZ: // Positive 0 (+0) - c99_class = FP_ZERO; - break; - case _FPCLASS_NINF: // Negative infinity ( -INF) - case _FPCLASS_PINF: // Positive infinity (+INF) - c99_class = FP_INFINITE; - break; - case _FPCLASS_ND: // Negative denormalized - case _FPCLASS_PD: // Positive denormalized - c99_class = FP_SUBNORMAL; - break; - case _FPCLASS_NN: // Negative normalized non-zero - case _FPCLASS_PN: // Positive normalized non-zero - c99_class = FP_NORMAL; - break; - default: - c99_class = FP_NAN; // Should never happen - break; - } - return c99_class; + const int float_point_class = _fpclass(x); + int c99_class; + switch (float_point_class) { + case _FPCLASS_SNAN: // Signaling NaN + case _FPCLASS_QNAN: // Quiet NaN + c99_class = FP_NAN; + break; + case _FPCLASS_NZ: // Negative zero ( -0) + case _FPCLASS_PZ: // Positive 0 (+0) + c99_class = FP_ZERO; + break; + case _FPCLASS_NINF: // Negative infinity ( -INF) + case _FPCLASS_PINF: // Positive infinity (+INF) + c99_class = FP_INFINITE; + break; + case _FPCLASS_ND: // Negative denormalized + case _FPCLASS_PD: // Positive denormalized + c99_class = FP_SUBNORMAL; + break; + case _FPCLASS_NN: // Negative normalized non-zero + case _FPCLASS_PN: // Positive normalized non-zero + c99_class = FP_NORMAL; + break; + default: + c99_class = FP_NAN; // Should never happen + break; + } + return c99_class; } // This function handle the special subnormal case for float; it will @@ -907,12 +898,11 @@ inline int fpclassify_double(double x) { // bit_cast is avoided to simplify dependency and to create a code that is // easy to deploy in C code inline int fpclassify_float(float x) { - uint32 bitwise_representation; - memcpy(&bitwise_representation, &x, 4); - if ((bitwise_representation & 0x7f800000) == 0 && - (bitwise_representation & 0x007fffff) != 0) - return FP_SUBNORMAL; - return fpclassify_double(x); + uint32 bitwise_representation; + memcpy(&bitwise_representation, &x, 4); + if ((bitwise_representation & 0x7f800000) == 0 && (bitwise_representation & 0x007fffff) != 0) + return FP_SUBNORMAL; + return fpclassify_double(x); } // // This define takes care of the denormalized float; the casting to @@ -922,10 +912,10 @@ inline int fpclassify_float(float x) { #define isnan _isnan inline int isinf(double x) { - const int float_point_class =_fpclass(x); - if (float_point_class == _FPCLASS_PINF) return 1; - if (float_point_class == _FPCLASS_NINF) return -1; - return 0; + const int float_point_class = _fpclass(x); + if (float_point_class == _FPCLASS_PINF) return 1; + if (float_point_class == _FPCLASS_NINF) return -1; + return 0; } // #include "conflict-signal.h" @@ -934,28 +924,28 @@ typedef void (*sig_t)(int); // These actually belong in errno.h but there's a name confilict in errno // on WinNT. They (and a ton more) are also found in Winsock2.h, but // if'd out under NT. We need this subset at minimum. -#define EXFULL ENOMEM // not really that great a translation... +#define EXFULL ENOMEM // not really that great a translation... // The following are already defined in VS2010. #if (_MSC_VER < 1600) #define EWOULDBLOCK WSAEWOULDBLOCK #ifndef PTHREADS_REDHAT_WIN32 -#define ETIMEDOUT WSAETIMEDOUT +#define ETIMEDOUT WSAETIMEDOUT #endif -#define ENOTSOCK WSAENOTSOCK +#define ENOTSOCK WSAENOTSOCK #define EINPROGRESS WSAEINPROGRESS -#define ECONNRESET WSAECONNRESET +#define ECONNRESET WSAECONNRESET #endif // // Really from // -inline void bzero(void *s, int n) { - memset(s, 0, n); +inline void bzero(void* s, int n) { + memset(s, 0, n); } // From glob.h -#define __ptr_t void * +#define __ptr_t void* // Defined all over the place. typedef int pid_t; @@ -969,12 +959,12 @@ typedef short int16_t; // ----- END VC++ STUBS & FAKE DEFINITIONS ---------------------------------- -#endif // _MSC_VER +#endif // _MSC_VER -#ifdef STL_MSVC // not always the same as _MSC_VER +#ifdef STL_MSVC // not always the same as _MSC_VER #include "base/port_hash.h" #else -struct PortableHashBase { }; +struct PortableHashBase {}; #endif #if defined(OS_WINDOWS) || defined(__APPLE__) @@ -997,11 +987,11 @@ struct PortableHashBase { }; // gcc2: empty #ifndef HASH_NAMESPACE -# define HASH_NAMESPACE_DECLARATION_START -# define HASH_NAMESPACE_DECLARATION_END +#define HASH_NAMESPACE_DECLARATION_START +#define HASH_NAMESPACE_DECLARATION_END #else -# define HASH_NAMESPACE_DECLARATION_START namespace HASH_NAMESPACE { -# define HASH_NAMESPACE_DECLARATION_END } +#define HASH_NAMESPACE_DECLARATION_START namespace HASH_NAMESPACE { +#define HASH_NAMESPACE_DECLARATION_END } #endif // Our STL-like classes use __STD. @@ -1027,25 +1017,18 @@ struct PortableHashBase { }; // modern PowerPC hardware can also do unaligned integer loads and stores; // but note: the FPU still sends unaligned loads and stores to a trap handler! -#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) - -#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) - -#elif defined(__arm__) && \ - !defined(__ARM_ARCH_5__) && \ - !defined(__ARM_ARCH_5T__) && \ - !defined(__ARM_ARCH_5TE__) && \ - !defined(__ARM_ARCH_5TEJ__) && \ - !defined(__ARM_ARCH_6__) && \ - !defined(__ARM_ARCH_6J__) && \ - !defined(__ARM_ARCH_6K__) && \ - !defined(__ARM_ARCH_6Z__) && \ - !defined(__ARM_ARCH_6ZK__) && \ - !defined(__ARM_ARCH_6T2__) +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) + +#elif defined(__arm__) && !defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5T__) && \ + !defined(__ARM_ARCH_5TE__) && !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6__) && \ + !defined(__ARM_ARCH_6J__) && !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6Z__) && \ + !defined(__ARM_ARCH_6ZK__) && !defined(__ARM_ARCH_6T2__) // ARMv7 and newer support native unaligned accesses, but only of 16-bit // and 32-bit values (not 64-bit); older versions either raise a fatal signal, @@ -1057,24 +1040,24 @@ struct PortableHashBase { }; // // This is a mess, but there's not much we can do about it. -#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) // TODO(user): NEON supports unaligned 64-bit loads and stores. // See if that would be more efficient on platforms supporting it, // at least for copies. -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; +inline uint64 UNALIGNED_LOAD64(const void* p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; } -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); +inline void UNALIGNED_STORE64(void* p, uint64 v) { + memcpy(p, &v, sizeof v); } #else @@ -1084,34 +1067,34 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) { // These functions are provided for architectures that don't support // unaligned loads and stores. -inline uint16 UNALIGNED_LOAD16(const void *p) { - uint16 t; - memcpy(&t, p, sizeof t); - return t; +inline uint16 UNALIGNED_LOAD16(const void* p) { + uint16 t; + memcpy(&t, p, sizeof t); + return t; } -inline uint32 UNALIGNED_LOAD32(const void *p) { - uint32 t; - memcpy(&t, p, sizeof t); - return t; +inline uint32 UNALIGNED_LOAD32(const void* p) { + uint32 t; + memcpy(&t, p, sizeof t); + return t; } -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; +inline uint64 UNALIGNED_LOAD64(const void* p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; } -inline void UNALIGNED_STORE16(void *p, uint16 v) { - memcpy(p, &v, sizeof v); +inline void UNALIGNED_STORE16(void* p, uint16 v) { + memcpy(p, &v, sizeof v); } -inline void UNALIGNED_STORE32(void *p, uint32 v) { - memcpy(p, &v, sizeof v); +inline void UNALIGNED_STORE32(void* p, uint32 v) { + memcpy(p, &v, sizeof v); } -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); +inline void UNALIGNED_STORE64(void* p, uint64 v) { + memcpy(p, &v, sizeof v); } #endif @@ -1129,27 +1112,27 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) { #if defined(__cplusplus) -inline void UnalignedCopy16(const void *src, void *dst) { - UNALIGNED_STORE16(dst, UNALIGNED_LOAD16(src)); +inline void UnalignedCopy16(const void* src, void* dst) { + UNALIGNED_STORE16(dst, UNALIGNED_LOAD16(src)); } -inline void UnalignedCopy32(const void *src, void *dst) { - UNALIGNED_STORE32(dst, UNALIGNED_LOAD32(src)); +inline void UnalignedCopy32(const void* src, void* dst) { + UNALIGNED_STORE32(dst, UNALIGNED_LOAD32(src)); } -inline void UnalignedCopy64(const void *src, void *dst) { - if (sizeof(void *) == 8) { - UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); - } else { - const char *src_char = reinterpret_cast(src); - char *dst_char = reinterpret_cast(dst); +inline void UnalignedCopy64(const void* src, void* dst) { + if (sizeof(void*) == 8) { + UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); + } else { + const char* src_char = reinterpret_cast(src); + char* dst_char = reinterpret_cast(dst); - UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char)); - UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4)); - } + UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char)); + UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4)); + } } -#endif // defined(__cpluscplus) +#endif // defined(__cpluscplus) // printf macros for size_t, in the style of inttypes.h #ifdef _LP64 @@ -1177,19 +1160,17 @@ inline void UnalignedCopy64(const void *src, void *dst) { #define PRINTABLE_PTHREAD(pthreadt) pthreadt #endif -#define SIZEOF_MEMBER(t, f) sizeof(((t*) 4096)->f) +#define SIZEOF_MEMBER(t, f) sizeof(((t*)4096)->f) -#define OFFSETOF_MEMBER(t, f) \ - (reinterpret_cast( \ - &reinterpret_cast(16)->f) - \ - reinterpret_cast(16)) +#define OFFSETOF_MEMBER(t, f) \ + (reinterpret_cast(&reinterpret_cast(16)->f) - reinterpret_cast(16)) #ifdef PTHREADS_REDHAT_WIN32 #include -using std::ostream; // NOLINT(build/include) -#include // NOLINT(build/include) +using std::ostream; // NOLINT(build/include) +#include // NOLINT(build/include) // pthread_t is not a simple integer or pointer on Win32 -std::ostream& operator << (std::ostream& out, const pthread_t& thread_id); +std::ostream& operator<<(std::ostream& out, const pthread_t& thread_id); #endif // GXX_EXPERIMENTAL_CXX0X is defined by gcc and clang up to at least @@ -1215,7 +1196,7 @@ enum { kPlatformUsesOPDSections = 1 }; #define FUNC_PTR_TO_CHAR_PTR(func) (reinterpret_cast(func)[0]) #else enum { kPlatformUsesOPDSections = 0 }; -#define FUNC_PTR_TO_CHAR_PTR(func) (reinterpret_cast(func)) +#define FUNC_PTR_TO_CHAR_PTR(func) (reinterpret_cast(func)) #endif -#endif // BASE_PORT_H_ +#endif // BASE_PORT_H_ diff --git a/be/src/gutil/ref_counted.cc b/be/src/gutil/ref_counted.cc index 280d9df25e25ca..db17bd2502063e 100644 --- a/be/src/gutil/ref_counted.cc +++ b/be/src/gutil/ref_counted.cc @@ -5,6 +5,7 @@ #include "gutil/ref_counted.h" #include + #include "gutil/atomic_refcount.h" namespace doris { @@ -12,84 +13,84 @@ namespace doris { namespace subtle { RefCountedBase::RefCountedBase() - : ref_count_(0) + : ref_count_(0) #ifndef NDEBUG - , in_dtor_(false) + , + in_dtor_(false) #endif - { +{ } RefCountedBase::~RefCountedBase() { #ifndef NDEBUG - DCHECK(in_dtor_) << "RefCounted object deleted without calling Release()"; + DCHECK(in_dtor_) << "RefCounted object deleted without calling Release()"; #endif } void RefCountedBase::AddRef() const { - // TODO(maruel): Add back once it doesn't assert 500 times/sec. - // Current thread books the critical section "AddRelease" without release it. - // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_); + // TODO(maruel): Add back once it doesn't assert 500 times/sec. + // Current thread books the critical section "AddRelease" without release it. + // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_); #ifndef NDEBUG - DCHECK(!in_dtor_); + DCHECK(!in_dtor_); #endif - ++ref_count_; + ++ref_count_; } bool RefCountedBase::Release() const { - // TODO(maruel): Add back once it doesn't assert 500 times/sec. - // Current thread books the critical section "AddRelease" without release it. - // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_); + // TODO(maruel): Add back once it doesn't assert 500 times/sec. + // Current thread books the critical section "AddRelease" without release it. + // DFAKE_SCOPED_LOCK_THREAD_LOCKED(add_release_); #ifndef NDEBUG - DCHECK(!in_dtor_); + DCHECK(!in_dtor_); #endif - if (--ref_count_ == 0) { + if (--ref_count_ == 0) { #ifndef NDEBUG - in_dtor_ = true; + in_dtor_ = true; #endif - return true; - } - return false; + return true; + } + return false; } bool RefCountedThreadSafeBase::HasOneRef() const { - return base::RefCountIsOne( - &const_cast(this)->ref_count_); + return base::RefCountIsOne(&const_cast(this)->ref_count_); } RefCountedThreadSafeBase::RefCountedThreadSafeBase() : ref_count_(0) { #ifndef NDEBUG - in_dtor_ = false; + in_dtor_ = false; #endif } RefCountedThreadSafeBase::~RefCountedThreadSafeBase() { #ifndef NDEBUG - DCHECK(in_dtor_) << "RefCountedThreadSafe object deleted without " - "calling Release()"; + DCHECK(in_dtor_) << "RefCountedThreadSafe object deleted without " + "calling Release()"; #endif } void RefCountedThreadSafeBase::AddRef() const { #ifndef NDEBUG - DCHECK(!in_dtor_); + DCHECK(!in_dtor_); #endif - base::RefCountInc(&ref_count_); + base::RefCountInc(&ref_count_); } bool RefCountedThreadSafeBase::Release() const { #ifndef NDEBUG - DCHECK(!in_dtor_); - DCHECK(!base::RefCountIsZero(&ref_count_)); + DCHECK(!in_dtor_); + DCHECK(!base::RefCountIsZero(&ref_count_)); #endif - if (!base::RefCountDec(&ref_count_)) { + if (!base::RefCountDec(&ref_count_)) { #ifndef NDEBUG - in_dtor_ = true; + in_dtor_ = true; #endif - return true; - } - return false; + return true; + } + return false; } -} // namespace subtle +} // namespace subtle -} // namespace doris +} // namespace doris diff --git a/be/src/gutil/ref_counted.h b/be/src/gutil/ref_counted.h index ca993813968120..b7d82cc60231e7 100644 --- a/be/src/gutil/ref_counted.h +++ b/be/src/gutil/ref_counted.h @@ -7,7 +7,7 @@ #include #include -#include // IWYU pragma: keep +#include // IWYU pragma: keep #include "gutil/atomicops.h" #include "gutil/macros.h" @@ -19,52 +19,52 @@ namespace subtle { typedef Atomic32 AtomicRefCount; class RefCountedBase { - public: - bool HasOneRef() const { return ref_count_ == 1; } +public: + bool HasOneRef() const { return ref_count_ == 1; } - protected: - RefCountedBase(); - ~RefCountedBase(); +protected: + RefCountedBase(); + ~RefCountedBase(); - void AddRef() const; + void AddRef() const; - // Returns true if the object should self-delete. - bool Release() const; + // Returns true if the object should self-delete. + bool Release() const; - private: - mutable int ref_count_; +private: + mutable int ref_count_; #ifndef NDEBUG - mutable bool in_dtor_; + mutable bool in_dtor_; #endif - DFAKE_MUTEX(add_release_); + DFAKE_MUTEX(add_release_); - DISALLOW_COPY_AND_ASSIGN(RefCountedBase); + DISALLOW_COPY_AND_ASSIGN(RefCountedBase); }; class RefCountedThreadSafeBase { - public: - bool HasOneRef() const; +public: + bool HasOneRef() const; - protected: - RefCountedThreadSafeBase(); - ~RefCountedThreadSafeBase(); +protected: + RefCountedThreadSafeBase(); + ~RefCountedThreadSafeBase(); - void AddRef() const; + void AddRef() const; - // Returns true if the object should self-delete. - bool Release() const; + // Returns true if the object should self-delete. + bool Release() const; - private: - mutable AtomicRefCount ref_count_; +private: + mutable AtomicRefCount ref_count_; #ifndef NDEBUG - mutable bool in_dtor_; + mutable bool in_dtor_; #endif - DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafeBase); + DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafeBase); }; -} // namespace subtle +} // namespace subtle // // A base class for reference counted classes. Otherwise, known as a cheap @@ -82,40 +82,38 @@ class RefCountedThreadSafeBase { // the object accidently while there are references to it. template class RefCounted : public subtle::RefCountedBase { - public: - RefCounted() {} +public: + RefCounted() {} - void AddRef() const { - subtle::RefCountedBase::AddRef(); - } + void AddRef() const { subtle::RefCountedBase::AddRef(); } - void Release() const { - if (subtle::RefCountedBase::Release()) { - delete static_cast(this); + void Release() const { + if (subtle::RefCountedBase::Release()) { + delete static_cast(this); + } } - } - protected: - ~RefCounted() {} +protected: + ~RefCounted() {} - private: - DISALLOW_COPY_AND_ASSIGN(RefCounted); +private: + DISALLOW_COPY_AND_ASSIGN(RefCounted); }; // Forward declaration. -template class RefCountedThreadSafe; +template +class RefCountedThreadSafe; // Default traits for RefCountedThreadSafe. Deletes the object when its ref // count reaches 0. Overload to delete it on a different thread etc. -template +template struct DefaultRefCountedThreadSafeTraits { - static void Destruct(const T* x) { - // Delete through RefCountedThreadSafe to make child classes only need to be - // friend with RefCountedThreadSafe instead of this struct, which is an - // implementation detail. - RefCountedThreadSafe::DeleteInternal(x); - } + static void Destruct(const T* x) { + // Delete through RefCountedThreadSafe to make child classes only need to be + // friend with RefCountedThreadSafe instead of this struct, which is an + // implementation detail. + RefCountedThreadSafe::DeleteInternal(x); + } }; // @@ -132,48 +130,45 @@ struct DefaultRefCountedThreadSafeTraits { // ~MyFoo(); template > class RefCountedThreadSafe : public subtle::RefCountedThreadSafeBase { - public: - RefCountedThreadSafe() {} +public: + RefCountedThreadSafe() {} - void AddRef() const { - subtle::RefCountedThreadSafeBase::AddRef(); - } + void AddRef() const { subtle::RefCountedThreadSafeBase::AddRef(); } - void Release() const { - if (subtle::RefCountedThreadSafeBase::Release()) { - Traits::Destruct(static_cast(this)); + void Release() const { + if (subtle::RefCountedThreadSafeBase::Release()) { + Traits::Destruct(static_cast(this)); + } } - } - protected: - ~RefCountedThreadSafe() {} +protected: + ~RefCountedThreadSafe() {} - private: - friend struct DefaultRefCountedThreadSafeTraits; - static void DeleteInternal(const T* x) { delete x; } +private: + friend struct DefaultRefCountedThreadSafeTraits; + static void DeleteInternal(const T* x) { delete x; } - DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafe); + DISALLOW_COPY_AND_ASSIGN(RefCountedThreadSafe); }; // // A thread-safe wrapper for some piece of data so we can place other // things in scoped_refptrs<>. // -template -class RefCountedData - : public doris::RefCountedThreadSafe< doris::RefCountedData> { - public: - RefCountedData() : data() {} - RefCountedData(const T& in_value) : data(in_value) {} - - T data; - - private: - friend class doris::RefCountedThreadSafe>; - ~RefCountedData() {} +template +class RefCountedData : public doris::RefCountedThreadSafe> { +public: + RefCountedData() : data() {} + RefCountedData(const T& in_value) : data(in_value) {} + + T data; + +private: + friend class doris::RefCountedThreadSafe>; + ~RefCountedData() {} }; -} // namespace doris +} // namespace doris // // A smart pointer class for reference counted objects. Use this class instead @@ -225,141 +220,127 @@ class RefCountedData // template class scoped_refptr { - public: - typedef T element_type; - - scoped_refptr() : ptr_(NULL) { - } - - scoped_refptr(T* p) : ptr_(p) { - if (ptr_) - ptr_->AddRef(); - } - - // Copy constructor. - scoped_refptr(const scoped_refptr& r) : ptr_(r.ptr_) { - if (ptr_) - ptr_->AddRef(); - } - - // Copy conversion constructor. - template - scoped_refptr(const scoped_refptr& r) : ptr_(r.get()) { - if (ptr_) - ptr_->AddRef(); - } - - // Move constructor. This is required in addition to the conversion - // constructor below in order for clang to warn about pessimizing moves. - scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT - r.ptr_ = nullptr; - } - - // Move conversion constructor. - template - scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT - r.ptr_ = nullptr; - } - - ~scoped_refptr() { - if (ptr_) - ptr_->Release(); - } - - T* get() const { return ptr_; } +public: + typedef T element_type; + + scoped_refptr() : ptr_(NULL) {} + + scoped_refptr(T* p) : ptr_(p) { + if (ptr_) ptr_->AddRef(); + } + + // Copy constructor. + scoped_refptr(const scoped_refptr& r) : ptr_(r.ptr_) { + if (ptr_) ptr_->AddRef(); + } + + // Copy conversion constructor. + template + scoped_refptr(const scoped_refptr& r) : ptr_(r.get()) { + if (ptr_) ptr_->AddRef(); + } + + // Move constructor. This is required in addition to the conversion + // constructor below in order for clang to warn about pessimizing moves. + scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT + r.ptr_ = nullptr; + } + + // Move conversion constructor. + template + scoped_refptr(scoped_refptr&& r) noexcept : ptr_(r.get()) { // NOLINT + r.ptr_ = nullptr; + } + + ~scoped_refptr() { + if (ptr_) ptr_->Release(); + } + + T* get() const { return ptr_; } // The following is disabled in Kudu's version of this file since it's // relatively dangerous. Chromium is planning on doing the same in their // tree, but hasn't done so yet. See http://code.google.com/p/chromium/issues/detail?id=110610 #if SCOPED_REFPTR_ALLOW_IMPLICIT_CONVERSION_TO_PTR - // Allow scoped_refptr to be used in boolean expression - // and comparison operations. - operator T*() const { return ptr_; } + // Allow scoped_refptr to be used in boolean expression + // and comparison operations. + operator T*() const { return ptr_; } #else - typedef T* scoped_refptr::*Testable; - operator Testable() const { return ptr_ ? &scoped_refptr::ptr_ : NULL; } + typedef T* scoped_refptr::*Testable; + operator Testable() const { return ptr_ ? &scoped_refptr::ptr_ : NULL; } #endif - T* operator->() const { - assert(ptr_ != NULL); - return ptr_; - } - - scoped_refptr& operator=(T* p) { - // AddRef first so that self assignment should work - if (p) - p->AddRef(); - T* old_ptr = ptr_; - ptr_ = p; - if (old_ptr) - old_ptr->Release(); - return *this; - } - - scoped_refptr& operator=(const scoped_refptr& r) { - return *this = r.ptr_; - } - - template - scoped_refptr& operator=(const scoped_refptr& r) { - return *this = r.get(); - } - - scoped_refptr& operator=(scoped_refptr&& r) { - scoped_refptr(std::move(r)).swap(*this); - return *this; - } - - template - scoped_refptr& operator=(scoped_refptr&& r) { - scoped_refptr(std::move(r)).swap(*this); - return *this; - } - - void swap(T** pp) { - T* p = ptr_; - ptr_ = *pp; - *pp = p; - } - - void swap(scoped_refptr& r) { - swap(&r.ptr_); - } - - // Like gscoped_ptr::reset(), drops a reference on the currently held object - // (if any), and adds a reference to the passed-in object (if not NULL). - void reset(T* p = NULL) { - *this = p; - } - - protected: - T* ptr_; - - private: - template friend class scoped_refptr; + T* operator->() const { + assert(ptr_ != NULL); + return ptr_; + } + + scoped_refptr& operator=(T* p) { + // AddRef first so that self assignment should work + if (p) p->AddRef(); + T* old_ptr = ptr_; + ptr_ = p; + if (old_ptr) old_ptr->Release(); + return *this; + } + + scoped_refptr& operator=(const scoped_refptr& r) { return *this = r.ptr_; } + + template + scoped_refptr& operator=(const scoped_refptr& r) { + return *this = r.get(); + } + + scoped_refptr& operator=(scoped_refptr&& r) { + scoped_refptr(std::move(r)).swap(*this); + return *this; + } + + template + scoped_refptr& operator=(scoped_refptr&& r) { + scoped_refptr(std::move(r)).swap(*this); + return *this; + } + + void swap(T** pp) { + T* p = ptr_; + ptr_ = *pp; + *pp = p; + } + + void swap(scoped_refptr& r) { swap(&r.ptr_); } + + // Like gscoped_ptr::reset(), drops a reference on the currently held object + // (if any), and adds a reference to the passed-in object (if not NULL). + void reset(T* p = NULL) { *this = p; } + +protected: + T* ptr_; + +private: + template + friend class scoped_refptr; }; // Handy utility for creating a scoped_refptr out of a T* explicitly without // having to retype all the template arguments template scoped_refptr make_scoped_refptr(T* t) { - return scoped_refptr(t); + return scoped_refptr(t); } // equal_to and hash implementations for templated scoped_refptrs suitable for // use with STL unordered_* containers. template struct ScopedRefPtrEqualToFunctor { - bool operator()(const scoped_refptr& x, const scoped_refptr& y) const { - return x.get() == y.get(); - } + bool operator()(const scoped_refptr& x, const scoped_refptr& y) const { + return x.get() == y.get(); + } }; template struct ScopedRefPtrHashFunctor { - size_t operator()(const scoped_refptr& p) const { - return reinterpret_cast(p.get()); - } + size_t operator()(const scoped_refptr& p) const { return reinterpret_cast(p.get()); } }; -#endif // BASE_MEMORY_REF_COUNTED_H_ +#endif // BASE_MEMORY_REF_COUNTED_H_ diff --git a/be/src/gutil/spinlock_internal.cc b/be/src/gutil/spinlock_internal.cc index 958bc4df4a8832..8dd0c416f04faa 100644 --- a/be/src/gutil/spinlock_internal.cc +++ b/be/src/gutil/spinlock_internal.cc @@ -44,7 +44,11 @@ #include "gutil/spinlock_internal.h" // forward declaration for use by spinlock_*-inl.h -namespace base { namespace internal { static int SuggestedDelayNS(int loop); }} +namespace base { +namespace internal { +static int SuggestedDelayNS(int loop); +} +} // namespace base #if defined(_WIN32) #include "gutil/spinlock_win32-inl.h" @@ -58,63 +62,62 @@ namespace base { namespace internal { // See spinlock_internal.h for spec. -int32 SpinLockWait(volatile Atomic32 *w, int n, - const SpinLockWaitTransition trans[]) { - int32 v; - bool done = false; - for (int loop = 0; !done; loop++) { - v = base::subtle::Acquire_Load(w); - int i; - for (i = 0; i != n && v != trans[i].from; i++) { +int32 SpinLockWait(volatile Atomic32* w, int n, const SpinLockWaitTransition trans[]) { + int32 v; + bool done = false; + for (int loop = 0; !done; loop++) { + v = base::subtle::Acquire_Load(w); + int i; + for (i = 0; i != n && v != trans[i].from; i++) { + } + if (i == n) { + SpinLockDelay(w, v, loop); // no matching transition + } else if (trans[i].to == v || // null transition + base::subtle::Acquire_CompareAndSwap(w, v, trans[i].to) == v) { + done = trans[i].done; + } } - if (i == n) { - SpinLockDelay(w, v, loop); // no matching transition - } else if (trans[i].to == v || // null transition - base::subtle::Acquire_CompareAndSwap(w, v, trans[i].to) == v) { - done = trans[i].done; - } - } - return v; + return v; } // Return a suggested delay in nanoseconds for iteration number "loop" static int SuggestedDelayNS(int loop) { - // Weak pseudo-random number generator to get some spread between threads - // when many are spinning. + // Weak pseudo-random number generator to get some spread between threads + // when many are spinning. #ifdef BASE_HAS_ATOMIC64 - static base::subtle::Atomic64 rand; - uint64 r = base::subtle::NoBarrier_Load(&rand); - r = 0x5deece66dLL * r + 0xb; // numbers from nrand48() - base::subtle::NoBarrier_Store(&rand, r); + static base::subtle::Atomic64 rand; + uint64 r = base::subtle::NoBarrier_Load(&rand); + r = 0x5deece66dLL * r + 0xb; // numbers from nrand48() + base::subtle::NoBarrier_Store(&rand, r); - r <<= 16; // 48-bit random number now in top 48-bits. - if (loop < 0 || loop > 32) { // limit loop to 0..32 - loop = 32; - } - // loop>>3 cannot exceed 4 because loop cannot exceed 32. - // Select top 20..24 bits of lower 48 bits, - // giving approximately 0ms to 16ms. - // Mean is exponential in loop for first 32 iterations, then 8ms. - // The futex path multiplies this by 16, since we expect explicit wakeups - // almost always on that path. - return r >> (44 - (loop >> 3)); + r <<= 16; // 48-bit random number now in top 48-bits. + if (loop < 0 || loop > 32) { // limit loop to 0..32 + loop = 32; + } + // loop>>3 cannot exceed 4 because loop cannot exceed 32. + // Select top 20..24 bits of lower 48 bits, + // giving approximately 0ms to 16ms. + // Mean is exponential in loop for first 32 iterations, then 8ms. + // The futex path multiplies this by 16, since we expect explicit wakeups + // almost always on that path. + return r >> (44 - (loop >> 3)); #else - static Atomic32 rand; - uint32 r = base::subtle::NoBarrier_Load(&rand); - r = 0x343fd * r + 0x269ec3; // numbers from MSVC++ - base::subtle::NoBarrier_Store(&rand, r); + static Atomic32 rand; + uint32 r = base::subtle::NoBarrier_Load(&rand); + r = 0x343fd * r + 0x269ec3; // numbers from MSVC++ + base::subtle::NoBarrier_Store(&rand, r); - r <<= 1; // 31-bit random number now in top 31-bits. - if (loop < 0 || loop > 32) { // limit loop to 0..32 - loop = 32; - } - // loop>>3 cannot exceed 4 because loop cannot exceed 32. - // Select top 20..24 bits of lower 31 bits, - // giving approximately 0ms to 16ms. - // Mean is exponential in loop for first 32 iterations, then 8ms. - // The futex path multiplies this by 16, since we expect explicit wakeups - // almost always on that path. - return r >> (12 - (loop >> 3)); + r <<= 1; // 31-bit random number now in top 31-bits. + if (loop < 0 || loop > 32) { // limit loop to 0..32 + loop = 32; + } + // loop>>3 cannot exceed 4 because loop cannot exceed 32. + // Select top 20..24 bits of lower 31 bits, + // giving approximately 0ms to 16ms. + // Mean is exponential in loop for first 32 iterations, then 8ms. + // The futex path multiplies this by 16, since we expect explicit wakeups + // almost always on that path. + return r >> (12 - (loop >> 3)); #endif } diff --git a/be/src/gutil/spinlock_internal.h b/be/src/gutil/spinlock_internal.h index 1af38abf292604..78fa4968711e29 100644 --- a/be/src/gutil/spinlock_internal.h +++ b/be/src/gutil/spinlock_internal.h @@ -36,8 +36,8 @@ #ifndef BASE_SPINLOCK_INTERNAL_H_ #define BASE_SPINLOCK_INTERNAL_H_ -#include "gutil/basictypes.h" #include "gutil/atomicops.h" +#include "gutil/basictypes.h" namespace base { namespace internal { @@ -45,19 +45,18 @@ namespace internal { // SpinLockWait() waits until it can perform one of several transitions from // "from" to "to". It returns when it performs a transition where done==true. struct SpinLockWaitTransition { - int32 from; - int32 to; - bool done; + int32 from; + int32 to; + bool done; }; // Wait until *w can transition from trans[i].from to trans[i].to for some i // satisfying 0<=i +#include #include #include -#include + #include "gutil/linux_syscall_support.h" #define FUTEX_WAIT 0 @@ -47,54 +48,50 @@ static int futex_private_flag = FUTEX_PRIVATE_FLAG; namespace { static struct InitModule { - InitModule() { - int x = 0; - // futexes are ints, so we can use them only when - // that's the same size as the lockword_ in SpinLock. - have_futex = (sizeof (Atomic32) == sizeof (int) && - sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0); - if (have_futex && - sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) { - futex_private_flag = 0; + InitModule() { + int x = 0; + // futexes are ints, so we can use them only when + // that's the same size as the lockword_ in SpinLock. + have_futex = (sizeof(Atomic32) == sizeof(int) && + sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0); + if (have_futex && sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) { + futex_private_flag = 0; + } } - } } init_module; -} // anonymous namespace - +} // anonymous namespace namespace base { namespace internal { -void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { - if (loop != 0) { - int save_errno = errno; - struct timespec tm; - tm.tv_sec = 0; - if (have_futex) { - tm.tv_nsec = base::internal::SuggestedDelayNS(loop); - } else { - tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin +void SpinLockDelay(volatile Atomic32* w, int32 value, int loop) { + if (loop != 0) { + int save_errno = errno; + struct timespec tm; + tm.tv_sec = 0; + if (have_futex) { + tm.tv_nsec = base::internal::SuggestedDelayNS(loop); + } else { + tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin + } + if (have_futex) { + tm.tv_nsec *= 16; // increase the delay; we expect explicit wakeups + sys_futex(reinterpret_cast(const_cast(w)), + FUTEX_WAIT | futex_private_flag, value, + reinterpret_cast(&tm), NULL, 0); + } else { + nanosleep(&tm, NULL); + } + errno = save_errno; } - if (have_futex) { - tm.tv_nsec *= 16; // increase the delay; we expect explicit wakeups - sys_futex(reinterpret_cast(const_cast(w)), - FUTEX_WAIT | futex_private_flag, - value, reinterpret_cast(&tm), - NULL, 0); - } else { - nanosleep(&tm, NULL); - } - errno = save_errno; - } } -void SpinLockWake(volatile Atomic32 *w, bool all) { - if (have_futex) { - sys_futex(reinterpret_cast(const_cast(w)), - FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1, - NULL, NULL, 0); - } +void SpinLockWake(volatile Atomic32* w, bool all) { + if (have_futex) { + sys_futex(reinterpret_cast(const_cast(w)), FUTEX_WAKE | futex_private_flag, + all ? INT_MAX : 1, NULL, NULL, 0); + } } } // namespace internal diff --git a/be/src/gutil/spinlock_posix-inl.h b/be/src/gutil/spinlock_posix-inl.h index b34c9912b39368..9fc372fa1e5413 100644 --- a/be/src/gutil/spinlock_posix-inl.h +++ b/be/src/gutil/spinlock_posix-inl.h @@ -34,29 +34,28 @@ #include #if defined(HAVE_SCHED_H) || defined(__APPLE__) -#include /* For sched_yield() */ +#include /* For sched_yield() */ #endif -#include /* For nanosleep() */ +#include /* For nanosleep() */ namespace base { namespace internal { -void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { - int save_errno = errno; - if (loop == 0) { - } else if (loop == 1) { - sched_yield(); - } else { - struct timespec tm; - tm.tv_sec = 0; - tm.tv_nsec = base::internal::SuggestedDelayNS(loop); - nanosleep(&tm, NULL); - } - errno = save_errno; +void SpinLockDelay(volatile Atomic32* w, int32 value, int loop) { + int save_errno = errno; + if (loop == 0) { + } else if (loop == 1) { + sched_yield(); + } else { + struct timespec tm; + tm.tv_sec = 0; + tm.tv_nsec = base::internal::SuggestedDelayNS(loop); + nanosleep(&tm, NULL); + } + errno = save_errno; } -void SpinLockWake(volatile Atomic32 *w, bool all) { -} +void SpinLockWake(volatile Atomic32* w, bool all) {} } // namespace internal } // namespace base diff --git a/be/src/gutil/spinlock_win32-inl.h b/be/src/gutil/spinlock_win32-inl.h index 956b9653e6db34..26375252f0b416 100644 --- a/be/src/gutil/spinlock_win32-inl.h +++ b/be/src/gutil/spinlock_win32-inl.h @@ -32,23 +32,21 @@ * This file is a Win32-specific part of spinlock_internal.cc */ - #include namespace base { namespace internal { -void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { - if (loop == 0) { - } else if (loop == 1) { - Sleep(0); - } else { - Sleep(base::internal::SuggestedDelayNS(loop) / 1000000); - } +void SpinLockDelay(volatile Atomic32* w, int32 value, int loop) { + if (loop == 0) { + } else if (loop == 1) { + Sleep(0); + } else { + Sleep(base::internal::SuggestedDelayNS(loop) / 1000000); + } } -void SpinLockWake(volatile Atomic32 *w, bool all) { -} +void SpinLockWake(volatile Atomic32* w, bool all) {} } // namespace internal } // namespace base diff --git a/be/src/gutil/stl_util.h b/be/src/gutil/stl_util.h index d841ca8a4dc24a..7e007fe08163d5 100644 --- a/be/src/gutil/stl_util.h +++ b/be/src/gutil/stl_util.h @@ -29,7 +29,8 @@ #define UTIL_GTL_STL_UTIL_H_ #include -#include // for memcpy +#include // for memcpy + #include using std::copy; using std::max; @@ -57,49 +58,51 @@ using std::vector; #include "gutil/port.h" // Sort and remove duplicates of an STL vector or deque. -template -void STLSortAndRemoveDuplicates(T *v) { - sort(v->begin(), v->end()); - v->erase(unique(v->begin(), v->end()), v->end()); +template +void STLSortAndRemoveDuplicates(T* v) { + sort(v->begin(), v->end()); + v->erase(unique(v->begin(), v->end()), v->end()); } // Clear internal memory of an STL object. // STL clear()/reserve(0) does not always free internal memory allocated // This function uses swap/destructor to ensure the internal memory is freed. -template void STLClearObject(T* obj) { - T tmp; - tmp.swap(*obj); - obj->reserve(0); // this is because sometimes "T tmp" allocates objects with - // memory (arena implementation?). use reserve() - // to clear() even if it doesn't always work +template +void STLClearObject(T* obj) { + T tmp; + tmp.swap(*obj); + obj->reserve(0); // this is because sometimes "T tmp" allocates objects with + // memory (arena implementation?). use reserve() + // to clear() even if it doesn't always work } // Specialization for deque. Same as STLClearObject but doesn't call reserve // since deque doesn't have reserve. template void STLClearObject(deque* obj) { - deque tmp; - tmp.swap(*obj); + deque tmp; + tmp.swap(*obj); } // Reduce memory usage on behalf of object if its capacity is greater // than or equal to "limit", which defaults to 2^20. -template inline void STLClearIfBig(T* obj, size_t limit = 1<<20) { - if (obj->capacity() >= limit) { - STLClearObject(obj); - } else { - obj->clear(); - } +template +inline void STLClearIfBig(T* obj, size_t limit = 1 << 20) { + if (obj->capacity() >= limit) { + STLClearObject(obj); + } else { + obj->clear(); + } } // Specialization for deque, which doesn't implement capacity(). template -inline void STLClearIfBig(deque* obj, size_t limit = 1<<20) { - if (obj->size() >= limit) { - STLClearObject(obj); - } else { - obj->clear(); - } +inline void STLClearIfBig(deque* obj, size_t limit = 1 << 20) { + if (obj->size() >= limit) { + STLClearObject(obj); + } else { + obj->clear(); + } } // Reduce the number of buckets in a hash_set or hash_map back to the @@ -121,23 +124,25 @@ inline void STLClearIfBig(deque* obj, size_t limit = 1<<20) { // number of buckets is reset to the default to keep subsequent clear // operations cheap. Note that the default number of buckets is 193 // in the Gnu library implementation as of Jan '08. -template inline void STLClearHashIfBig(T *obj, size_t limit) { - if (obj->bucket_count() >= limit) { - T tmp; - tmp.swap(*obj); - } else { - obj->clear(); - } +template +inline void STLClearHashIfBig(T* obj, size_t limit) { + if (obj->bucket_count() >= limit) { + T tmp; + tmp.swap(*obj); + } else { + obj->clear(); + } } // Reserve space for STL object. // STL's reserve() will always copy. // This function avoid the copy if we already have capacity -template void STLReserveIfNeeded(T* obj, int new_size) { - if (obj->capacity() < new_size) // increase capacity - obj->reserve(new_size); - else if (obj->size() > new_size) // reduce size - obj->resize(new_size); +template +void STLReserveIfNeeded(T* obj, int new_size) { + if (obj->capacity() < new_size) // increase capacity + obj->reserve(new_size); + else if (obj->size() > new_size) // reduce size + obj->resize(new_size); } // STLDeleteContainerPointers() @@ -153,13 +158,12 @@ template void STLReserveIfNeeded(T* obj, int new_size) { // NOTE: If you're calling this on an entire container, you probably want // to call STLDeleteElements(&container) instead, or use an ElementDeleter. template -void STLDeleteContainerPointers(ForwardIterator begin, - ForwardIterator end) { - while (begin != end) { - ForwardIterator temp = begin; - ++begin; - delete *temp; - } +void STLDeleteContainerPointers(ForwardIterator begin, ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete *temp; + } } // STLDeleteContainerPairPointers() @@ -171,14 +175,13 @@ void STLDeleteContainerPointers(ForwardIterator begin, // which could result in the hash function trying to dereference a stale // pointer. template -void STLDeleteContainerPairPointers(ForwardIterator begin, - ForwardIterator end) { - while (begin != end) { - ForwardIterator temp = begin; - ++begin; - delete temp->first; - delete temp->second; - } +void STLDeleteContainerPairPointers(ForwardIterator begin, ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete temp->first; + delete temp->second; + } } // STLDeleteContainerPairFirstPointers() @@ -186,13 +189,12 @@ void STLDeleteContainerPairPointers(ForwardIterator begin, // on the FIRST item in the pairs. // NOTE: Like STLDeleteContainerPointers, deleting behind the iterator. template -void STLDeleteContainerPairFirstPointers(ForwardIterator begin, - ForwardIterator end) { - while (begin != end) { - ForwardIterator temp = begin; - ++begin; - delete temp->first; - } +void STLDeleteContainerPairFirstPointers(ForwardIterator begin, ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete temp->first; + } } // STLDeleteContainerPairSecondPointers() @@ -204,33 +206,28 @@ void STLDeleteContainerPairFirstPointers(ForwardIterator begin, // NOTE: If you're calling this on an entire container, you probably want // to call STLDeleteValues(&container) instead, or use ValueDeleter. template -void STLDeleteContainerPairSecondPointers(ForwardIterator begin, - ForwardIterator end) { - while (begin != end) { - ForwardIterator temp = begin; - ++begin; - delete temp->second; - } +void STLDeleteContainerPairSecondPointers(ForwardIterator begin, ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete temp->second; + } } -template -inline void STLAssignToVector(vector* vec, - const T* ptr, - size_t n) { - vec->resize(n); - if (n == 0) return; - memcpy(&vec->front(), ptr, n*sizeof(T)); +template +inline void STLAssignToVector(vector* vec, const T* ptr, size_t n) { + vec->resize(n); + if (n == 0) return; + memcpy(&vec->front(), ptr, n * sizeof(T)); } // Not faster; but we need the specialization so the function works at all // on the vector specialization. -template<> -inline void STLAssignToVector(vector* vec, - const bool* ptr, - size_t n) { - vec->clear(); - if (n == 0) return; - vec->insert(vec->begin(), ptr, ptr + n); +template <> +inline void STLAssignToVector(vector* vec, const bool* ptr, size_t n) { + vec->clear(); + if (n == 0) return; + vec->insert(vec->begin(), ptr, ptr + n); } /***** Hack to allow faster assignment to a vector *****/ @@ -242,24 +239,22 @@ inline void STLAssignToVector(vector* vec, // STLAssignToVectorChar(&vec, ptr, size); // STLAssignToString(&str, ptr, size); -inline void STLAssignToVectorChar(vector* vec, - const char* ptr, - size_t n) { - STLAssignToVector(vec, ptr, n); +inline void STLAssignToVectorChar(vector* vec, const char* ptr, size_t n) { + STLAssignToVector(vec, ptr, n); } // A struct that mirrors the GCC4 implementation of a string. See: // /usr/crosstool/v8/gcc-4.1.0-glibc-2.2.2/i686-unknown-linux-gnu/include/c++/4.1.0/ext/sso_string_base.h struct InternalStringRepGCC4 { - char* _M_data; - size_t _M_string_length; + char* _M_data; + size_t _M_string_length; - enum { _S_local_capacity = 15 }; + enum { _S_local_capacity = 15 }; - union { - char _M_local_data[_S_local_capacity + 1]; - size_t _M_allocated_capacity; - }; + union { + char _M_local_data[_S_local_capacity + 1]; + size_t _M_allocated_capacity; + }; }; // Like str->resize(new_size), except any new characters added to @@ -267,45 +262,45 @@ struct InternalStringRepGCC4 { // than being filled with '0' bytes. Typically used when code is then // going to overwrite the backing store of the string with known data. inline void STLStringResizeUninitialized(string* s, size_t new_size) { - if (sizeof(*s) == sizeof(InternalStringRepGCC4)) { - if (new_size > s->capacity()) { - s->reserve(new_size); + if (sizeof(*s) == sizeof(InternalStringRepGCC4)) { + if (new_size > s->capacity()) { + s->reserve(new_size); + } + // The line below depends on the layout of 'string'. THIS IS + // NON-PORTABLE CODE. If our STL implementation changes, we will + // need to change this as well. + InternalStringRepGCC4* rep = reinterpret_cast(s); + assert(rep->_M_data == s->data()); + assert(rep->_M_string_length == s->size()); + + // We have to null-terminate the string for c_str() to work properly. + // So we leave the actual contents of the string uninitialized, but + // we set the byte one past the new end of the string to '\0' + const_cast(s->data())[new_size] = '\0'; + rep->_M_string_length = new_size; + } else { + // Slow path: have to reallocate stuff, or an unknown string rep + s->resize(new_size); } - // The line below depends on the layout of 'string'. THIS IS - // NON-PORTABLE CODE. If our STL implementation changes, we will - // need to change this as well. - InternalStringRepGCC4* rep = reinterpret_cast(s); - assert(rep->_M_data == s->data()); - assert(rep->_M_string_length == s->size()); - - // We have to null-terminate the string for c_str() to work properly. - // So we leave the actual contents of the string uninitialized, but - // we set the byte one past the new end of the string to '\0' - const_cast(s->data())[new_size] = '\0'; - rep->_M_string_length = new_size; - } else { - // Slow path: have to reallocate stuff, or an unknown string rep - s->resize(new_size); - } } // Returns true if the string implementation supports a resize where // the new characters added to the string are left untouched. inline bool STLStringSupportsNontrashingResize(const string& s) { - return (sizeof(s) == sizeof(InternalStringRepGCC4)); + return (sizeof(s) == sizeof(InternalStringRepGCC4)); } inline void STLAssignToString(string* str, const char* ptr, size_t n) { - STLStringResizeUninitialized(str, n); - if (n == 0) return; - memcpy(&*str->begin(), ptr, n); + STLStringResizeUninitialized(str, n); + if (n == 0) return; + memcpy(&*str->begin(), ptr, n); } inline void STLAppendToString(string* str, const char* ptr, size_t n) { - if (n == 0) return; - size_t old_size = str->size(); - STLStringResizeUninitialized(str, old_size + n); - memcpy(&*str->begin() + old_size, ptr, n); + if (n == 0) return; + size_t old_size = str->size(); + STLStringResizeUninitialized(str, old_size + n); + memcpy(&*str->begin() + old_size, ptr, n); } // To treat a possibly-empty vector as an array, use these functions. @@ -317,22 +312,22 @@ inline void STLAppendToString(string* str, const char* ptr, size_t n) { // everywhere. If our STL implementation changes, we will need to // change this as well. -template +template inline T* vector_as_array(vector* v) { -# ifdef NDEBUG - return &*v->begin(); -# else - return v->empty() ? NULL : &*v->begin(); -# endif +#ifdef NDEBUG + return &*v->begin(); +#else + return v->empty() ? NULL : &*v->begin(); +#endif } -template +template inline const T* vector_as_array(const vector* v) { -# ifdef NDEBUG - return &*v->begin(); -# else - return v->empty() ? NULL : &*v->begin(); -# endif +#ifdef NDEBUG + return &*v->begin(); +#else + return v->empty() ? NULL : &*v->begin(); +#endif } // Return a mutable char* pointing to a string's internal buffer, @@ -348,8 +343,8 @@ inline const T* vector_as_array(const vector* v) { // According to Matt Austern, this should already work on all current C++98 // implementations. inline char* string_as_array(string* str) { - // DO NOT USE const_cast(str->data())! See the unittest for why. - return str->empty() ? NULL : &*str->begin(); + // DO NOT USE const_cast(str->data())! See the unittest for why. + return str->empty() ? NULL : &*str->begin(); } // These are methods that test two hash maps/sets for equality. These exist @@ -359,29 +354,22 @@ inline char* string_as_array(string* str) { // differed. template -inline bool -HashSetEquality(const HashSet& set_a, - const HashSet& set_b) { - if (set_a.size() != set_b.size()) return false; - for (typename HashSet::const_iterator i = set_a.begin(); - i != set_a.end(); - ++i) - if (set_b.find(*i) == set_b.end()) return false; - return true; +inline bool HashSetEquality(const HashSet& set_a, const HashSet& set_b) { + if (set_a.size() != set_b.size()) return false; + for (typename HashSet::const_iterator i = set_a.begin(); i != set_a.end(); ++i) + if (set_b.find(*i) == set_b.end()) return false; + return true; } template -inline bool -HashMapEquality(const HashMap& map_a, - const HashMap& map_b) { - if (map_a.size() != map_b.size()) return false; - for (typename HashMap::const_iterator i = map_a.begin(); - i != map_a.end(); ++i) { - typename HashMap::const_iterator j = map_b.find(i->first); - if (j == map_b.end()) return false; - if (i->second != j->second) return false; - } - return true; +inline bool HashMapEquality(const HashMap& map_a, const HashMap& map_b) { + if (map_a.size() != map_b.size()) return false; + for (typename HashMap::const_iterator i = map_a.begin(); i != map_a.end(); ++i) { + typename HashMap::const_iterator j = map_b.find(i->first); + if (j == map_b.end()) return false; + if (i->second != j->second) return false; + } + return true; } // The following functions are useful for cleaning up STL containers @@ -398,23 +386,22 @@ HashMapEquality(const HashMap& map_a, // ElementDeleter (defined below), which ensures that your container's elements // are deleted when the ElementDeleter goes out of scope. template -void STLDeleteElements(T *container) { - if (!container) return; - STLDeleteContainerPointers(container->begin(), container->end()); - container->clear(); +void STLDeleteElements(T* container) { + if (!container) return; + STLDeleteContainerPointers(container->begin(), container->end()); + container->clear(); } // Given an STL container consisting of (key, value) pairs, STLDeleteValues // deletes all the "value" components and clears the container. Does nothing // in the case it's given a NULL pointer. template -void STLDeleteValues(T *v) { - if (!v) return; - STLDeleteContainerPairSecondPointers(v->begin(), v->end()); - v->clear(); +void STLDeleteValues(T* v) { + if (!v) return; + STLDeleteContainerPairSecondPointers(v->begin(), v->end()); + v->clear(); } - // ElementDeleter and ValueDeleter provide a convenient way to delete all // elements or values from STL containers when they go out of scope. This // greatly simplifies code that creates temporary objects and has multiple @@ -431,96 +418,81 @@ void STLDeleteValues(T *v) { // TemplatedValueDeleter classes. Clients should not typically use this class // directly. class BaseDeleter { - public: - virtual ~BaseDeleter() {} +public: + virtual ~BaseDeleter() {} - protected: - BaseDeleter() {} +protected: + BaseDeleter() {} - private: - DISALLOW_EVIL_CONSTRUCTORS(BaseDeleter); +private: + DISALLOW_EVIL_CONSTRUCTORS(BaseDeleter); }; // Given a pointer to an STL container, this class will delete all the element // pointers when it goes out of scope. Clients should typically use // ElementDeleter rather than invoking this class directly. -template +template class TemplatedElementDeleter : public BaseDeleter { - public: - explicit TemplatedElementDeleter(STLContainer *ptr) - : container_ptr_(ptr) { - } +public: + explicit TemplatedElementDeleter(STLContainer* ptr) : container_ptr_(ptr) {} - virtual ~TemplatedElementDeleter() { - STLDeleteElements(container_ptr_); - } + virtual ~TemplatedElementDeleter() { STLDeleteElements(container_ptr_); } - private: - STLContainer *container_ptr_; +private: + STLContainer* container_ptr_; - DISALLOW_EVIL_CONSTRUCTORS(TemplatedElementDeleter); + DISALLOW_EVIL_CONSTRUCTORS(TemplatedElementDeleter); }; // Like TemplatedElementDeleter, this class will delete element pointers from a // container when it goes out of scope. However, it is much nicer to use, // since the class itself is not templated. class ElementDeleter { - public: - template - explicit ElementDeleter(STLContainer *ptr) - : deleter_(new TemplatedElementDeleter(ptr)) { - } +public: + template + explicit ElementDeleter(STLContainer* ptr) + : deleter_(new TemplatedElementDeleter(ptr)) {} - ~ElementDeleter() { - delete deleter_; - } + ~ElementDeleter() { delete deleter_; } - private: - BaseDeleter *deleter_; +private: + BaseDeleter* deleter_; - DISALLOW_EVIL_CONSTRUCTORS(ElementDeleter); + DISALLOW_EVIL_CONSTRUCTORS(ElementDeleter); }; // Given a pointer to an STL container this class will delete all the value // pointers when it goes out of scope. Clients should typically use // ValueDeleter rather than invoking this class directly. -template +template class TemplatedValueDeleter : public BaseDeleter { - public: - explicit TemplatedValueDeleter(STLContainer *ptr) - : container_ptr_(ptr) { - } +public: + explicit TemplatedValueDeleter(STLContainer* ptr) : container_ptr_(ptr) {} - virtual ~TemplatedValueDeleter() { - STLDeleteValues(container_ptr_); - } + virtual ~TemplatedValueDeleter() { STLDeleteValues(container_ptr_); } - private: - STLContainer *container_ptr_; +private: + STLContainer* container_ptr_; - DISALLOW_EVIL_CONSTRUCTORS(TemplatedValueDeleter); + DISALLOW_EVIL_CONSTRUCTORS(TemplatedValueDeleter); }; // Similar to ElementDeleter, but wraps a TemplatedValueDeleter rather than an // TemplatedElementDeleter. class ValueDeleter { - public: - template - explicit ValueDeleter(STLContainer *ptr) - : deleter_(new TemplatedValueDeleter(ptr)) { - } +public: + template + explicit ValueDeleter(STLContainer* ptr) + : deleter_(new TemplatedValueDeleter(ptr)) {} - ~ValueDeleter() { - delete deleter_; - } + ~ValueDeleter() { delete deleter_; } - private: - BaseDeleter *deleter_; +private: + BaseDeleter* deleter_; - DISALLOW_EVIL_CONSTRUCTORS(ValueDeleter); + DISALLOW_EVIL_CONSTRUCTORS(ValueDeleter); }; - // STLElementDeleter and STLValueDeleter are similar to ElementDeleter and // ValueDeleter, except that: // - The classes are templated, making them less convenient to use. @@ -528,22 +500,25 @@ class ValueDeleter { // New code should typically use ElementDeleter and ValueDeleter unless // efficiency is a large concern. -template class STLElementDeleter { - public: - STLElementDeleter(STLContainer *ptr) : container_ptr_(ptr) {} - ~STLElementDeleter() { STLDeleteElements(container_ptr_); } - private: - STLContainer *container_ptr_; -}; +template +class STLElementDeleter { +public: + STLElementDeleter(STLContainer* ptr) : container_ptr_(ptr) {} + ~STLElementDeleter() { STLDeleteElements(container_ptr_); } -template class STLValueDeleter { - public: - STLValueDeleter(STLContainer *ptr) : container_ptr_(ptr) {} - ~STLValueDeleter() { STLDeleteValues(container_ptr_); } - private: - STLContainer *container_ptr_; +private: + STLContainer* container_ptr_; }; +template +class STLValueDeleter { +public: + STLValueDeleter(STLContainer* ptr) : container_ptr_(ptr) {} + ~STLValueDeleter() { STLDeleteValues(container_ptr_); } + +private: + STLContainer* container_ptr_; +}; // STLSet{Difference,SymmetricDifference,Union,Intersection}(A a, B b, C *c) // *APPEND* the set {difference, symmetric difference, union, intersection} of @@ -572,113 +547,84 @@ template class STLValueDeleter { // set c = STLSetDifference(a, b); // is an example of where RVO comes into play. -template -void STLSetDifference(const SortedSTLContainerA &a, - const SortedSTLContainerB &b, - SortedSTLContainerC *c) { - // The qualified name avoids an ambiguity error, particularly with C++11: - assert(std::is_sorted(a.begin(), a.end())); - assert(std::is_sorted(b.begin(), b.end())); - assert(static_cast(&a) != - static_cast(c)); - assert(static_cast(&b) != - static_cast(c)); - std::set_difference(a.begin(), a.end(), b.begin(), b.end(), - std::inserter(*c, c->end())); -} - -template -SortedSTLContainer STLSetDifference(const SortedSTLContainer &a, - const SortedSTLContainer &b) { - SortedSTLContainer c; - STLSetDifference(a, b, &c); - return c; -} - -template -void STLSetUnion(const SortedSTLContainerA &a, - const SortedSTLContainerB &b, - SortedSTLContainerC *c) { - assert(std::is_sorted(a.begin(), a.end())); - assert(std::is_sorted(b.begin(), b.end())); - assert(static_cast(&a) != - static_cast(c)); - assert(static_cast(&b) != - static_cast(c)); - std::set_union(a.begin(), a.end(), b.begin(), b.end(), - std::inserter(*c, c->end())); -} - -template -void STLSetSymmetricDifference(const SortedSTLContainerA &a, - const SortedSTLContainerB &b, - SortedSTLContainerC *c) { - assert(std::is_sorted(a.begin(), a.end())); - assert(std::is_sorted(b.begin(), b.end())); - assert(static_cast(&a) != - static_cast(c)); - assert(static_cast(&b) != - static_cast(c)); - std::set_symmetric_difference(a.begin(), a.end(), b.begin(), b.end(), - std::inserter(*c, c->end())); -} - -template -SortedSTLContainer STLSetSymmetricDifference(const SortedSTLContainer &a, - const SortedSTLContainer &b) { - SortedSTLContainer c; - STLSetSymmetricDifference(a, b, &c); - return c; -} - -template -SortedSTLContainer STLSetUnion(const SortedSTLContainer &a, - const SortedSTLContainer &b) { - SortedSTLContainer c; - STLSetUnion(a, b, &c); - return c; -} - -template -void STLSetIntersection(const SortedSTLContainerA &a, - const SortedSTLContainerB &b, - SortedSTLContainerC *c) { - assert(std::is_sorted(a.begin(), a.end())); - assert(std::is_sorted(b.begin(), b.end())); - assert(static_cast(&a) != - static_cast(c)); - assert(static_cast(&b) != - static_cast(c)); - std::set_intersection(a.begin(), a.end(), b.begin(), b.end(), - std::inserter(*c, c->end())); -} - -template -SortedSTLContainer STLSetIntersection(const SortedSTLContainer &a, - const SortedSTLContainer &b) { - SortedSTLContainer c; - STLSetIntersection(a, b, &c); - return c; +template +void STLSetDifference(const SortedSTLContainerA& a, const SortedSTLContainerB& b, + SortedSTLContainerC* c) { + // The qualified name avoids an ambiguity error, particularly with C++11: + assert(std::is_sorted(a.begin(), a.end())); + assert(std::is_sorted(b.begin(), b.end())); + assert(static_cast(&a) != static_cast(c)); + assert(static_cast(&b) != static_cast(c)); + std::set_difference(a.begin(), a.end(), b.begin(), b.end(), std::inserter(*c, c->end())); +} + +template +SortedSTLContainer STLSetDifference(const SortedSTLContainer& a, const SortedSTLContainer& b) { + SortedSTLContainer c; + STLSetDifference(a, b, &c); + return c; +} + +template +void STLSetUnion(const SortedSTLContainerA& a, const SortedSTLContainerB& b, + SortedSTLContainerC* c) { + assert(std::is_sorted(a.begin(), a.end())); + assert(std::is_sorted(b.begin(), b.end())); + assert(static_cast(&a) != static_cast(c)); + assert(static_cast(&b) != static_cast(c)); + std::set_union(a.begin(), a.end(), b.begin(), b.end(), std::inserter(*c, c->end())); +} + +template +void STLSetSymmetricDifference(const SortedSTLContainerA& a, const SortedSTLContainerB& b, + SortedSTLContainerC* c) { + assert(std::is_sorted(a.begin(), a.end())); + assert(std::is_sorted(b.begin(), b.end())); + assert(static_cast(&a) != static_cast(c)); + assert(static_cast(&b) != static_cast(c)); + std::set_symmetric_difference(a.begin(), a.end(), b.begin(), b.end(), + std::inserter(*c, c->end())); +} + +template +SortedSTLContainer STLSetSymmetricDifference(const SortedSTLContainer& a, + const SortedSTLContainer& b) { + SortedSTLContainer c; + STLSetSymmetricDifference(a, b, &c); + return c; +} + +template +SortedSTLContainer STLSetUnion(const SortedSTLContainer& a, const SortedSTLContainer& b) { + SortedSTLContainer c; + STLSetUnion(a, b, &c); + return c; +} + +template +void STLSetIntersection(const SortedSTLContainerA& a, const SortedSTLContainerB& b, + SortedSTLContainerC* c) { + assert(std::is_sorted(a.begin(), a.end())); + assert(std::is_sorted(b.begin(), b.end())); + assert(static_cast(&a) != static_cast(c)); + assert(static_cast(&b) != static_cast(c)); + std::set_intersection(a.begin(), a.end(), b.begin(), b.end(), std::inserter(*c, c->end())); +} + +template +SortedSTLContainer STLSetIntersection(const SortedSTLContainer& a, const SortedSTLContainer& b) { + SortedSTLContainer c; + STLSetIntersection(a, b, &c); + return c; } // Similar to STLSet{Union,Intesection,etc}, but simpler because the result is // always bool. -template -bool STLIncludes(const SortedSTLContainerA &a, - const SortedSTLContainerB &b) { - assert(std::is_sorted(a.begin(), a.end())); - assert(std::is_sorted(b.begin(), b.end())); - return std::includes(a.begin(), a.end(), - b.begin(), b.end()); +template +bool STLIncludes(const SortedSTLContainerA& a, const SortedSTLContainerB& b) { + assert(std::is_sorted(a.begin(), a.end())); + assert(std::is_sorted(b.begin(), b.end())); + return std::includes(a.begin(), a.end(), b.begin(), b.end()); } // Functors that compose arbitrary unary and binary functions with a @@ -695,99 +641,86 @@ bool STLIncludes(const SortedSTLContainerA &a, // A typical usage for these functions would be when iterating over // the contents of an STL map. For other sample usage, see the unittest. -template -class UnaryOperateOnFirst - : public std::unary_function { - public: - UnaryOperateOnFirst() { - } +template +class UnaryOperateOnFirst : public std::unary_function { +public: + UnaryOperateOnFirst() {} - UnaryOperateOnFirst(const UnaryOp& f) : f_(f) { // TODO(user): explicit? - } + UnaryOperateOnFirst(const UnaryOp& f) : f_(f) { // TODO(user): explicit? + } - typename UnaryOp::result_type operator()(const Pair& p) const { - return f_(p.first); - } + typename UnaryOp::result_type operator()(const Pair& p) const { return f_(p.first); } - private: - UnaryOp f_; +private: + UnaryOp f_; }; -template +template UnaryOperateOnFirst UnaryOperate1st(const UnaryOp& f) { - return UnaryOperateOnFirst(f); + return UnaryOperateOnFirst(f); } -template -class UnaryOperateOnSecond - : public std::unary_function { - public: - UnaryOperateOnSecond() { - } +template +class UnaryOperateOnSecond : public std::unary_function { +public: + UnaryOperateOnSecond() {} - UnaryOperateOnSecond(const UnaryOp& f) : f_(f) { // TODO(user): explicit? - } + UnaryOperateOnSecond(const UnaryOp& f) : f_(f) { // TODO(user): explicit? + } - typename UnaryOp::result_type operator()(const Pair& p) const { - return f_(p.second); - } + typename UnaryOp::result_type operator()(const Pair& p) const { return f_(p.second); } - private: - UnaryOp f_; +private: + UnaryOp f_; }; -template +template UnaryOperateOnSecond UnaryOperate2nd(const UnaryOp& f) { - return UnaryOperateOnSecond(f); + return UnaryOperateOnSecond(f); } -template +template class BinaryOperateOnFirst - : public std::binary_function { - public: - BinaryOperateOnFirst() { - } + : public std::binary_function { +public: + BinaryOperateOnFirst() {} - BinaryOperateOnFirst(const BinaryOp& f) : f_(f) { // TODO(user): explicit? - } + BinaryOperateOnFirst(const BinaryOp& f) : f_(f) { // TODO(user): explicit? + } - typename BinaryOp::result_type operator()(const Pair& p1, - const Pair& p2) const { - return f_(p1.first, p2.first); - } + typename BinaryOp::result_type operator()(const Pair& p1, const Pair& p2) const { + return f_(p1.first, p2.first); + } - private: - BinaryOp f_; +private: + BinaryOp f_; }; // TODO(user): explicit? -template +template BinaryOperateOnFirst BinaryOperate1st(const BinaryOp& f) { - return BinaryOperateOnFirst(f); + return BinaryOperateOnFirst(f); } -template +template class BinaryOperateOnSecond - : public std::binary_function { - public: - BinaryOperateOnSecond() { - } + : public std::binary_function { +public: + BinaryOperateOnSecond() {} - BinaryOperateOnSecond(const BinaryOp& f) : f_(f) { - } + BinaryOperateOnSecond(const BinaryOp& f) : f_(f) {} - typename BinaryOp::result_type operator()(const Pair& p1, - const Pair& p2) const { - return f_(p1.second, p2.second); - } + typename BinaryOp::result_type operator()(const Pair& p1, const Pair& p2) const { + return f_(p1.second, p2.second); + } - private: - BinaryOp f_; +private: + BinaryOp f_; }; -template +template BinaryOperateOnSecond BinaryOperate2nd(const BinaryOp& f) { - return BinaryOperateOnSecond(f); + return BinaryOperateOnSecond(f); } // Functor that composes a binary functor h from an arbitrary binary functor @@ -803,32 +736,32 @@ BinaryOperateOnSecond BinaryOperate2nd(const BinaryOp& f) { // // F has to be a model of AdaptableBinaryFunction. // G1 and G2 have to be models of AdabtableUnaryFunction. -template -class BinaryComposeBinary : public binary_function { - public: - BinaryComposeBinary(F f, G1 g1, G2 g2) : f_(f), g1_(g1), g2_(g2) { } - - typename F::result_type operator()(typename G1::argument_type x, - typename G2::argument_type y) const { - return f_(g1_(x), g2_(y)); - } - - private: - F f_; - G1 g1_; - G2 g2_; +template +class BinaryComposeBinary + : public binary_function { +public: + BinaryComposeBinary(F f, G1 g1, G2 g2) : f_(f), g1_(g1), g2_(g2) {} + + typename F::result_type operator()(typename G1::argument_type x, + typename G2::argument_type y) const { + return f_(g1_(x), g2_(y)); + } + +private: + F f_; + G1 g1_; + G2 g2_; }; -template +template BinaryComposeBinary BinaryCompose1(F f, G g) { - return BinaryComposeBinary(f, g, g); + return BinaryComposeBinary(f, g, g); } -template +template BinaryComposeBinary BinaryCompose2(F f, G1 g1, G2 g2) { - return BinaryComposeBinary(f, g1, g2); + return BinaryComposeBinary(f, g1, g2); } // This is a wrapper for an STL allocator which keeps a count of the @@ -838,43 +771,40 @@ BinaryComposeBinary BinaryCompose2(F f, G1 g1, G2 g2) { // deallocation. template > class STLCountingAllocator : public Alloc { - public: - typedef typename Alloc::pointer pointer; - typedef typename Alloc::size_type size_type; - - STLCountingAllocator() : bytes_used_(NULL) { } - STLCountingAllocator(int64* b) : bytes_used_(b) {} // TODO(user): explicit? - - // Constructor used for rebinding - template - STLCountingAllocator(const STLCountingAllocator& x) - : Alloc(x), - bytes_used_(x.bytes_used()) { - } - - pointer allocate(size_type n, std::allocator::const_pointer hint = 0) { - assert(bytes_used_ != NULL); - *bytes_used_ += n * sizeof(T); - return Alloc::allocate(n, hint); - } - - void deallocate(pointer p, size_type n) { - Alloc::deallocate(p, n); - assert(bytes_used_ != NULL); - *bytes_used_ -= n * sizeof(T); - } - - // Rebind allows an allocator to be used for a different type - template struct rebind { - typedef STLCountingAllocator::other> other; - }; - - int64* bytes_used() const { return bytes_used_; } - - private: - int64* bytes_used_; +public: + typedef typename Alloc::pointer pointer; + typedef typename Alloc::size_type size_type; + + STLCountingAllocator() : bytes_used_(NULL) {} + STLCountingAllocator(int64* b) : bytes_used_(b) {} // TODO(user): explicit? + + // Constructor used for rebinding + template + STLCountingAllocator(const STLCountingAllocator& x) + : Alloc(x), bytes_used_(x.bytes_used()) {} + + pointer allocate(size_type n, std::allocator::const_pointer hint = 0) { + assert(bytes_used_ != NULL); + *bytes_used_ += n * sizeof(T); + return Alloc::allocate(n, hint); + } + + void deallocate(pointer p, size_type n) { + Alloc::deallocate(p, n); + assert(bytes_used_ != NULL); + *bytes_used_ -= n * sizeof(T); + } + + // Rebind allows an allocator to be used for a different type + template + struct rebind { + typedef STLCountingAllocator::other> other; + }; + + int64* bytes_used() const { return bytes_used_; } + +private: + int64* bytes_used_; }; // Even though a struct has no data members, it cannot have zero size @@ -897,12 +827,9 @@ class STLCountingAllocator : public Alloc { // despite the fact that sizeof(Empty) > 0. template struct STLEmptyBaseHandle : public Base { - template - STLEmptyBaseHandle(const U &b, const Data &d) - : Base(b), - data(d) { - } - Data data; + template + STLEmptyBaseHandle(const U& b, const Data& d) : Base(b), data(d) {} + Data data; }; // These functions return true if there is some element in the sorted range @@ -910,41 +837,40 @@ struct STLEmptyBaseHandle : public Base { // end2). The iterators do not have to be of the same type, but the value types // must be less-than comparable. (Two elements a,b are considered equal if // !(a < b) && !(b < a). -template -bool SortedRangesHaveIntersection(InputIterator1 begin1, InputIterator1 end1, - InputIterator2 begin2, InputIterator2 end2) { - assert(std::is_sorted(begin1, end1)); - assert(std::is_sorted(begin2, end2)); - while (begin1 != end1 && begin2 != end2) { - if (*begin1 < *begin2) { - ++begin1; - } else if (*begin2 < *begin1) { - ++begin2; - } else { - return true; +template +bool SortedRangesHaveIntersection(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2) { + assert(std::is_sorted(begin1, end1)); + assert(std::is_sorted(begin2, end2)); + while (begin1 != end1 && begin2 != end2) { + if (*begin1 < *begin2) { + ++begin1; + } else if (*begin2 < *begin1) { + ++begin2; + } else { + return true; + } } - } - return false; + return false; } // This is equivalent to the function above, but using a custom comparison // function. -template -bool SortedRangesHaveIntersection(InputIterator1 begin1, InputIterator1 end1, - InputIterator2 begin2, InputIterator2 end2, - Comp comparator) { - assert(std::is_sorted(begin1, end1, comparator)); - assert(std::is_sorted(begin2, end2, comparator)); - while (begin1 != end1 && begin2 != end2) { - if (comparator(*begin1, *begin2)) { - ++begin1; - } else if (comparator(*begin2, *begin1)) { - ++begin2; - } else { - return true; +template +bool SortedRangesHaveIntersection(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, Comp comparator) { + assert(std::is_sorted(begin1, end1, comparator)); + assert(std::is_sorted(begin2, end2, comparator)); + while (begin1 != end1 && begin2 != end2) { + if (comparator(*begin1, *begin2)) { + ++begin1; + } else if (comparator(*begin2, *begin1)) { + ++begin2; + } else { + return true; + } } - } - return false; + return false; } // release_ptr is intended to help remove systematic use of gscoped_ptr @@ -964,13 +890,14 @@ bool SortedRangesHaveIntersection(InputIterator1 begin1, InputIterator1 end1, // int remove_idx = f(v); // return release_ptr(&v[remove_idx]); // } -template T* release_ptr(T **ptr) MUST_USE_RESULT; -template T* release_ptr(T **ptr) { - assert(ptr); - T *tmp = *ptr; - *ptr = NULL; - return tmp; +template +T* release_ptr(T** ptr) MUST_USE_RESULT; +template +T* release_ptr(T** ptr) { + assert(ptr); + T* tmp = *ptr; + *ptr = NULL; + return tmp; } - -#endif // UTIL_GTL_STL_UTIL_H_ +#endif // UTIL_GTL_STL_UTIL_H_ diff --git a/be/src/gutil/stringprintf.cc b/be/src/gutil/stringprintf.cc index 8f164a138d036a..8b0991a166328a 100644 --- a/be/src/gutil/stringprintf.cc +++ b/be/src/gutil/stringprintf.cc @@ -4,10 +4,12 @@ #include #include // For va_list and related operations -#include // MSVC requires this for _vsnprintf +#include // MSVC requires this for _vsnprintf + #include using std::vector; #include + #include "gutil/macros.h" #ifdef _MSC_VER @@ -17,80 +19,79 @@ enum { IS__MSC_VER = 0 }; #endif void StringAppendV(string* dst, const char* format, va_list ap) { - // First try with a small fixed size buffer - static const int kSpaceLength = 1024; - char space[kSpaceLength]; - - // It's possible for methods that use a va_list to invalidate - // the data in it upon use. The fix is to make a copy - // of the structure before using it and use that copy instead. - va_list backup_ap; - va_copy(backup_ap, ap); - int result = vsnprintf(space, kSpaceLength, format, backup_ap); - va_end(backup_ap); - - if (result < kSpaceLength) { - if (result >= 0) { - // Normal case -- everything fit. - dst->append(space, result); - return; + // First try with a small fixed size buffer + static const int kSpaceLength = 1024; + char space[kSpaceLength]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, kSpaceLength, format, backup_ap); + va_end(backup_ap); + + if (result < kSpaceLength) { + if (result >= 0) { + // Normal case -- everything fit. + dst->append(space, result); + return; + } + + if (IS__MSC_VER) { + // Error or MSVC running out of space. MSVC 8.0 and higher + // can be asked about space needed with the special idiom below: + va_copy(backup_ap, ap); + result = vsnprintf(nullptr, 0, format, backup_ap); + va_end(backup_ap); + } + + if (result < 0) { + // Just an error. + return; + } } - if (IS__MSC_VER) { - // Error or MSVC running out of space. MSVC 8.0 and higher - // can be asked about space needed with the special idiom below: - va_copy(backup_ap, ap); - result = vsnprintf(nullptr, 0, format, backup_ap); - va_end(backup_ap); - } + // Increase the buffer size to the size requested by vsnprintf, + // plus one for the closing \0. + int length = result + 1; + auto buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + result = vsnprintf(buf, length, format, backup_ap); + va_end(backup_ap); - if (result < 0) { - // Just an error. - return; + if (result >= 0 && result < length) { + // It fit + dst->append(buf, result); } - } - - // Increase the buffer size to the size requested by vsnprintf, - // plus one for the closing \0. - int length = result+1; - auto buf = new char[length]; - - // Restore the va_list before we use it again - va_copy(backup_ap, ap); - result = vsnprintf(buf, length, format, backup_ap); - va_end(backup_ap); - - if (result >= 0 && result < length) { - // It fit - dst->append(buf, result); - } - delete[] buf; + delete[] buf; } - string StringPrintf(const char* format, ...) { - va_list ap; - va_start(ap, format); - string result; - StringAppendV(&result, format, ap); - va_end(ap); - return result; + va_list ap; + va_start(ap, format); + string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; } const string& SStringPrintf(string* dst, const char* format, ...) { - va_list ap; - va_start(ap, format); - dst->clear(); - StringAppendV(dst, format, ap); - va_end(ap); - return *dst; + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; } void StringAppendF(string* dst, const char* format, ...) { - va_list ap; - va_start(ap, format); - StringAppendV(dst, format, ap); - va_end(ap); + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); } // Max arguments supported by StringPrintVector @@ -99,38 +100,35 @@ const int kStringPrintfVectorMaxArgs = 32; // An empty block of zero for filler arguments. This is const so that if // printf tries to write to it (via %n) then the program gets a SIGSEGV // and we can fix the problem or protect against an attack. -static const char string_printf_empty_block[256] = { '\0' }; +static const char string_printf_empty_block[256] = {'\0'}; string StringPrintfVector(const char* format, const vector& v) { - CHECK_LE(v.size(), kStringPrintfVectorMaxArgs) - << "StringPrintfVector currently only supports up to " - << kStringPrintfVectorMaxArgs << " arguments. " - << "Feel free to add support for more if you need it."; - - // Add filler arguments so that bogus format+args have a harder time - // crashing the program, corrupting the program (%n), - // or displaying random chunks of memory to users. - - const char* cstr[kStringPrintfVectorMaxArgs]; - for (int i = 0; i < v.size(); ++i) { - cstr[i] = v[i].c_str(); - } - for (int i = v.size(); i < arraysize(cstr); ++i) { - cstr[i] = &string_printf_empty_block[0]; - } - - // I do not know any way to pass kStringPrintfVectorMaxArgs arguments, - // or any way to build a va_list by hand, or any API for printf - // that accepts an array of arguments. The best I can do is stick - // this COMPILE_ASSERT right next to the actual statement. - - COMPILE_ASSERT(kStringPrintfVectorMaxArgs == 32, arg_count_mismatch); - return StringPrintf(format, - cstr[0], cstr[1], cstr[2], cstr[3], cstr[4], - cstr[5], cstr[6], cstr[7], cstr[8], cstr[9], - cstr[10], cstr[11], cstr[12], cstr[13], cstr[14], - cstr[15], cstr[16], cstr[17], cstr[18], cstr[19], - cstr[20], cstr[21], cstr[22], cstr[23], cstr[24], - cstr[25], cstr[26], cstr[27], cstr[28], cstr[29], - cstr[30], cstr[31]); + CHECK_LE(v.size(), kStringPrintfVectorMaxArgs) + << "StringPrintfVector currently only supports up to " << kStringPrintfVectorMaxArgs + << " arguments. " + << "Feel free to add support for more if you need it."; + + // Add filler arguments so that bogus format+args have a harder time + // crashing the program, corrupting the program (%n), + // or displaying random chunks of memory to users. + + const char* cstr[kStringPrintfVectorMaxArgs]; + for (int i = 0; i < v.size(); ++i) { + cstr[i] = v[i].c_str(); + } + for (int i = v.size(); i < arraysize(cstr); ++i) { + cstr[i] = &string_printf_empty_block[0]; + } + + // I do not know any way to pass kStringPrintfVectorMaxArgs arguments, + // or any way to build a va_list by hand, or any API for printf + // that accepts an array of arguments. The best I can do is stick + // this COMPILE_ASSERT right next to the actual statement. + + COMPILE_ASSERT(kStringPrintfVectorMaxArgs == 32, arg_count_mismatch); + return StringPrintf(format, cstr[0], cstr[1], cstr[2], cstr[3], cstr[4], cstr[5], cstr[6], + cstr[7], cstr[8], cstr[9], cstr[10], cstr[11], cstr[12], cstr[13], cstr[14], + cstr[15], cstr[16], cstr[17], cstr[18], cstr[19], cstr[20], cstr[21], + cstr[22], cstr[23], cstr[24], cstr[25], cstr[26], cstr[27], cstr[28], + cstr[29], cstr[30], cstr[31]); } diff --git a/be/src/gutil/stringprintf.h b/be/src/gutil/stringprintf.h index e486e7277a4fe4..ccbfbf4f3f9e62 100644 --- a/be/src/gutil/stringprintf.h +++ b/be/src/gutil/stringprintf.h @@ -11,6 +11,7 @@ #define _BASE_STRINGPRINTF_H #include + #include using std::string; #include @@ -20,18 +21,18 @@ using std::vector; // Return a C++ string extern string StringPrintf(const char* format, ...) - // Tell the compiler to do printf format string checking. - PRINTF_ATTRIBUTE(1,2); + // Tell the compiler to do printf format string checking. + PRINTF_ATTRIBUTE(1, 2); // Store result into a supplied string and return it extern const string& SStringPrintf(string* dst, const char* format, ...) - // Tell the compiler to do printf format string checking. - PRINTF_ATTRIBUTE(2,3); + // Tell the compiler to do printf format string checking. + PRINTF_ATTRIBUTE(2, 3); // Append result to a supplied string extern void StringAppendF(string* dst, const char* format, ...) - // Tell the compiler to do printf format string checking. - PRINTF_ATTRIBUTE(2,3); + // Tell the compiler to do printf format string checking. + PRINTF_ATTRIBUTE(2, 3); // Lower-level routine that takes a va_list and appends to a specified // string. All other routines are just convenience wrappers around it. diff --git a/be/src/gutil/strings/ascii_ctype.cc b/be/src/gutil/strings/ascii_ctype.cc index 44608f8f8811a0..f2028a63244ede 100644 --- a/be/src/gutil/strings/ascii_ctype.cc +++ b/be/src/gutil/strings/ascii_ctype.cc @@ -33,78 +33,54 @@ // elif i % 16 == 15: // print const unsigned char kAsciiPropertyBits[256] = { - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 - 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20 - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30 - 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40 - 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, - 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50 - 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60 - 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, - 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70 - 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 + 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20 + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30 + 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40, }; const unsigned char kAsciiToLower[256] = { - 00, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 'a', 'b', 'c', 'd', 'e', - 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', - 'z', 91, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, - 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, - 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, - 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, - 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, - 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, - 250, 251, 252, 253, 254, 255 -}; + 00, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', + 'z', 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, + 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, + 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255}; const unsigned char kAsciiToUpper[256] = { - 00, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', 123, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, - 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, - 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, - 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, - 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, - 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, - 250, 251, 252, 253, 254, 255 -}; + 00, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, + 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, + 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255}; diff --git a/be/src/gutil/strings/ascii_ctype.h b/be/src/gutil/strings/ascii_ctype.h index aeaa94371b427f..62f0f28b332d25 100644 --- a/be/src/gutil/strings/ascii_ctype.h +++ b/be/src/gutil/strings/ascii_ctype.h @@ -34,42 +34,60 @@ extern const unsigned char kAsciiPropertyBits[256]; // Public functions. -static inline bool ascii_isalpha(unsigned char c) { return kApb[c] & 0x01; } -static inline bool ascii_isalnum(unsigned char c) { return kApb[c] & 0x04; } -static inline bool ascii_isspace(unsigned char c) { return kApb[c] & 0x08; } -static inline bool ascii_ispunct(unsigned char c) { return kApb[c] & 0x10; } -static inline bool ascii_isblank(unsigned char c) { return kApb[c] & 0x20; } -static inline bool ascii_iscntrl(unsigned char c) { return kApb[c] & 0x40; } -static inline bool ascii_isxdigit(unsigned char c) { return kApb[c] & 0x80; } +static inline bool ascii_isalpha(unsigned char c) { + return kApb[c] & 0x01; +} +static inline bool ascii_isalnum(unsigned char c) { + return kApb[c] & 0x04; +} +static inline bool ascii_isspace(unsigned char c) { + return kApb[c] & 0x08; +} +static inline bool ascii_ispunct(unsigned char c) { + return kApb[c] & 0x10; +} +static inline bool ascii_isblank(unsigned char c) { + return kApb[c] & 0x20; +} +static inline bool ascii_iscntrl(unsigned char c) { + return kApb[c] & 0x40; +} +static inline bool ascii_isxdigit(unsigned char c) { + return kApb[c] & 0x80; +} static inline bool ascii_isdigit(unsigned char c) { - return c >= '0' && c <= '9'; + return c >= '0' && c <= '9'; } static inline bool ascii_isprint(unsigned char c) { - return c >= 32 && c < 127; + return c >= 32 && c < 127; } static inline bool ascii_isgraph(unsigned char c) { - return c > 32 && c < 127; + return c > 32 && c < 127; } static inline bool ascii_isupper(unsigned char c) { - return c >= 'A' && c <= 'Z'; + return c >= 'A' && c <= 'Z'; } static inline bool ascii_islower(unsigned char c) { - return c >= 'a' && c <= 'z'; + return c >= 'a' && c <= 'z'; } static inline bool ascii_isascii(unsigned char c) { - return c < 128; + return c < 128; } #undef kApb extern const unsigned char kAsciiToLower[256]; -static inline char ascii_tolower(unsigned char c) { return kAsciiToLower[c]; } +static inline char ascii_tolower(unsigned char c) { + return kAsciiToLower[c]; +} extern const unsigned char kAsciiToUpper[256]; -static inline char ascii_toupper(unsigned char c) { return kAsciiToUpper[c]; } +static inline char ascii_toupper(unsigned char c) { + return kAsciiToUpper[c]; +} -#endif // STRINGS_ASCII_CTYPE_H_ +#endif // STRINGS_ASCII_CTYPE_H_ diff --git a/be/src/gutil/strings/charset.cc b/be/src/gutil/strings/charset.cc index 7725b0f4108551..1d62e0535f6d40 100644 --- a/be/src/gutil/strings/charset.cc +++ b/be/src/gutil/strings/charset.cc @@ -7,18 +7,18 @@ namespace strings { CharSet::CharSet() { - memset(this, 0, sizeof(*this)); + memset(this, 0, sizeof(*this)); } CharSet::CharSet(const char* characters) { - memset(this, 0, sizeof(*this)); - for (; *characters != '\0'; ++characters) { - Add(*characters); - } + memset(this, 0, sizeof(*this)); + for (; *characters != '\0'; ++characters) { + Add(*characters); + } } CharSet::CharSet(const CharSet& other) { - memcpy(this, &other, sizeof(*this)); + memcpy(this, &other, sizeof(*this)); } -} // namespace strings +} // namespace strings diff --git a/be/src/gutil/strings/charset.h b/be/src/gutil/strings/charset.h index b6b70134f4ada8..727c2400d30690 100644 --- a/be/src/gutil/strings/charset.h +++ b/be/src/gutil/strings/charset.h @@ -29,43 +29,43 @@ namespace strings { // http://goto/style-guide-exception-20978288 class CharSet { - public: - // Initialize a CharSet containing no characters or the given set of - // characters, respectively. - CharSet(); - // Deliberately an implicit constructor, so anything that takes a CharSet - // can also take an explicit list of characters. - CharSet(const char* characters); // NOLINT(runtime/explicit) - explicit CharSet(const CharSet& other); +public: + // Initialize a CharSet containing no characters or the given set of + // characters, respectively. + CharSet(); + // Deliberately an implicit constructor, so anything that takes a CharSet + // can also take an explicit list of characters. + CharSet(const char* characters); // NOLINT(runtime/explicit) + explicit CharSet(const CharSet& other); - // Add or remove a character from the set. - void Add(unsigned char c) { bits_[Word(c)] |= BitMask(c); } - void Remove(unsigned char c) { bits_[Word(c)] &= ~BitMask(c); } + // Add or remove a character from the set. + void Add(unsigned char c) { bits_[Word(c)] |= BitMask(c); } + void Remove(unsigned char c) { bits_[Word(c)] &= ~BitMask(c); } - // Return true if this character is in the set - bool Test(unsigned char c) const { return bits_[Word(c)] & BitMask(c); } + // Return true if this character is in the set + bool Test(unsigned char c) const { return bits_[Word(c)] & BitMask(c); } - private: - // The numbers below are optimized for 64-bit hardware. TODO(user): In the - // future, we should change this to use uword_t and do various bits of magic - // to calculate the numbers at compile time. +private: + // The numbers below are optimized for 64-bit hardware. TODO(user): In the + // future, we should change this to use uword_t and do various bits of magic + // to calculate the numbers at compile time. - // In general, - // static const int kNumWords = max(32 / sizeof(uword_t), 1); - uint64 bits_[4]; + // In general, + // static const int kNumWords = max(32 / sizeof(uword_t), 1); + uint64 bits_[4]; - // 4 words => the high 2 bits of c are the word number. In general, - // kShiftValue = 8 - log2(kNumWords) - static int Word(unsigned char c) { return c >> 6; } + // 4 words => the high 2 bits of c are the word number. In general, + // kShiftValue = 8 - log2(kNumWords) + static int Word(unsigned char c) { return c >> 6; } - // And the value we AND with c is ((1 << shift value) - 1) - // static const int kLowBitsMask = (256 / kNumWords) - 1; - static uint64 BitMask(unsigned char c) { - uint64 mask = 1; - return mask << (c & 0x3f); - } + // And the value we AND with c is ((1 << shift value) - 1) + // static const int kLowBitsMask = (256 / kNumWords) - 1; + static uint64 BitMask(unsigned char c) { + uint64 mask = 1; + return mask << (c & 0x3f); + } }; -} // namespace strings +} // namespace strings -#endif // STRINGS_CHARSET_H_ +#endif // STRINGS_CHARSET_H_ diff --git a/be/src/gutil/strings/escaping.cc b/be/src/gutil/strings/escaping.cc index adb80938e11880..68996f5a4087c7 100644 --- a/be/src/gutil/strings/escaping.cc +++ b/be/src/gutil/strings/escaping.cc @@ -12,13 +12,13 @@ using std::numeric_limits; #include using std::vector; +#include "gutil/charmap.h" +#include "gutil/gscoped_ptr.h" #include "gutil/integral_types.h" #include "gutil/port.h" -#include "gutil/gscoped_ptr.h" -#include "gutil/strings/join.h" -#include "gutil/utf/utf.h" // for runetochar -#include "gutil/charmap.h" #include "gutil/stl_util.h" +#include "gutil/strings/join.h" +#include "gutil/utf/utf.h" // for runetochar namespace strings { @@ -37,22 +37,21 @@ static bool kLeaveNullsEscaped = true; // Example: [some "string" to test] --> [some ""string"" to test] // ---------------------------------------------------------------------- int EscapeStrForCSV(const char* src, char* dest, int dest_len) { - int used = 0; + int used = 0; - while (true) { - if (*src == '\0' && used < dest_len) { - dest[used] = '\0'; - return used; - } + while (true) { + if (*src == '\0' && used < dest_len) { + dest[used] = '\0'; + return used; + } - if (used + 1 >= dest_len) // +1 because we might require two characters - return -1; + if (used + 1 >= dest_len) // +1 because we might require two characters + return -1; - if (*src == '"') - dest[used++] = '"'; + if (*src == '"') dest[used++] = '"'; - dest[used++] = *src++; - } + dest[used++] = *src++; + } } // ---------------------------------------------------------------------- @@ -75,126 +74,150 @@ int EscapeStrForCSV(const char* src, char* dest, int dest_len) { #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) int UnescapeCEscapeSequences(const char* source, char* dest) { - return UnescapeCEscapeSequences(source, dest, nullptr); + return UnescapeCEscapeSequences(source, dest, nullptr); } -int UnescapeCEscapeSequences(const char* source, char* dest, - vector *errors) { - char* d = dest; - const char* p = source; +int UnescapeCEscapeSequences(const char* source, char* dest, vector* errors) { + char* d = dest; + const char* p = source; - // Small optimization for case where source = dest and there's no escaping - while ( p == d && *p != '\0' && *p != '\\' ) - p++, d++; + // Small optimization for case where source = dest and there's no escaping + while (p == d && *p != '\0' && *p != '\\') p++, d++; - while (*p != '\0') { - if (*p != '\\') { - *d++ = *p++; - } else { - switch ( *++p ) { // skip past the '\\' - case '\0': - LOG_STRING(ERROR, errors) << "String cannot end with \\"; - *d = '\0'; - return d - dest; // we're done with p - case 'a': *d++ = '\a'; break; - case 'b': *d++ = '\b'; break; - case 'f': *d++ = '\f'; break; - case 'n': *d++ = '\n'; break; - case 'r': *d++ = '\r'; break; - case 't': *d++ = '\t'; break; - case 'v': *d++ = '\v'; break; - case '\\': *d++ = '\\'; break; - case '?': *d++ = '\?'; break; // \? Who knew? - case '\'': *d++ = '\''; break; - case '"': *d++ = '\"'; break; - case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits - case '4': case '5': case '6': case '7': { - const char *octal_start = p; - unsigned int ch = *p - '0'; - if ( IS_OCTAL_DIGIT(p[1]) ) - ch = ch * 8 + *++p - '0'; - if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice - ch = ch * 8 + *++p - '0'; // now points at last digit - if (ch > 0xFF) - LOG_STRING(ERROR, errors) << "Value of " << - "\\" << string(octal_start, p+1-octal_start) << - " exceeds 8 bits"; - *d++ = ch; - break; - } - case 'x': case 'X': { - if (!ascii_isxdigit(p[1])) { - if (p[1] == '\0') { - LOG_STRING(ERROR, errors) << "String cannot end with \\x"; - } else { - LOG_STRING(ERROR, errors) << - "\\x cannot be followed by a non-hex digit: \\" << *p << p[1]; + while (*p != '\0') { + if (*p != '\\') { + *d++ = *p++; + } else { + switch (*++p) { // skip past the '\\' + case '\0': + LOG_STRING(ERROR, errors) << "String cannot end with \\"; + *d = '\0'; + return d - dest; // we're done with p + case 'a': + *d++ = '\a'; + break; + case 'b': + *d++ = '\b'; + break; + case 'f': + *d++ = '\f'; + break; + case 'n': + *d++ = '\n'; + break; + case 'r': + *d++ = '\r'; + break; + case 't': + *d++ = '\t'; + break; + case 'v': + *d++ = '\v'; + break; + case '\\': + *d++ = '\\'; + break; + case '?': + *d++ = '\?'; + break; // \? Who knew? + case '\'': + *d++ = '\''; + break; + case '"': + *d++ = '\"'; + break; + case '0': + case '1': + case '2': + case '3': // octal digit: 1 to 3 digits + case '4': + case '5': + case '6': + case '7': { + const char* octal_start = p; + unsigned int ch = *p - '0'; + if (IS_OCTAL_DIGIT(p[1])) ch = ch * 8 + *++p - '0'; + if (IS_OCTAL_DIGIT(p[1])) // safe (and easy) to do this twice + ch = ch * 8 + *++p - '0'; // now points at last digit + if (ch > 0xFF) + LOG_STRING(ERROR, errors) << "Value of " + << "\\" << string(octal_start, p + 1 - octal_start) + << " exceeds 8 bits"; + *d++ = ch; + break; } - break; - } - unsigned int ch = 0; - const char *hex_start = p; - while (ascii_isxdigit(p[1])) // arbitrarily many hex digits - ch = (ch << 4) + hex_digit_to_int(*++p); - if (ch > 0xFF) - LOG_STRING(ERROR, errors) << "Value of " << - "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; - *d++ = ch; - break; - } - case 'u': { - // \uhhhh => convert 4 hex digits to UTF-8 - char32 rune = 0; - const char *hex_start = p; - for (int i = 0; i < 4; ++i) { - if (ascii_isxdigit(p[1])) { // Look one char ahead. - rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. - } else { - LOG_STRING(ERROR, errors) - << "\\u must be followed by 4 hex digits: \\" - << string(hex_start, p+1-hex_start); - break; + case 'x': + case 'X': { + if (!ascii_isxdigit(p[1])) { + if (p[1] == '\0') { + LOG_STRING(ERROR, errors) << "String cannot end with \\x"; + } else { + LOG_STRING(ERROR, errors) + << "\\x cannot be followed by a non-hex digit: \\" << *p << p[1]; + } + break; + } + unsigned int ch = 0; + const char* hex_start = p; + while (ascii_isxdigit(p[1])) // arbitrarily many hex digits + ch = (ch << 4) + hex_digit_to_int(*++p); + if (ch > 0xFF) + LOG_STRING(ERROR, errors) + << "Value of " + << "\\" << string(hex_start, p + 1 - hex_start) << " exceeds 8 bits"; + *d++ = ch; + break; } - } - d += runetochar(d, &rune); - break; - } - case 'U': { - // \Uhhhhhhhh => convert 8 hex digits to UTF-8 - char32 rune = 0; - const char *hex_start = p; - for (int i = 0; i < 8; ++i) { - if (ascii_isxdigit(p[1])) { // Look one char ahead. - // Don't change rune until we're sure this - // is within the Unicode limit, but do advance p. - char32 newrune = (rune << 4) + hex_digit_to_int(*++p); - if (newrune > 0x10FFFF) { - LOG_STRING(ERROR, errors) - << "Value of \\" - << string(hex_start, p + 1 - hex_start) - << " exceeds Unicode limit (0x10FFFF)"; + case 'u': { + // \uhhhh => convert 4 hex digits to UTF-8 + char32 rune = 0; + const char* hex_start = p; + for (int i = 0; i < 4; ++i) { + if (ascii_isxdigit(p[1])) { // Look one char ahead. + rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. + } else { + LOG_STRING(ERROR, errors) << "\\u must be followed by 4 hex digits: \\" + << string(hex_start, p + 1 - hex_start); + break; + } + } + d += runetochar(d, &rune); + break; + } + case 'U': { + // \Uhhhhhhhh => convert 8 hex digits to UTF-8 + char32 rune = 0; + const char* hex_start = p; + for (int i = 0; i < 8; ++i) { + if (ascii_isxdigit(p[1])) { // Look one char ahead. + // Don't change rune until we're sure this + // is within the Unicode limit, but do advance p. + char32 newrune = (rune << 4) + hex_digit_to_int(*++p); + if (newrune > 0x10FFFF) { + LOG_STRING(ERROR, errors) + << "Value of \\" << string(hex_start, p + 1 - hex_start) + << " exceeds Unicode limit (0x10FFFF)"; + break; + } else { + rune = newrune; + } + } else { + LOG_STRING(ERROR, errors) << "\\U must be followed by 8 hex digits: \\" + << string(hex_start, p + 1 - hex_start); + break; + } + } + d += runetochar(d, &rune); break; - } else { - rune = newrune; - } - } else { - LOG_STRING(ERROR, errors) - << "\\U must be followed by 8 hex digits: \\" - << string(hex_start, p+1-hex_start); - break; } - } - d += runetochar(d, &rune); - break; + default: + LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; + } + p++; // read past letter we escaped } - default: - LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; - } - p++; // read past letter we escaped } - } - *d = '\0'; - return d - dest; + *d = '\0'; + return d - dest; } // ---------------------------------------------------------------------- @@ -215,23 +238,21 @@ int UnescapeCEscapeSequences(const char* source, char* dest, // // ---------------------------------------------------------------------- int UnescapeCEscapeString(const string& src, string* dest) { - return UnescapeCEscapeString(src, dest, nullptr); + return UnescapeCEscapeString(src, dest, nullptr); } -int UnescapeCEscapeString(const string& src, string* dest, - vector *errors) { - CHECK(dest); - dest->resize(src.size() + 1); - int len = UnescapeCEscapeSequences(src.c_str(), - const_cast(dest->data()), errors); - dest->resize(len); - return len; +int UnescapeCEscapeString(const string& src, string* dest, vector* errors) { + CHECK(dest); + dest->resize(src.size() + 1); + int len = UnescapeCEscapeSequences(src.c_str(), const_cast(dest->data()), errors); + dest->resize(len); + return len; } string UnescapeCEscapeString(const string& src) { - gscoped_array unescaped(new char[src.size() + 1]); - int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), nullptr); - return string(unescaped.get(), len); + gscoped_array unescaped(new char[src.size() + 1]); + int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), nullptr); + return string(unescaped.get(), len); } // ---------------------------------------------------------------------- @@ -251,186 +272,208 @@ string UnescapeCEscapeString(const string& src) { // NOTE: any changes to this function must also be reflected in the older // UnescapeCEscapeSequences(). // ---------------------------------------------------------------------- -static bool CUnescapeInternal(const StringPiece& source, - bool leave_nulls_escaped, - char* dest, - int* dest_len, - string* error) { - char* d = dest; - const char* p = source.data(); - const char* end = source.end(); - const char* last_byte = end - 1; - - // Small optimization for case where source = dest and there's no escaping - while (p == d && p < end && *p != '\\') - p++, d++; - - while (p < end) { - if (*p != '\\') { - *d++ = *p++; - } else { - if (++p > last_byte) { // skip past the '\\' - if (error) *error = "String cannot end with \\"; - return false; - } - switch (*p) { - case 'a': *d++ = '\a'; break; - case 'b': *d++ = '\b'; break; - case 'f': *d++ = '\f'; break; - case 'n': *d++ = '\n'; break; - case 'r': *d++ = '\r'; break; - case 't': *d++ = '\t'; break; - case 'v': *d++ = '\v'; break; - case '\\': *d++ = '\\'; break; - case '?': *d++ = '\?'; break; // \? Who knew? - case '\'': *d++ = '\''; break; - case '"': *d++ = '\"'; break; - case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits - case '4': case '5': case '6': case '7': { - const char *octal_start = p; - unsigned int ch = *p - '0'; - if (p < last_byte && IS_OCTAL_DIGIT(p[1])) - ch = ch * 8 + *++p - '0'; - if (p < last_byte && IS_OCTAL_DIGIT(p[1])) - ch = ch * 8 + *++p - '0'; // now points at last digit - if (ch > 0xff) { - if (error) { - *error = "Value of \\" + - string(octal_start, p + 1 - octal_start) + - " exceeds 0xff"; - } - return false; - } - if ((ch == 0) && leave_nulls_escaped) { - // Copy the escape sequence for the null character - const int octal_size = p + 1 - octal_start; - *d++ = '\\'; - memcpy(d, octal_start, octal_size); - d += octal_size; - break; - } - *d++ = ch; - break; - } - case 'x': case 'X': { - if (p >= last_byte) { - if (error) *error = "String cannot end with \\x"; - return false; - } else if (!ascii_isxdigit(p[1])) { - if (error) *error = "\\x cannot be followed by a non-hex digit"; - return false; - } - unsigned int ch = 0; - const char *hex_start = p; - while (p < last_byte && ascii_isxdigit(p[1])) - // Arbitrarily many hex digits - ch = (ch << 4) + hex_digit_to_int(*++p); - if (ch > 0xFF) { - if (error) { - *error = "Value of \\" + string(hex_start, p + 1 - hex_start) + - " exceeds 0xff"; +static bool CUnescapeInternal(const StringPiece& source, bool leave_nulls_escaped, char* dest, + int* dest_len, string* error) { + char* d = dest; + const char* p = source.data(); + const char* end = source.end(); + const char* last_byte = end - 1; + + // Small optimization for case where source = dest and there's no escaping + while (p == d && p < end && *p != '\\') p++, d++; + + while (p < end) { + if (*p != '\\') { + *d++ = *p++; + } else { + if (++p > last_byte) { // skip past the '\\' + if (error) *error = "String cannot end with \\"; + return false; } - return false; - } - if ((ch == 0) && leave_nulls_escaped) { - // Copy the escape sequence for the null character - const int hex_size = p + 1 - hex_start; - *d++ = '\\'; - memcpy(d, hex_start, hex_size); - d += hex_size; - break; - } - *d++ = ch; - break; - } - case 'u': { - // \uhhhh => convert 4 hex digits to UTF-8 - char32 rune = 0; - const char *hex_start = p; - if (p + 4 >= end) { - if (error) { - *error = "\\u must be followed by 4 hex digits: \\" + - string(hex_start, p + 1 - hex_start); + switch (*p) { + case 'a': + *d++ = '\a'; + break; + case 'b': + *d++ = '\b'; + break; + case 'f': + *d++ = '\f'; + break; + case 'n': + *d++ = '\n'; + break; + case 'r': + *d++ = '\r'; + break; + case 't': + *d++ = '\t'; + break; + case 'v': + *d++ = '\v'; + break; + case '\\': + *d++ = '\\'; + break; + case '?': + *d++ = '\?'; + break; // \? Who knew? + case '\'': + *d++ = '\''; + break; + case '"': + *d++ = '\"'; + break; + case '0': + case '1': + case '2': + case '3': // octal digit: 1 to 3 digits + case '4': + case '5': + case '6': + case '7': { + const char* octal_start = p; + unsigned int ch = *p - '0'; + if (p < last_byte && IS_OCTAL_DIGIT(p[1])) ch = ch * 8 + *++p - '0'; + if (p < last_byte && IS_OCTAL_DIGIT(p[1])) + ch = ch * 8 + *++p - '0'; // now points at last digit + if (ch > 0xff) { + if (error) { + *error = "Value of \\" + string(octal_start, p + 1 - octal_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const int octal_size = p + 1 - octal_start; + *d++ = '\\'; + memcpy(d, octal_start, octal_size); + d += octal_size; + break; + } + *d++ = ch; + break; } - return false; - } - for (int i = 0; i < 4; ++i) { - // Look one char ahead. - if (ascii_isxdigit(p[1])) { - rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. - } else { - if (error) { - *error = "\\u must be followed by 4 hex digits: \\" + - string(hex_start, p + 1 - hex_start); - } - return false; + case 'x': + case 'X': { + if (p >= last_byte) { + if (error) *error = "String cannot end with \\x"; + return false; + } else if (!ascii_isxdigit(p[1])) { + if (error) *error = "\\x cannot be followed by a non-hex digit"; + return false; + } + unsigned int ch = 0; + const char* hex_start = p; + while (p < last_byte && ascii_isxdigit(p[1])) + // Arbitrarily many hex digits + ch = (ch << 4) + hex_digit_to_int(*++p); + if (ch > 0xFF) { + if (error) { + *error = "Value of \\" + string(hex_start, p + 1 - hex_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const int hex_size = p + 1 - hex_start; + *d++ = '\\'; + memcpy(d, hex_start, hex_size); + d += hex_size; + break; + } + *d++ = ch; + break; } - } - if ((rune == 0) && leave_nulls_escaped) { - // Copy the escape sequence for the null character - *d++ = '\\'; - memcpy(d, hex_start, 5); // u0000 - d += 5; - break; - } - d += runetochar(d, &rune); - break; - } - case 'U': { - // \Uhhhhhhhh => convert 8 hex digits to UTF-8 - char32 rune = 0; - const char *hex_start = p; - if (p + 8 >= end) { - if (error) { - *error = "\\U must be followed by 8 hex digits: \\" + - string(hex_start, p + 1 - hex_start); + case 'u': { + // \uhhhh => convert 4 hex digits to UTF-8 + char32 rune = 0; + const char* hex_start = p; + if (p + 4 >= end) { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 4; ++i) { + // Look one char ahead. + if (ascii_isxdigit(p[1])) { + rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. + } else { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memcpy(d, hex_start, 5); // u0000 + d += 5; + break; + } + d += runetochar(d, &rune); + break; } - return false; - } - for (int i = 0; i < 8; ++i) { - // Look one char ahead. - if (ascii_isxdigit(p[1])) { - // Don't change rune until we're sure this - // is within the Unicode limit, but do advance p. - char32 newrune = (rune << 4) + hex_digit_to_int(*++p); - if (newrune > 0x10FFFF) { - if (error) { - *error = "Value of \\" + - string(hex_start, p + 1 - hex_start) + - " exceeds Unicode limit (0x10FFFF)"; + case 'U': { + // \Uhhhhhhhh => convert 8 hex digits to UTF-8 + char32 rune = 0; + const char* hex_start = p; + if (p + 8 >= end) { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 8; ++i) { + // Look one char ahead. + if (ascii_isxdigit(p[1])) { + // Don't change rune until we're sure this + // is within the Unicode limit, but do advance p. + char32 newrune = (rune << 4) + hex_digit_to_int(*++p); + if (newrune > 0x10FFFF) { + if (error) { + *error = "Value of \\" + string(hex_start, p + 1 - hex_start) + + " exceeds Unicode limit (0x10FFFF)"; + } + return false; + } else { + rune = newrune; + } + } else { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + string(hex_start, p + 1 - hex_start); + } + return false; + } } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memcpy(d, hex_start, 9); // U00000000 + d += 9; + break; + } + d += runetochar(d, &rune); + break; + } + default: { + if (error) *error = string("Unknown escape sequence: \\") + *p; return false; - } else { - rune = newrune; - } - } else { - if (error) { - *error = "\\U must be followed by 8 hex digits: \\" + - string(hex_start, p + 1 - hex_start); - } - return false; } - } - if ((rune == 0) && leave_nulls_escaped) { - // Copy the escape sequence for the null character - *d++ = '\\'; - memcpy(d, hex_start, 9); // U00000000 - d += 9; - break; - } - d += runetochar(d, &rune); - break; - } - default: { - if (error) *error = string("Unknown escape sequence: \\") + *p; - return false; + } + p++; // read past letter we escaped } - } - p++; // read past letter we escaped } - } - *dest_len = d - dest; - return true; + *dest_len = d - dest; + return true; } // ---------------------------------------------------------------------- @@ -439,21 +482,16 @@ static bool CUnescapeInternal(const StringPiece& source, // Same as above but uses a C++ string for output. 'source' and 'dest' // may be the same. // ---------------------------------------------------------------------- -bool CUnescapeInternal(const StringPiece& source, - bool leave_nulls_escaped, - string* dest, +bool CUnescapeInternal(const StringPiece& source, bool leave_nulls_escaped, string* dest, string* error) { - dest->resize(source.size()); - int dest_size; - if (!CUnescapeInternal(source, - leave_nulls_escaped, - const_cast(dest->data()), - &dest_size, - error)) { - return false; - } - dest->resize(dest_size); - return true; + dest->resize(source.size()); + int dest_size; + if (!CUnescapeInternal(source, leave_nulls_escaped, const_cast(dest->data()), &dest_size, + error)) { + return false; + } + dest->resize(dest_size); + return true; } // ---------------------------------------------------------------------- @@ -461,13 +499,12 @@ bool CUnescapeInternal(const StringPiece& source, // // See CUnescapeInternal() for implementation details. // ---------------------------------------------------------------------- -bool CUnescape(const StringPiece& source, char* dest, int* dest_len, - string* error) { - return CUnescapeInternal(source, kUnescapeNulls, dest, dest_len, error); +bool CUnescape(const StringPiece& source, char* dest, int* dest_len, string* error) { + return CUnescapeInternal(source, kUnescapeNulls, dest, dest_len, error); } bool CUnescape(const StringPiece& source, string* dest, string* error) { - return CUnescapeInternal(source, kUnescapeNulls, dest, error); + return CUnescapeInternal(source, kUnescapeNulls, dest, error); } // ---------------------------------------------------------------------- @@ -475,17 +512,13 @@ bool CUnescape(const StringPiece& source, string* dest, string* error) { // // See CUnescapeInternal() for implementation details. // ---------------------------------------------------------------------- -bool CUnescapeForNullTerminatedString(const StringPiece& source, - char* dest, - int* dest_len, +bool CUnescapeForNullTerminatedString(const StringPiece& source, char* dest, int* dest_len, string* error) { - return CUnescapeInternal(source, kLeaveNullsEscaped, dest, dest_len, error); + return CUnescapeInternal(source, kLeaveNullsEscaped, dest, dest_len, error); } -bool CUnescapeForNullTerminatedString(const StringPiece& source, - string* dest, - string* error) { - return CUnescapeInternal(source, kLeaveNullsEscaped, dest, error); +bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest, string* error) { + return CUnescapeInternal(source, kLeaveNullsEscaped, dest, error); } // ---------------------------------------------------------------------- @@ -503,67 +536,82 @@ bool CUnescapeForNullTerminatedString(const StringPiece& source, // // Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped. // ---------------------------------------------------------------------- -int CEscapeInternal(const char* src, int src_len, char* dest, - int dest_len, bool use_hex, bool utf8_safe) { - const char* src_end = src + src_len; - int used = 0; - bool last_hex_escape = false; // true if last output char was \xNN - - for (; src < src_end; src++) { - if (dest_len - used < 2) // Need space for two letter escape - return -1; - - bool is_hex_escape = false; - switch (*src) { - case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; - case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; - case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; - case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; - case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; - case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; - default: - // Note that if we emit \xNN and the src character after that is a hex - // digit then that digit must be escaped too to prevent it being - // interpreted as part of the character code by C. - if ((!utf8_safe || *src < 0x80) && - (!ascii_isprint(*src) || - (last_hex_escape && ascii_isxdigit(*src)))) { - if (dest_len - used < 4) // need space for 4 letter escape +int CEscapeInternal(const char* src, int src_len, char* dest, int dest_len, bool use_hex, + bool utf8_safe) { + const char* src_end = src + src_len; + int used = 0; + bool last_hex_escape = false; // true if last output char was \xNN + + for (; src < src_end; src++) { + if (dest_len - used < 2) // Need space for two letter escape return -1; - sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"), *src); - is_hex_escape = use_hex; - used += 4; - } else { - dest[used++] = *src; - break; + + bool is_hex_escape = false; + switch (*src) { + case '\n': + dest[used++] = '\\'; + dest[used++] = 'n'; + break; + case '\r': + dest[used++] = '\\'; + dest[used++] = 'r'; + break; + case '\t': + dest[used++] = '\\'; + dest[used++] = 't'; + break; + case '\"': + dest[used++] = '\\'; + dest[used++] = '\"'; + break; + case '\'': + dest[used++] = '\\'; + dest[used++] = '\''; + break; + case '\\': + dest[used++] = '\\'; + dest[used++] = '\\'; + break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if ((!utf8_safe || *src < 0x80) && + (!ascii_isprint(*src) || (last_hex_escape && ascii_isxdigit(*src)))) { + if (dest_len - used < 4) // need space for 4 letter escape + return -1; + sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"), *src); + is_hex_escape = use_hex; + used += 4; + } else { + dest[used++] = *src; + break; + } } + last_hex_escape = is_hex_escape; } - last_hex_escape = is_hex_escape; - } - if (dest_len - used < 1) // make sure that there is room for \0 - return -1; + if (dest_len - used < 1) // make sure that there is room for \0 + return -1; - dest[used] = '\0'; // doesn't count towards return value though - return used; + dest[used] = '\0'; // doesn't count towards return value though + return used; } int CEscapeString(const char* src, int src_len, char* dest, int dest_len) { - return CEscapeInternal(src, src_len, dest, dest_len, false, false); + return CEscapeInternal(src, src_len, dest, dest_len, false, false); } int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len) { - return CEscapeInternal(src, src_len, dest, dest_len, true, false); + return CEscapeInternal(src, src_len, dest, dest_len, true, false); } -int Utf8SafeCEscapeString(const char* src, int src_len, char* dest, - int dest_len) { - return CEscapeInternal(src, src_len, dest, dest_len, false, true); +int Utf8SafeCEscapeString(const char* src, int src_len, char* dest, int dest_len) { + return CEscapeInternal(src, src_len, dest, dest_len, false, true); } -int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, - int dest_len) { - return CEscapeInternal(src, src_len, dest, dest_len, true, true); +int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, int dest_len) { + return CEscapeInternal(src, src_len, dest, dest_len, true, true); } // ---------------------------------------------------------------------- @@ -580,87 +628,77 @@ int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, // Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped. // ---------------------------------------------------------------------- string CEscape(const StringPiece& src) { - const int dest_length = src.size() * 4 + 1; // Maximum possible expansion - gscoped_array dest(new char[dest_length]); - const int len = CEscapeInternal(src.data(), src.size(), - dest.get(), dest_length, false, false); - DCHECK_GE(len, 0); - return string(dest.get(), len); + const int dest_length = src.size() * 4 + 1; // Maximum possible expansion + gscoped_array dest(new char[dest_length]); + const int len = CEscapeInternal(src.data(), src.size(), dest.get(), dest_length, false, false); + DCHECK_GE(len, 0); + return string(dest.get(), len); } string CHexEscape(const StringPiece& src) { - const int dest_length = src.size() * 4 + 1; // Maximum possible expansion - gscoped_array dest(new char[dest_length]); - const int len = CEscapeInternal(src.data(), src.size(), - dest.get(), dest_length, true, false); - DCHECK_GE(len, 0); - return string(dest.get(), len); + const int dest_length = src.size() * 4 + 1; // Maximum possible expansion + gscoped_array dest(new char[dest_length]); + const int len = CEscapeInternal(src.data(), src.size(), dest.get(), dest_length, true, false); + DCHECK_GE(len, 0); + return string(dest.get(), len); } string Utf8SafeCEscape(const StringPiece& src) { - const int dest_length = src.size() * 4 + 1; // Maximum possible expansion - gscoped_array dest(new char[dest_length]); - const int len = CEscapeInternal(src.data(), src.size(), - dest.get(), dest_length, false, true); - DCHECK_GE(len, 0); - return string(dest.get(), len); + const int dest_length = src.size() * 4 + 1; // Maximum possible expansion + gscoped_array dest(new char[dest_length]); + const int len = CEscapeInternal(src.data(), src.size(), dest.get(), dest_length, false, true); + DCHECK_GE(len, 0); + return string(dest.get(), len); } string Utf8SafeCHexEscape(const StringPiece& src) { - const int dest_length = src.size() * 4 + 1; // Maximum possible expansion - gscoped_array dest(new char[dest_length]); - const int len = CEscapeInternal(src.data(), src.size(), - dest.get(), dest_length, true, true); - DCHECK_GE(len, 0); - return string(dest.get(), len); + const int dest_length = src.size() * 4 + 1; // Maximum possible expansion + gscoped_array dest(new char[dest_length]); + const int len = CEscapeInternal(src.data(), src.size(), dest.get(), dest_length, true, true); + DCHECK_GE(len, 0); + return string(dest.get(), len); } // ---------------------------------------------------------------------- // BackslashEscape and BackslashUnescape // ---------------------------------------------------------------------- -void BackslashEscape(const StringPiece& src, - const strings::CharSet& to_escape, - string* dest) { - dest->reserve(dest->size() + src.size()); - for (const char *p = src.data(), *end = src.data() + src.size(); - p != end; ) { - // Advance to next character we need to escape, or to end of source - const char* next = p; - while (next < end && !to_escape.Test(*next)) { - next++; +void BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape, string* dest) { + dest->reserve(dest->size() + src.size()); + for (const char *p = src.data(), *end = src.data() + src.size(); p != end;) { + // Advance to next character we need to escape, or to end of source + const char* next = p; + while (next < end && !to_escape.Test(*next)) { + next++; + } + // Append the whole run of non-escaped chars + dest->append(p, next - p); + if (next == end) break; + // Char at *next needs to be escaped. Append backslash followed by *next + char c[2]; + c[0] = '\\'; + c[1] = *next; + dest->append(c, 2); + p = next + 1; } - // Append the whole run of non-escaped chars - dest->append(p, next - p); - if (next == end) break; - // Char at *next needs to be escaped. Append backslash followed by *next - char c[2]; - c[0] = '\\'; - c[1] = *next; - dest->append(c, 2); - p = next + 1; - } -} - -void BackslashUnescape(const StringPiece& src, - const strings::CharSet& to_unescape, - string* dest) { - dest->reserve(dest->size() + src.size()); - bool escaped = false; - for (const char* p = src.data(), *end = src.data() + src.size(); - p != end; ++p) { - if (escaped) { - if (!to_unescape.Test(*p)) { - // Keep the backslash - dest->push_back('\\'); - } - dest->push_back(*p); - escaped = false; - } else if (*p == '\\') { - escaped = true; - } else { - dest->push_back(*p); +} + +void BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape, string* dest) { + dest->reserve(dest->size() + src.size()); + bool escaped = false; + for (const char *p = src.data(), *end = src.data() + src.size(); p != end; ++p) { + if (escaped) { + if (!to_unescape.Test(*p)) { + // Keep the backslash + dest->push_back('\\'); + } + dest->push_back(*p); + escaped = false; + } else if (*p == '\\') { + escaped = true; + } else { + dest->push_back(*p); + } } - } } // ---------------------------------------------------------------------- @@ -688,36 +726,35 @@ void BackslashUnescape(const StringPiece& src, // See QEncodingUnescape(). // ---------------------------------------------------------------------- -int QuotedPrintableUnescape(const char *source, int slen, - char *dest, int szdest) { - char* d = dest; - const char* p = source; - - while ( p < source+slen && *p != '\0' && d < dest+szdest ) { - switch (*p) { - case '=': - // If it's valid, convert to hex and insert or remove line-wrap. - // In the case of line-wrap removal, we allow LF as well as CRLF. - if ( p < source + slen - 1 ) { - if ( p[1] == '\n' ) { - p++; - } else if ( p < source + slen - 2 ) { - if ( ascii_isxdigit(p[1]) && ascii_isxdigit(p[2]) ) { - *d++ = hex_digit_to_int(p[1])*16 + hex_digit_to_int(p[2]); - p += 2; - } else if ( p[1] == '\r' && p[2] == '\n' ) { - p += 2; +int QuotedPrintableUnescape(const char* source, int slen, char* dest, int szdest) { + char* d = dest; + const char* p = source; + + while (p < source + slen && *p != '\0' && d < dest + szdest) { + switch (*p) { + case '=': + // If it's valid, convert to hex and insert or remove line-wrap. + // In the case of line-wrap removal, we allow LF as well as CRLF. + if (p < source + slen - 1) { + if (p[1] == '\n') { + p++; + } else if (p < source + slen - 2) { + if (ascii_isxdigit(p[1]) && ascii_isxdigit(p[2])) { + *d++ = hex_digit_to_int(p[1]) * 16 + hex_digit_to_int(p[2]); + p += 2; + } else if (p[1] == '\r' && p[2] == '\n') { + p += 2; + } + } } - } + p++; + break; + default: + *d++ = *p++; + break; } - p++; - break; - default: - *d++ = *p++; - break; } - } - return (d-dest); + return (d - dest); } // ---------------------------------------------------------------------- @@ -726,89 +763,87 @@ int QuotedPrintableUnescape(const char *source, int slen, // This is very similar to QuotedPrintableUnescape except that we convert // '_'s into spaces. (See RFC 2047) // ---------------------------------------------------------------------- -int QEncodingUnescape(const char *source, int slen, - char *dest, int szdest) { - char* d = dest; - const char* p = source; - - while ( p < source+slen && *p != '\0' && d < dest+szdest ) { - switch (*p) { - case '=': - // If it's valid, convert to hex and insert or remove line-wrap. - // In the case of line-wrap removal, the assumption is that this - // is an RFC-compliant message with lines terminated by CRLF. - if (p < source+slen-2) { - if ( ascii_isxdigit(p[1]) && ascii_isxdigit(p[2]) ) { - *d++ = hex_digit_to_int(p[1])*16 + hex_digit_to_int(p[2]); - p += 2; - } else if ( p[1] == '\r' && p[2] == '\n' ) { - p += 2; - } +int QEncodingUnescape(const char* source, int slen, char* dest, int szdest) { + char* d = dest; + const char* p = source; + + while (p < source + slen && *p != '\0' && d < dest + szdest) { + switch (*p) { + case '=': + // If it's valid, convert to hex and insert or remove line-wrap. + // In the case of line-wrap removal, the assumption is that this + // is an RFC-compliant message with lines terminated by CRLF. + if (p < source + slen - 2) { + if (ascii_isxdigit(p[1]) && ascii_isxdigit(p[2])) { + *d++ = hex_digit_to_int(p[1]) * 16 + hex_digit_to_int(p[2]); + p += 2; + } else if (p[1] == '\r' && p[2] == '\n') { + p += 2; + } + } + p++; + break; + case '_': // According to rfc2047, _'s are to be treated as spaces + *d++ = ' '; + p++; + break; + default: + *d++ = *p++; + break; } - p++; - break; - case '_': // According to rfc2047, _'s are to be treated as spaces - *d++ = ' '; - p++; - break; - default: - *d++ = *p++; - break; } - } - return (d-dest); + return (d - dest); } int CalculateBase64EscapedLen(int input_len, bool do_padding) { - // Base64 encodes three bytes of input at a time. If the input is not - // divisible by three, we pad as appropriate. - // - // (from http://www.ietf.org/rfc/rfc3548.txt) - // Special processing is performed if fewer than 24 bits are available - // at the end of the data being encoded. A full encoding quantum is - // always completed at the end of a quantity. When fewer than 24 input - // bits are available in an input group, zero bits are added (on the - // right) to form an integral number of 6-bit groups. Padding at the - // end of the data is performed using the '=' character. Since all base - // 64 input is an integral number of octets, only the following cases - // can arise: - - - // Base64 encodes each three bytes of input into four bytes of output. - int len = (input_len / 3) * 4; - - if (input_len % 3 == 0) { - // (from http://www.ietf.org/rfc/rfc3548.txt) - // (1) the final quantum of encoding input is an integral multiple of 24 - // bits; here, the final unit of encoded output will be an integral - // multiple of 4 characters with no "=" padding, - } else if (input_len % 3 == 1) { - // (from http://www.ietf.org/rfc/rfc3548.txt) - // (2) the final quantum of encoding input is exactly 8 bits; here, the - // final unit of encoded output will be two characters followed by two - // "=" padding characters, or - len += 2; - if (do_padding) { - len += 2; - } - } else { // (input_len % 3 == 2) + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // // (from http://www.ietf.org/rfc/rfc3548.txt) - // (3) the final quantum of encoding input is exactly 16 bits; here, the - // final unit of encoded output will be three characters followed by one - // "=" padding character. - len += 3; - if (do_padding) { - len += 1; + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + int len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from http://www.ietf.org/rfc/rfc3548.txt) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from http://www.ietf.org/rfc/rfc3548.txt) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from http://www.ietf.org/rfc/rfc3548.txt) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } } - } - assert(len >= input_len); // make sure we didn't overflow - return len; + assert(len >= input_len); // make sure we didn't overflow + return len; } // Base64Escape does padding, so this calculation includes padding. int CalculateBase64EscapedLen(int input_len) { - return CalculateBase64EscapedLen(input_len, true); + return CalculateBase64EscapedLen(input_len, true); } // ---------------------------------------------------------------------- @@ -844,232 +879,227 @@ int CalculateBase64EscapedLen(int input_len) { // filename-safe. // ---------------------------------------------------------------------- -int Base64UnescapeInternal(const char *src, int szsrc, - char *dest, int szdest, +int Base64UnescapeInternal(const char* src, int szsrc, char* dest, int szdest, const signed char* unbase64) { - static const char kPad64 = '='; - - int decode = 0; - int destidx = 0; - int state = 0; - unsigned int ch = 0; - unsigned int temp = 0; - - // The GET_INPUT macro gets the next input character, skipping - // over any whitespace, and stopping when we reach the end of the - // string or when we read any non-data character. The arguments are - // an arbitrary identifier (used as a label for goto) and the number - // of data bytes that must remain in the input to avoid aborting the - // loop. -#define GET_INPUT(label, remain) \ - label: \ - --szsrc; \ - ch = *src++; \ - decode = unbase64[ch]; \ - if (decode < 0) { \ - if (ascii_isspace(ch) && szsrc >= remain) \ - goto label; \ - state = 4 - remain; \ - break; \ + static const char kPad64 = '='; + + int decode = 0; + int destidx = 0; + int state = 0; + unsigned int ch = 0; + unsigned int temp = 0; + + // The GET_INPUT macro gets the next input character, skipping + // over any whitespace, and stopping when we reach the end of the + // string or when we read any non-data character. The arguments are + // an arbitrary identifier (used as a label for goto) and the number + // of data bytes that must remain in the input to avoid aborting the + // loop. +#define GET_INPUT(label, remain) \ + label: \ + --szsrc; \ + ch = *src++; \ + decode = unbase64[ch]; \ + if (decode < 0) { \ + if (ascii_isspace(ch) && szsrc >= remain) goto label; \ + state = 4 - remain; \ + break; \ } - // if dest is null, we're just checking to see if it's legal input - // rather than producing output. (I suspect this could just be done - // with a regexp...). We duplicate the loop so this test can be - // outside it instead of in every iteration. - - if (dest) { - // This loop consumes 4 input bytes and produces 3 output bytes - // per iteration. We can't know at the start that there is enough - // data left in the string for a full iteration, so the loop may - // break out in the middle; if so 'state' will be set to the - // number of input bytes read. - - while (szsrc >= 4) { - // We'll start by optimistically assuming that the next four - // bytes of the string (src[0..3]) are four good data bytes - // (that is, no nulls, whitespace, padding chars, or illegal - // chars). We need to test src[0..2] for nulls individually - // before constructing temp to preserve the property that we - // never read past a null in the string (no matter how long - // szsrc claims the string is). - - if (!src[0] || !src[1] || !src[2] || - (temp = ((unbase64[src[0]] << 18) | - (unbase64[src[1]] << 12) | - (unbase64[src[2]] << 6) | - (unbase64[src[3]]))) & 0x80000000) { - // Iff any of those four characters was bad (null, illegal, - // whitespace, padding), then temp's high bit will be set - // (because unbase64[] is -1 for all bad characters). - // - // We'll back up and resort to the slower decoder, which knows - // how to handle those cases. - - GET_INPUT(first, 4); - temp = decode; - GET_INPUT(second, 3); - temp = (temp << 6) | decode; - GET_INPUT(third, 2); - temp = (temp << 6) | decode; - GET_INPUT(fourth, 1); - temp = (temp << 6) | decode; - } else { - // We really did have four good data bytes, so advance four - // characters in the string. - - szsrc -= 4; - src += 4; - decode = -1; - ch = '\0'; - } - - // temp has 24 bits of input, so write that out as three bytes. - - if (destidx+3 > szdest) return -1; - dest[destidx+2] = temp; - temp >>= 8; - dest[destidx+1] = temp; - temp >>= 8; - dest[destidx] = temp; - destidx += 3; - } - } else { - while (szsrc >= 4) { - if (!src[0] || !src[1] || !src[2] || - (temp = ((unbase64[src[0]] << 18) | - (unbase64[src[1]] << 12) | - (unbase64[src[2]] << 6) | - (unbase64[src[3]]))) & 0x80000000) { - GET_INPUT(first_no_dest, 4); - GET_INPUT(second_no_dest, 3); - GET_INPUT(third_no_dest, 2); - GET_INPUT(fourth_no_dest, 1); - } else { - szsrc -= 4; - src += 4; - decode = -1; - ch = '\0'; - } - destidx += 3; + // if dest is null, we're just checking to see if it's legal input + // rather than producing output. (I suspect this could just be done + // with a regexp...). We duplicate the loop so this test can be + // outside it instead of in every iteration. + + if (dest) { + // This loop consumes 4 input bytes and produces 3 output bytes + // per iteration. We can't know at the start that there is enough + // data left in the string for a full iteration, so the loop may + // break out in the middle; if so 'state' will be set to the + // number of input bytes read. + + while (szsrc >= 4) { + // We'll start by optimistically assuming that the next four + // bytes of the string (src[0..3]) are four good data bytes + // (that is, no nulls, whitespace, padding chars, or illegal + // chars). We need to test src[0..2] for nulls individually + // before constructing temp to preserve the property that we + // never read past a null in the string (no matter how long + // szsrc claims the string is). + + if (!src[0] || !src[1] || !src[2] || + (temp = ((unbase64[src[0]] << 18) | (unbase64[src[1]] << 12) | + (unbase64[src[2]] << 6) | (unbase64[src[3]]))) & + 0x80000000) { + // Iff any of those four characters was bad (null, illegal, + // whitespace, padding), then temp's high bit will be set + // (because unbase64[] is -1 for all bad characters). + // + // We'll back up and resort to the slower decoder, which knows + // how to handle those cases. + + GET_INPUT(first, 4); + temp = decode; + GET_INPUT(second, 3); + temp = (temp << 6) | decode; + GET_INPUT(third, 2); + temp = (temp << 6) | decode; + GET_INPUT(fourth, 1); + temp = (temp << 6) | decode; + } else { + // We really did have four good data bytes, so advance four + // characters in the string. + + szsrc -= 4; + src += 4; + decode = -1; + ch = '\0'; + } + + // temp has 24 bits of input, so write that out as three bytes. + + if (destidx + 3 > szdest) return -1; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + destidx += 3; + } + } else { + while (szsrc >= 4) { + if (!src[0] || !src[1] || !src[2] || + (temp = ((unbase64[src[0]] << 18) | (unbase64[src[1]] << 12) | + (unbase64[src[2]] << 6) | (unbase64[src[3]]))) & + 0x80000000) { + GET_INPUT(first_no_dest, 4); + GET_INPUT(second_no_dest, 3); + GET_INPUT(third_no_dest, 2); + GET_INPUT(fourth_no_dest, 1); + } else { + szsrc -= 4; + src += 4; + decode = -1; + ch = '\0'; + } + destidx += 3; + } } - } #undef GET_INPUT - // if the loop terminated because we read a bad character, return - // now. - if (decode < 0 && ch != '\0' && ch != kPad64 && !ascii_isspace(ch)) - return -1; - - if (ch == kPad64) { - // if we stopped by hitting an '=', un-read that character -- we'll - // look at it again when we count to check for the proper number of - // equals signs at the end. - ++szsrc; - --src; - } else { - // This loop consumes 1 input byte per iteration. It's used to - // clean up the 0-3 input bytes remaining when the first, faster - // loop finishes. 'temp' contains the data from 'state' input - // characters read by the first loop. - while (szsrc > 0) { - --szsrc; - ch = *src++; - decode = unbase64[ch]; - if (decode < 0) { - if (ascii_isspace(ch)) { - continue; - } else if (ch == '\0') { - break; - } else if (ch == kPad64) { - // back up one character; we'll read it again when we check - // for the correct number of equals signs at the end. - ++szsrc; - --src; - break; - } else { - return -1; - } - } - - // Each input character gives us six bits of output. - temp = (temp << 6) | decode; - ++state; - if (state == 4) { - // If we've accumulated 24 bits of output, write that out as - // three bytes. - if (dest) { - if (destidx+3 > szdest) return -1; - dest[destidx+2] = temp; - temp >>= 8; - dest[destidx+1] = temp; - temp >>= 8; - dest[destidx] = temp; + // if the loop terminated because we read a bad character, return + // now. + if (decode < 0 && ch != '\0' && ch != kPad64 && !ascii_isspace(ch)) return -1; + + if (ch == kPad64) { + // if we stopped by hitting an '=', un-read that character -- we'll + // look at it again when we count to check for the proper number of + // equals signs at the end. + ++szsrc; + --src; + } else { + // This loop consumes 1 input byte per iteration. It's used to + // clean up the 0-3 input bytes remaining when the first, faster + // loop finishes. 'temp' contains the data from 'state' input + // characters read by the first loop. + while (szsrc > 0) { + --szsrc; + ch = *src++; + decode = unbase64[ch]; + if (decode < 0) { + if (ascii_isspace(ch)) { + continue; + } else if (ch == '\0') { + break; + } else if (ch == kPad64) { + // back up one character; we'll read it again when we check + // for the correct number of equals signs at the end. + ++szsrc; + --src; + break; + } else { + return -1; + } + } + + // Each input character gives us six bits of output. + temp = (temp << 6) | decode; + ++state; + if (state == 4) { + // If we've accumulated 24 bits of output, write that out as + // three bytes. + if (dest) { + if (destidx + 3 > szdest) return -1; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 3; + state = 0; + temp = 0; + } } - destidx += 3; - state = 0; - temp = 0; - } } - } - // Process the leftover data contained in 'temp' at the end of the input. - int expected_equals = 0; - switch (state) { + // Process the leftover data contained in 'temp' at the end of the input. + int expected_equals = 0; + switch (state) { case 0: - // Nothing left over; output is a multiple of 3 bytes. - break; + // Nothing left over; output is a multiple of 3 bytes. + break; case 1: - // Bad input; we have 6 bits left over. - return -1; + // Bad input; we have 6 bits left over. + return -1; case 2: - // Produce one more output byte from the 12 input bits we have left. - if (dest) { - if (destidx+1 > szdest) return -1; - temp >>= 4; - dest[destidx] = temp; - } - ++destidx; - expected_equals = 2; - break; + // Produce one more output byte from the 12 input bits we have left. + if (dest) { + if (destidx + 1 > szdest) return -1; + temp >>= 4; + dest[destidx] = temp; + } + ++destidx; + expected_equals = 2; + break; case 3: - // Produce two more output bytes from the 18 input bits we have left. - if (dest) { - if (destidx+2 > szdest) return -1; - temp >>= 2; - dest[destidx+1] = temp; - temp >>= 8; - dest[destidx] = temp; - } - destidx += 2; - expected_equals = 1; - break; + // Produce two more output bytes from the 18 input bits we have left. + if (dest) { + if (destidx + 2 > szdest) return -1; + temp >>= 2; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 2; + expected_equals = 1; + break; default: - // state should have no other values at this point. - LOG(FATAL) << "This can't happen; base64 decoder state = " << state; - } + // state should have no other values at this point. + LOG(FATAL) << "This can't happen; base64 decoder state = " << state; + } - // The remainder of the string should be all whitespace, mixed with - // exactly 0 equals signs, or exactly 'expected_equals' equals - // signs. (Always accepting 0 equals signs is a google extension - // not covered in the RFC.) + // The remainder of the string should be all whitespace, mixed with + // exactly 0 equals signs, or exactly 'expected_equals' equals + // signs. (Always accepting 0 equals signs is a google extension + // not covered in the RFC.) - int equals = 0; - while (szsrc > 0 && *src) { - if (*src == kPad64) - ++equals; - else if (!ascii_isspace(*src)) - return -1; - --szsrc; - ++src; - } + int equals = 0; + while (szsrc > 0 && *src) { + if (*src == kPad64) + ++equals; + else if (!ascii_isspace(*src)) + return -1; + --szsrc; + ++src; + } - return (equals == 0 || equals == expected_equals) ? destidx : -1; + return (equals == 0 || equals == expected_equals) ? destidx : -1; } // The arrays below were generated by the following code @@ -1102,240 +1132,221 @@ int Base64UnescapeInternal(const char *src, int szsrc, // where the value of "Base64[]" was replaced by one of the base-64 conversion // tables from the functions below. static const signed char kUnBase64[] = { - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, - 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, - 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, - -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, - 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, - 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, - -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, - 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, - 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, - 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1 -}; + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 62 /*+*/, -1, + -1, -1, 63 /*/ */, 52 /*0*/, 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, + 58 /*6*/, 59 /*7*/, 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1, + -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/, 5 /*F*/, 6 /*G*/, + 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/, 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, + 16 /*Q*/, 17 /*R*/, 18 /*S*/, 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, + 25 /*Z*/, -1, -1, -1, -1, -1, -1, 26 /*a*/, 27 /*b*/, + 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/, 34 /*i*/, 35 /*j*/, 36 /*k*/, + 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/, 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, + 46 /*u*/, 47 /*v*/, 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1}; static const signed char kUnWebSafeBase64[] = { - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 62/*-*/, -1, -1, - 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, - 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, - -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, - 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, - 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, - -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, - 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, - 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, - 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1 -}; - -int Base64Unescape(const char *src, int szsrc, char *dest, int szdest) { - return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnBase64); -} - -int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) { - return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64); + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + 62 /*-*/, -1, -1, 52 /*0*/, 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, + 58 /*6*/, 59 /*7*/, 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1, + -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/, 5 /*F*/, 6 /*G*/, + 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/, 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, + 16 /*Q*/, 17 /*R*/, 18 /*S*/, 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, + 25 /*Z*/, -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/, 27 /*b*/, + 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/, 34 /*i*/, 35 /*j*/, 36 /*k*/, + 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/, 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, + 46 /*u*/, 47 /*v*/, 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1}; + +int Base64Unescape(const char* src, int szsrc, char* dest, int szdest) { + return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnBase64); +} + +int WebSafeBase64Unescape(const char* src, int szsrc, char* dest, int szdest) { + return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64); } static bool Base64UnescapeInternal(const char* src, int slen, string* dest, const signed char* unbase64) { - // Determine the size of the output string. Base64 encodes every 3 bytes into - // 4 characters. any leftover chars are added directly for good measure. - // This is documented in the base64 RFC: http://www.ietf.org/rfc/rfc3548.txt - const int dest_len = 3 * (slen / 4) + (slen % 4); - - dest->clear(); - dest->resize(dest_len); - - // We are getting the destination buffer by getting the beginning of the - // string and converting it into a char *. - const int len = Base64UnescapeInternal(src, slen, string_as_array(dest), - dest->size(), unbase64); - if (len < 0) { + // Determine the size of the output string. Base64 encodes every 3 bytes into + // 4 characters. any leftover chars are added directly for good measure. + // This is documented in the base64 RFC: http://www.ietf.org/rfc/rfc3548.txt + const int dest_len = 3 * (slen / 4) + (slen % 4); + dest->clear(); - return false; - } + dest->resize(dest_len); + + // We are getting the destination buffer by getting the beginning of the + // string and converting it into a char *. + const int len = + Base64UnescapeInternal(src, slen, string_as_array(dest), dest->size(), unbase64); + if (len < 0) { + dest->clear(); + return false; + } - // could be shorter if there was padding - DCHECK_LE(len, dest_len); - dest->resize(len); + // could be shorter if there was padding + DCHECK_LE(len, dest_len); + dest->resize(len); - return true; + return true; } -bool Base64Unescape(const char *src, int slen, string* dest) { - return Base64UnescapeInternal(src, slen, dest, kUnBase64); +bool Base64Unescape(const char* src, int slen, string* dest) { + return Base64UnescapeInternal(src, slen, dest, kUnBase64); } -bool WebSafeBase64Unescape(const char *src, int slen, string* dest) { - return Base64UnescapeInternal(src, slen, dest, kUnWebSafeBase64); +bool WebSafeBase64Unescape(const char* src, int slen, string* dest) { + return Base64UnescapeInternal(src, slen, dest, kUnWebSafeBase64); } -int Base64EscapeInternal(const unsigned char *src, int szsrc, - char *dest, int szdest, const char *base64, - bool do_padding) { - static const char kPad64 = '='; +int Base64EscapeInternal(const unsigned char* src, int szsrc, char* dest, int szdest, + const char* base64, bool do_padding) { + static const char kPad64 = '='; - if (szsrc <= 0) return 0; + if (szsrc <= 0) return 0; - char *cur_dest = dest; - const unsigned char *cur_src = src; + char* cur_dest = dest; + const unsigned char* cur_src = src; - // Three bytes of data encodes to four characters of cyphertext. - // So we can pump through three-byte chunks atomically. - while (szsrc > 2) { /* keep going until we have less than 24 bits */ - if ((szdest -= 4) < 0) return 0; - cur_dest[0] = base64[cur_src[0] >> 2]; - cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; - cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)]; - cur_dest[3] = base64[cur_src[2] & 0x3f]; + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + while (szsrc > 2) { /* keep going until we have less than 24 bits */ + if ((szdest -= 4) < 0) return 0; + cur_dest[0] = base64[cur_src[0] >> 2]; + cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; + cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)]; + cur_dest[3] = base64[cur_src[2] & 0x3f]; - cur_dest += 4; - cur_src += 3; - szsrc -= 3; - } + cur_dest += 4; + cur_src += 3; + szsrc -= 3; + } - /* now deal with the tail (<=2 bytes) */ - switch (szsrc) { + /* now deal with the tail (<=2 bytes) */ + switch (szsrc) { case 0: - // Nothing left; nothing more to do. - break; + // Nothing left; nothing more to do. + break; case 1: - // One byte left: this encodes to two characters, and (optionally) - // two pad characters to round out the four-character cypherblock. - if ((szdest -= 2) < 0) return 0; - cur_dest[0] = base64[cur_src[0] >> 2]; - cur_dest[1] = base64[(cur_src[0] & 0x03) << 4]; - cur_dest += 2; - if (do_padding) { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. if ((szdest -= 2) < 0) return 0; - cur_dest[0] = kPad64; - cur_dest[1] = kPad64; + cur_dest[0] = base64[cur_src[0] >> 2]; + cur_dest[1] = base64[(cur_src[0] & 0x03) << 4]; cur_dest += 2; - } - break; + if (do_padding) { + if ((szdest -= 2) < 0) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + } + break; case 2: - // Two bytes left: this encodes to three characters, and (optionally) - // one pad character to round out the four-character cypherblock. - if ((szdest -= 3) < 0) return 0; - cur_dest[0] = base64[cur_src[0] >> 2]; - cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; - cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2]; - cur_dest += 3; - if (do_padding) { - if ((szdest -= 1) < 0) return 0; - cur_dest[0] = kPad64; - cur_dest += 1; - } - break; + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if ((szdest -= 3) < 0) return 0; + cur_dest[0] = base64[cur_src[0] >> 2]; + cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; + cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2]; + cur_dest += 3; + if (do_padding) { + if ((szdest -= 1) < 0) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + } + break; default: - // Should not be reached: blocks of 3 bytes are handled - // in the while loop before this switch statement. - LOG_ASSERT(false) << "Logic problem? szsrc = " << szsrc; - break; - } - return (cur_dest - dest); + // Should not be reached: blocks of 3 bytes are handled + // in the while loop before this switch statement. + LOG_ASSERT(false) << "Logic problem? szsrc = " << szsrc; + break; + } + return (cur_dest - dest); } static const char kBase64Chars[] = -"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static const char kWebSafeBase64Chars[] = -"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -int Base64Escape(const unsigned char *src, int szsrc, char *dest, int szdest) { - return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true); +int Base64Escape(const unsigned char* src, int szsrc, char* dest, int szdest) { + return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true); } -int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest, - int szdest, bool do_padding) { - return Base64EscapeInternal(src, szsrc, dest, szdest, - kWebSafeBase64Chars, do_padding); +int WebSafeBase64Escape(const unsigned char* src, int szsrc, char* dest, int szdest, + bool do_padding) { + return Base64EscapeInternal(src, szsrc, dest, szdest, kWebSafeBase64Chars, do_padding); } -void Base64EscapeInternal(const unsigned char* src, int szsrc, - string* dest, bool do_padding, +void Base64EscapeInternal(const unsigned char* src, int szsrc, string* dest, bool do_padding, const char* base64_chars) { - const int calc_escaped_size = - CalculateBase64EscapedLen(szsrc, do_padding); - dest->clear(); - dest->resize(calc_escaped_size, '\0'); - const int escaped_len = Base64EscapeInternal(src, szsrc, - string_as_array(dest), - dest->size(), - base64_chars, - do_padding); - DCHECK_EQ(calc_escaped_size, escaped_len); + const int calc_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding); + dest->clear(); + dest->resize(calc_escaped_size, '\0'); + const int escaped_len = Base64EscapeInternal(src, szsrc, string_as_array(dest), dest->size(), + base64_chars, do_padding); + DCHECK_EQ(calc_escaped_size, escaped_len); } -void Base64Escape(const unsigned char *src, int szsrc, - string* dest, bool do_padding) { - Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars); +void Base64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding) { + Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars); } -void WebSafeBase64Escape(const unsigned char *src, int szsrc, - string *dest, bool do_padding) { - Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars); +void WebSafeBase64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding) { + Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars); } void Base64Escape(const string& src, string* dest) { - Base64Escape(reinterpret_cast(src.data()), - src.size(), dest, true); + Base64Escape(reinterpret_cast(src.data()), src.size(), dest, true); } void WebSafeBase64Escape(const string& src, string* dest) { - WebSafeBase64Escape(reinterpret_cast(src.data()), - src.size(), dest, false); + WebSafeBase64Escape(reinterpret_cast(src.data()), src.size(), dest, + false); } void WebSafeBase64EscapeWithPadding(const string& src, string* dest) { - WebSafeBase64Escape(reinterpret_cast(src.data()), - src.size(), dest, true); + WebSafeBase64Escape(reinterpret_cast(src.data()), src.size(), dest, true); } // Returns true iff c is in the Base 32 alphabet. bool ValidBase32Byte(char c) { - return (c >= 'A' && c <= 'Z') || (c >= '2' && c <= '7') || c == '='; + return (c >= 'A' && c <= 'Z') || (c >= '2' && c <= '7') || c == '='; } // Mapping from number of Base32 escaped characters (0 through 8) to number of @@ -1348,200 +1359,187 @@ bool ValidBase32Byte(char c) { // the length of the buffer to hold unescaped data. // // See http://tools.ietf.org/html/rfc4648#section-6 for details. -static const int kBase32NumUnescapedBytes[] = { - 0, 5, 1, 5, 2, 3, 5, 4, 5 -}; +static const int kBase32NumUnescapedBytes[] = {0, 5, 1, 5, 2, 3, 5, 4, 5}; int Base32Unescape(const char* src, int slen, char* dest, int szdest) { - int destidx = 0; - char escaped_bytes[8]; - unsigned char unescaped_bytes[5]; - while (slen > 0) { - // Collect the next 8 escaped bytes and convert to upper case. If there - // are less than 8 bytes left, pad with '=', but keep track of the number - // of non-padded bytes for later. - int non_padded_len = 8; - for (int i = 0; i < 8; ++i) { - escaped_bytes[i] = (i < slen) ? ascii_toupper(src[i]) : '='; - if (!ValidBase32Byte(escaped_bytes[i])) { - return -1; - } - // Stop counting escaped bytes at first '='. - if (escaped_bytes[i] == '=' && non_padded_len == 8) { - non_padded_len = i; - } - } + int destidx = 0; + char escaped_bytes[8]; + unsigned char unescaped_bytes[5]; + while (slen > 0) { + // Collect the next 8 escaped bytes and convert to upper case. If there + // are less than 8 bytes left, pad with '=', but keep track of the number + // of non-padded bytes for later. + int non_padded_len = 8; + for (int i = 0; i < 8; ++i) { + escaped_bytes[i] = (i < slen) ? ascii_toupper(src[i]) : '='; + if (!ValidBase32Byte(escaped_bytes[i])) { + return -1; + } + // Stop counting escaped bytes at first '='. + if (escaped_bytes[i] == '=' && non_padded_len == 8) { + non_padded_len = i; + } + } - // Convert the 8 escaped bytes to 5 unescaped bytes and copy to dest. - EightBase32DigitsToFiveBytes(escaped_bytes, unescaped_bytes); - const int num_unescaped = kBase32NumUnescapedBytes[non_padded_len]; - for (int i = 0; i < num_unescaped; ++i) { - if (destidx == szdest) { - // No more room in dest, so terminate early. - return -1; - } - dest[destidx] = unescaped_bytes[i]; - ++destidx; + // Convert the 8 escaped bytes to 5 unescaped bytes and copy to dest. + EightBase32DigitsToFiveBytes(escaped_bytes, unescaped_bytes); + const int num_unescaped = kBase32NumUnescapedBytes[non_padded_len]; + for (int i = 0; i < num_unescaped; ++i) { + if (destidx == szdest) { + // No more room in dest, so terminate early. + return -1; + } + dest[destidx] = unescaped_bytes[i]; + ++destidx; + } + src += 8; + slen -= 8; } - src += 8; - slen -= 8; - } - return destidx; + return destidx; } bool Base32Unescape(const char* src, int slen, string* dest) { - // Determine the size of the output string. - const int dest_len = 5 * (slen / 8) + kBase32NumUnescapedBytes[slen % 8]; - - dest->clear(); - dest->resize(dest_len); + // Determine the size of the output string. + const int dest_len = 5 * (slen / 8) + kBase32NumUnescapedBytes[slen % 8]; - // We are getting the destination buffer by getting the beginning of the - // string and converting it into a char *. - const int len = Base32Unescape(src, slen, - string_as_array(dest), dest->size()); - if (len < 0) { dest->clear(); - return false; - } - - // Could be shorter if there was padding. - DCHECK_LE(len, dest_len); - dest->resize(len); - - return true; -} - -void GeneralFiveBytesToEightBase32Digits(const unsigned char *in_bytes, - char *out, const char *alphabet) { - // It's easier to just hard code this. - // The conversion isbased on the following picture of the division of a - // 40-bit block into 8 5-byte words: - // - // 5 3 2 5 1 4 4 1 5 2 3 5 - // |:::::::|:::::::|:::::::|:::::::|::::::: - // +----+----+----+----+----+----+----+---- - // - out[0] = alphabet[in_bytes[0] >> 3]; - out[1] = alphabet[(in_bytes[0] & 0x07) << 2 | in_bytes[1] >> 6]; - out[2] = alphabet[(in_bytes[1] & 0x3E) >> 1]; - out[3] = alphabet[(in_bytes[1] & 0x01) << 4 | in_bytes[2] >> 4]; - out[4] = alphabet[(in_bytes[2] & 0x0F) << 1 | in_bytes[3] >> 7]; - out[5] = alphabet[(in_bytes[3] & 0x7C) >> 2]; - out[6] = alphabet[(in_bytes[3] & 0x03) << 3 | in_bytes[4] >> 5]; - out[7] = alphabet[(in_bytes[4] & 0x1F)]; -} - -static int GeneralBase32Escape(const unsigned char *src, size_t szsrc, - char *dest, size_t szdest, - const char *alphabet) { - static const char kPad32 = '='; - - if (szsrc == 0) return 0; - - char *cur_dest = dest; - const unsigned char *cur_src = src; - - // Five bytes of data encodes to eight characters of cyphertext. - // So we can pump through three-byte chunks atomically. - while (szsrc > 4) { // keep going until we have less than 40 bits - if ( szdest < 8) return 0; - szdest -= 8; - - GeneralFiveBytesToEightBase32Digits(cur_src, cur_dest, alphabet); - - cur_dest += 8; - cur_src += 5; - szsrc -= 5; - } - - // Now deal with the tail (<=4 bytes). - if (szsrc > 0) { - if ( szdest < 8) return 0; - szdest -= 8; - unsigned char last_chunk[5]; - memcpy(last_chunk, cur_src, szsrc); - - for (size_t i = szsrc; i < 5; ++i) { - last_chunk[i] = '\0'; + dest->resize(dest_len); + + // We are getting the destination buffer by getting the beginning of the + // string and converting it into a char *. + const int len = Base32Unescape(src, slen, string_as_array(dest), dest->size()); + if (len < 0) { + dest->clear(); + return false; } - GeneralFiveBytesToEightBase32Digits(last_chunk, cur_dest, alphabet); - int filled = (szsrc * 8) / 5 + 1; - cur_dest += filled; + // Could be shorter if there was padding. + DCHECK_LE(len, dest_len); + dest->resize(len); + + return true; +} + +void GeneralFiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out, + const char* alphabet) { + // It's easier to just hard code this. + // The conversion isbased on the following picture of the division of a + // 40-bit block into 8 5-byte words: + // + // 5 3 2 5 1 4 4 1 5 2 3 5 + // |:::::::|:::::::|:::::::|:::::::|::::::: + // +----+----+----+----+----+----+----+---- + // + out[0] = alphabet[in_bytes[0] >> 3]; + out[1] = alphabet[(in_bytes[0] & 0x07) << 2 | in_bytes[1] >> 6]; + out[2] = alphabet[(in_bytes[1] & 0x3E) >> 1]; + out[3] = alphabet[(in_bytes[1] & 0x01) << 4 | in_bytes[2] >> 4]; + out[4] = alphabet[(in_bytes[2] & 0x0F) << 1 | in_bytes[3] >> 7]; + out[5] = alphabet[(in_bytes[3] & 0x7C) >> 2]; + out[6] = alphabet[(in_bytes[3] & 0x03) << 3 | in_bytes[4] >> 5]; + out[7] = alphabet[(in_bytes[4] & 0x1F)]; +} + +static int GeneralBase32Escape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest, + const char* alphabet) { + static const char kPad32 = '='; + + if (szsrc == 0) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + // Five bytes of data encodes to eight characters of cyphertext. + // So we can pump through three-byte chunks atomically. + while (szsrc > 4) { // keep going until we have less than 40 bits + if (szdest < 8) return 0; + szdest -= 8; + + GeneralFiveBytesToEightBase32Digits(cur_src, cur_dest, alphabet); + + cur_dest += 8; + cur_src += 5; + szsrc -= 5; + } + + // Now deal with the tail (<=4 bytes). + if (szsrc > 0) { + if (szdest < 8) return 0; + szdest -= 8; + unsigned char last_chunk[5]; + memcpy(last_chunk, cur_src, szsrc); + + for (size_t i = szsrc; i < 5; ++i) { + last_chunk[i] = '\0'; + } + + GeneralFiveBytesToEightBase32Digits(last_chunk, cur_dest, alphabet); + int filled = (szsrc * 8) / 5 + 1; + cur_dest += filled; - // Add on the padding. - for (int i = 0; i < (8 - filled); ++i) { - *(cur_dest++) = kPad32; + // Add on the padding. + for (int i = 0; i < (8 - filled); ++i) { + *(cur_dest++) = kPad32; + } } - } - return cur_dest - dest; + return cur_dest - dest; } -static bool GeneralBase32Escape(const string& src, string* dest, - const char *alphabet) { - const int max_escaped_size = CalculateBase32EscapedLen(src.length()); - dest->clear(); - dest->resize(max_escaped_size + 1, '\0'); - const int escaped_len = - GeneralBase32Escape(reinterpret_cast(src.c_str()), - src.length(), &*dest->begin(), dest->size(), - alphabet); +static bool GeneralBase32Escape(const string& src, string* dest, const char* alphabet) { + const int max_escaped_size = CalculateBase32EscapedLen(src.length()); + dest->clear(); + dest->resize(max_escaped_size + 1, '\0'); + const int escaped_len = + GeneralBase32Escape(reinterpret_cast(src.c_str()), src.length(), + &*dest->begin(), dest->size(), alphabet); - DCHECK_LE(max_escaped_size, escaped_len); + DCHECK_LE(max_escaped_size, escaped_len); - if (escaped_len < 0) { - dest->clear(); - return false; - } + if (escaped_len < 0) { + dest->clear(); + return false; + } - dest->resize(escaped_len); - return true; + dest->resize(escaped_len); + return true; } -static const char Base32Alphabet[] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', '2', '3', '4', '5', '6', '7' - }; +static const char Base32Alphabet[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', '2', '3', '4', '5', '6', '7'}; -int Base32Escape(const unsigned char* src, size_t szsrc, - char* dest, size_t szdest) { - return GeneralBase32Escape(src, szsrc, dest, szdest, Base32Alphabet); +int Base32Escape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest) { + return GeneralBase32Escape(src, szsrc, dest, szdest, Base32Alphabet); } bool Base32Escape(const string& src, string* dest) { - return GeneralBase32Escape(src, dest, Base32Alphabet); + return GeneralBase32Escape(src, dest, Base32Alphabet); } -void FiveBytesToEightBase32Digits(const unsigned char *in_bytes, char *out) { - GeneralFiveBytesToEightBase32Digits(in_bytes, out, Base32Alphabet); +void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out) { + GeneralFiveBytesToEightBase32Digits(in_bytes, out, Base32Alphabet); } static const char Base32HexAlphabet[] = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', - 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', - 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - }; + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', +}; -int Base32HexEscape(const unsigned char* src, size_t szsrc, - char* dest, size_t szdest) { - return GeneralBase32Escape(src, szsrc, dest, szdest, Base32HexAlphabet); +int Base32HexEscape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest) { + return GeneralBase32Escape(src, szsrc, dest, szdest, Base32HexAlphabet); } bool Base32HexEscape(const string& src, string* dest) { - return GeneralBase32Escape(src, dest, Base32HexAlphabet); + return GeneralBase32Escape(src, dest, Base32HexAlphabet); } int CalculateBase32EscapedLen(size_t input_len) { - DCHECK_LE(input_len, numeric_limits::max() / 8); - size_t intermediate_result = 8 * input_len + 4; - size_t len = intermediate_result / 5; - len = (len + 7) & ~7; - return len; + DCHECK_LE(input_len, numeric_limits::max() / 8); + size_t intermediate_result = 8 * input_len + 4; + size_t len = intermediate_result / 5; + len = (len + 7) & ~7; + return len; } // ---------------------------------------------------------------------- @@ -1556,66 +1554,59 @@ int CalculateBase32EscapedLen(size_t input_len) { // for details on base32. // ---------------------------------------------------------------------- - -void EightBase32DigitsToTenHexDigits(const char *in, char *out) { - unsigned char bytes[5]; - EightBase32DigitsToFiveBytes(in, bytes); - b2a_hex(bytes, out, 5); -} - -void EightBase32DigitsToFiveBytes(const char *in, unsigned char *bytes_out) { - static const char Base32InverseAlphabet[] = { - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 26/*2*/, 27/*3*/, 28/*4*/, 29/*5*/, 30/*6*/, 31/*7*/, - 99, 99, 99, 99, 99, 00/*=*/, 99, 99, - 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, - 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, - 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - }; - - // Convert to raw bytes. It's easier to just hard code this. - bytes_out[0] = Base32InverseAlphabet[in[0]] << 3 | - Base32InverseAlphabet[in[1]] >> 2; - - bytes_out[1] = Base32InverseAlphabet[in[1]] << 6 | - Base32InverseAlphabet[in[2]] << 1 | - Base32InverseAlphabet[in[3]] >> 4; - - bytes_out[2] = Base32InverseAlphabet[in[3]] << 4 | - Base32InverseAlphabet[in[4]] >> 1; - - bytes_out[3] = Base32InverseAlphabet[in[4]] << 7 | - Base32InverseAlphabet[in[5]] << 2 | - Base32InverseAlphabet[in[6]] >> 3; - - bytes_out[4] = Base32InverseAlphabet[in[6]] << 5 | - Base32InverseAlphabet[in[7]]; +void EightBase32DigitsToTenHexDigits(const char* in, char* out) { + unsigned char bytes[5]; + EightBase32DigitsToFiveBytes(in, bytes); + b2a_hex(bytes, out, 5); +} + +void EightBase32DigitsToFiveBytes(const char* in, unsigned char* bytes_out) { + static const char Base32InverseAlphabet[] = { + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 26 /*2*/, 27 /*3*/, 28 /*4*/, 29 /*5*/, 30 /*6*/, 31 /*7*/, + 99, 99, 99, 99, 99, 00 /*=*/, 99, 99, + 99, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/, 5 /*F*/, 6 /*G*/, + 7 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/, 12 /*M*/, 13 /*N*/, 14 /*O*/, + 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/, 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, + 23 /*X*/, 24 /*Y*/, 25 /*Z*/, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99}; + + // Convert to raw bytes. It's easier to just hard code this. + bytes_out[0] = Base32InverseAlphabet[in[0]] << 3 | Base32InverseAlphabet[in[1]] >> 2; + + bytes_out[1] = Base32InverseAlphabet[in[1]] << 6 | Base32InverseAlphabet[in[2]] << 1 | + Base32InverseAlphabet[in[3]] >> 4; + + bytes_out[2] = Base32InverseAlphabet[in[3]] << 4 | Base32InverseAlphabet[in[4]] >> 1; + + bytes_out[3] = Base32InverseAlphabet[in[4]] << 7 | Base32InverseAlphabet[in[5]] << 2 | + Base32InverseAlphabet[in[6]] >> 3; + + bytes_out[4] = Base32InverseAlphabet[in[6]] << 5 | Base32InverseAlphabet[in[7]]; } // ---------------------------------------------------------------------- @@ -1628,82 +1619,77 @@ void EightBase32DigitsToFiveBytes(const char *in, unsigned char *bytes_out) { // See RFC3548 at http://www.ietf.org/rfc/rfc3548.txt // for details on base32. // ---------------------------------------------------------------------- -void TenHexDigitsToEightBase32Digits(const char *in, char *out) { - unsigned char bytes[5]; +void TenHexDigitsToEightBase32Digits(const char* in, char* out) { + unsigned char bytes[5]; - // Convert hex to raw bytes. - a2b_hex(in, bytes, 5); - FiveBytesToEightBase32Digits(bytes, out); + // Convert hex to raw bytes. + a2b_hex(in, bytes, 5); + FiveBytesToEightBase32Digits(bytes, out); } // ---------------------------------------------------------------------- // EscapeFileName / UnescapeFileName // ---------------------------------------------------------------------- static const Charmap escape_file_name_exceptions( - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" // letters - "0123456789" // digits - "-_."); + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" // letters + "0123456789" // digits + "-_."); void EscapeFileName(const StringPiece& src, string* dst) { - // Reserve at least src.size() chars - dst->reserve(dst->size() + src.size()); + // Reserve at least src.size() chars + dst->reserve(dst->size() + src.size()); - for (char c : src) { - // We do not use "isalpha" because we want the behavior to be - // independent of the current locale settings. - if (escape_file_name_exceptions.contains(c)) { - dst->push_back(c); + for (char c : src) { + // We do not use "isalpha" because we want the behavior to be + // independent of the current locale settings. + if (escape_file_name_exceptions.contains(c)) { + dst->push_back(c); - } else if (c == '/') { - dst->push_back('~'); + } else if (c == '/') { + dst->push_back('~'); - } else { - char tmp[2]; - b2a_hex(reinterpret_cast(&c), tmp, 1); - dst->push_back('%'); - dst->append(tmp, 2); + } else { + char tmp[2]; + b2a_hex(reinterpret_cast(&c), tmp, 1); + dst->push_back('%'); + dst->append(tmp, 2); + } } - } } void UnescapeFileName(const StringPiece& src_piece, string* dst) { - const char* src = src_piece.data(); - const int len = src_piece.size(); - for (int i = 0; i < len; ++i) { - const char c = src[i]; - if (c == '~') { - dst->push_back('/'); - - } else if ((c == '%') && (i + 2 < len)) { - unsigned char tmp[1]; - a2b_hex(src + i + 1, &tmp[0], 1); - dst->push_back(tmp[0]); - i += 2; + const char* src = src_piece.data(); + const int len = src_piece.size(); + for (int i = 0; i < len; ++i) { + const char c = src[i]; + if (c == '~') { + dst->push_back('/'); + + } else if ((c == '%') && (i + 2 < len)) { + unsigned char tmp[1]; + a2b_hex(src + i + 1, &tmp[0], 1); + dst->push_back(tmp[0]); + i += 2; - } else { - dst->push_back(c); + } else { + dst->push_back(c); + } } - } } static char hex_value[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' - 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15, 0, + 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static char hex_char[] = "0123456789abcdef"; @@ -1712,29 +1698,27 @@ static char hex_char[] = "0123456789abcdef"; // individual characters at a time. template static void a2b_hex_t(const char* a, T b, int num) { - for (int i = 0; i < num; i++) { - b[i] = (hex_value[a[i * 2] & 0xFF] << 4) - + (hex_value[a[i * 2 + 1] & 0xFF]); - } + for (int i = 0; i < num; i++) { + b[i] = (hex_value[a[i * 2] & 0xFF] << 4) + (hex_value[a[i * 2 + 1] & 0xFF]); + } } string a2b_bin(const string& a, bool byte_order_msb) { - string result; - const char *data = a.c_str(); - int num_bytes = (a.size()+7)/8; - for (int byte_offset = 0; byte_offset < num_bytes; ++byte_offset) { - unsigned char c = 0; - for (int bit_offset = 0; bit_offset < 8; ++bit_offset) { - if (*data == '\0') - break; - if (*data++ != '0') { - int bits_to_shift = (byte_order_msb) ? 7-bit_offset : bit_offset; - c |= (1 << bits_to_shift); - } + string result; + const char* data = a.c_str(); + int num_bytes = (a.size() + 7) / 8; + for (int byte_offset = 0; byte_offset < num_bytes; ++byte_offset) { + unsigned char c = 0; + for (int bit_offset = 0; bit_offset < 8; ++bit_offset) { + if (*data == '\0') break; + if (*data++ != '0') { + int bits_to_shift = (byte_order_msb) ? 7 - bit_offset : bit_offset; + c |= (1 << bits_to_shift); + } + } + result.append(1, c); } - result.append(1, c); - } - return result; + return result; } // This is a templated function so that T can be either a char* @@ -1742,132 +1726,131 @@ string a2b_bin(const string& a, bool byte_order_msb) { // individual characters at a time. template static void b2a_hex_t(const unsigned char* b, T a, int num) { - for (int i = 0; i < num; i++) { - a[i * 2 + 0] = hex_char[b[i] >> 4]; - a[i * 2 + 1] = hex_char[b[i] & 0xf]; - } + for (int i = 0; i < num; i++) { + a[i * 2 + 0] = hex_char[b[i] >> 4]; + a[i * 2 + 1] = hex_char[b[i] & 0xf]; + } } string b2a_bin(const string& b, bool byte_order_msb) { - string result; - for (char c : b) { - for (int bit_offset = 0; bit_offset < 8; ++bit_offset) { - int x = (byte_order_msb) ? 7-bit_offset : bit_offset; - result.append(1, (c & (1 << x)) ? '1' : '0'); + string result; + for (char c : b) { + for (int bit_offset = 0; bit_offset < 8; ++bit_offset) { + int x = (byte_order_msb) ? 7 - bit_offset : bit_offset; + result.append(1, (c & (1 << x)) ? '1' : '0'); + } } - } - return result; + return result; } void b2a_hex(const unsigned char* b, char* a, int num) { - b2a_hex_t(b, a, num); + b2a_hex_t(b, a, num); } void a2b_hex(const char* a, unsigned char* b, int num) { - a2b_hex_t(a, b, num); + a2b_hex_t(a, b, num); } void a2b_hex(const char* a, char* b, int num) { - a2b_hex_t(a, b, num); + a2b_hex_t(a, b, num); } string b2a_hex(const char* b, int len) { - string result; - result.resize(len << 1); - b2a_hex_t(reinterpret_cast(b), result, len); - return result; + string result; + result.resize(len << 1); + b2a_hex_t(reinterpret_cast(b), result, len); + return result; } string b2a_hex(const StringPiece& b) { - return b2a_hex(b.data(), b.size()); + return b2a_hex(b.data(), b.size()); } string a2b_hex(const string& a) { - string result; - a2b_hex(a.c_str(), &result, a.size()/2); + string result; + a2b_hex(a.c_str(), &result, a.size() / 2); - return result; + return result; } void b2a_hex(const unsigned char* from, string* to, int num) { - to->resize(num << 1); - b2a_hex_t(from, *to, num); + to->resize(num << 1); + b2a_hex_t(from, *to, num); } void a2b_hex(const char* from, string* to, int num) { - to->resize(num); - a2b_hex_t(from, *to, num); + to->resize(num); + a2b_hex_t(from, *to, num); } const char* kDontNeedShellEscapeChars = -"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.=/:,@"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.=/:,@"; string ShellEscape(StringPiece src) { - if (!src.empty() && // empty string needs quotes - src.find_first_not_of(kDontNeedShellEscapeChars) == StringPiece::npos) { - // only contains chars that don't need quotes; it's fine - return src.ToString(); - } else if (src.find('\'') == StringPiece::npos) { - // no single quotes; just wrap it in single quotes - return StrCat("'", src, "'"); - } else { - // needs double quote escaping - string result = "\""; - for (char c : src) { - switch (c) { - case '\\': - case '$': - case '"': - case '`': - result.push_back('\\'); - }; - result.push_back(c); + if (!src.empty() && // empty string needs quotes + src.find_first_not_of(kDontNeedShellEscapeChars) == StringPiece::npos) { + // only contains chars that don't need quotes; it's fine + return src.ToString(); + } else if (src.find('\'') == StringPiece::npos) { + // no single quotes; just wrap it in single quotes + return StrCat("'", src, "'"); + } else { + // needs double quote escaping + string result = "\""; + for (char c : src) { + switch (c) { + case '\\': + case '$': + case '"': + case '`': + result.push_back('\\'); + }; + result.push_back(c); + } + result.push_back('"'); + return result; } - result.push_back('"'); - return result; - } -} - -static const char kHexTable[513]= - "000102030405060708090a0b0c0d0e0f" - "101112131415161718191a1b1c1d1e1f" - "202122232425262728292a2b2c2d2e2f" - "303132333435363738393a3b3c3d3e3f" - "404142434445464748494a4b4c4d4e4f" - "505152535455565758595a5b5c5d5e5f" - "606162636465666768696a6b6c6d6e6f" - "707172737475767778797a7b7c7d7e7f" - "808182838485868788898a8b8c8d8e8f" - "909192939495969798999a9b9c9d9e9f" - "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" - "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" - "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" - "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" - "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" - "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; +} + +static const char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; //------------------------------------------------------------------------ // ByteStringToAscii // Reads at most bytes_to_read from binary_string and prints it to // ascii_string in downcased hex. //------------------------------------------------------------------------ -void ByteStringToAscii(string const &binary_string, int bytes_to_read, - string * ascii_string ) { - if (binary_string.size() < bytes_to_read) { - bytes_to_read = binary_string.size(); - } +void ByteStringToAscii(string const& binary_string, int bytes_to_read, string* ascii_string) { + if (binary_string.size() < bytes_to_read) { + bytes_to_read = binary_string.size(); + } - CHECK_GE(bytes_to_read, 0); - ascii_string->resize(bytes_to_read*2); + CHECK_GE(bytes_to_read, 0); + ascii_string->resize(bytes_to_read * 2); - string::const_iterator in = binary_string.begin(); - string::iterator out = ascii_string->begin(); + string::const_iterator in = binary_string.begin(); + string::iterator out = ascii_string->begin(); - for (int i = 0; i < bytes_to_read; i++) { - *out++ = kHexTable[(*in)*2]; - *out++ = kHexTable[(*in)*2 + 1]; - ++in; - } + for (int i = 0; i < bytes_to_read; i++) { + *out++ = kHexTable[(*in) * 2]; + *out++ = kHexTable[(*in) * 2 + 1]; + ++in; + } } //------------------------------------------------------------------------ @@ -1878,38 +1861,38 @@ void ByteStringToAscii(string const &binary_string, int bytes_to_read, // Returns false and may modify output if it is // unable to parse the hex string. //------------------------------------------------------------------------ -bool ByteStringFromAscii(string const & hex_string, string * binary_string) { - binary_string->clear(); +bool ByteStringFromAscii(string const& hex_string, string* binary_string) { + binary_string->clear(); - if ((hex_string.size()%2) != 0) { - return false; - } + if ((hex_string.size() % 2) != 0) { + return false; + } - int value = 0; - for (int i = 0; i < hex_string.size(); i++) { - char c = hex_string[i]; + int value = 0; + for (int i = 0; i < hex_string.size(); i++) { + char c = hex_string[i]; - if (!ascii_isxdigit(c)) { - return false; - } + if (!ascii_isxdigit(c)) { + return false; + } - if (ascii_isdigit(c)) { - value += c - '0'; - } else if (ascii_islower(c)) { - value += 10 + c - 'a'; - } else { - value += 10 + c - 'A'; - } + if (ascii_isdigit(c)) { + value += c - '0'; + } else if (ascii_islower(c)) { + value += 10 + c - 'a'; + } else { + value += 10 + c - 'A'; + } - if (i & 1) { - binary_string->push_back(value); - value = 0; - } else { - value <<= 4; + if (i & 1) { + binary_string->push_back(value); + value = 0; + } else { + value <<= 4; + } } - } - return true; + return true; } // ---------------------------------------------------------------------- @@ -1945,79 +1928,72 @@ bool ByteStringFromAscii(string const & hex_string, string * binary_string) { // (1) determines the presence of LF (first one is ok) // (2) if yes, removes any CR, else convert every CR to LF -void CleanStringLineEndings(const string& src, string* dst, - bool auto_end_last_line) { - if (dst->empty()) { - dst->append(src); - CleanStringLineEndings(dst, auto_end_last_line); - } else { - string tmp = src; - CleanStringLineEndings(&tmp, auto_end_last_line); - dst->append(tmp); - } +void CleanStringLineEndings(const string& src, string* dst, bool auto_end_last_line) { + if (dst->empty()) { + dst->append(src); + CleanStringLineEndings(dst, auto_end_last_line); + } else { + string tmp = src; + CleanStringLineEndings(&tmp, auto_end_last_line); + dst->append(tmp); + } } void CleanStringLineEndings(string* str, bool auto_end_last_line) { - int output_pos = 0; - bool r_seen = false; - int len = str->size(); - - char* p = string_as_array(str); - - for (int input_pos = 0; input_pos < len;) { - if (!r_seen && input_pos + 8 < len) { - uint64 v = UNALIGNED_LOAD64(p + input_pos); - // Loop over groups of 8 bytes at a time until we come across - // a word that has a byte whose value is less than or equal to - // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ). - // - // We use a has_less macro that quickly tests a whole 64-bit - // word to see if any of the bytes has a value < N. - // - // For more details, see: - // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord -#define has_less(x, n) (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128) - if (!has_less(v, '\r' + 1)) { + int output_pos = 0; + bool r_seen = false; + int len = str->size(); + + char* p = string_as_array(str); + + for (int input_pos = 0; input_pos < len;) { + if (!r_seen && input_pos + 8 < len) { + uint64 v = UNALIGNED_LOAD64(p + input_pos); + // Loop over groups of 8 bytes at a time until we come across + // a word that has a byte whose value is less than or equal to + // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ). + // + // We use a has_less macro that quickly tests a whole 64-bit + // word to see if any of the bytes has a value < N. + // + // For more details, see: + // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord +#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128) + if (!has_less(v, '\r' + 1)) { #undef has_less - // No byte in this word has a value that could be a \r or a \n - if (output_pos != input_pos) - UNALIGNED_STORE64(p + output_pos, v); - input_pos += 8; - output_pos += 8; - continue; - } + // No byte in this word has a value that could be a \r or a \n + if (output_pos != input_pos) UNALIGNED_STORE64(p + output_pos, v); + input_pos += 8; + output_pos += 8; + continue; + } + } + string::const_reference in = p[input_pos]; + if (in == '\r') { + if (r_seen) p[output_pos++] = '\n'; + r_seen = true; + } else if (in == '\n') { + if (input_pos != output_pos) + p[output_pos++] = '\n'; + else + output_pos++; + r_seen = false; + } else { + if (r_seen) p[output_pos++] = '\n'; + r_seen = false; + if (input_pos != output_pos) + p[output_pos++] = in; + else + output_pos++; + } + input_pos++; } - string::const_reference in = p[input_pos]; - if (in == '\r') { - if (r_seen) - p[output_pos++] = '\n'; - r_seen = true; - } else if (in == '\n') { - if (input_pos != output_pos) - p[output_pos++] = '\n'; - else - output_pos++; - r_seen = false; - } else { - if (r_seen) - p[output_pos++] = '\n'; - r_seen = false; - if (input_pos != output_pos) - p[output_pos++] = in; - else - output_pos++; + if (r_seen || (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) { + str->resize(output_pos + 1); + str->operator[](output_pos) = '\n'; + } else if (output_pos < len) { + str->resize(output_pos); } - input_pos++; - } - if (r_seen || (auto_end_last_line - && output_pos > 0 - && p[output_pos - 1] != '\n')) { - str->resize(output_pos + 1); - str->operator[](output_pos) = '\n'; - } else if (output_pos < len) { - str->resize(output_pos); - } } - -} // namespace strings +} // namespace strings diff --git a/be/src/gutil/strings/escaping.h b/be/src/gutil/strings/escaping.h index 00720eea3e3149..2ab0db7da9f773 100644 --- a/be/src/gutil/strings/escaping.h +++ b/be/src/gutil/strings/escaping.h @@ -22,6 +22,7 @@ #define STRINGS_ESCAPING_H_ #include + #include using std::string; #include @@ -83,8 +84,7 @@ int EscapeStrForCSV(const char* src, char* dest, int dest_len); // *** DEPRECATED: Use CUnescape() in new code *** // ---------------------------------------------------------------------- int UnescapeCEscapeSequences(const char* source, char* dest); -int UnescapeCEscapeSequences(const char* source, char* dest, - vector* errors); +int UnescapeCEscapeSequences(const char* source, char* dest, vector* errors); // ---------------------------------------------------------------------- // UnescapeCEscapeString() @@ -103,8 +103,7 @@ int UnescapeCEscapeSequences(const char* source, char* dest, // *** DEPRECATED: Use CUnescape() in new code *** // ---------------------------------------------------------------------- int UnescapeCEscapeString(const string& src, string* dest); -int UnescapeCEscapeString(const string& src, string* dest, - vector* errors); +int UnescapeCEscapeString(const string& src, string* dest, vector* errors); string UnescapeCEscapeString(const string& src); // ---------------------------------------------------------------------- @@ -137,14 +136,13 @@ string UnescapeCEscapeString(const string& src); // Errors: Sets the description of the first encountered error in // 'error'. To disable error reporting, set 'error' to NULL. // ---------------------------------------------------------------------- -bool CUnescape(const StringPiece& source, char* dest, int* dest_len, - string* error); +bool CUnescape(const StringPiece& source, char* dest, int* dest_len, string* error); bool CUnescape(const StringPiece& source, string* dest, string* error); // A version with no error reporting. inline bool CUnescape(const StringPiece& source, string* dest) { - return CUnescape(source, dest, NULL); + return CUnescape(source, dest, NULL); } // ---------------------------------------------------------------------- @@ -159,19 +157,14 @@ inline bool CUnescape(const StringPiece& source, string* dest) { // // ---------------------------------------------------------------------- -bool CUnescapeForNullTerminatedString(const StringPiece& source, - char* dest, - int* dest_len, +bool CUnescapeForNullTerminatedString(const StringPiece& source, char* dest, int* dest_len, string* error); -bool CUnescapeForNullTerminatedString(const StringPiece& source, - string* dest, - string* error); +bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest, string* error); // A version with no error reporting. -inline bool CUnescapeForNullTerminatedString(const StringPiece& source, - string* dest) { - return CUnescapeForNullTerminatedString(source, dest, NULL); +inline bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest) { + return CUnescapeForNullTerminatedString(source, dest, NULL); } // ---------------------------------------------------------------------- @@ -191,10 +184,8 @@ inline bool CUnescapeForNullTerminatedString(const StringPiece& source, // ---------------------------------------------------------------------- int CEscapeString(const char* src, int src_len, char* dest, int dest_len); int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len); -int Utf8SafeCEscapeString(const char* src, int src_len, char* dest, - int dest_len); -int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, - int dest_len); +int Utf8SafeCEscapeString(const char* src, int src_len, char* dest, int dest_len); +int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, int dest_len); // ---------------------------------------------------------------------- // CEscape() @@ -230,25 +221,19 @@ string Utf8SafeCHexEscape(const StringPiece& src); // On the other hand, for all strings "src", the following is true: // BackslashUnescape(BackslashEscape(src, ":\\"), ":\\") == src // ---------------------------------------------------------------------- -void BackslashEscape(const StringPiece& src, - const strings::CharSet& to_escape, - string* dest); -void BackslashUnescape(const StringPiece& src, - const strings::CharSet& to_unescape, - string* dest); - -inline string BackslashEscape(const StringPiece& src, - const strings::CharSet& to_escape) { - string s; - BackslashEscape(src, to_escape, &s); - return s; +void BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape, string* dest); +void BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape, string* dest); + +inline string BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape) { + string s; + BackslashEscape(src, to_escape, &s); + return s; } -inline string BackslashUnescape(const StringPiece& src, - const strings::CharSet& to_unescape) { - string s; - BackslashUnescape(src, to_unescape, &s); - return s; +inline string BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape) { + string s; + BackslashUnescape(src, to_unescape, &s); + return s; } // ---------------------------------------------------------------------- @@ -312,13 +297,13 @@ int QEncodingUnescape(const char* src, int slen, char* dest, int szdest); int Base64Unescape(const char* src, int slen, char* dest, int szdest); bool Base64Unescape(const char* src, int slen, string* dest); inline bool Base64Unescape(const string& src, string* dest) { - return Base64Unescape(src.data(), src.size(), dest); + return Base64Unescape(src.data(), src.size(), dest); } int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest); bool WebSafeBase64Unescape(const char* src, int slen, string* dest); inline bool WebSafeBase64Unescape(const string& src, string* dest) { - return WebSafeBase64Unescape(src.data(), src.size(), dest); + return WebSafeBase64Unescape(src.data(), src.size(), dest); } // Return the length to use for the output buffer given to the base64 escape @@ -342,8 +327,8 @@ int CalculateBase64EscapedLen(int input_len); // which when set to false will prevent padding with "=". // ---------------------------------------------------------------------- int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest); -int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest, - int szdest, bool do_padding); +int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest, int szdest, + bool do_padding); // Encode src into dest with padding. void Base64Escape(const string& src, string* dest); // Encode src into dest web-safely without padding. @@ -351,10 +336,8 @@ void WebSafeBase64Escape(const string& src, string* dest); // Encode src into dest web-safely with padding. void WebSafeBase64EscapeWithPadding(const string& src, string* dest); -void Base64Escape(const unsigned char* src, int szsrc, - string* dest, bool do_padding); -void WebSafeBase64Escape(const unsigned char* src, int szsrc, - string* dest, bool do_padding); +void Base64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding); +void WebSafeBase64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding); // ---------------------------------------------------------------------- // Base32Unescape() @@ -365,7 +348,7 @@ void WebSafeBase64Escape(const unsigned char* src, int szsrc, int Base32Unescape(const char* src, int slen, char* dest, int szdest); bool Base32Unescape(const char* src, int slen, string* dest); inline bool Base32Unescape(const string& src, string* dest) { - return Base32Unescape(src.data(), src.size(), dest); + return Base32Unescape(src.data(), src.size(), dest); } // ---------------------------------------------------------------------- @@ -378,8 +361,7 @@ inline bool Base32Unescape(const string& src, string* dest) { // // Note that this is "Base 32 Encoding" from RFC 4648 section 6. // ---------------------------------------------------------------------- -int Base32Escape(const unsigned char* src, size_t szsrc, - char* dest, size_t szdest); +int Base32Escape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest); bool Base32Escape(const string& src, string* dest); // ---------------------------------------------------------------------- @@ -393,8 +375,7 @@ bool Base32Escape(const string& src, string* dest); // Note that this is "Base 32 Encoding with Extended Hex Alphabet" // from RFC 4648 section 7. // ---------------------------------------------------------------------- -int Base32HexEscape(const unsigned char* src, size_t szsrc, - char* dest, size_t szdest); +int Base32HexEscape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest); bool Base32HexEscape(const string& src, string* dest); // Return the length to use for the output buffer given to the base32 escape @@ -466,14 +447,14 @@ void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out); void EscapeFileName(const StringPiece& src, string* dst); void UnescapeFileName(const StringPiece& src, string* dst); inline string EscapeFileName(const StringPiece& src) { - string r; - EscapeFileName(src, &r); - return r; + string r; + EscapeFileName(src, &r); + return r; } inline string UnescapeFileName(const StringPiece& src) { - string r; - UnescapeFileName(src, &r); - return r; + string r; + UnescapeFileName(src, &r); + return r; } // ---------------------------------------------------------------------- @@ -481,24 +462,24 @@ inline string UnescapeFileName(const StringPiece& src) { // ---------------------------------------------------------------------- inline int int_to_hex_digit(int i) { - DCHECK((i >= 0) && (i <= 15)); - return ((i < 10) ? (i + '0') : ((i - 10) + 'A')); + DCHECK((i >= 0) && (i <= 15)); + return ((i < 10) ? (i + '0') : ((i - 10) + 'A')); } inline int int_to_lower_hex_digit(int i) { - DCHECK((i >= 0) && (i <= 15)); - return (i < 10) ? (i + '0') : ((i - 10) + 'a'); + DCHECK((i >= 0) && (i <= 15)); + return (i < 10) ? (i + '0') : ((i - 10) + 'a'); } inline int hex_digit_to_int(char c) { - /* Assume ASCII. */ - DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); - DCHECK(ascii_isxdigit(c)); - int x = static_cast(c); - if (x > '9') { - x += 9; - } - return x & 0xf; + /* Assume ASCII. */ + DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); + DCHECK(ascii_isxdigit(c)); + int x = static_cast(c); + if (x > '9') { + x += 9; + } + return x & 0xf; } // ---------------------------------------------------------------------- @@ -568,24 +549,22 @@ string ShellEscape(StringPiece src); // returns the resulting string. template string ShellEscapeCommandLine(InputIterator begin, const InputIterator& end) { - string result; - for (; begin != end; ++begin) { - if (!result.empty()) result.append(" "); - result.append(ShellEscape(*begin)); - } - return result; + string result; + for (; begin != end; ++begin) { + if (!result.empty()) result.append(" "); + result.append(ShellEscape(*begin)); + } + return result; } // Reads at most bytes_to_read from binary_string and writes it to // ascii_string in lower case hex. -void ByteStringToAscii(const string& binary_string, int bytes_to_read, - string* ascii_string); - -inline string ByteStringToAscii(const string& binary_string, - int bytes_to_read) { - string result; - ByteStringToAscii(binary_string, bytes_to_read, &result); - return result; +void ByteStringToAscii(const string& binary_string, int bytes_to_read, string* ascii_string); + +inline string ByteStringToAscii(const string& binary_string, int bytes_to_read) { + string result; + ByteStringToAscii(binary_string, bytes_to_read, &result); + return result; } // Converts the hex from ascii_string into binary data and @@ -620,13 +599,12 @@ bool ByteStringFromAscii(const string& ascii_string, string* binary_string); // This does not do the right thing for CRCRLF files created by // broken programs that do another Unix->DOS conversion on files // that are already in CRLF format. -void CleanStringLineEndings(const string& src, string* dst, - bool auto_end_last_line); +void CleanStringLineEndings(const string& src, string* dst, bool auto_end_last_line); // Same as above, but transforms the argument in place. void CleanStringLineEndings(string* str, bool auto_end_last_line); -} // namespace strings +} // namespace strings // The following functions used to be defined in strutil.h in the top-level // namespace, so we alias them here. Do not add new functions here. @@ -673,4 +651,4 @@ using strings::ByteStringFromAscii; using strings::ByteStringToAscii; using strings::CleanStringLineEndings; -#endif // STRINGS_ESCAPING_H_ +#endif // STRINGS_ESCAPING_H_ diff --git a/be/src/gutil/strings/fastmem.h b/be/src/gutil/strings/fastmem.h index ec84071f0ed292..db5696482d97b2 100644 --- a/be/src/gutil/strings/fastmem.h +++ b/be/src/gutil/strings/fastmem.h @@ -35,96 +35,128 @@ namespace strings { // somewhere in their last 8 bytes. Further optimizations can be added later // if it makes sense to do so. inline bool memeq(const void* a_v, const void* b_v, size_t n) { - const uint8_t *a = reinterpret_cast(a_v); - const uint8_t *b = reinterpret_cast(b_v); + const uint8_t* a = reinterpret_cast(a_v); + const uint8_t* b = reinterpret_cast(b_v); - size_t n_rounded_down = n & ~static_cast(7); - if (PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 - return memcmp(a, b, n) == 0; - } - // n >= 8 - uint64 u = UNALIGNED_LOAD64(a) ^ UNALIGNED_LOAD64(b); - uint64 v = UNALIGNED_LOAD64(a + n - 8) ^ UNALIGNED_LOAD64(b + n - 8); - if ((u | v) != 0) { // The first or last 8 bytes differ. - return false; - } - a += 8; - b += 8; - n = n_rounded_down - 8; - if (n > 128) { - // As of 2012, memcmp on x86-64 uses a big unrolled loop with SSE2 - // instructions, and while we could try to do something faster, it - // doesn't seem worth pursuing. - return memcmp(a, b, n) == 0; - } - for (; n >= 16; n -= 16) { - uint64 x = UNALIGNED_LOAD64(a) ^ UNALIGNED_LOAD64(b); - uint64 y = UNALIGNED_LOAD64(a + 8) ^ UNALIGNED_LOAD64(b + 8); - if ((x | y) != 0) { - return false; + size_t n_rounded_down = n & ~static_cast(7); + if (PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 + return memcmp(a, b, n) == 0; } - a += 16; - b += 16; - } - // n must be 0 or 8 now because it was a multiple of 8 at the top of the loop. - return n == 0 || UNALIGNED_LOAD64(a) == UNALIGNED_LOAD64(b); + // n >= 8 + uint64 u = UNALIGNED_LOAD64(a) ^ UNALIGNED_LOAD64(b); + uint64 v = UNALIGNED_LOAD64(a + n - 8) ^ UNALIGNED_LOAD64(b + n - 8); + if ((u | v) != 0) { // The first or last 8 bytes differ. + return false; + } + a += 8; + b += 8; + n = n_rounded_down - 8; + if (n > 128) { + // As of 2012, memcmp on x86-64 uses a big unrolled loop with SSE2 + // instructions, and while we could try to do something faster, it + // doesn't seem worth pursuing. + return memcmp(a, b, n) == 0; + } + for (; n >= 16; n -= 16) { + uint64 x = UNALIGNED_LOAD64(a) ^ UNALIGNED_LOAD64(b); + uint64 y = UNALIGNED_LOAD64(a + 8) ^ UNALIGNED_LOAD64(b + 8); + if ((x | y) != 0) { + return false; + } + a += 16; + b += 16; + } + // n must be 0 or 8 now because it was a multiple of 8 at the top of the loop. + return n == 0 || UNALIGNED_LOAD64(a) == UNALIGNED_LOAD64(b); } -inline int fastmemcmp_inlined(const void *a_void, const void *b_void, size_t n) { - const uint8_t *a = reinterpret_cast(a_void); - const uint8_t *b = reinterpret_cast(b_void); +inline int fastmemcmp_inlined(const void* a_void, const void* b_void, size_t n) { + const uint8_t* a = reinterpret_cast(a_void); + const uint8_t* b = reinterpret_cast(b_void); - if (n >= 64) { - return memcmp(a, b, n); - } - const void* a_limit = a + n; - const size_t sizeof_uint64 = sizeof(uint64); // NOLINT(runtime/sizeof) - while (a + sizeof_uint64 <= a_limit && - UNALIGNED_LOAD64(a) == UNALIGNED_LOAD64(b)) { - a += sizeof_uint64; - b += sizeof_uint64; - } - const size_t sizeof_uint32 = sizeof(uint32); // NOLINT(runtime/sizeof) - if (a + sizeof_uint32 <= a_limit && - UNALIGNED_LOAD32(a) == UNALIGNED_LOAD32(b)) { - a += sizeof_uint32; - b += sizeof_uint32; - } - while (a < a_limit) { - int d = static_cast(*a++) - static_cast(*b++); - if (d) return d; - } - return 0; + if (n >= 64) { + return memcmp(a, b, n); + } + const void* a_limit = a + n; + const size_t sizeof_uint64 = sizeof(uint64); // NOLINT(runtime/sizeof) + while (a + sizeof_uint64 <= a_limit && UNALIGNED_LOAD64(a) == UNALIGNED_LOAD64(b)) { + a += sizeof_uint64; + b += sizeof_uint64; + } + const size_t sizeof_uint32 = sizeof(uint32); // NOLINT(runtime/sizeof) + if (a + sizeof_uint32 <= a_limit && UNALIGNED_LOAD32(a) == UNALIGNED_LOAD32(b)) { + a += sizeof_uint32; + b += sizeof_uint32; + } + while (a < a_limit) { + int d = static_cast(*a++) - static_cast(*b++); + if (d) return d; + } + return 0; } // The standard memcpy operation is slow for variable small sizes. // This implementation inlines the optimal realization for sizes 1 to 16. // To avoid code bloat don't use it in case of not performance-critical spots, // nor when you don't expect very frequent values of size <= 16. -inline void memcpy_inlined(void *dst, const void *src, size_t size) { - // Compiler inlines code with minimal amount of data movement when third - // parameter of memcpy is a constant. - switch (size) { - case 1: memcpy(dst, src, 1); break; - case 2: memcpy(dst, src, 2); break; - case 3: memcpy(dst, src, 3); break; - case 4: memcpy(dst, src, 4); break; - case 5: memcpy(dst, src, 5); break; - case 6: memcpy(dst, src, 6); break; - case 7: memcpy(dst, src, 7); break; - case 8: memcpy(dst, src, 8); break; - case 9: memcpy(dst, src, 9); break; - case 10: memcpy(dst, src, 10); break; - case 11: memcpy(dst, src, 11); break; - case 12: memcpy(dst, src, 12); break; - case 13: memcpy(dst, src, 13); break; - case 14: memcpy(dst, src, 14); break; - case 15: memcpy(dst, src, 15); break; - case 16: memcpy(dst, src, 16); break; - default: memcpy(dst, src, size); break; - } +inline void memcpy_inlined(void* dst, const void* src, size_t size) { + // Compiler inlines code with minimal amount of data movement when third + // parameter of memcpy is a constant. + switch (size) { + case 1: + memcpy(dst, src, 1); + break; + case 2: + memcpy(dst, src, 2); + break; + case 3: + memcpy(dst, src, 3); + break; + case 4: + memcpy(dst, src, 4); + break; + case 5: + memcpy(dst, src, 5); + break; + case 6: + memcpy(dst, src, 6); + break; + case 7: + memcpy(dst, src, 7); + break; + case 8: + memcpy(dst, src, 8); + break; + case 9: + memcpy(dst, src, 9); + break; + case 10: + memcpy(dst, src, 10); + break; + case 11: + memcpy(dst, src, 11); + break; + case 12: + memcpy(dst, src, 12); + break; + case 13: + memcpy(dst, src, 13); + break; + case 14: + memcpy(dst, src, 14); + break; + case 15: + memcpy(dst, src, 15); + break; + case 16: + memcpy(dst, src, 16); + break; + default: + memcpy(dst, src, size); + break; + } } -} // namespace strings +} // namespace strings -#endif // STRINGS_FASTMEM_H_ +#endif // STRINGS_FASTMEM_H_ diff --git a/be/src/gutil/strings/join.cc b/be/src/gutil/strings/join.cc index a9b02f1a552766..a432aac89dda35 100644 --- a/be/src/gutil/strings/join.cc +++ b/be/src/gutil/strings/join.cc @@ -3,6 +3,7 @@ #include "gutil/strings/join.h" #include + #include "gutil/gscoped_ptr.h" #include "gutil/strings/ascii_ctype.h" #include "gutil/strings/escaping.h" @@ -18,20 +19,14 @@ // If result_length_p is not NULL, it will contain the length of the // result string (not including the trailing '\0'). // ---------------------------------------------------------------------- -char* JoinUsing(const vector& components, - const char* delim, - int* result_length_p) { - const int num_components = components.size(); - const int delim_length = strlen(delim); - int num_chars = (num_components > 1) - ? delim_length * (num_components - 1) - : 0; - for (int i = 0; i < num_components; ++i) - num_chars += strlen(components[i]); - - auto res_buffer = new char[num_chars + 1]; - return JoinUsingToBuffer(components, delim, num_chars+1, - res_buffer, result_length_p); +char* JoinUsing(const vector& components, const char* delim, int* result_length_p) { + const int num_components = components.size(); + const int delim_length = strlen(delim); + int num_chars = (num_components > 1) ? delim_length * (num_components - 1) : 0; + for (int i = 0; i < num_components; ++i) num_chars += strlen(components[i]); + + auto res_buffer = new char[num_chars + 1]; + return JoinUsingToBuffer(components, delim, num_chars + 1, res_buffer, result_length_p); } // ---------------------------------------------------------------------- @@ -44,41 +39,37 @@ char* JoinUsing(const vector& components, // If result_length_p is not NULL, it will contain the length of the // result string (not including the trailing '\0'). // ---------------------------------------------------------------------- -char* JoinUsingToBuffer(const vector& components, - const char* delim, - int result_buffer_size, - char* result_buffer, - int* result_length_p) { - CHECK(result_buffer != nullptr); - const int num_components = components.size(); - const int max_str_len = result_buffer_size - 1; - char* curr_dest = result_buffer; - int num_chars = 0; - for (int i = 0; (i < num_components) && (num_chars < max_str_len); ++i) { - const char* curr_src = components[i]; - while ((*curr_src != '\0') && (num_chars < max_str_len)) { - *curr_dest = *curr_src; - ++num_chars; - ++curr_dest; - ++curr_src; - } - if (i != (num_components-1)) { // not the last component ==> add separator - curr_src = delim; - while ((*curr_src != '\0') && (num_chars < max_str_len)) { - *curr_dest = *curr_src; - ++num_chars; - ++curr_dest; - ++curr_src; - } +char* JoinUsingToBuffer(const vector& components, const char* delim, + int result_buffer_size, char* result_buffer, int* result_length_p) { + CHECK(result_buffer != nullptr); + const int num_components = components.size(); + const int max_str_len = result_buffer_size - 1; + char* curr_dest = result_buffer; + int num_chars = 0; + for (int i = 0; (i < num_components) && (num_chars < max_str_len); ++i) { + const char* curr_src = components[i]; + while ((*curr_src != '\0') && (num_chars < max_str_len)) { + *curr_dest = *curr_src; + ++num_chars; + ++curr_dest; + ++curr_src; + } + if (i != (num_components - 1)) { // not the last component ==> add separator + curr_src = delim; + while ((*curr_src != '\0') && (num_chars < max_str_len)) { + *curr_dest = *curr_src; + ++num_chars; + ++curr_dest; + ++curr_src; + } + } } - } - if (result_buffer_size > 0) - *curr_dest = '\0'; // add null termination - if (result_length_p != nullptr) // set string length value - *result_length_p = num_chars; + if (result_buffer_size > 0) *curr_dest = '\0'; // add null termination + if (result_length_p != nullptr) // set string length value + *result_length_p = num_chars; - return result_buffer; + return result_buffer; } // ---------------------------------------------------------------------- @@ -90,28 +81,21 @@ char* JoinUsingToBuffer(const vector& components, // // ---------------------------------------------------------------------- -void JoinStringsInArray(string const* const* components, - int num_components, - const char* delim, - string * result) { - CHECK(result != nullptr); - result->clear(); - for (int i = 0; i < num_components; i++) { - if (i>0) { - (*result) += delim; +void JoinStringsInArray(string const* const* components, int num_components, const char* delim, + string* result) { + CHECK(result != nullptr); + result->clear(); + for (int i = 0; i < num_components; i++) { + if (i > 0) { + (*result) += delim; + } + (*result) += *(components[i]); } - (*result) += *(components[i]); - } } -void JoinStringsInArray(string const *components, - int num_components, - const char *delim, - string *result) { - JoinStringsIterator(components, - components + num_components, - delim, - result); +void JoinStringsInArray(string const* components, int num_components, const char* delim, + string* result) { + JoinStringsIterator(components, components + num_components, delim, result); } // ---------------------------------------------------------------------- @@ -124,22 +108,17 @@ void JoinStringsInArray(string const *components, // as the last argument). // ---------------------------------------------------------------------- -void JoinMapKeysAndValues(const map& components, - const StringPiece& intra_delim, - const StringPiece& inter_delim, - string* result) { - JoinKeysAndValuesIterator(components.begin(), components.end(), - intra_delim, inter_delim, - result); +void JoinMapKeysAndValues(const map& components, const StringPiece& intra_delim, + const StringPiece& inter_delim, string* result) { + JoinKeysAndValuesIterator(components.begin(), components.end(), intra_delim, inter_delim, + result); } -void JoinVectorKeysAndValues(const vector< pair>& components, - const StringPiece& intra_delim, - const StringPiece& inter_delim, +void JoinVectorKeysAndValues(const vector>& components, + const StringPiece& intra_delim, const StringPiece& inter_delim, string* result) { - JoinKeysAndValuesIterator(components.begin(), components.end(), - intra_delim, inter_delim, - result); + JoinKeysAndValuesIterator(components.begin(), components.end(), intra_delim, inter_delim, + result); } // ---------------------------------------------------------------------- @@ -158,53 +137,50 @@ void JoinVectorKeysAndValues(const vector< pair>& components, // [Google], [x], [Buchheit, Paul], [string with " quote in it], [ space ] // ---> [Google,x,"Buchheit, Paul","string with "" quote in it"," space "] // ---------------------------------------------------------------------- -void JoinCSVLineWithDelimiter(const vector& cols, char delimiter, - string* output) { - CHECK(output); - CHECK(output->empty()); - vector quoted_cols; - - const string delimiter_str(1, delimiter); - const string escape_chars = delimiter_str + "\""; - - // If the string contains the delimiter or " anywhere, or begins or ends with - // whitespace (ie ascii_isspace() returns true), escape all double-quotes and - // bracket the string in double quotes. string.rbegin() evaluates to the last - // character of the string. - for (const auto& col : cols) { - if ((col.find_first_of(escape_chars) != string::npos) || - (!col.empty() && (ascii_isspace(*col.begin()) || - ascii_isspace(*col.rbegin())))) { - // Double the original size, for escaping, plus two bytes for - // the bracketing double-quotes, and one byte for the closing \0. - int size = 2 * col.size() + 3; - gscoped_array buf(new char[size]); - - // Leave space at beginning and end for bracketing double-quotes. - int escaped_size = strings::EscapeStrForCSV(col.c_str(), - buf.get() + 1, size - 2); - CHECK_GE(escaped_size, 0) << "Buffer somehow wasn't large enough."; - CHECK_GE(size, escaped_size + 3) - << "Buffer should have one space at the beginning for a " - << "double-quote, one at the end for a double-quote, and " - << "one at the end for a closing '\0'"; - *buf.get() = '"'; - *((buf.get() + 1) + escaped_size) = '"'; - *((buf.get() + 1) + escaped_size + 1) = '\0'; - quoted_cols.push_back(string(buf.get(), buf.get() + escaped_size + 2)); - } else { - quoted_cols.push_back(col); +void JoinCSVLineWithDelimiter(const vector& cols, char delimiter, string* output) { + CHECK(output); + CHECK(output->empty()); + vector quoted_cols; + + const string delimiter_str(1, delimiter); + const string escape_chars = delimiter_str + "\""; + + // If the string contains the delimiter or " anywhere, or begins or ends with + // whitespace (ie ascii_isspace() returns true), escape all double-quotes and + // bracket the string in double quotes. string.rbegin() evaluates to the last + // character of the string. + for (const auto& col : cols) { + if ((col.find_first_of(escape_chars) != string::npos) || + (!col.empty() && (ascii_isspace(*col.begin()) || ascii_isspace(*col.rbegin())))) { + // Double the original size, for escaping, plus two bytes for + // the bracketing double-quotes, and one byte for the closing \0. + int size = 2 * col.size() + 3; + gscoped_array buf(new char[size]); + + // Leave space at beginning and end for bracketing double-quotes. + int escaped_size = strings::EscapeStrForCSV(col.c_str(), buf.get() + 1, size - 2); + CHECK_GE(escaped_size, 0) << "Buffer somehow wasn't large enough."; + CHECK_GE(size, escaped_size + 3) + << "Buffer should have one space at the beginning for a " + << "double-quote, one at the end for a double-quote, and " + << "one at the end for a closing '\0'"; + *buf.get() = '"'; + *((buf.get() + 1) + escaped_size) = '"'; + *((buf.get() + 1) + escaped_size + 1) = '\0'; + quoted_cols.push_back(string(buf.get(), buf.get() + escaped_size + 2)); + } else { + quoted_cols.push_back(col); + } } - } - JoinStrings(quoted_cols, delimiter_str, output); + JoinStrings(quoted_cols, delimiter_str, output); } void JoinCSVLine(const vector& cols, string* output) { - JoinCSVLineWithDelimiter(cols, ',', output); + JoinCSVLineWithDelimiter(cols, ',', output); } string JoinCSVLine(const vector& cols) { - string output; - JoinCSVLine(cols, &output); - return output; + string output; + JoinCSVLine(cols, &output); + return output; } diff --git a/be/src/gutil/strings/join.h b/be/src/gutil/strings/join.h index 097cf7aa42b957..f8a22f583be115 100644 --- a/be/src/gutil/strings/join.h +++ b/be/src/gutil/strings/join.h @@ -9,12 +9,13 @@ #include #include + #include using __gnu_cxx::hash; -using __gnu_cxx::hash_map; // Not used in this file. +using __gnu_cxx::hash_map; // Not used in this file. #include using __gnu_cxx::hash; -using __gnu_cxx::hash_set; // Not used in this file. +using __gnu_cxx::hash_set; // Not used in this file. #include using std::back_insert_iterator; using std::iterator_traits; @@ -32,13 +33,13 @@ using std::pair; #include using std::vector; +#include "gutil/hash/hash.h" #include "gutil/integral_types.h" #include "gutil/macros.h" -#include "gutil/template_util.h" #include "gutil/strings/numbers.h" -#include "gutil/strings/strcat.h" // For backward compatibility. +#include "gutil/strings/strcat.h" // For backward compatibility. #include "gutil/strings/stringpiece.h" -#include "gutil/hash/hash.h" +#include "gutil/template_util.h" // ---------------------------------------------------------------------- // JoinUsing() @@ -53,9 +54,7 @@ using std::vector; // If result_length_p is not NULL, it will contain the length of the // result string (not including the trailing '\0'). // ---------------------------------------------------------------------- -char* JoinUsing(const vector& components, - const char* delim, - int* result_length_p); +char* JoinUsing(const vector& components, const char* delim, int* result_length_p); // ---------------------------------------------------------------------- // JoinUsingToBuffer() @@ -67,11 +66,8 @@ char* JoinUsing(const vector& components, // If result_length_p is not NULL, it will contain the length of the // result string (not including the trailing '\0'). // ---------------------------------------------------------------------- -char* JoinUsingToBuffer(const vector& components, - const char* delim, - int result_buffer_size, - char* result_buffer, - int* result_length_p); +char* JoinUsingToBuffer(const vector& components, const char* delim, + int result_buffer_size, char* result_buffer, int* result_length_p); // ---------------------------------------------------------------------- // JoinStrings(), JoinStringsIterator(), JoinStringsInArray() @@ -98,174 +94,139 @@ char* JoinUsingToBuffer(const vector& components, // the latter case the target string is cleared and overwritten. // ---------------------------------------------------------------------- template -void JoinStrings(const CONTAINER& components, - const StringPiece& delim, - string* result); +void JoinStrings(const CONTAINER& components, const StringPiece& delim, string* result); template -string JoinStrings(const CONTAINER& components, - const StringPiece& delim); +string JoinStrings(const CONTAINER& components, const StringPiece& delim); template -void JoinStringsIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& delim, +void JoinStringsIterator(const ITERATOR& start, const ITERATOR& end, const StringPiece& delim, string* result); template -string JoinStringsIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& delim); +string JoinStringsIterator(const ITERATOR& start, const ITERATOR& end, const StringPiece& delim); // Join the keys of a map using the specified delimiter. -template -void JoinKeysIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& delim, - string *result) { - result->clear(); - for (ITERATOR iter = start; iter != end; ++iter) { - if (iter == start) { - StrAppend(result, iter->first); - } else { - StrAppend(result, delim, iter->first); +template +void JoinKeysIterator(const ITERATOR& start, const ITERATOR& end, const StringPiece& delim, + string* result) { + result->clear(); + for (ITERATOR iter = start; iter != end; ++iter) { + if (iter == start) { + StrAppend(result, iter->first); + } else { + StrAppend(result, delim, iter->first); + } } - } } template -string JoinKeysIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& delim) { - string result; - JoinKeysIterator(start, end, delim, &result); - return result; +string JoinKeysIterator(const ITERATOR& start, const ITERATOR& end, const StringPiece& delim) { + string result; + JoinKeysIterator(start, end, delim, &result); + return result; } // Join the keys and values of a map using the specified delimiters. -template -void JoinKeysAndValuesIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& intra_delim, - const StringPiece& inter_delim, - string *result) { - result->clear(); - for (ITERATOR iter = start; iter != end; ++iter) { - if (iter == start) { - StrAppend(result, iter->first, intra_delim, iter->second); - } else { - StrAppend(result, inter_delim, iter->first, intra_delim, iter->second); +template +void JoinKeysAndValuesIterator(const ITERATOR& start, const ITERATOR& end, + const StringPiece& intra_delim, const StringPiece& inter_delim, + string* result) { + result->clear(); + for (ITERATOR iter = start; iter != end; ++iter) { + if (iter == start) { + StrAppend(result, iter->first, intra_delim, iter->second); + } else { + StrAppend(result, inter_delim, iter->first, intra_delim, iter->second); + } } - } } template -string JoinKeysAndValuesIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& intra_delim, - const StringPiece& inter_delim) { - string result; - JoinKeysAndValuesIterator(start, end, intra_delim, inter_delim, &result); - return result; +string JoinKeysAndValuesIterator(const ITERATOR& start, const ITERATOR& end, + const StringPiece& intra_delim, const StringPiece& inter_delim) { + string result; + JoinKeysAndValuesIterator(start, end, intra_delim, inter_delim, &result); + return result; } -void JoinStringsInArray(string const* const* components, - int num_components, - const char* delim, +void JoinStringsInArray(string const* const* components, int num_components, const char* delim, string* result); -void JoinStringsInArray(string const* components, - int num_components, - const char* delim, +void JoinStringsInArray(string const* components, int num_components, const char* delim, string* result); -string JoinStringsInArray(string const* const* components, - int num_components, - const char* delim); -string JoinStringsInArray(string const* components, - int num_components, - const char* delim); +string JoinStringsInArray(string const* const* components, int num_components, const char* delim); +string JoinStringsInArray(string const* components, int num_components, const char* delim); // ---------------------------------------------------------------------- // Definitions of above JoinStrings* methods // ---------------------------------------------------------------------- template -inline void JoinStrings(const CONTAINER& components, - const StringPiece& delim, - string* result) { - JoinStringsIterator(components.begin(), components.end(), delim, result); +inline void JoinStrings(const CONTAINER& components, const StringPiece& delim, string* result) { + JoinStringsIterator(components.begin(), components.end(), delim, result); } template -inline string JoinStrings(const CONTAINER& components, - const StringPiece& delim) { - string result; - JoinStrings(components, delim, &result); - return result; +inline string JoinStrings(const CONTAINER& components, const StringPiece& delim) { + string result; + JoinStrings(components, delim, &result); + return result; } // Join the strings produced by calling 'functor' on each element of // 'components'. -template -string JoinMapped(const CONTAINER& components, - const FUNC& functor, - const StringPiece& delim) { - string result; - for (typename CONTAINER::const_iterator iter = components.begin(); - iter != components.end(); - iter++) { - if (iter != components.begin()) { - result.append(delim.data(), delim.size()); +template +string JoinMapped(const CONTAINER& components, const FUNC& functor, const StringPiece& delim) { + string result; + for (typename CONTAINER::const_iterator iter = components.begin(); iter != components.end(); + iter++) { + if (iter != components.begin()) { + result.append(delim.data(), delim.size()); + } + result.append(functor(*iter)); } - result.append(functor(*iter)); - } - return result; + return result; } template -void JoinStringsIterator(const ITERATOR& start, - const ITERATOR& end, - const StringPiece& delim, +void JoinStringsIterator(const ITERATOR& start, const ITERATOR& end, const StringPiece& delim, string* result) { - result->clear(); - - // Precompute resulting length so we can reserve() memory in one shot. - if (start != end) { - int length = delim.size()*(distance(start, end)-1); - for (ITERATOR iter = start; iter != end; ++iter) { - length += iter->size(); + result->clear(); + + // Precompute resulting length so we can reserve() memory in one shot. + if (start != end) { + int length = delim.size() * (distance(start, end) - 1); + for (ITERATOR iter = start; iter != end; ++iter) { + length += iter->size(); + } + result->reserve(length); } - result->reserve(length); - } - // Now combine everything. - for (ITERATOR iter = start; iter != end; ++iter) { - if (iter != start) { - result->append(delim.data(), delim.size()); + // Now combine everything. + for (ITERATOR iter = start; iter != end; ++iter) { + if (iter != start) { + result->append(delim.data(), delim.size()); + } + result->append(iter->data(), iter->size()); } - result->append(iter->data(), iter->size()); - } } template -inline string JoinStringsIterator(const ITERATOR& start, - const ITERATOR& end, +inline string JoinStringsIterator(const ITERATOR& start, const ITERATOR& end, const StringPiece& delim) { - string result; - JoinStringsIterator(start, end, delim, &result); - return result; + string result; + JoinStringsIterator(start, end, delim, &result); + return result; } -inline string JoinStringsInArray(string const* const* components, - int num_components, +inline string JoinStringsInArray(string const* const* components, int num_components, const char* delim) { - string result; - JoinStringsInArray(components, num_components, delim, &result); - return result; + string result; + JoinStringsInArray(components, num_components, delim, &result); + return result; } -inline string JoinStringsInArray(string const* components, - int num_components, - const char* delim) { - string result; - JoinStringsInArray(components, num_components, delim, &result); - return result; +inline string JoinStringsInArray(string const* components, int num_components, const char* delim) { + string result; + JoinStringsInArray(components, num_components, delim, &result); + return result; } // ---------------------------------------------------------------------- @@ -279,24 +240,17 @@ inline string JoinStringsInArray(string const* components, // as the last argument). // ---------------------------------------------------------------------- -void JoinMapKeysAndValues(const map& components, - const StringPiece& intra_delim, - const StringPiece& inter_delim, - string* result); -void JoinVectorKeysAndValues(const vector< pair>& components, - const StringPiece& intra_delim, - const StringPiece& inter_delim, +void JoinMapKeysAndValues(const map& components, const StringPiece& intra_delim, + const StringPiece& inter_delim, string* result); +void JoinVectorKeysAndValues(const vector>& components, + const StringPiece& intra_delim, const StringPiece& inter_delim, string* result); // DEPRECATED(jyrki): use JoinKeysAndValuesIterator directly. -template -void JoinHashMapKeysAndValues(const T& container, - const StringPiece& intra_delim, - const StringPiece& inter_delim, - string* result) { - JoinKeysAndValuesIterator(container.begin(), container.end(), - intra_delim, inter_delim, - result); +template +void JoinHashMapKeysAndValues(const T& container, const StringPiece& intra_delim, + const StringPiece& inter_delim, string* result) { + JoinKeysAndValuesIterator(container.begin(), container.end(), intra_delim, inter_delim, result); } // ---------------------------------------------------------------------- @@ -321,9 +275,7 @@ void JoinHashMapKeysAndValues(const T& container, // ---------------------------------------------------------------------- void JoinCSVLine(const vector& original_cols, string* output); string JoinCSVLine(const vector& original_cols); -void JoinCSVLineWithDelimiter(const vector& original_cols, - char delimiter, - string* output); +void JoinCSVLineWithDelimiter(const vector& original_cols, char delimiter, string* output); // ---------------------------------------------------------------------- // JoinElements() @@ -337,53 +289,43 @@ void JoinCSVLineWithDelimiter(const vector& original_cols, // ---------------------------------------------------------------------- template -void JoinElementsIterator(ITERATOR first, - ITERATOR last, - StringPiece delim, - string* result) { - result->clear(); - for (ITERATOR it = first; it != last; ++it) { - if (it != first) { - StrAppend(result, delim); +void JoinElementsIterator(ITERATOR first, ITERATOR last, StringPiece delim, string* result) { + result->clear(); + for (ITERATOR it = first; it != last; ++it) { + if (it != first) { + StrAppend(result, delim); + } + StrAppend(result, *it); } - StrAppend(result, *it); - } } template -string JoinElementsIterator(ITERATOR first, - ITERATOR last, - StringPiece delim) { - string result; - JoinElementsIterator(first, last, delim, &result); - return result; +string JoinElementsIterator(ITERATOR first, ITERATOR last, StringPiece delim) { + string result; + JoinElementsIterator(first, last, delim, &result); + return result; } template -inline void JoinElements(const CONTAINER& components, - StringPiece delim, - string* result) { - JoinElementsIterator(components.begin(), components.end(), delim, result); +inline void JoinElements(const CONTAINER& components, StringPiece delim, string* result) { + JoinElementsIterator(components.begin(), components.end(), delim, result); } template inline string JoinElements(const CONTAINER& components, StringPiece delim) { - string result; - JoinElements(components, delim, &result); - return result; + string result; + JoinElements(components, delim, &result); + return result; } template -void JoinInts(const CONTAINER& components, - const char* delim, - string* result) { - JoinElements(components, delim, result); +void JoinInts(const CONTAINER& components, const char* delim, string* result) { + JoinElements(components, delim, result); } template -inline string JoinInts(const CONTAINER& components, - const char* delim) { - return JoinElements(components, delim); +inline string JoinInts(const CONTAINER& components, const char* delim) { + return JoinElements(components, delim); } -#endif // STRINGS_JOIN_H_ +#endif // STRINGS_JOIN_H_ diff --git a/be/src/gutil/strings/memutil.cc b/be/src/gutil/strings/memutil.cc index ace02b359ba7cc..501de634eb75a7 100644 --- a/be/src/gutil/strings/memutil.cc +++ b/be/src/gutil/strings/memutil.cc @@ -4,135 +4,123 @@ #include "gutil/strings/memutil.h" -#include // for malloc, NULL +#include // for malloc, NULL -#include "gutil/strings/ascii_ctype.h" // for ascii_tolower +#include "gutil/strings/ascii_ctype.h" // for ascii_tolower -int memcasecmp(const char *s1, const char *s2, size_t len) { - const unsigned char *us1 = reinterpret_cast(s1); - const unsigned char *us2 = reinterpret_cast(s2); +int memcasecmp(const char* s1, const char* s2, size_t len) { + const unsigned char* us1 = reinterpret_cast(s1); + const unsigned char* us2 = reinterpret_cast(s2); - for ( int i = 0; i < len; i++ ) { - const int diff = - static_cast(static_cast(ascii_tolower(us1[i]))) - - static_cast(static_cast(ascii_tolower(us2[i]))); - if (diff != 0) return diff; - } - return 0; + for (int i = 0; i < len; i++) { + const int diff = static_cast(static_cast(ascii_tolower(us1[i]))) - + static_cast(static_cast(ascii_tolower(us2[i]))); + if (diff != 0) return diff; + } + return 0; } -char *memdup(const char *s, size_t slen) { - void *copy; - if ( (copy=malloc(slen)) == nullptr ) - return nullptr; - memcpy(copy, s, slen); - return reinterpret_cast(copy); +char* memdup(const char* s, size_t slen) { + void* copy; + if ((copy = malloc(slen)) == nullptr) return nullptr; + memcpy(copy, s, slen); + return reinterpret_cast(copy); } -char *memrchr(const char *s, int c, size_t slen) { - for (const char* e = s + slen-1; e >= s; e--) { - if (*e == c) - return const_cast(e); - } - return nullptr; +char* memrchr(const char* s, int c, size_t slen) { + for (const char* e = s + slen - 1; e >= s; e--) { + if (*e == c) return const_cast(e); + } + return nullptr; } -size_t memspn(const char *s, size_t slen, const char *accept) { - const char *p = s, *spanp; - char c, sc; - - cont: - c = *p++; - if ( slen-- == 0 ) - return p-1 - s; - for (spanp = accept; (sc=*spanp++) != '\0';) - if (sc == c) - goto cont; - return p-1 - s; -} +size_t memspn(const char* s, size_t slen, const char* accept) { + const char *p = s, *spanp; + char c, sc; +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} -size_t memcspn(const char *s, size_t slen, const char *reject) { - const char *p = s, *spanp; - char c, sc; +size_t memcspn(const char* s, size_t slen, const char* reject) { + const char *p = s, *spanp; + char c, sc; - while ( slen-- != 0 ) { - c = *p++; - for (spanp = reject; (sc=*spanp++) != '\0';) - if (sc == c) - return p-1 - s; - } - return p - s; + while (slen-- != 0) { + c = *p++; + for (spanp = reject; (sc = *spanp++) != '\0';) + if (sc == c) return p - 1 - s; + } + return p - s; } -char *mempbrk(const char *s, size_t slen, const char *accept) { - const char *scanp; - int sc; +char* mempbrk(const char* s, size_t slen, const char* accept) { + const char* scanp; + int sc; - for ( ; slen; ++s, --slen ) { - for (scanp = accept; (sc=*scanp++) != '\0';) - if (sc == *s) - return const_cast(s); - } - return nullptr; + for (; slen; ++s, --slen) { + for (scanp = accept; (sc = *scanp++) != '\0';) + if (sc == *s) return const_cast(s); + } + return nullptr; } -template -const char *int_memmatch(const char *phaystack, size_t haylen, - const char *pneedle, size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - const unsigned char *haystack = (const unsigned char *) phaystack; - const unsigned char *hayend = (const unsigned char *) phaystack + haylen; - const unsigned char *needlestart = (const unsigned char *) pneedle; - const unsigned char *needle = (const unsigned char *) pneedle; - const unsigned char *needleend = (const unsigned char *) pneedle + neelen; - - for (; haystack < hayend; ++haystack) { - unsigned char hay = case_sensitive ? *haystack : - static_cast(ascii_tolower(*haystack)); - unsigned char nee = case_sensitive ? *needle : - static_cast(ascii_tolower(*needle)); - if (hay == nee) { - if (++needle == needleend) { - return (const char *) (haystack + 1 - neelen); - } - } else if (needle != needlestart) { - // must back up haystack in case a prefix matched (find "aab" in "aaab") - haystack -= needle - needlestart; // for loop will advance one more - needle = needlestart; +template +const char* int_memmatch(const char* phaystack, size_t haylen, const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + const unsigned char* haystack = (const unsigned char*)phaystack; + const unsigned char* hayend = (const unsigned char*)phaystack + haylen; + const unsigned char* needlestart = (const unsigned char*)pneedle; + const unsigned char* needle = (const unsigned char*)pneedle; + const unsigned char* needleend = (const unsigned char*)pneedle + neelen; + + for (; haystack < hayend; ++haystack) { + unsigned char hay = + case_sensitive ? *haystack : static_cast(ascii_tolower(*haystack)); + unsigned char nee = + case_sensitive ? *needle : static_cast(ascii_tolower(*needle)); + if (hay == nee) { + if (++needle == needleend) { + return (const char*)(haystack + 1 - neelen); + } + } else if (needle != needlestart) { + // must back up haystack in case a prefix matched (find "aab" in "aaab") + haystack -= needle - needlestart; // for loop will advance one more + needle = needlestart; + } } - } - return nullptr; + return nullptr; } // explicit template instantiations -template const char *int_memmatch(const char *phaystack, size_t haylen, - const char *pneedle, size_t neelen); -template const char *int_memmatch(const char *phaystack, size_t haylen, - const char *pneedle, size_t neelen); +template const char* int_memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); +template const char* int_memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); // This is significantly faster for case-sensitive matches with very // few possible matches. See unit test for benchmarks. -const char *memmatch(const char *phaystack, size_t haylen, - const char *pneedle, size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - if (haylen < neelen) +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A C-style cast is used here to work around the fact that memchr returns a + // void* on Posix-compliant systems and const void* on Windows. + while ((match = (const char*)(memchr(phaystack, pneedle[0], hayend - phaystack)))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } return nullptr; - - const char* match; - const char* hayend = phaystack + haylen - neelen + 1; - // A C-style cast is used here to work around the fact that memchr returns a - // void* on Posix-compliant systems and const void* on Windows. - while ((match = (const char*)(memchr(phaystack, pneedle[0], - hayend - phaystack)))) { - if (memcmp(match, pneedle, neelen) == 0) - return match; - else - phaystack = match + 1; - } - return nullptr; } diff --git a/be/src/gutil/strings/memutil.h b/be/src/gutil/strings/memutil.h index b230149f0bcc01..83425da9b5fd13 100644 --- a/be/src/gutil/strings/memutil.h +++ b/be/src/gutil/strings/memutil.h @@ -54,100 +54,87 @@ #define STRINGS_MEMUTIL_H_ #include -#include // to get the POSIX mem*() routines +#include // to get the POSIX mem*() routines -#include "gutil/port.h" // disable some warnings on Windows +#include "gutil/port.h" // disable some warnings on Windows -inline char *memcat(char *dest, size_t destlen, - const char *src, size_t srclen) { - return reinterpret_cast(memcpy(dest + destlen, src, srclen)); +inline char* memcat(char* dest, size_t destlen, const char* src, size_t srclen) { + return reinterpret_cast(memcpy(dest + destlen, src, srclen)); } -int memcasecmp(const char *s1, const char *s2, size_t len); -char *memdup(const char *s, size_t slen); -char *memrchr(const char *s, int c, size_t slen); -size_t memspn(const char *s, size_t slen, const char *accept); -size_t memcspn(const char *s, size_t slen, const char *reject); -char *mempbrk(const char *s, size_t slen, const char *accept); +int memcasecmp(const char* s1, const char* s2, size_t len); +char* memdup(const char* s, size_t slen); +char* memrchr(const char* s, int c, size_t slen); +size_t memspn(const char* s, size_t slen, const char* accept); +size_t memcspn(const char* s, size_t slen, const char* reject); +char* mempbrk(const char* s, size_t slen, const char* accept); // This is for internal use only. Don't call this directly -template -const char * int_memmatch(const char * phaystack, size_t haylen, - const char * pneedle, size_t neelen); +template +const char* int_memmatch(const char* phaystack, size_t haylen, const char* pneedle, size_t neelen); // These are the guys you can call directly -inline const char * memstr(const char *phaystack, size_t haylen, - const char *pneedle) { - return int_memmatch(phaystack, haylen, pneedle, strlen(pneedle)); +inline const char* memstr(const char* phaystack, size_t haylen, const char* pneedle) { + return int_memmatch(phaystack, haylen, pneedle, strlen(pneedle)); } -inline const char * memcasestr(const char *phaystack, size_t haylen, - const char *pneedle) { - return int_memmatch(phaystack, haylen, pneedle, strlen(pneedle)); +inline const char* memcasestr(const char* phaystack, size_t haylen, const char* pneedle) { + return int_memmatch(phaystack, haylen, pneedle, strlen(pneedle)); } -inline const char * memmem(const char *phaystack, size_t haylen, - const char *pneedle, size_t needlelen) { - return int_memmatch(phaystack, haylen, pneedle, needlelen); +inline const char* memmem(const char* phaystack, size_t haylen, const char* pneedle, + size_t needlelen) { + return int_memmatch(phaystack, haylen, pneedle, needlelen); } -inline const char * memcasemem(const char *phaystack, size_t haylen, - const char *pneedle, size_t needlelen) { - return int_memmatch(phaystack, haylen, pneedle, needlelen); +inline const char* memcasemem(const char* phaystack, size_t haylen, const char* pneedle, + size_t needlelen) { + return int_memmatch(phaystack, haylen, pneedle, needlelen); } // This is significantly faster for case-sensitive matches with very // few possible matches. See unit test for benchmarks. -const char *memmatch(const char *phaystack, size_t haylen, - const char *pneedle, size_t neelen); +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, size_t neelen); // The ""'s catch people who don't pass in a literal for "str" -#define strliterallen(str) (sizeof("" str "")-1) +#define strliterallen(str) (sizeof("" str "") - 1) // Must use a string literal for prefix. -#define memprefix(str, len, prefix) \ - ( (((len) >= strliterallen(prefix)) \ - && memcmp(str, prefix, strliterallen(prefix)) == 0) \ - ? str + strliterallen(prefix) \ - : NULL ) - -#define memcaseprefix(str, len, prefix) \ - ( (((len) >= strliterallen(prefix)) \ - && memcasecmp(str, prefix, strliterallen(prefix)) == 0) \ - ? str + strliterallen(prefix) \ - : NULL ) +#define memprefix(str, len, prefix) \ + ((((len) >= strliterallen(prefix)) && memcmp(str, prefix, strliterallen(prefix)) == 0) \ + ? str + strliterallen(prefix) \ + : NULL) -// Must use a string literal for suffix. -#define memsuffix(str, len, suffix) \ - ( (((len) >= strliterallen(suffix)) \ - && memcmp(str + (len) - strliterallen(suffix), suffix, \ - strliterallen(suffix)) == 0) \ - ? str + (len) - strliterallen(suffix) \ - : NULL ) +#define memcaseprefix(str, len, prefix) \ + ((((len) >= strliterallen(prefix)) && memcasecmp(str, prefix, strliterallen(prefix)) == 0) \ + ? str + strliterallen(prefix) \ + : NULL) -#define memcasesuffix(str, len, suffix) \ - ( (((len) >= strliterallen(suffix)) \ - && memcasecmp(str + (len) - strliterallen(suffix), suffix, \ - strliterallen(suffix)) == 0) \ - ? str + (len) - strliterallen(suffix) \ - : NULL ) +// Must use a string literal for suffix. +#define memsuffix(str, len, suffix) \ + ((((len) >= strliterallen(suffix)) && \ + memcmp(str + (len)-strliterallen(suffix), suffix, strliterallen(suffix)) == 0) \ + ? str + (len)-strliterallen(suffix) \ + : NULL) -#define memis(str, len, literal) \ - ( (((len) == strliterallen(literal)) \ - && memcmp(str, literal, strliterallen(literal)) == 0) ) +#define memcasesuffix(str, len, suffix) \ + ((((len) >= strliterallen(suffix)) && \ + memcasecmp(str + (len)-strliterallen(suffix), suffix, strliterallen(suffix)) == 0) \ + ? str + (len)-strliterallen(suffix) \ + : NULL) -#define memcaseis(str, len, literal) \ - ( (((len) == strliterallen(literal)) \ - && memcasecmp(str, literal, strliterallen(literal)) == 0) ) +#define memis(str, len, literal) \ + ((((len) == strliterallen(literal)) && memcmp(str, literal, strliterallen(literal)) == 0)) +#define memcaseis(str, len, literal) \ + ((((len) == strliterallen(literal)) && memcasecmp(str, literal, strliterallen(literal)) == 0)) inline int memcount(const char* buf, size_t len, char c) { - int num = 0; - for (int i = 0; i < len; i++) { - if (buf[i] == c) - num++; - } - return num; + int num = 0; + for (int i = 0; i < len; i++) { + if (buf[i] == c) num++; + } + return num; } -#endif // STRINGS_MEMUTIL_H_ +#endif // STRINGS_MEMUTIL_H_ diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc index 05ce40f692768b..83fd47a5597a19 100644 --- a/be/src/gutil/strings/numbers.cc +++ b/be/src/gutil/strings/numbers.cc @@ -9,23 +9,25 @@ #include #include #include -#include // for DBL_DIG and FLT_DIG -#include // for HUGE_VAL +#include // for DBL_DIG and FLT_DIG +#include // for HUGE_VAL #include #include #include + #include using std::numeric_limits; #include using std::string; -#include "gutil/int128.h" -#include "gutil/integral_types.h" #include + #include "gutil/gscoped_ptr.h" +#include "gutil/int128.h" +#include "gutil/integral_types.h" #include "gutil/stringprintf.h" -#include "gutil/strtoint.h" #include "gutil/strings/ascii_ctype.h" +#include "gutil/strtoint.h" // Reads a in *text, which may not be whitespace-initiated. // *len is the length, or -1 if text is '\0'-terminated, which is more @@ -39,66 +41,58 @@ using std::string; // the last symbol seen was a '.', which will be ignored. This is // useful in case that an initial '-' or final '.' would have another // meaning (as a separator, e.g.). -static inline bool EatADouble(const char** text, int* len, bool allow_question, - double* val, bool* initial_minus, - bool* final_period) { - const char* pos = *text; - int rem = *len; // remaining length, or -1 if null-terminated - - if (pos == nullptr || rem == 0) - return false; - - if (allow_question && (*pos == '?')) { - *text = pos + 1; - if (rem != -1) - *len = rem - 1; - return true; - } +static inline bool EatADouble(const char** text, int* len, bool allow_question, double* val, + bool* initial_minus, bool* final_period) { + const char* pos = *text; + int rem = *len; // remaining length, or -1 if null-terminated - if (initial_minus) { - if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment. - if (rem == 1) - return false; - ++pos; - if (rem != -1) - --rem; + if (pos == nullptr || rem == 0) return false; + + if (allow_question && (*pos == '?')) { + *text = pos + 1; + if (rem != -1) *len = rem - 1; + return true; } - } - - // a double has to begin one of these (we don't allow 'inf' or whitespace) - // this also serves as an optimization. - if (!strchr("-+.0123456789", *pos)) - return false; - - // strtod is evil in that the second param is a non-const char** - char* end_nonconst; - double retval; - if (rem == -1) { - retval = strtod(pos, &end_nonconst); - } else { - // not '\0'-terminated & no obvious terminator found. must copy. - gscoped_array buf(new char[rem + 1]); - memcpy(buf.get(), pos, rem); - buf[rem] = '\0'; - retval = strtod(buf.get(), &end_nonconst); - end_nonconst = const_cast(pos) + (end_nonconst - buf.get()); - } - - if (pos == end_nonconst) - return false; - - if (final_period) { - *final_period = (end_nonconst[-1] == '.'); - if (*final_period) { - --end_nonconst; + + if (initial_minus) { + if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment. + if (rem == 1) return false; + ++pos; + if (rem != -1) --rem; + } + } + + // a double has to begin one of these (we don't allow 'inf' or whitespace) + // this also serves as an optimization. + if (!strchr("-+.0123456789", *pos)) return false; + + // strtod is evil in that the second param is a non-const char** + char* end_nonconst; + double retval; + if (rem == -1) { + retval = strtod(pos, &end_nonconst); + } else { + // not '\0'-terminated & no obvious terminator found. must copy. + gscoped_array buf(new char[rem + 1]); + memcpy(buf.get(), pos, rem); + buf[rem] = '\0'; + retval = strtod(buf.get(), &end_nonconst); + end_nonconst = const_cast(pos) + (end_nonconst - buf.get()); + } + + if (pos == end_nonconst) return false; + + if (final_period) { + *final_period = (end_nonconst[-1] == '.'); + if (*final_period) { + --end_nonconst; + } } - } - *text = end_nonconst; - *val = retval; - if (rem != -1) - *len = rem - (end_nonconst - pos); - return true; + *text = end_nonconst; + *val = retval; + if (rem != -1) *len = rem - (end_nonconst - pos); + return true; } // If update, consume one of acceptable_chars from string *text of @@ -106,156 +100,139 @@ static inline bool EatADouble(const char** text, int* len, bool allow_question, // *text is null-terminated. If update is false, don't alter *text and // *len. If null_ok, then update must be false, and, if text has no // more chars, then return '\1' (arbitrary nonzero). -static inline char EatAChar(const char** text, int* len, - const char* acceptable_chars, - bool update, bool null_ok) { - assert(!(update && null_ok)); - if ((*len == 0) || (**text == '\0')) - return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode. - - if (strchr(acceptable_chars, **text)) { - char result = **text; - if (update) { - ++(*text); - if (*len != -1) - --(*len); +static inline char EatAChar(const char** text, int* len, const char* acceptable_chars, bool update, + bool null_ok) { + assert(!(update && null_ok)); + if ((*len == 0) || (**text == '\0')) + return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode. + + if (strchr(acceptable_chars, **text)) { + char result = **text; + if (update) { + ++(*text); + if (*len != -1) --(*len); + } + return result; } - return result; - } - return '\0'; // no match; no update + return '\0'; // no match; no update } // Parse an expression in 'text' of the form: or // See full comments in header file. -bool ParseDoubleRange(const char* text, int len, const char** end, - double* from, double* to, bool* is_currency, - const DoubleRangeOptions& opts) { - const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL; - - if (!opts.dont_modify_unbounded) { - *from = -HUGE_VAL; - *to = HUGE_VAL; - } - if (opts.allow_currency && (is_currency != nullptr)) - *is_currency = false; - - assert(len >= -1); - assert(opts.separators && (*opts.separators != '\0')); - // these aren't valid separators - assert(strlen(opts.separators) == - strcspn(opts.separators, "+0123456789eE$")); - assert(opts.num_required_bounds <= 2); - - // Handle easier cases of comparators (<, >) first - if (opts.allow_comparators) { - char comparator = EatAChar(&text, &len, "<>", true, false); - if (comparator) { - double* dest = (comparator == '>') ? from : to; - EatAChar(&text, &len, "=", true, false); - if (opts.allow_currency && EatAChar(&text, &len, "$", true, false)) - if (is_currency != nullptr) - *is_currency = true; - if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr, - nullptr)) - return false; - *end = text; - return EatAChar(&text, &len, opts.acceptable_terminators, false, - opts.null_terminator_ok); +bool ParseDoubleRange(const char* text, int len, const char** end, double* from, double* to, + bool* is_currency, const DoubleRangeOptions& opts) { + const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL; + + if (!opts.dont_modify_unbounded) { + *from = -HUGE_VAL; + *to = HUGE_VAL; + } + if (opts.allow_currency && (is_currency != nullptr)) *is_currency = false; + + assert(len >= -1); + assert(opts.separators && (*opts.separators != '\0')); + // these aren't valid separators + assert(strlen(opts.separators) == strcspn(opts.separators, "+0123456789eE$")); + assert(opts.num_required_bounds <= 2); + + // Handle easier cases of comparators (<, >) first + if (opts.allow_comparators) { + char comparator = EatAChar(&text, &len, "<>", true, false); + if (comparator) { + double* dest = (comparator == '>') ? from : to; + EatAChar(&text, &len, "=", true, false); + if (opts.allow_currency && EatAChar(&text, &len, "$", true, false)) + if (is_currency != nullptr) *is_currency = true; + if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr, nullptr)) + return false; + *end = text; + return EatAChar(&text, &len, opts.acceptable_terminators, false, + opts.null_terminator_ok); + } } - } - - bool seen_dollar = (opts.allow_currency && - EatAChar(&text, &len, "$", true, false)); - - // If we see a '-', two things could be happening: - or - // ... where is negative. Treat initial minus sign as a - // separator if '-' is a valid separator. - // Similarly, we prepare for the possibility of seeing a '.' at the - // end of the number, in case '.' (which really means '..') is a - // separator. - bool initial_minus_sign = false; - bool final_period = false; - bool* check_initial_minus = (strchr(opts.separators, '-') && !seen_dollar - && (opts.num_required_bounds < 2)) ? - (&initial_minus_sign) : nullptr; - bool* check_final_period = strchr(opts.separators, '.') ? (&final_period) - : nullptr; - bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers, - from, check_initial_minus, check_final_period); - - // if 2 bounds required, must see a double (or '?' if allowed) - if ((opts.num_required_bounds == 2) && !double_seen) return false; - - if (seen_dollar && !double_seen) { - --text; - if (len != -1) - ++len; - seen_dollar = false; - } - // If we're here, we've read the first double and now expect a - // separator and another . - char separator = EatAChar(&text, &len, opts.separators, true, false); - if (separator == '.') { - // seen one '.' as separator; must check for another; perhaps set seplen=2 - if (EatAChar(&text, &len, ".", true, false)) { - if (final_period) { - // We may have three periods in a row. The first is part of the - // first number, the others are a separator. Policy: 234...567 - // is "234." to "567", not "234" to ".567". - EatAChar(&text, &len, ".", true, false); - } - } else if (!EatAChar(&text, &len, opts.separators, true, false)) { - // just one '.' and no other separator; uneat the first '.' we saw - --text; - if (len != -1) - ++len; - separator = '\0'; + + bool seen_dollar = (opts.allow_currency && EatAChar(&text, &len, "$", true, false)); + + // If we see a '-', two things could be happening: - or + // ... where is negative. Treat initial minus sign as a + // separator if '-' is a valid separator. + // Similarly, we prepare for the possibility of seeing a '.' at the + // end of the number, in case '.' (which really means '..') is a + // separator. + bool initial_minus_sign = false; + bool final_period = false; + bool* check_initial_minus = + (strchr(opts.separators, '-') && !seen_dollar && (opts.num_required_bounds < 2)) + ? (&initial_minus_sign) + : nullptr; + bool* check_final_period = strchr(opts.separators, '.') ? (&final_period) : nullptr; + bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers, from, + check_initial_minus, check_final_period); + + // if 2 bounds required, must see a double (or '?' if allowed) + if ((opts.num_required_bounds == 2) && !double_seen) return false; + + if (seen_dollar && !double_seen) { + --text; + if (len != -1) ++len; + seen_dollar = false; } - } - // By now, we've consumed whatever separator there may have been, - // and separator is true iff there was one. - if (!separator) { - if (final_period) // final period now considered part of first double - EatAChar(&text, &len, ".", true, false); - if (initial_minus_sign && double_seen) { - *to = *from; - *from = from_default; - } else if (opts.require_separator || - (opts.num_required_bounds > 0 && !double_seen) || - (opts.num_required_bounds > 1) ) { - return false; + // If we're here, we've read the first double and now expect a + // separator and another . + char separator = EatAChar(&text, &len, opts.separators, true, false); + if (separator == '.') { + // seen one '.' as separator; must check for another; perhaps set seplen=2 + if (EatAChar(&text, &len, ".", true, false)) { + if (final_period) { + // We may have three periods in a row. The first is part of the + // first number, the others are a separator. Policy: 234...567 + // is "234." to "567", not "234" to ".567". + EatAChar(&text, &len, ".", true, false); + } + } else if (!EatAChar(&text, &len, opts.separators, true, false)) { + // just one '.' and no other separator; uneat the first '.' we saw + --text; + if (len != -1) ++len; + separator = '\0'; + } } - } else { - if (initial_minus_sign && double_seen) - *from = -(*from); - // read second - bool second_dollar_seen = (seen_dollar - || (opts.allow_currency && !double_seen)) - && EatAChar(&text, &len, "$", true, false); - bool second_double_seen = EatADouble( - &text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr); - if (opts.num_required_bounds > double_seen + second_double_seen) - return false; - if (second_dollar_seen && !second_double_seen) { - --text; - if (len != -1) - ++len; - second_dollar_seen = false; + // By now, we've consumed whatever separator there may have been, + // and separator is true iff there was one. + if (!separator) { + if (final_period) // final period now considered part of first double + EatAChar(&text, &len, ".", true, false); + if (initial_minus_sign && double_seen) { + *to = *from; + *from = from_default; + } else if (opts.require_separator || (opts.num_required_bounds > 0 && !double_seen) || + (opts.num_required_bounds > 1)) { + return false; + } + } else { + if (initial_minus_sign && double_seen) *from = -(*from); + // read second + bool second_dollar_seen = (seen_dollar || (opts.allow_currency && !double_seen)) && + EatAChar(&text, &len, "$", true, false); + bool second_double_seen = + EatADouble(&text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr); + if (opts.num_required_bounds > double_seen + second_double_seen) return false; + if (second_dollar_seen && !second_double_seen) { + --text; + if (len != -1) ++len; + second_dollar_seen = false; + } + seen_dollar = seen_dollar || second_dollar_seen; } - seen_dollar = seen_dollar || second_dollar_seen; - } - - if (seen_dollar && (is_currency != nullptr)) - *is_currency = true; - // We're done. But we have to check that the next char is a proper - // terminator. - *end = text; - char terminator = EatAChar(&text, &len, opts.acceptable_terminators, false, - opts.null_terminator_ok); - if (terminator == '.') - --(*end); - return terminator; + + if (seen_dollar && (is_currency != nullptr)) *is_currency = true; + // We're done. But we have to check that the next char is a proper + // terminator. + *end = text; + char terminator = + EatAChar(&text, &len, opts.acceptable_terminators, false, opts.null_terminator_ok); + if (terminator == '.') --(*end); + return terminator; } // ---------------------------------------------------------------------- @@ -264,23 +241,20 @@ bool ParseDoubleRange(const char* text, int len, const char** end, // of nothing but zeroes, in which case one is kept: 0...0 becomes 0). // -------------------------------------------------------------------- -void ConsumeStrayLeadingZeroes(string *const str) { - const string::size_type len(str->size()); - if (len > 1 && (*str)[0] == '0') { - const char - *const begin(str->c_str()), - *const end(begin + len), - *ptr(begin + 1); - while (ptr != end && *ptr == '0') { - ++ptr; - } - string::size_type remove(ptr - begin); - DCHECK_GT(ptr, begin); - if (remove == len) { - --remove; // if they are all zero, leave one... +void ConsumeStrayLeadingZeroes(string* const str) { + const string::size_type len(str->size()); + if (len > 1 && (*str)[0] == '0') { + const char *const begin(str->c_str()), *const end(begin + len), *ptr(begin + 1); + while (ptr != end && *ptr == '0') { + ++ptr; + } + string::size_type remove(ptr - begin); + DCHECK_GT(ptr, begin); + if (remove == len) { + --remove; // if they are all zero, leave one... + } + str->erase(0, remove); } - str->erase(0, remove); - } } // ---------------------------------------------------------------------- @@ -291,39 +265,39 @@ void ConsumeStrayLeadingZeroes(string *const str) { // This cannot handle decimal numbers with leading 0s. // -------------------------------------------------------------------- -int32 ParseLeadingInt32Value(const char *str, int32 deflt) { - char *error = nullptr; - long value = strtol(str, &error, 0); - // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. - if (value > numeric_limits::max()) { - value = numeric_limits::max(); - } else if (value < numeric_limits::min()) { - value = numeric_limits::min(); - } - return (error == str) ? deflt : value; +int32 ParseLeadingInt32Value(const char* str, int32 deflt) { + char* error = nullptr; + long value = strtol(str, &error, 0); + // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. + if (value > numeric_limits::max()) { + value = numeric_limits::max(); + } else if (value < numeric_limits::min()) { + value = numeric_limits::min(); + } + return (error == str) ? deflt : value; } -uint32 ParseLeadingUInt32Value(const char *str, uint32 deflt) { - if (numeric_limits::max() == numeric_limits::max()) { - // When long is 32 bits, we can use strtoul. - char *error = nullptr; - const uint32 value = strtoul(str, &error, 0); - return (error == str) ? deflt : value; - } else { - // When long is 64 bits, we must use strto64 and handle limits - // by hand. The reason we cannot use a 64-bit strtoul is that - // it would be impossible to differentiate "-2" (that should wrap - // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 - // (that should be pegged to UINT_MAX due to overflow). - char *error = nullptr; - int64 value = strto64(str, &error, 0); - if (value > numeric_limits::max() || - value < -static_cast(numeric_limits::max())) { - value = numeric_limits::max(); +uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt) { + if (numeric_limits::max() == numeric_limits::max()) { + // When long is 32 bits, we can use strtoul. + char* error = nullptr; + const uint32 value = strtoul(str, &error, 0); + return (error == str) ? deflt : value; + } else { + // When long is 64 bits, we must use strto64 and handle limits + // by hand. The reason we cannot use a 64-bit strtoul is that + // it would be impossible to differentiate "-2" (that should wrap + // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 + // (that should be pegged to UINT_MAX due to overflow). + char* error = nullptr; + int64 value = strto64(str, &error, 0); + if (value > numeric_limits::max() || + value < -static_cast(numeric_limits::max())) { + value = numeric_limits::max(); + } + // Within these limits, truncation to 32 bits handles negatives correctly. + return (error == str) ? deflt : value; } - // Within these limits, truncation to 32 bits handles negatives correctly. - return (error == str) ? deflt : value; - } } // ---------------------------------------------------------------------- @@ -335,39 +309,39 @@ uint32 ParseLeadingUInt32Value(const char *str, uint32 deflt) { // This can handle strings with leading 0s. // -------------------------------------------------------------------- -int32 ParseLeadingDec32Value(const char *str, int32 deflt) { - char *error = nullptr; - long value = strtol(str, &error, 10); - // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. - if (value > numeric_limits::max()) { - value = numeric_limits::max(); - } else if (value < numeric_limits::min()) { - value = numeric_limits::min(); - } - return (error == str) ? deflt : value; +int32 ParseLeadingDec32Value(const char* str, int32 deflt) { + char* error = nullptr; + long value = strtol(str, &error, 10); + // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. + if (value > numeric_limits::max()) { + value = numeric_limits::max(); + } else if (value < numeric_limits::min()) { + value = numeric_limits::min(); + } + return (error == str) ? deflt : value; } -uint32 ParseLeadingUDec32Value(const char *str, uint32 deflt) { - if (numeric_limits::max() == numeric_limits::max()) { - // When long is 32 bits, we can use strtoul. - char *error = nullptr; - const uint32 value = strtoul(str, &error, 10); - return (error == str) ? deflt : value; - } else { - // When long is 64 bits, we must use strto64 and handle limits - // by hand. The reason we cannot use a 64-bit strtoul is that - // it would be impossible to differentiate "-2" (that should wrap - // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 - // (that should be pegged to UINT_MAX due to overflow). - char *error = nullptr; - int64 value = strto64(str, &error, 10); - if (value > numeric_limits::max() || - value < -static_cast(numeric_limits::max())) { - value = numeric_limits::max(); +uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt) { + if (numeric_limits::max() == numeric_limits::max()) { + // When long is 32 bits, we can use strtoul. + char* error = nullptr; + const uint32 value = strtoul(str, &error, 10); + return (error == str) ? deflt : value; + } else { + // When long is 64 bits, we must use strto64 and handle limits + // by hand. The reason we cannot use a 64-bit strtoul is that + // it would be impossible to differentiate "-2" (that should wrap + // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 + // (that should be pegged to UINT_MAX due to overflow). + char* error = nullptr; + int64 value = strto64(str, &error, 10); + if (value > numeric_limits::max() || + value < -static_cast(numeric_limits::max())) { + value = numeric_limits::max(); + } + // Within these limits, truncation to 32 bits handles negatives correctly. + return (error == str) ? deflt : value; } - // Within these limits, truncation to 32 bits handles negatives correctly. - return (error == str) ? deflt : value; - } } // ---------------------------------------------------------------------- @@ -378,22 +352,22 @@ uint32 ParseLeadingUDec32Value(const char *str, uint32 deflt) { // valid integer is found; else returns deflt // UInt64 and Int64 cannot handle decimal numbers with leading 0s. // -------------------------------------------------------------------- -uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) { - char *error = nullptr; - const uint64 value = strtou64(str, &error, 0); - return (error == str) ? deflt : value; +uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt) { + char* error = nullptr; + const uint64 value = strtou64(str, &error, 0); + return (error == str) ? deflt : value; } -int64 ParseLeadingInt64Value(const char *str, int64 deflt) { - char *error = nullptr; - const int64 value = strto64(str, &error, 0); - return (error == str) ? deflt : value; +int64 ParseLeadingInt64Value(const char* str, int64 deflt) { + char* error = nullptr; + const int64 value = strto64(str, &error, 0); + return (error == str) ? deflt : value; } -uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) { - char *error = nullptr; - const uint64 value = strtou64(str, &error, 16); - return (error == str) ? deflt : value; +uint64 ParseLeadingHex64Value(const char* str, uint64 deflt) { + char* error = nullptr; + const uint64 value = strtou64(str, &error, 16); + return (error == str) ? deflt : value; } // ---------------------------------------------------------------------- @@ -405,16 +379,16 @@ uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) { // This can handle strings with leading 0s. // -------------------------------------------------------------------- -int64 ParseLeadingDec64Value(const char *str, int64 deflt) { - char *error = nullptr; - const int64 value = strto64(str, &error, 10); - return (error == str) ? deflt : value; +int64 ParseLeadingDec64Value(const char* str, int64 deflt) { + char* error = nullptr; + const int64 value = strto64(str, &error, 10); + return (error == str) ? deflt : value; } -uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) { - char *error = nullptr; - const uint64 value = strtou64(str, &error, 10); - return (error == str) ? deflt : value; +uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt) { + char* error = nullptr; + const uint64 value = strtou64(str, &error, 10); + return (error == str) ? deflt : value; } // ---------------------------------------------------------------------- @@ -423,16 +397,16 @@ uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) { // if a valid value is found; else returns deflt // -------------------------------------------------------------------- -double ParseLeadingDoubleValue(const char *str, double deflt) { - char *error = nullptr; - errno = 0; - const double value = strtod(str, &error); - if (errno != 0 || // overflow/underflow happened - error == str) { // no valid parse - return deflt; - } else { - return value; - } +double ParseLeadingDoubleValue(const char* str, double deflt) { + char* error = nullptr; + errno = 0; + const double value = strtod(str, &error); + if (errno != 0 || // overflow/underflow happened + error == str) { // no valid parse + return deflt; + } else { + return value; + } } // ---------------------------------------------------------------------- @@ -442,47 +416,38 @@ double ParseLeadingDoubleValue(const char *str, double deflt) { // whitespace, is case insensitive, and recognizes these forms: // 0/1, false/true, no/yes, n/y // -------------------------------------------------------------------- -bool ParseLeadingBoolValue(const char *str, bool deflt) { - static const int kMaxLen = 5; - char value[kMaxLen + 1]; - // Skip whitespace - while (ascii_isspace(*str)) { - ++str; - } - int len = 0; - for (; len <= kMaxLen && ascii_isalnum(*str); ++str) - value[len++] = ascii_tolower(*str); - if (len == 0 || len > kMaxLen) - return deflt; - value[len] = '\0'; - switch (len) { +bool ParseLeadingBoolValue(const char* str, bool deflt) { + static const int kMaxLen = 5; + char value[kMaxLen + 1]; + // Skip whitespace + while (ascii_isspace(*str)) { + ++str; + } + int len = 0; + for (; len <= kMaxLen && ascii_isalnum(*str); ++str) value[len++] = ascii_tolower(*str); + if (len == 0 || len > kMaxLen) return deflt; + value[len] = '\0'; + switch (len) { case 1: - if (value[0] == '0' || value[0] == 'n') - return false; - if (value[0] == '1' || value[0] == 'y') - return true; - break; + if (value[0] == '0' || value[0] == 'n') return false; + if (value[0] == '1' || value[0] == 'y') return true; + break; case 2: - if (!strcmp(value, "no")) - return false; - break; + if (!strcmp(value, "no")) return false; + break; case 3: - if (!strcmp(value, "yes")) - return true; - break; + if (!strcmp(value, "yes")) return true; + break; case 4: - if (!strcmp(value, "true")) - return true; - break; + if (!strcmp(value, "true")) return true; + break; case 5: - if (!strcmp(value, "false")) - return false; - break; - } - return deflt; + if (!strcmp(value, "false")) return false; + break; + } + return deflt; } - // ---------------------------------------------------------------------- // FpToString() // FloatToString() @@ -492,19 +457,17 @@ bool ParseLeadingBoolValue(const char *str, bool deflt) { // ---------------------------------------------------------------------- string FpToString(Fprint fp) { - char buf[17]; - snprintf(buf, sizeof(buf), "%016" PRIx64, fp); - return string(buf); + char buf[17]; + snprintf(buf, sizeof(buf), "%016" PRIx64, fp); + return string(buf); } // Default arguments string Uint128ToHexString(uint128 ui128) { - char buf[33]; - snprintf(buf, sizeof(buf), "%016" PRIx64, - Uint128High64(ui128)); - snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64, - Uint128Low64(ui128)); - return string(buf); + char buf[33]; + snprintf(buf, sizeof(buf), "%016" PRIx64, Uint128High64(ui128)); + snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64, Uint128Low64(ui128)); + return string(buf); } namespace { @@ -513,217 +476,205 @@ namespace { // Uses 36 to indicate an invalid character since we support // bases up to 36. static const int8 kAsciiToInt[256] = { - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 36, 36, 36, 36, 36, 36, 36, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 36, 36, 36, 36, 36, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36 }; + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 36, 36, + 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; // Input format based on POSIX.1-2008 strtol // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html -template -bool safe_int_internal(const char* start, const char* end, int base, - IntType* value_p) { - // Consume whitespace. - while (start < end && ascii_isspace(start[0])) { - ++start; - } - while (start < end && ascii_isspace(end[-1])) { - --end; - } - if (start >= end) { - return false; - } - - // Consume sign. - const bool negative = (start[0] == '-'); - if (negative || start[0] == '+') { - ++start; - if (start >= end) { - return false; +template +bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { + // Consume whitespace. + while (start < end && ascii_isspace(start[0])) { + ++start; } - } - - // Consume base-dependent prefix. - // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 - // base 16: "0x" -> base 16 - // Also validate the base. - if (base == 0) { - if (end - start >= 2 && start[0] == '0' && - (start[1] == 'x' || start[1] == 'X')) { - base = 16; - start += 2; - } else if (end - start >= 1 && start[0] == '0') { - base = 8; - start += 1; - } else { - base = 10; + while (start < end && ascii_isspace(end[-1])) { + --end; } - } else if (base == 16) { - if (end - start >= 2 && start[0] == '0' && - (start[1] == 'x' || start[1] == 'X')) { - start += 2; + if (start >= end) { + return false; } - } else if (base >= 2 && base <= 36) { - // okay - } else { - return false; - } - - // Consume digits. - // - // The classic loop: - // - // for each digit - // value = value * base + digit - // value *= sign - // - // The classic loop needs overflow checking. It also fails on the most - // negative integer, -2147483648 in 32-bit two's complement representation. - // - // My improved loop: - // - // if (!negative) - // for each digit - // value = value * base - // value = value + digit - // else - // for each digit - // value = value * base - // value = value - digit - // - // Overflow checking becomes simple. - // - // I present the positive code first for easier reading. - IntType value = 0; - if (!negative) { - const IntType vmax = std::numeric_limits::max(); - assert(vmax > 0); - assert(vmax >= base); - const IntType vmax_over_base = vmax / base; - // loop over digits - // loop body is interleaved for perf, not readability - for (; start < end; ++start) { - unsigned char c = static_cast(start[0]); - int digit = kAsciiToInt[c]; - if (value > vmax_over_base) return false; - value *= base; - if (digit >= base) return false; - if (value > vmax - digit) return false; - value += digit; + + // Consume sign. + const bool negative = (start[0] == '-'); + if (negative || start[0] == '+') { + ++start; + if (start >= end) { + return false; + } } - } else { - const IntType vmin = std::numeric_limits::min(); - assert(vmin < 0); - assert(vmin <= 0 - base); - IntType vmin_over_base = vmin / base; - // 2003 c++ standard [expr.mul] - // "... the sign of the remainder is implementation-defined." - // Although (vmin/base)*base + vmin%base is always vmin. - // 2011 c++ standard tightens the spec but we cannot rely on it. - if (vmin % base > 0) { - vmin_over_base += 1; + + // Consume base-dependent prefix. + // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 + // base 16: "0x" -> base 16 + // Also validate the base. + if (base == 0) { + if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { + base = 16; + start += 2; + } else if (end - start >= 1 && start[0] == '0') { + base = 8; + start += 1; + } else { + base = 10; + } + } else if (base == 16) { + if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { + start += 2; + } + } else if (base >= 2 && base <= 36) { + // okay + } else { + return false; } - // loop over digits - // loop body is interleaved for perf, not readability - for (; start < end; ++start) { - unsigned char c = static_cast(start[0]); - int digit = kAsciiToInt[c]; - if (value < vmin_over_base) return false; - value *= base; - if (digit >= base) return false; - if (value < vmin + digit) return false; - value -= digit; + + // Consume digits. + // + // The classic loop: + // + // for each digit + // value = value * base + digit + // value *= sign + // + // The classic loop needs overflow checking. It also fails on the most + // negative integer, -2147483648 in 32-bit two's complement representation. + // + // My improved loop: + // + // if (!negative) + // for each digit + // value = value * base + // value = value + digit + // else + // for each digit + // value = value * base + // value = value - digit + // + // Overflow checking becomes simple. + // + // I present the positive code first for easier reading. + IntType value = 0; + if (!negative) { + const IntType vmax = std::numeric_limits::max(); + assert(vmax > 0); + assert(vmax >= base); + const IntType vmax_over_base = vmax / base; + // loop over digits + // loop body is interleaved for perf, not readability + for (; start < end; ++start) { + unsigned char c = static_cast(start[0]); + int digit = kAsciiToInt[c]; + if (value > vmax_over_base) return false; + value *= base; + if (digit >= base) return false; + if (value > vmax - digit) return false; + value += digit; + } + } else { + const IntType vmin = std::numeric_limits::min(); + assert(vmin < 0); + assert(vmin <= 0 - base); + IntType vmin_over_base = vmin / base; + // 2003 c++ standard [expr.mul] + // "... the sign of the remainder is implementation-defined." + // Although (vmin/base)*base + vmin%base is always vmin. + // 2011 c++ standard tightens the spec but we cannot rely on it. + if (vmin % base > 0) { + vmin_over_base += 1; + } + // loop over digits + // loop body is interleaved for perf, not readability + for (; start < end; ++start) { + unsigned char c = static_cast(start[0]); + int digit = kAsciiToInt[c]; + if (value < vmin_over_base) return false; + value *= base; + if (digit >= base) return false; + if (value < vmin + digit) return false; + value -= digit; + } } - } - // Store output. - *value_p = value; - return true; + // Store output. + *value_p = value; + return true; } -} // anonymous namespace +} // anonymous namespace -bool safe_strto32_base(const char* startptr, const int buffer_size, - int32* v, int base) { - return safe_int_internal(startptr, startptr + buffer_size, base, v); +bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) { + return safe_int_internal(startptr, startptr + buffer_size, base, v); } -bool safe_strto64_base(const char* startptr, const int buffer_size, - int64* v, int base) { - return safe_int_internal(startptr, startptr + buffer_size, base, v); +bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) { + return safe_int_internal(startptr, startptr + buffer_size, base, v); } bool safe_strto32(const char* startptr, const int buffer_size, int32* value) { - return safe_int_internal(startptr, startptr + buffer_size, 10, value); + return safe_int_internal(startptr, startptr + buffer_size, 10, value); } bool safe_strto64(const char* startptr, const int buffer_size, int64* value) { - return safe_int_internal(startptr, startptr + buffer_size, 10, value); + return safe_int_internal(startptr, startptr + buffer_size, 10, value); } bool safe_strto32_base(const char* str, int32* value, int base) { - char* endptr; - errno = 0; // errno only gets set on errors - *value = strto32(str, &endptr, base); - if (endptr != str) { - while (ascii_isspace(*endptr)) ++endptr; - } - return *str != '\0' && *endptr == '\0' && errno == 0; + char* endptr; + errno = 0; // errno only gets set on errors + *value = strto32(str, &endptr, base); + if (endptr != str) { + while (ascii_isspace(*endptr)) ++endptr; + } + return *str != '\0' && *endptr == '\0' && errno == 0; } bool safe_strto64_base(const char* str, int64* value, int base) { - char* endptr; - errno = 0; // errno only gets set on errors - *value = strto64(str, &endptr, base); - if (endptr != str) { - while (ascii_isspace(*endptr)) ++endptr; - } - return *str != '\0' && *endptr == '\0' && errno == 0; + char* endptr; + errno = 0; // errno only gets set on errors + *value = strto64(str, &endptr, base); + if (endptr != str) { + while (ascii_isspace(*endptr)) ++endptr; + } + return *str != '\0' && *endptr == '\0' && errno == 0; } bool safe_strtou32_base(const char* str, uint32* value, int base) { - // strtoul does not give any errors on negative numbers, so we have to - // search the string for '-' manually. - while (ascii_isspace(*str)) ++str; - if (*str == '-') return false; - - char* endptr; - errno = 0; // errno only gets set on errors - *value = strtou32(str, &endptr, base); - if (endptr != str) { - while (ascii_isspace(*endptr)) ++endptr; - } - return *str != '\0' && *endptr == '\0' && errno == 0; + // strtoul does not give any errors on negative numbers, so we have to + // search the string for '-' manually. + while (ascii_isspace(*str)) ++str; + if (*str == '-') return false; + + char* endptr; + errno = 0; // errno only gets set on errors + *value = strtou32(str, &endptr, base); + if (endptr != str) { + while (ascii_isspace(*endptr)) ++endptr; + } + return *str != '\0' && *endptr == '\0' && errno == 0; } bool safe_strtou64_base(const char* str, uint64* value, int base) { - // strtou64 does not give any errors on negative numbers, so we have to - // search the string for '-' manually. - while (ascii_isspace(*str)) ++str; - if (*str == '-') return false; - - char* endptr; - errno = 0; // errno only gets set on errors - *value = strtou64(str, &endptr, base); - if (endptr != str) { - while (ascii_isspace(*endptr)) ++endptr; - } - return *str != '\0' && *endptr == '\0' && errno == 0; + // strtou64 does not give any errors on negative numbers, so we have to + // search the string for '-' manually. + while (ascii_isspace(*str)) ++str; + if (*str == '-') return false; + + char* endptr; + errno = 0; // errno only gets set on errors + *value = strtou64(str, &endptr, base); + if (endptr != str) { + while (ascii_isspace(*endptr)) ++endptr; + } + return *str != '\0' && *endptr == '\0' && errno == 0; } // ---------------------------------------------------------------------- @@ -731,39 +682,35 @@ bool safe_strtou64_base(const char* str, uint64* value, int base) { // Converts unsigned number to string representation in base-36. // -------------------------------------------------------------------- size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) { - CHECK_GT(buf_size, 0); - CHECK(buffer); - static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + CHECK_GT(buf_size, 0); + CHECK(buffer); + static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz"; - buffer[buf_size - 1] = '\0'; - size_t result_size = 1; + buffer[buf_size - 1] = '\0'; + size_t result_size = 1; - do { - if (buf_size == result_size) { // Ran out of space. - return 0; - } - int remainder = number % 36; - number /= 36; - buffer[buf_size - result_size - 1] = kAlphabet[remainder]; - result_size++; - } while (number); + do { + if (buf_size == result_size) { // Ran out of space. + return 0; + } + int remainder = number % 36; + number /= 36; + buffer[buf_size - result_size - 1] = kAlphabet[remainder]; + result_size++; + } while (number); - memmove(buffer, buffer + buf_size - result_size, result_size); + memmove(buffer, buffer + buf_size - result_size, result_size); - return result_size - 1; + return result_size - 1; } // Generate functions that wrap safe_strtoXXX_base. -#define GEN_SAFE_STRTO(name, type) \ -bool name##_base(const string& str, type* value, int base) { \ - return name##_base(str.c_str(), value, base); \ -} \ -bool name(const char* str, type* value) { \ - return name##_base(str, value, 10); \ -} \ -bool name(const string& str, type* value) { \ - return name##_base(str.c_str(), value, 10); \ -} +#define GEN_SAFE_STRTO(name, type) \ + bool name##_base(const string& str, type* value, int base) { \ + return name##_base(str.c_str(), value, base); \ + } \ + bool name(const char* str, type* value) { return name##_base(str, value, 10); } \ + bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); } GEN_SAFE_STRTO(safe_strto32, int32); GEN_SAFE_STRTO(safe_strtou32, uint32); GEN_SAFE_STRTO(safe_strto64, int64); @@ -771,68 +718,68 @@ GEN_SAFE_STRTO(safe_strtou64, uint64); #undef GEN_SAFE_STRTO bool safe_strtof(const char* str, float* value) { - char* endptr; -#ifdef _MSC_VER // has no strtof() - *value = strtod(str, &endptr); + char* endptr; +#ifdef _MSC_VER // has no strtof() + *value = strtod(str, &endptr); #else - *value = strtof(str, &endptr); + *value = strtof(str, &endptr); #endif - if (endptr != str) { - while (ascii_isspace(*endptr)) ++endptr; - } - // Ignore range errors from strtod/strtof. - // The values it returns on underflow and - // overflow are the right fallback in a - // robust setting. - return *str != '\0' && *endptr == '\0'; + if (endptr != str) { + while (ascii_isspace(*endptr)) ++endptr; + } + // Ignore range errors from strtod/strtof. + // The values it returns on underflow and + // overflow are the right fallback in a + // robust setting. + return *str != '\0' && *endptr == '\0'; } bool safe_strtod(const char* str, double* value) { - char* endptr; - *value = strtod(str, &endptr); - if (endptr != str) { - while (ascii_isspace(*endptr)) ++endptr; - } - // Ignore range errors from strtod. The values it - // returns on underflow and overflow are the right - // fallback in a robust setting. - return *str != '\0' && *endptr == '\0'; + char* endptr; + *value = strtod(str, &endptr); + if (endptr != str) { + while (ascii_isspace(*endptr)) ++endptr; + } + // Ignore range errors from strtod. The values it + // returns on underflow and overflow are the right + // fallback in a robust setting. + return *str != '\0' && *endptr == '\0'; } bool safe_strtof(const string& str, float* value) { - return safe_strtof(str.c_str(), value); + return safe_strtof(str.c_str(), value); } bool safe_strtod(const string& str, double* value) { - return safe_strtod(str.c_str(), value); + return safe_strtod(str.c_str(), value); } uint64 atoi_kmgt(const char* s) { - char* endptr; - uint64 n = strtou64(s, &endptr, 10); - uint64 scale = 1; - char c = *endptr; - if (c != '\0') { - c = ascii_toupper(c); - switch (c) { - case 'K': - scale = GG_ULONGLONG(1) << 10; - break; - case 'M': - scale = GG_ULONGLONG(1) << 20; - break; - case 'G': - scale = GG_ULONGLONG(1) << 30; - break; - case 'T': - scale = GG_ULONGLONG(1) << 40; - break; - default: - LOG(FATAL) << "Invalid mnemonic: `" << c << "';" - << " should be one of `K', `M', `G', and `T'."; + char* endptr; + uint64 n = strtou64(s, &endptr, 10); + uint64 scale = 1; + char c = *endptr; + if (c != '\0') { + c = ascii_toupper(c); + switch (c) { + case 'K': + scale = GG_ULONGLONG(1) << 10; + break; + case 'M': + scale = GG_ULONGLONG(1) << 20; + break; + case 'G': + scale = GG_ULONGLONG(1) << 30; + break; + case 'T': + scale = GG_ULONGLONG(1) << 40; + break; + default: + LOG(FATAL) << "Invalid mnemonic: `" << c << "';" + << " should be one of `K', `M', `G', and `T'."; + } } - } - return n * scale; + return n * scale; } // ---------------------------------------------------------------------- @@ -860,52 +807,52 @@ uint64 atoi_kmgt(const char* s) { // for FastTimeToBuffer(), we guarantee that it is.) // ---------------------------------------------------------------------- -char *FastInt64ToBuffer(int64 i, char* buffer) { - FastInt64ToBufferLeft(i, buffer); - return buffer; +char* FastInt64ToBuffer(int64 i, char* buffer) { + FastInt64ToBufferLeft(i, buffer); + return buffer; } -char *FastInt32ToBuffer(int32 i, char* buffer) { - FastInt32ToBufferLeft(i, buffer); - return buffer; +char* FastInt32ToBuffer(int32 i, char* buffer) { + FastInt32ToBufferLeft(i, buffer); + return buffer; } -char *FastHexToBuffer(int i, char* buffer) { - CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i; - - static const char *hexdigits = "0123456789abcdef"; - char *p = buffer + 21; - *p-- = '\0'; - do { - *p-- = hexdigits[i & 15]; // mod by 16 - i >>= 4; // divide by 16 - } while (i > 0); - return p + 1; +char* FastHexToBuffer(int i, char* buffer) { + CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i; + + static const char* hexdigits = "0123456789abcdef"; + char* p = buffer + 21; + *p-- = '\0'; + do { + *p-- = hexdigits[i & 15]; // mod by 16 + i >>= 4; // divide by 16 + } while (i > 0); + return p + 1; } -char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { - static const char *hexdigits = "0123456789abcdef"; - buffer[num_byte] = '\0'; - for (int i = num_byte - 1; i >= 0; i--) { - buffer[i] = hexdigits[value & 0xf]; - value >>= 4; - } - return buffer; +char* InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { + static const char* hexdigits = "0123456789abcdef"; + buffer[num_byte] = '\0'; + for (int i = num_byte - 1; i >= 0; i--) { + buffer[i] = hexdigits[value & 0xf]; + value >>= 4; + } + return buffer; } -char *FastHex64ToBuffer(uint64 value, char* buffer) { - return InternalFastHexToBuffer(value, buffer, 16); +char* FastHex64ToBuffer(uint64 value, char* buffer) { + return InternalFastHexToBuffer(value, buffer, 16); } -char *FastHex32ToBuffer(uint32 value, char* buffer) { - return InternalFastHexToBuffer(value, buffer, 8); +char* FastHex32ToBuffer(uint32 value, char* buffer) { + return InternalFastHexToBuffer(value, buffer, 8); } // TODO(user): revisit the two_ASCII_digits optimization. // // Several converters use this table to reduce // division and modulo operations. -extern const char two_ASCII_digits[100][2]; // from strutil.cc +extern const char two_ASCII_digits[100][2]; // from strutil.cc // ---------------------------------------------------------------------- // FastInt32ToBufferLeft() @@ -924,155 +871,155 @@ extern const char two_ASCII_digits[100][2]; // from strutil.cc // ---------------------------------------------------------------------- char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { - uint digits; - const char *ASCII_digits = nullptr; - // The idea of this implementation is to trim the number of divides to as few - // as possible by using multiplication and subtraction rather than mod (%), - // and by outputting two digits at a time rather than one. - // The huge-number case is first, in the hopes that the compiler will output - // that case in one branch-free block of code, and only output conditional - // branches into it from below. - if (u >= 1000000000) { // >= 1,000,000,000 - digits = u / 100000000; // 100,000,000 + uint digits; + const char* ASCII_digits = nullptr; + // The idea of this implementation is to trim the number of divides to as few + // as possible by using multiplication and subtraction rather than mod (%), + // and by outputting two digits at a time rather than one. + // The huge-number case is first, in the hopes that the compiler will output + // that case in one branch-free block of code, and only output conditional + // branches into it from below. + if (u >= 1000000000) { // >= 1,000,000,000 + digits = u / 100000000; // 100,000,000 + ASCII_digits = two_ASCII_digits[digits]; + buffer[0] = ASCII_digits[0]; + buffer[1] = ASCII_digits[1]; + buffer += 2; + sublt100_000_000: + u -= digits * 100000000; // 100,000,000 + lt100_000_000: + digits = u / 1000000; // 1,000,000 + ASCII_digits = two_ASCII_digits[digits]; + buffer[0] = ASCII_digits[0]; + buffer[1] = ASCII_digits[1]; + buffer += 2; + sublt1_000_000: + u -= digits * 1000000; // 1,000,000 + lt1_000_000: + digits = u / 10000; // 10,000 + ASCII_digits = two_ASCII_digits[digits]; + buffer[0] = ASCII_digits[0]; + buffer[1] = ASCII_digits[1]; + buffer += 2; + sublt10_000: + u -= digits * 10000; // 10,000 + lt10_000: + digits = u / 100; + ASCII_digits = two_ASCII_digits[digits]; + buffer[0] = ASCII_digits[0]; + buffer[1] = ASCII_digits[1]; + buffer += 2; + sublt100: + u -= digits * 100; + lt100: + digits = u; + ASCII_digits = two_ASCII_digits[digits]; + buffer[0] = ASCII_digits[0]; + buffer[1] = ASCII_digits[1]; + buffer += 2; + done: + *buffer = 0; + return buffer; + } + + if (u < 100) { + digits = u; + if (u >= 10) goto lt100; + *buffer++ = '0' + digits; + goto done; + } + if (u < 10000) { // 10,000 + if (u >= 1000) goto lt10_000; + digits = u / 100; + *buffer++ = '0' + digits; + goto sublt100; + } + if (u < 1000000) { // 1,000,000 + if (u >= 100000) goto lt1_000_000; + digits = u / 10000; // 10,000 + *buffer++ = '0' + digits; + goto sublt10_000; + } + if (u < 100000000) { // 100,000,000 + if (u >= 10000000) goto lt100_000_000; + digits = u / 1000000; // 1,000,000 + *buffer++ = '0' + digits; + goto sublt1_000_000; + } + // we already know that u < 1,000,000,000 + digits = u / 100000000; // 100,000,000 + *buffer++ = '0' + digits; + goto sublt100_000_000; +} + +char* FastInt32ToBufferLeft(int32 i, char* buffer) { + uint32 u = i; + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apprently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + return FastUInt32ToBufferLeft(u, buffer); +} + +char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { + uint digits; + const char* ASCII_digits = nullptr; + + uint32 u = static_cast(u64); + if (u == u64) return FastUInt32ToBufferLeft(u, buffer); + + uint64 top_11_digits = u64 / 1000000000; + buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); + u = u64 - (top_11_digits * 1000000000); + + digits = u / 10000000; // 10,000,000 + DCHECK_LT(digits, 100); ASCII_digits = two_ASCII_digits[digits]; buffer[0] = ASCII_digits[0]; buffer[1] = ASCII_digits[1]; buffer += 2; - sublt100_000_000: - u -= digits * 100000000; // 100,000,000 - lt100_000_000: - digits = u / 1000000; // 1,000,000 + u -= digits * 10000000; // 10,000,000 + digits = u / 100000; // 100,000 ASCII_digits = two_ASCII_digits[digits]; buffer[0] = ASCII_digits[0]; buffer[1] = ASCII_digits[1]; buffer += 2; - sublt1_000_000: - u -= digits * 1000000; // 1,000,000 - lt1_000_000: - digits = u / 10000; // 10,000 + u -= digits * 100000; // 100,000 + digits = u / 1000; // 1,000 ASCII_digits = two_ASCII_digits[digits]; buffer[0] = ASCII_digits[0]; buffer[1] = ASCII_digits[1]; buffer += 2; - sublt10_000: - u -= digits * 10000; // 10,000 - lt10_000: - digits = u / 100; + u -= digits * 1000; // 1,000 + digits = u / 10; ASCII_digits = two_ASCII_digits[digits]; buffer[0] = ASCII_digits[0]; buffer[1] = ASCII_digits[1]; buffer += 2; - sublt100: - u -= digits * 100; - lt100: + u -= digits * 10; digits = u; - ASCII_digits = two_ASCII_digits[digits]; - buffer[0] = ASCII_digits[0]; - buffer[1] = ASCII_digits[1]; - buffer += 2; - done: + *buffer++ = '0' + digits; *buffer = 0; return buffer; - } - - if (u < 100) { - digits = u; - if (u >= 10) goto lt100; - *buffer++ = '0' + digits; - goto done; - } - if (u < 10000) { // 10,000 - if (u >= 1000) goto lt10_000; - digits = u / 100; - *buffer++ = '0' + digits; - goto sublt100; - } - if (u < 1000000) { // 1,000,000 - if (u >= 100000) goto lt1_000_000; - digits = u / 10000; // 10,000 - *buffer++ = '0' + digits; - goto sublt10_000; - } - if (u < 100000000) { // 100,000,000 - if (u >= 10000000) goto lt100_000_000; - digits = u / 1000000; // 1,000,000 - *buffer++ = '0' + digits; - goto sublt1_000_000; - } - // we already know that u < 1,000,000,000 - digits = u / 100000000; // 100,000,000 - *buffer++ = '0' + digits; - goto sublt100_000_000; -} - -char* FastInt32ToBufferLeft(int32 i, char* buffer) { - uint32 u = i; - if (i < 0) { - *buffer++ = '-'; - // We need to do the negation in modular (i.e., "unsigned") - // arithmetic; MSVC++ apprently warns for plain "-u", so - // we write the equivalent expression "0 - u" instead. - u = 0 - u; - } - return FastUInt32ToBufferLeft(u, buffer); -} - -char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { - uint digits; - const char *ASCII_digits = nullptr; - - uint32 u = static_cast(u64); - if (u == u64) return FastUInt32ToBufferLeft(u, buffer); - - uint64 top_11_digits = u64 / 1000000000; - buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); - u = u64 - (top_11_digits * 1000000000); - - digits = u / 10000000; // 10,000,000 - DCHECK_LT(digits, 100); - ASCII_digits = two_ASCII_digits[digits]; - buffer[0] = ASCII_digits[0]; - buffer[1] = ASCII_digits[1]; - buffer += 2; - u -= digits * 10000000; // 10,000,000 - digits = u / 100000; // 100,000 - ASCII_digits = two_ASCII_digits[digits]; - buffer[0] = ASCII_digits[0]; - buffer[1] = ASCII_digits[1]; - buffer += 2; - u -= digits * 100000; // 100,000 - digits = u / 1000; // 1,000 - ASCII_digits = two_ASCII_digits[digits]; - buffer[0] = ASCII_digits[0]; - buffer[1] = ASCII_digits[1]; - buffer += 2; - u -= digits * 1000; // 1,000 - digits = u / 10; - ASCII_digits = two_ASCII_digits[digits]; - buffer[0] = ASCII_digits[0]; - buffer[1] = ASCII_digits[1]; - buffer += 2; - u -= digits * 10; - digits = u; - *buffer++ = '0' + digits; - *buffer = 0; - return buffer; } char* FastInt64ToBufferLeft(int64 i, char* buffer) { - uint64 u = i; - if (i < 0) { - *buffer++ = '-'; - u = 0 - u; - } - return FastUInt64ToBufferLeft(u, buffer); + uint64 u = i; + if (i < 0) { + *buffer++ = '-'; + u = 0 - u; + } + return FastUInt64ToBufferLeft(u, buffer); } int HexDigitsPrefix(const char* buf, int num_digits) { - for (int i = 0; i < num_digits; i++) - if (!ascii_isxdigit(buf[i])) - return 0; // This also detects end of string as '\0' is not xdigit. - return 1; + for (int i = 0; i < num_digits; i++) + if (!ascii_isxdigit(buf[i])) + return 0; // This also detects end of string as '\0' is not xdigit. + return 1; } // ---------------------------------------------------------------------- @@ -1098,88 +1045,85 @@ int HexDigitsPrefix(const char* buf, int num_digits) { // strict mode, but "01" == "1" otherwise. // ---------------------------------------------------------------------- -int AutoDigitStrCmp(const char* a, int alen, - const char* b, int blen, - bool strict) { - int aindex = 0; - int bindex = 0; - while ((aindex < alen) && (bindex < blen)) { - if (isdigit(a[aindex]) && isdigit(b[bindex])) { - // Compare runs of digits. Instead of extracting numbers, we - // just skip leading zeroes, and then get the run-lengths. This - // allows us to handle arbitrary precision numbers. We remember - // how many zeroes we found so that we can differentiate between - // "1" and "01" in strict mode. - - // Skip leading zeroes, but remember how many we found - int azeroes = aindex; - int bzeroes = bindex; - while ((aindex < alen) && (a[aindex] == '0')) aindex++; - while ((bindex < blen) && (b[bindex] == '0')) bindex++; - azeroes = aindex - azeroes; - bzeroes = bindex - bzeroes; - - // Count digit lengths - int astart = aindex; - int bstart = bindex; - while ((aindex < alen) && isdigit(a[aindex])) aindex++; - while ((bindex < blen) && isdigit(b[bindex])) bindex++; - if (aindex - astart < bindex - bstart) { - // a has shorter run of digits: so smaller - return -1; - } else if (aindex - astart > bindex - bstart) { - // a has longer run of digits: so larger - return 1; - } else { - // Same lengths, so compare digit by digit - for (int i = 0; i < aindex-astart; i++) { - if (a[astart+i] < b[bstart+i]) { - return -1; - } else if (a[astart+i] > b[bstart+i]) { - return 1; - } - } - // Equal: did one have more leading zeroes? - if (strict && azeroes != bzeroes) { - if (azeroes > bzeroes) { - // a has more leading zeroes: a < b +int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict) { + int aindex = 0; + int bindex = 0; + while ((aindex < alen) && (bindex < blen)) { + if (isdigit(a[aindex]) && isdigit(b[bindex])) { + // Compare runs of digits. Instead of extracting numbers, we + // just skip leading zeroes, and then get the run-lengths. This + // allows us to handle arbitrary precision numbers. We remember + // how many zeroes we found so that we can differentiate between + // "1" and "01" in strict mode. + + // Skip leading zeroes, but remember how many we found + int azeroes = aindex; + int bzeroes = bindex; + while ((aindex < alen) && (a[aindex] == '0')) aindex++; + while ((bindex < blen) && (b[bindex] == '0')) bindex++; + azeroes = aindex - azeroes; + bzeroes = bindex - bzeroes; + + // Count digit lengths + int astart = aindex; + int bstart = bindex; + while ((aindex < alen) && isdigit(a[aindex])) aindex++; + while ((bindex < blen) && isdigit(b[bindex])) bindex++; + if (aindex - astart < bindex - bstart) { + // a has shorter run of digits: so smaller + return -1; + } else if (aindex - astart > bindex - bstart) { + // a has longer run of digits: so larger + return 1; + } else { + // Same lengths, so compare digit by digit + for (int i = 0; i < aindex - astart; i++) { + if (a[astart + i] < b[bstart + i]) { + return -1; + } else if (a[astart + i] > b[bstart + i]) { + return 1; + } + } + // Equal: did one have more leading zeroes? + if (strict && azeroes != bzeroes) { + if (azeroes > bzeroes) { + // a has more leading zeroes: a < b + return -1; + } else { + // b has more leading zeroes: a > b + return 1; + } + } + // Equal: so continue scanning + } + } else if (a[aindex] < b[bindex]) { return -1; - } else { - // b has more leading zeroes: a > b + } else if (a[aindex] > b[bindex]) { return 1; - } + } else { + aindex++; + bindex++; } - // Equal: so continue scanning - } - } else if (a[aindex] < b[bindex]) { - return -1; - } else if (a[aindex] > b[bindex]) { - return 1; - } else { - aindex++; - bindex++; } - } - if (aindex < alen) { - // b is prefix of a - return 1; - } else if (bindex < blen) { - // a is prefix of b - return -1; - } else { - // a is equal to b - return 0; - } + if (aindex < alen) { + // b is prefix of a + return 1; + } else if (bindex < blen) { + // a is prefix of b + return -1; + } else { + // a is equal to b + return 0; + } } bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) { - return AutoDigitStrCmp(a, alen, b, blen, false) < 0; + return AutoDigitStrCmp(a, alen, b, blen, false) < 0; } -bool StrictAutoDigitLessThan(const char* a, int alen, - const char* b, int blen) { - return AutoDigitStrCmp(a, alen, b, blen, true) < 0; +bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen) { + return AutoDigitStrCmp(a, alen, b, blen, true) < 0; } // ---------------------------------------------------------------------- @@ -1224,113 +1168,105 @@ bool StrictAutoDigitLessThan(const char* a, int alen, // ---------------------------------------------------------------------- string SimpleDtoa(double value) { - char buffer[kDoubleToBufferSize]; - return DoubleToBuffer(value, buffer); + char buffer[kDoubleToBufferSize]; + return DoubleToBuffer(value, buffer); } string SimpleFtoa(float value) { - char buffer[kFloatToBufferSize]; - return FloatToBuffer(value, buffer); + char buffer[kFloatToBufferSize]; + return FloatToBuffer(value, buffer); } char* DoubleToBuffer(double value, char* buffer) { - // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all - // platforms these days. Just in case some system exists where DBL_DIG - // is significantly larger -- and risks overflowing our buffer -- we have - // this assert. - COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); + // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all + // platforms these days. Just in case some system exists where DBL_DIG + // is significantly larger -- and risks overflowing our buffer -- we have + // this assert. + COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); - int snprintf_result = - snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); + int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); - // The snprintf should never overflow because the buffer is significantly - // larger than the precision we asked for. - DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); + // The snprintf should never overflow because the buffer is significantly + // larger than the precision we asked for. + DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); - if (strtod(buffer, nullptr) != value) { - snprintf_result = - snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); + if (strtod(buffer, nullptr) != value) { + snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value); - // Should never overflow; see above. - DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); - } - return buffer; + // Should never overflow; see above. + DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); + } + return buffer; } char* FloatToBuffer(float value, char* buffer) { - // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all - // platforms these days. Just in case some system exists where FLT_DIG - // is significantly larger -- and risks overflowing our buffer -- we have - // this assert. - COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); + // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all + // platforms these days. Just in case some system exists where FLT_DIG + // is significantly larger -- and risks overflowing our buffer -- we have + // this assert. + COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); - int snprintf_result = - snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); + int snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); - // The snprintf should never overflow because the buffer is significantly - // larger than the precision we asked for. - DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); + // The snprintf should never overflow because the buffer is significantly + // larger than the precision we asked for. + DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); - float parsed_value; - if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { - snprintf_result = - snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value); + float parsed_value; + if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { + snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 2, value); - // Should never overflow; see above. - DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); - } - return buffer; + // Should never overflow; see above. + DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); + } + return buffer; } -int DoubleToBuffer(double value, int width, char *buffer) { - // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all - // platforms these days. Just in case some system exists where DBL_DIG - // is significantly larger -- and risks overflowing our buffer -- we have - // this assert. - COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); +int DoubleToBuffer(double value, int width, char* buffer) { + // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all + // platforms these days. Just in case some system exists where DBL_DIG + // is significantly larger -- and risks overflowing our buffer -- we have + // this assert. + COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); - int snprintf_result = - snprintf(buffer, width, "%.*g", DBL_DIG, value); + int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value); - // The snprintf should never overflow because the buffer is significantly - // larger than the precision we asked for. - DCHECK(snprintf_result > 0 && snprintf_result < width); + // The snprintf should never overflow because the buffer is significantly + // larger than the precision we asked for. + DCHECK(snprintf_result > 0 && snprintf_result < width); - if (strtod(buffer, nullptr) != value) { - snprintf_result = - snprintf(buffer, width, "%.*g", DBL_DIG+2, value); + if (strtod(buffer, nullptr) != value) { + snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value); - // Should never overflow; see above. - DCHECK(snprintf_result > 0 && snprintf_result < width); - } + // Should never overflow; see above. + DCHECK(snprintf_result > 0 && snprintf_result < width); + } - return snprintf_result; + return snprintf_result; } -int FloatToBuffer(float value, int width, char *buffer) { - // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all - // platforms these days. Just in case some system exists where FLT_DIG - // is significantly larger -- and risks overflowing our buffer -- we have - // this assert. - COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); +int FloatToBuffer(float value, int width, char* buffer) { + // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all + // platforms these days. Just in case some system exists where FLT_DIG + // is significantly larger -- and risks overflowing our buffer -- we have + // this assert. + COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); - int snprintf_result = - snprintf(buffer, width, "%.*g", FLT_DIG, value); + int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value); - // The snprintf should never overflow because the buffer is significantly - // larger than the precision we asked for. - DCHECK(snprintf_result > 0 && snprintf_result < width); + // The snprintf should never overflow because the buffer is significantly + // larger than the precision we asked for. + DCHECK(snprintf_result > 0 && snprintf_result < width); - float parsed_value; - if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { - snprintf_result = - snprintf(buffer, width, "%.*g", FLT_DIG+2, value); + float parsed_value; + if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { + snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value); - // Should never overflow; see above. - DCHECK(snprintf_result > 0 && snprintf_result < width); - } + // Should never overflow; see above. + DCHECK(snprintf_result > 0 && snprintf_result < width); + } - return snprintf_result; + return snprintf_result; } // ---------------------------------------------------------------------- @@ -1342,116 +1278,112 @@ int FloatToBuffer(float value, int width, char *buffer) { // Return value: string // ---------------------------------------------------------------------- string SimpleItoaWithCommas(int32 i) { - // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints. - // Longest is -2,147,483,648. - char local[14]; - char *p = local + sizeof(local); - // Need to use uint32 instead of int32 to correctly handle - // -2,147,483,648. - uint32 n = i; - if (i < 0) - n = 0 - n; // negate the unsigned value to avoid overflow - *--p = '0' + n % 10; // this case deals with the number "0" - n /= 10; - while (n) { - *--p = '0' + n % 10; + // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints. + // Longest is -2,147,483,648. + char local[14]; + char* p = local + sizeof(local); + // Need to use uint32 instead of int32 to correctly handle + // -2,147,483,648. + uint32 n = i; + if (i < 0) n = 0 - n; // negate the unsigned value to avoid overflow + *--p = '0' + n % 10; // this case deals with the number "0" n /= 10; - if (n == 0) break; - - *--p = '0' + n % 10; - n /= 10; - if (n == 0) break; - - *--p = ','; - *--p = '0' + n % 10; - n /= 10; - // For this unrolling, we check if n == 0 in the main while loop - } - if (i < 0) - *--p = '-'; - return string(p, local + sizeof(local)); + while (n) { + *--p = '0' + n % 10; + n /= 10; + if (n == 0) break; + + *--p = '0' + n % 10; + n /= 10; + if (n == 0) break; + + *--p = ','; + *--p = '0' + n % 10; + n /= 10; + // For this unrolling, we check if n == 0 in the main while loop + } + if (i < 0) *--p = '-'; + return string(p, local + sizeof(local)); } // We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't // compile. string SimpleItoaWithCommas(uint32 i) { - // 10 digits and 3 commas are good for 32-bit or smaller ints. - // Longest is 4,294,967,295. - char local[13]; - char *p = local + sizeof(local); - *--p = '0' + i % 10; // this case deals with the number "0" - i /= 10; - while (i) { - *--p = '0' + i % 10; - i /= 10; - if (i == 0) break; - - *--p = '0' + i % 10; - i /= 10; - if (i == 0) break; - - *--p = ','; - *--p = '0' + i % 10; + // 10 digits and 3 commas are good for 32-bit or smaller ints. + // Longest is 4,294,967,295. + char local[13]; + char* p = local + sizeof(local); + *--p = '0' + i % 10; // this case deals with the number "0" i /= 10; - // For this unrolling, we check if i == 0 in the main while loop - } - return string(p, local + sizeof(local)); + while (i) { + *--p = '0' + i % 10; + i /= 10; + if (i == 0) break; + + *--p = '0' + i % 10; + i /= 10; + if (i == 0) break; + + *--p = ','; + *--p = '0' + i % 10; + i /= 10; + // For this unrolling, we check if i == 0 in the main while loop + } + return string(p, local + sizeof(local)); } string SimpleItoaWithCommas(int64 i) { - // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. - char local[26]; - char *p = local + sizeof(local); - // Need to use uint64 instead of int64 to correctly handle - // -9,223,372,036,854,775,808. - uint64 n = i; - if (i < 0) - n = 0 - n; - *--p = '0' + n % 10; // this case deals with the number "0" - n /= 10; - while (n) { - *--p = '0' + n % 10; - n /= 10; - if (n == 0) break; - - *--p = '0' + n % 10; + // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. + char local[26]; + char* p = local + sizeof(local); + // Need to use uint64 instead of int64 to correctly handle + // -9,223,372,036,854,775,808. + uint64 n = i; + if (i < 0) n = 0 - n; + *--p = '0' + n % 10; // this case deals with the number "0" n /= 10; - if (n == 0) break; - - *--p = ','; - *--p = '0' + n % 10; - n /= 10; - // For this unrolling, we check if n == 0 in the main while loop - } - if (i < 0) - *--p = '-'; - return string(p, local + sizeof(local)); + while (n) { + *--p = '0' + n % 10; + n /= 10; + if (n == 0) break; + + *--p = '0' + n % 10; + n /= 10; + if (n == 0) break; + + *--p = ','; + *--p = '0' + n % 10; + n /= 10; + // For this unrolling, we check if n == 0 in the main while loop + } + if (i < 0) *--p = '-'; + return string(p, local + sizeof(local)); } // We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't // compile. string SimpleItoaWithCommas(uint64 i) { - // 20 digits and 6 commas are good for 64-bit or smaller ints. - // Longest is 18,446,744,073,709,551,615. - char local[26]; - char *p = local + sizeof(local); - *--p = '0' + i % 10; // this case deals with the number "0" - i /= 10; - while (i) { - *--p = '0' + i % 10; + // 20 digits and 6 commas are good for 64-bit or smaller ints. + // Longest is 18,446,744,073,709,551,615. + char local[26]; + char* p = local + sizeof(local); + *--p = '0' + i % 10; // this case deals with the number "0" i /= 10; - if (i == 0) break; - - *--p = '0' + i % 10; - i /= 10; - if (i == 0) break; - - *--p = ','; - *--p = '0' + i % 10; - i /= 10; - // For this unrolling, we check if i == 0 in the main while loop - } - return string(p, local + sizeof(local)); + while (i) { + *--p = '0' + i % 10; + i /= 10; + if (i == 0) break; + + *--p = '0' + i % 10; + i /= 10; + if (i == 0) break; + + *--p = ','; + *--p = '0' + i % 10; + i /= 10; + // For this unrolling, we check if i == 0 in the main while loop + } + return string(p, local + sizeof(local)); } // ---------------------------------------------------------------------- @@ -1464,48 +1396,47 @@ string SimpleItoaWithCommas(uint64 i) { // Return value: string // ---------------------------------------------------------------------- string ItoaKMGT(int64 i) { - const char *sign = "", *suffix = ""; - if (i < 0) { - // We lose some accuracy if the caller passes LONG_LONG_MIN, but - // that's OK as this function is only for human readability - if (i == numeric_limits::min()) i++; - sign = "-"; - i = -i; - } - - int64 val; - - if ((val = (i >> 40)) > 1) { - suffix = "T"; - } else if ((val = (i >> 30)) > 1) { - suffix = "G"; - } else if ((val = (i >> 20)) > 1) { - suffix = "M"; - } else if ((val = (i >> 10)) > 1) { - suffix = "K"; - } else { - val = i; - } - - return StringPrintf("%s%" PRId64 "%s", sign, val, suffix); + const char *sign = "", *suffix = ""; + if (i < 0) { + // We lose some accuracy if the caller passes LONG_LONG_MIN, but + // that's OK as this function is only for human readability + if (i == numeric_limits::min()) i++; + sign = "-"; + i = -i; + } + + int64 val; + + if ((val = (i >> 40)) > 1) { + suffix = "T"; + } else if ((val = (i >> 30)) > 1) { + suffix = "G"; + } else if ((val = (i >> 20)) > 1) { + suffix = "M"; + } else if ((val = (i >> 10)) > 1) { + suffix = "K"; + } else { + val = i; + } + + return StringPrintf("%s%" PRId64 "%s", sign, val, suffix); } // DEPRECATED(wadetregaskis). // These are non-inline because some BUILD files turn on -Wformat-non-literal. string FloatToString(float f, const char* format) { - return StringPrintf(format, f); + return StringPrintf(format, f); } string IntToString(int i, const char* format) { - return StringPrintf(format, i); + return StringPrintf(format, i); } string Int64ToString(int64 i64, const char* format) { - return StringPrintf(format, i64); + return StringPrintf(format, i64); } string UInt64ToString(uint64 ui64, const char* format) { - return StringPrintf(format, ui64); + return StringPrintf(format, ui64); } - diff --git a/be/src/gutil/strings/numbers.h b/be/src/gutil/strings/numbers.h index cfdf75747cfe75..5f2acb216f2021 100644 --- a/be/src/gutil/strings/numbers.h +++ b/be/src/gutil/strings/numbers.h @@ -10,6 +10,7 @@ #include #include #include + #include using std::binary_function; using std::less; @@ -26,7 +27,6 @@ using std::vector; #include "gutil/port.h" #include "gutil/stringprintf.h" - // START DOXYGEN NumbersFunctions grouping /* @defgroup NumbersFunctions * @{ */ @@ -76,10 +76,8 @@ bool safe_strto64_base(const string& str, int64* value, int base); bool safe_strtou32_base(const string& str, uint32* value, int base); bool safe_strtou64_base(const string& str, uint64* value, int base); -bool safe_strto32_base(const char* startptr, int buffer_size, - int32* value, int base); -bool safe_strto64_base(const char* startptr, int buffer_size, - int64* value, int base); +bool safe_strto32_base(const char* startptr, int buffer_size, int32* value, int base); +bool safe_strto64_base(const char* startptr, int buffer_size, int64* value, int base); // u64tostr_base36() // The inverse of safe_strtou64_base, converts the number agument to @@ -92,7 +90,9 @@ size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer); // Similar to atoi(s), except s could be like "16k", "32M", "2G", "4t". uint64 atoi_kmgt(const char* s); -inline uint64 atoi_kmgt(const string& s) { return atoi_kmgt(s.c_str()); } +inline uint64 atoi_kmgt(const string& s) { + return atoi_kmgt(s.c_str()); +} // ---------------------------------------------------------------------- // FastIntToBuffer() @@ -144,12 +144,10 @@ char* FastHex32ToBuffer(uint32 i, char* buffer); // at least 22 bytes long inline char* FastIntToBuffer(int i, char* buffer) { - return (sizeof(i) == 4 ? - FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); + return (sizeof(i) == 4 ? FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); } inline char* FastUIntToBuffer(unsigned int i, char* buffer) { - return (sizeof(i) == 4 ? - FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); + return (sizeof(i) == 4 ? FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); } // ---------------------------------------------------------------------- @@ -168,19 +166,19 @@ inline char* FastUIntToBuffer(unsigned int i, char* buffer) { // terminating the string). // ---------------------------------------------------------------------- -char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes -char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes -char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes -char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes +char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes +char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes +char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes +char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes // Just define these in terms of the above. inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { - FastUInt32ToBufferLeft(i, buffer); - return buffer; + FastUInt32ToBufferLeft(i, buffer); + return buffer; } inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { - FastUInt64ToBufferLeft(i, buffer); - return buffer; + FastUInt64ToBufferLeft(i, buffer); + return buffer; } // ---------------------------------------------------------------------- @@ -207,7 +205,7 @@ void ConsumeStrayLeadingZeroes(string* str); // -------------------------------------------------------------------- int32 ParseLeadingInt32Value(const char* str, int32 deflt); inline int32 ParseLeadingInt32Value(const string& str, int32 deflt) { - return ParseLeadingInt32Value(str.c_str(), deflt); + return ParseLeadingInt32Value(str.c_str(), deflt); } // ParseLeadingUInt32Value @@ -219,7 +217,7 @@ inline int32 ParseLeadingInt32Value(const string& str, int32 deflt) { // -------------------------------------------------------------------- uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt); inline uint32 ParseLeadingUInt32Value(const string& str, uint32 deflt) { - return ParseLeadingUInt32Value(str.c_str(), deflt); + return ParseLeadingUInt32Value(str.c_str(), deflt); } // ---------------------------------------------------------------------- @@ -233,7 +231,7 @@ inline uint32 ParseLeadingUInt32Value(const string& str, uint32 deflt) { // -------------------------------------------------------------------- int32 ParseLeadingDec32Value(const char* str, int32 deflt); inline int32 ParseLeadingDec32Value(const string& str, int32 deflt) { - return ParseLeadingDec32Value(str.c_str(), deflt); + return ParseLeadingDec32Value(str.c_str(), deflt); } // ParseLeadingUDec32Value @@ -246,7 +244,7 @@ inline int32 ParseLeadingDec32Value(const string& str, int32 deflt) { // -------------------------------------------------------------------- uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt); inline uint32 ParseLeadingUDec32Value(const string& str, uint32 deflt) { - return ParseLeadingUDec32Value(str.c_str(), deflt); + return ParseLeadingUDec32Value(str.c_str(), deflt); } // ---------------------------------------------------------------------- @@ -261,23 +259,23 @@ inline uint32 ParseLeadingUDec32Value(const string& str, uint32 deflt) { // -------------------------------------------------------------------- uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt); inline uint64 ParseLeadingUInt64Value(const string& str, uint64 deflt) { - return ParseLeadingUInt64Value(str.c_str(), deflt); + return ParseLeadingUInt64Value(str.c_str(), deflt); } int64 ParseLeadingInt64Value(const char* str, int64 deflt); inline int64 ParseLeadingInt64Value(const string& str, int64 deflt) { - return ParseLeadingInt64Value(str.c_str(), deflt); + return ParseLeadingInt64Value(str.c_str(), deflt); } uint64 ParseLeadingHex64Value(const char* str, uint64 deflt); inline uint64 ParseLeadingHex64Value(const string& str, uint64 deflt) { - return ParseLeadingHex64Value(str.c_str(), deflt); + return ParseLeadingHex64Value(str.c_str(), deflt); } int64 ParseLeadingDec64Value(const char* str, int64 deflt); inline int64 ParseLeadingDec64Value(const string& str, int64 deflt) { - return ParseLeadingDec64Value(str.c_str(), deflt); + return ParseLeadingDec64Value(str.c_str(), deflt); } uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt); inline uint64 ParseLeadingUDec64Value(const string& str, uint64 deflt) { - return ParseLeadingUDec64Value(str.c_str(), deflt); + return ParseLeadingUDec64Value(str.c_str(), deflt); } // ---------------------------------------------------------------------- @@ -288,7 +286,7 @@ inline uint64 ParseLeadingUDec64Value(const string& str, uint64 deflt) { // -------------------------------------------------------------------- double ParseLeadingDoubleValue(const char* str, double deflt); inline double ParseLeadingDoubleValue(const string& str, double deflt) { - return ParseLeadingDoubleValue(str.c_str(), deflt); + return ParseLeadingDoubleValue(str.c_str(), deflt); } // ---------------------------------------------------------------------- @@ -300,7 +298,7 @@ inline double ParseLeadingDoubleValue(const string& str, double deflt) { // -------------------------------------------------------------------- bool ParseLeadingBoolValue(const char* str, bool deflt); inline bool ParseLeadingBoolValue(const string& str, bool deflt) { - return ParseLeadingBoolValue(str.c_str(), deflt); + return ParseLeadingBoolValue(str.c_str(), deflt); } // ---------------------------------------------------------------------- @@ -326,42 +324,34 @@ inline bool ParseLeadingBoolValue(const string& str, bool deflt) { // strict mode, but "01" == "1" otherwise. // ---------------------------------------------------------------------- -int AutoDigitStrCmp(const char* a, int alen, - const char* b, int blen, - bool strict); +int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict); -bool AutoDigitLessThan(const char* a, int alen, - const char* b, int blen); +bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen); -bool StrictAutoDigitLessThan(const char* a, int alen, - const char* b, int blen); +bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen); -struct autodigit_less - : public binary_function { - bool operator()(const string& a, const string& b) const { - return AutoDigitLessThan(a.data(), a.size(), b.data(), b.size()); - } +struct autodigit_less : public binary_function { + bool operator()(const string& a, const string& b) const { + return AutoDigitLessThan(a.data(), a.size(), b.data(), b.size()); + } }; -struct autodigit_greater - : public binary_function { - bool operator()(const string& a, const string& b) const { - return AutoDigitLessThan(b.data(), b.size(), a.data(), a.size()); - } +struct autodigit_greater : public binary_function { + bool operator()(const string& a, const string& b) const { + return AutoDigitLessThan(b.data(), b.size(), a.data(), a.size()); + } }; -struct strict_autodigit_less - : public binary_function { - bool operator()(const string& a, const string& b) const { - return StrictAutoDigitLessThan(a.data(), a.size(), b.data(), b.size()); - } +struct strict_autodigit_less : public binary_function { + bool operator()(const string& a, const string& b) const { + return StrictAutoDigitLessThan(a.data(), a.size(), b.data(), b.size()); + } }; -struct strict_autodigit_greater - : public binary_function { - bool operator()(const string& a, const string& b) const { - return StrictAutoDigitLessThan(b.data(), b.size(), a.data(), a.size()); - } +struct strict_autodigit_greater : public binary_function { + bool operator()(const string& a, const string& b) const { + return StrictAutoDigitLessThan(b.data(), b.size(), a.data(), a.size()); + } }; // ---------------------------------------------------------------------- @@ -372,25 +362,25 @@ struct strict_autodigit_greater // Return value: string // ---------------------------------------------------------------------- inline string SimpleItoa(int32 i) { - char buf[16]; // Longest is -2147483648 - return string(buf, FastInt32ToBufferLeft(i, buf)); + char buf[16]; // Longest is -2147483648 + return string(buf, FastInt32ToBufferLeft(i, buf)); } // We need this overload because otherwise SimpleItoa(5U) wouldn't compile. inline string SimpleItoa(uint32 i) { - char buf[16]; // Longest is 4294967295 - return string(buf, FastUInt32ToBufferLeft(i, buf)); + char buf[16]; // Longest is 4294967295 + return string(buf, FastUInt32ToBufferLeft(i, buf)); } inline string SimpleItoa(int64 i) { - char buf[32]; // Longest is -9223372036854775808 - return string(buf, FastInt64ToBufferLeft(i, buf)); + char buf[32]; // Longest is -9223372036854775808 + return string(buf, FastInt64ToBufferLeft(i, buf)); } // We need this overload because otherwise SimpleItoa(5ULL) wouldn't compile. inline string SimpleItoa(uint64 i) { - char buf[32]; // Longest is 18446744073709551615 - return string(buf, FastUInt64ToBufferLeft(i, buf)); + char buf[32]; // Longest is 18446744073709551615 + return string(buf, FastUInt64ToBufferLeft(i, buf)); } // SimpleAtoi converts a string to an integer. @@ -402,27 +392,26 @@ inline string SimpleItoa(uint64 i) { // Returns true if parsing was successful. template bool MUST_USE_RESULT SimpleAtoi(const char* s, int_type* out) { - // Must be of integer type (not pointer type), with more than 16-bitwidth. - COMPILE_ASSERT(sizeof(*out) == 4 || sizeof(*out) == 8, - SimpleAtoiWorksWith32Or64BitInts); - if (std::numeric_limits::is_signed) { // Signed - if (sizeof(*out) == 64 / 8) { // 64-bit - return safe_strto64(s, reinterpret_cast(out)); - } else { // 32-bit - return safe_strto32(s, reinterpret_cast(out)); - } - } else { // Unsigned - if (sizeof(*out) == 64 / 8) { // 64-bit - return safe_strtou64(s, reinterpret_cast(out)); - } else { // 32-bit - return safe_strtou32(s, reinterpret_cast(out)); + // Must be of integer type (not pointer type), with more than 16-bitwidth. + COMPILE_ASSERT(sizeof(*out) == 4 || sizeof(*out) == 8, SimpleAtoiWorksWith32Or64BitInts); + if (std::numeric_limits::is_signed) { // Signed + if (sizeof(*out) == 64 / 8) { // 64-bit + return safe_strto64(s, reinterpret_cast(out)); + } else { // 32-bit + return safe_strto32(s, reinterpret_cast(out)); + } + } else { // Unsigned + if (sizeof(*out) == 64 / 8) { // 64-bit + return safe_strtou64(s, reinterpret_cast(out)); + } else { // 32-bit + return safe_strtou32(s, reinterpret_cast(out)); + } } - } } template bool MUST_USE_RESULT SimpleAtoi(const string& s, int_type* out) { - return SimpleAtoi(s.c_str(), out); + return SimpleAtoi(s.c_str(), out); } // ---------------------------------------------------------------------- @@ -447,8 +436,8 @@ bool MUST_USE_RESULT SimpleAtoi(const string& s, int_type* out) { string SimpleDtoa(double value); string SimpleFtoa(float value); -int DoubleToBuffer(double i, int width, char *buffer); -int FloatToBuffer(float i, int width, char *buffer); +int DoubleToBuffer(double i, int width, char* buffer); +int FloatToBuffer(float i, int width, char* buffer); char* DoubleToBuffer(double i, char* buffer); char* FloatToBuffer(float i, char* buffer); @@ -517,15 +506,15 @@ string ItoaKMGT(int64 i); // '\0'-terminated, which is more efficient. // ---------------------------------------------------------------------- struct DoubleRangeOptions { - const char* separators; - bool require_separator; - const char* acceptable_terminators; - bool null_terminator_ok; - bool allow_unbounded_markers; - uint32 num_required_bounds; - bool dont_modify_unbounded; - bool allow_currency; - bool allow_comparators; + const char* separators; + bool require_separator; + const char* acceptable_terminators; + bool null_terminator_ok; + bool allow_unbounded_markers; + uint32 num_required_bounds; + bool dont_modify_unbounded; + bool allow_currency; + bool allow_comparators; }; // NOTE: The instruction below creates a Module titled @@ -533,9 +522,8 @@ struct DoubleRangeOptions { // This instruction is needed to expose global functions that are not // within a namespace. // -bool ParseDoubleRange(const char* text, int len, const char** end, - double* from, double* to, bool* is_currency, - const DoubleRangeOptions& opts); +bool ParseDoubleRange(const char* text, int len, const char** end, double* from, double* to, + bool* is_currency, const DoubleRangeOptions& opts); // END DOXYGEN SplitFunctions grouping /* @} */ @@ -575,4 +563,4 @@ bool ParseDoubleRange(const char* text, int len, const char** end, // return StringPrintf("%7" PRIu64, ui64); // } -#endif // STRINGS_NUMBERS_H_ +#endif // STRINGS_NUMBERS_H_ diff --git a/be/src/gutil/strings/split.cc b/be/src/gutil/strings/split.cc index 4d8cee523db7cf..0e865632e2d3b3 100644 --- a/be/src/gutil/strings/split.cc +++ b/be/src/gutil/strings/split.cc @@ -7,6 +7,7 @@ #include #include #include + #include using std::back_insert_iterator; using std::iterator_traits; @@ -16,14 +17,15 @@ using std::numeric_limits; using std::unordered_map; using std::unordered_set; -#include "gutil/integral_types.h" #include + +#include "gutil/hash/hash.h" +#include "gutil/integral_types.h" #include "gutil/logging-inl.h" #include "gutil/macros.h" -#include "gutil/strtoint.h" #include "gutil/strings/ascii_ctype.h" #include "gutil/strings/util.h" -#include "gutil/hash/hash.h" +#include "gutil/strtoint.h" // Implementations for some of the Split2 API. Much of the Split2 API is // templated so it exists in header files, either strings/split.h or @@ -40,72 +42,59 @@ namespace { // will ultimately use StringPiece::find(), and the AnyOf delimiter will use // StringPiece::find_first_of(). template -StringPiece GenericFind( - StringPiece text, - StringPiece delimiter, - FindPolicy find_policy) { - if (delimiter.empty() && text.length() > 0) { - // Special case for empty string delimiters: always return a zero-length - // StringPiece referring to the item at position 1. - return StringPiece(text.begin() + 1, 0); - } - int found_pos = StringPiece::npos; - StringPiece found(text.end(), 0); // By default, not found - found_pos = find_policy.Find(text, delimiter); - if (found_pos != StringPiece::npos) { - found.set(text.data() + found_pos, find_policy.Length(delimiter)); - } - return found; +StringPiece GenericFind(StringPiece text, StringPiece delimiter, FindPolicy find_policy) { + if (delimiter.empty() && text.length() > 0) { + // Special case for empty string delimiters: always return a zero-length + // StringPiece referring to the item at position 1. + return StringPiece(text.begin() + 1, 0); + } + int found_pos = StringPiece::npos; + StringPiece found(text.end(), 0); // By default, not found + found_pos = find_policy.Find(text, delimiter); + if (found_pos != StringPiece::npos) { + found.set(text.data() + found_pos, find_policy.Length(delimiter)); + } + return found; } // Finds using StringPiece::find(), therefore the length of the found delimiter // is delimiter.length(). struct LiteralPolicy { - int Find(StringPiece text, StringPiece delimiter) { - return text.find(delimiter); - } - int Length(StringPiece delimiter) { - return delimiter.length(); - } + int Find(StringPiece text, StringPiece delimiter) { return text.find(delimiter); } + int Length(StringPiece delimiter) { return delimiter.length(); } }; // Finds using StringPiece::find_first_of(), therefore the length of the found // delimiter is 1. struct AnyOfPolicy { - size_t Find(StringPiece text, StringPiece delimiter) { - return text.find_first_of(delimiter); - } - int Length(StringPiece delimiter) { - return 1; - } + size_t Find(StringPiece text, StringPiece delimiter) { return text.find_first_of(delimiter); } + int Length(StringPiece delimiter) { return 1; } }; -} // namespace +} // namespace // // Literal // -Literal::Literal(StringPiece sp) : delimiter_(sp.ToString()) { -} +Literal::Literal(StringPiece sp) : delimiter_(sp.ToString()) {} StringPiece Literal::Find(StringPiece text) const { - return GenericFind(text, delimiter_, LiteralPolicy()); + return GenericFind(text, delimiter_, LiteralPolicy()); } // // AnyOf // -AnyOf::AnyOf(StringPiece sp) : delimiters_(sp.ToString()) { -} +AnyOf::AnyOf(StringPiece sp) : delimiters_(sp.ToString()) {} StringPiece AnyOf::Find(StringPiece text) const { - return GenericFind(text, delimiters_, AnyOfPolicy()); + return GenericFind(text, delimiters_, AnyOfPolicy()); } -} // namespace delimiter -} // namespace strings +} // namespace delimiter +} // namespace strings // // ==================== LEGACY SPLIT FUNCTIONS ==================== @@ -124,8 +113,8 @@ namespace { // - unordered_map - to change append semantics template void AppendToImpl(Container* container, Splitter splitter) { - Container c = splitter; // Calls implicit conversion operator. - std::copy(c.begin(), c.end(), std::inserter(*container, container->end())); + Container c = splitter; // Calls implicit conversion operator. + std::copy(c.begin(), c.end(), std::inserter(*container, container->end())); } // Overload of AppendToImpl() that is optimized for appending to vector. @@ -133,12 +122,12 @@ void AppendToImpl(Container* container, Splitter splitter) { // as the intermediate container. template void AppendToImpl(vector* container, Splitter splitter) { - vector vsp = splitter; // Calls implicit conversion operator. - size_t container_size = container->size(); - container->resize(container_size + vsp.size()); - for (const auto& sp : vsp) { - sp.CopyToString(&(*container)[container_size++]); - } + vector vsp = splitter; // Calls implicit conversion operator. + size_t container_size = container->size(); + container->resize(container_size + vsp.size()); + for (const auto& sp : vsp) { + sp.CopyToString(&(*container)[container_size++]); + } } // Here we define two AppendToImpl() overloads for map<> and unordered_map<>. Both of @@ -163,20 +152,20 @@ void AppendToImpl(vector* container, Splitter splitter) { // template void AppendToMap(Map* m, Splitter splitter) { - Map tmp = splitter; // Calls implicit conversion operator. - for (typename Map::const_iterator it = tmp.begin(); it != tmp.end(); ++it) { - (*m)[it->first] = it->second; - } + Map tmp = splitter; // Calls implicit conversion operator. + for (typename Map::const_iterator it = tmp.begin(); it != tmp.end(); ++it) { + (*m)[it->first] = it->second; + } } template void AppendToImpl(map* map_container, Splitter splitter) { - AppendToMap(map_container, splitter); + AppendToMap(map_container, splitter); } template void AppendToImpl(unordered_map* map_container, Splitter splitter) { - AppendToMap(map_container, splitter); + AppendToMap(map_container, splitter); } // Appends the results of a call to strings::Split() to the specified container. @@ -193,17 +182,17 @@ void AppendToImpl(unordered_map* map_container, Splitter splitte // template void AppendTo(Container* container, Splitter splitter) { - if (container->empty()) { - // "Appending" to an empty container is by far the common case. For this we - // assign directly to the output container, which is more efficient than - // explicitly appending. - *container = splitter; // Calls implicit conversion operator. - } else { - AppendToImpl(container, splitter); - } + if (container->empty()) { + // "Appending" to an empty container is by far the common case. For this we + // assign directly to the output container, which is more efficient than + // explicitly appending. + *container = splitter; // Calls implicit conversion operator. + } else { + AppendToImpl(container, splitter); + } } -} // anonymous namespace +} // anonymous namespace // Constants for ClipString() static const int kMaxOverCut = 12; @@ -216,27 +205,24 @@ static const int kCutStrSize = sizeof(kCutStr) - 1; // if the string doesn't need to be clipped. // ---------------------------------------------------------------------- static int ClipStringHelper(const char* str, int max_len, bool use_ellipsis) { - if (strlen(str) <= max_len) - return -1; - - int max_substr_len = max_len; - - if (use_ellipsis && max_len > kCutStrSize) { - max_substr_len -= kCutStrSize; - } - - const char* cut_by = - (max_substr_len < kMaxOverCut ? str : str + max_len - kMaxOverCut); - const char* cut_at = str + max_substr_len; - while (!ascii_isspace(*cut_at) && cut_at > cut_by) - cut_at--; - - if (cut_at == cut_by) { - // No space was found - return max_substr_len; - } else { - return cut_at-str; - } + if (strlen(str) <= max_len) return -1; + + int max_substr_len = max_len; + + if (use_ellipsis && max_len > kCutStrSize) { + max_substr_len -= kCutStrSize; + } + + const char* cut_by = (max_substr_len < kMaxOverCut ? str : str + max_len - kMaxOverCut); + const char* cut_at = str + max_substr_len; + while (!ascii_isspace(*cut_at) && cut_at > cut_by) cut_at--; + + if (cut_at == cut_by) { + // No space was found + return max_substr_len; + } else { + return cut_at - str; + } } // ---------------------------------------------------------------------- @@ -247,14 +233,14 @@ static int ClipStringHelper(const char* str, int max_len, bool use_ellipsis) { // ---------------------------------------------------------------------- void ClipString(char* str, int max_len) { - int cut_at = ClipStringHelper(str, max_len, true); - if (cut_at != -1) { - if (max_len > kCutStrSize) { - strcpy(str+cut_at, kCutStr); - } else { - strcpy(str+cut_at, ""); + int cut_at = ClipStringHelper(str, max_len, true); + if (cut_at != -1) { + if (max_len > kCutStrSize) { + strcpy(str + cut_at, kCutStr); + } else { + strcpy(str + cut_at, ""); + } } - } } // ---------------------------------------------------------------------- @@ -262,13 +248,13 @@ void ClipString(char* str, int max_len) { // Version of ClipString() that uses string instead of char*. // ---------------------------------------------------------------------- void ClipString(string* full_str, int max_len) { - int cut_at = ClipStringHelper(full_str->c_str(), max_len, true); - if (cut_at != -1) { - full_str->erase(cut_at); - if (max_len > kCutStrSize) { - full_str->append(kCutStr); + int cut_at = ClipStringHelper(full_str->c_str(), max_len, true); + if (cut_at != -1) { + full_str->erase(cut_at); + if (max_len > kCutStrSize) { + full_str->append(kCutStr); + } } - } } // ---------------------------------------------------------------------- @@ -285,41 +271,36 @@ void ClipString(string* full_str, int max_len) { // If "pieces" is negative for some reason, it returns the whole string // ---------------------------------------------------------------------- template -static inline -void SplitStringToIteratorAllowEmpty(const StringType& full, - const char* delim, - int pieces, - ITR& result) { - string::size_type begin_index, end_index; - begin_index = 0; - - for (int i = 0; (i < pieces-1) || (pieces == 0); i++) { - end_index = full.find_first_of(delim, begin_index); - if (end_index == string::npos) { - *result++ = full.substr(begin_index); - return; +static inline void SplitStringToIteratorAllowEmpty(const StringType& full, const char* delim, + int pieces, ITR& result) { + string::size_type begin_index, end_index; + begin_index = 0; + + for (int i = 0; (i < pieces - 1) || (pieces == 0); i++) { + end_index = full.find_first_of(delim, begin_index); + if (end_index == string::npos) { + *result++ = full.substr(begin_index); + return; + } + *result++ = full.substr(begin_index, (end_index - begin_index)); + begin_index = end_index + 1; } - *result++ = full.substr(begin_index, (end_index - begin_index)); - begin_index = end_index + 1; - } - *result++ = full.substr(begin_index); + *result++ = full.substr(begin_index); } -void SplitStringIntoNPiecesAllowEmpty(const string& full, - const char* delim, - int pieces, +void SplitStringIntoNPiecesAllowEmpty(const string& full, const char* delim, int pieces, vector* result) { - if (pieces == 0) { - // No limit when pieces is 0. - AppendTo(result, strings::Split(full, AnyOf(delim))); - } else { - // The input argument "pieces" specifies the max size that *result should - // be. However, the argument to the Limit() delimiter is the max number of - // delimiters, which should be one less than "pieces". Example: "a,b,c" has - // 3 pieces and two comma delimiters. - int limit = std::max(pieces - 1, 0); - AppendTo(result, strings::Split(full, Limit(AnyOf(delim), limit))); - } + if (pieces == 0) { + // No limit when pieces is 0. + AppendTo(result, strings::Split(full, AnyOf(delim))); + } else { + // The input argument "pieces" specifies the max size that *result should + // be. However, the argument to the Limit() delimiter is the max number of + // delimiters, which should be one less than "pieces". Example: "a,b,c" has + // 3 pieces and two comma delimiters. + int limit = std::max(pieces - 1, 0); + AppendTo(result, strings::Split(full, Limit(AnyOf(delim), limit))); + } } // ---------------------------------------------------------------------- @@ -328,9 +309,8 @@ void SplitStringIntoNPiecesAllowEmpty(const string& full, // to 'result'. If there are consecutive delimiters, this function // will return corresponding empty strings. // ---------------------------------------------------------------------- -void SplitStringAllowEmpty(const string& full, const char* delim, - vector* result) { - AppendTo(result, strings::Split(full, AnyOf(delim))); +void SplitStringAllowEmpty(const string& full, const char* delim, vector* result) { + AppendTo(result, strings::Split(full, AnyOf(delim))); } // If we know how much to allocate for a vector of strings, we can @@ -345,24 +325,24 @@ void SplitStringAllowEmpty(const string& full, const char* delim, // and use the existing template function, but probably this is more clear // and more sure to get optimized to reasonable code. static int CalculateReserveForVector(const string& full, const char* delim) { - int count = 0; - if (delim[0] != '\0' && delim[1] == '\0') { - // Optimize the common case where delim is a single character. - char c = delim[0]; - const char* p = full.data(); - const char* end = p + full.size(); - while (p != end) { - if (*p == c) { // This could be optimized with hasless(v,1) trick. - ++p; - } else { - while (++p != end && *p != c) { - // Skip to the next occurence of the delimiter. + int count = 0; + if (delim[0] != '\0' && delim[1] == '\0') { + // Optimize the common case where delim is a single character. + char c = delim[0]; + const char* p = full.data(); + const char* end = p + full.size(); + while (p != end) { + if (*p == c) { // This could be optimized with hasless(v,1) trick. + ++p; + } else { + while (++p != end && *p != c) { + // Skip to the next occurence of the delimiter. + } + ++count; + } } - ++count; - } } - } - return count; + return count; } // ---------------------------------------------------------------------- @@ -378,68 +358,62 @@ static int CalculateReserveForVector(const string& full, const char* delim) { // the characters in the string, not the entire string as a single delimiter. // ---------------------------------------------------------------------- template -static inline -void SplitStringToIteratorUsing(const StringType& full, - const char* delim, - ITR& result) { - // Optimize the common case where delim is a single character. - if (delim[0] != '\0' && delim[1] == '\0') { - char c = delim[0]; - const char* p = full.data(); - const char* end = p + full.size(); - while (p != end) { - if (*p == c) { - ++p; - } else { - const char* start = p; - while (++p != end && *p != c) { - // Skip to the next occurence of the delimiter. +static inline void SplitStringToIteratorUsing(const StringType& full, const char* delim, + ITR& result) { + // Optimize the common case where delim is a single character. + if (delim[0] != '\0' && delim[1] == '\0') { + char c = delim[0]; + const char* p = full.data(); + const char* end = p + full.size(); + while (p != end) { + if (*p == c) { + ++p; + } else { + const char* start = p; + while (++p != end && *p != c) { + // Skip to the next occurence of the delimiter. + } + *result++ = StringType(start, p - start); + } } - *result++ = StringType(start, p - start); - } + return; } - return; - } - - string::size_type begin_index, end_index; - begin_index = full.find_first_not_of(delim); - while (begin_index != string::npos) { - end_index = full.find_first_of(delim, begin_index); - if (end_index == string::npos) { - *result++ = full.substr(begin_index); - return; + + string::size_type begin_index, end_index; + begin_index = full.find_first_not_of(delim); + while (begin_index != string::npos) { + end_index = full.find_first_of(delim, begin_index); + if (end_index == string::npos) { + *result++ = full.substr(begin_index); + return; + } + *result++ = full.substr(begin_index, (end_index - begin_index)); + begin_index = full.find_first_not_of(delim, end_index); } - *result++ = full.substr(begin_index, (end_index - begin_index)); - begin_index = full.find_first_not_of(delim, end_index); - } } -void SplitStringUsing(const string& full, - const char* delim, - vector* result) { - result->reserve(result->size() + CalculateReserveForVector(full, delim)); - std::back_insert_iterator< vector> it(*result); - SplitStringToIteratorUsing(full, delim, it); +void SplitStringUsing(const string& full, const char* delim, vector* result) { + result->reserve(result->size() + CalculateReserveForVector(full, delim)); + std::back_insert_iterator> it(*result); + SplitStringToIteratorUsing(full, delim, it); } void SplitStringToHashsetUsing(const string& full, const char* delim, unordered_set* result) { - AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); + AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); } -void SplitStringToSetUsing(const string& full, const char* delim, - set* result) { - AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); +void SplitStringToSetUsing(const string& full, const char* delim, set* result) { + AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); } -void SplitStringToMapUsing(const string& full, const char* delim, - map* result) { - AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); +void SplitStringToMapUsing(const string& full, const char* delim, map* result) { + AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); } void SplitStringToHashmapUsing(const string& full, const char* delim, unordered_map* result) { - AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); + AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty())); } // ---------------------------------------------------------------------- @@ -449,15 +423,13 @@ void SplitStringToHashmapUsing(const string& full, const char* delim, // If omit empty strings is true, empty strings are omitted // from the resulting vector. // ---------------------------------------------------------------------- -void SplitStringPieceToVector(const StringPiece& full, - const char* delim, - vector* vec, +void SplitStringPieceToVector(const StringPiece& full, const char* delim, vector* vec, bool omit_empty_strings) { - if (omit_empty_strings) { - AppendTo(vec, strings::Split(full, AnyOf(delim), SkipEmpty())); - } else { - AppendTo(vec, strings::Split(full, AnyOf(delim))); - } + if (omit_empty_strings) { + AppendTo(vec, strings::Split(full, AnyOf(delim), SkipEmpty())); + } else { + AppendTo(vec, strings::Split(full, AnyOf(delim))); + } } // ---------------------------------------------------------------------- @@ -467,59 +439,58 @@ void SplitStringPieceToVector(const StringPiece& full, // ---------------------------------------------------------------------- vector* SplitUsing(char* full, const char* delim) { - auto vec = new vector; - SplitToVector(full, delim, vec, true); // Omit empty strings - return vec; + auto vec = new vector; + SplitToVector(full, delim, vec, true); // Omit empty strings + return vec; } -void SplitToVector(char* full, const char* delim, vector* vec, - bool omit_empty_strings) { - char* next = full; - while ((next = gstrsep(&full, delim)) != nullptr) { - if (omit_empty_strings && next[0] == '\0') continue; - vec->push_back(next); - } - // Add last element (or full string if no delimiter found): - if (full != nullptr) { - vec->push_back(full); - } +void SplitToVector(char* full, const char* delim, vector* vec, bool omit_empty_strings) { + char* next = full; + while ((next = gstrsep(&full, delim)) != nullptr) { + if (omit_empty_strings && next[0] == '\0') continue; + vec->push_back(next); + } + // Add last element (or full string if no delimiter found): + if (full != nullptr) { + vec->push_back(full); + } } void SplitToVector(char* full, const char* delim, vector* vec, bool omit_empty_strings) { - char* next = full; - while ((next = gstrsep(&full, delim)) != nullptr) { - if (omit_empty_strings && next[0] == '\0') continue; - vec->push_back(next); - } - // Add last element (or full string if no delimiter found): - if (full != nullptr) { - vec->push_back(full); - } + char* next = full; + while ((next = gstrsep(&full, delim)) != nullptr) { + if (omit_empty_strings && next[0] == '\0') continue; + vec->push_back(next); + } + // Add last element (or full string if no delimiter found): + if (full != nullptr) { + vec->push_back(full); + } } // ---------------------------------------------------------------------- // SplitOneStringToken() // Mainly a stringified wrapper around strpbrk() // ---------------------------------------------------------------------- -string SplitOneStringToken(const char ** source, const char * delim) { - assert(source); - assert(delim); - if (!*source) { - return string(); - } - const char * begin = *source; - // Optimize the common case where delim is a single character. - if (delim[0] != '\0' && delim[1] == '\0') { - *source = strchr(*source, delim[0]); - } else { - *source = strpbrk(*source, delim); - } - if (*source) { - return string(begin, (*source)++); - } else { - return string(begin); - } +string SplitOneStringToken(const char** source, const char* delim) { + assert(source); + assert(delim); + if (!*source) { + return string(); + } + const char* begin = *source; + // Optimize the common case where delim is a single character. + if (delim[0] != '\0' && delim[1] == '\0') { + *source = strchr(*source, delim[0]); + } else { + *source = strpbrk(*source, delim); + } + if (*source) { + return string(begin, (*source)++); + } else { + return string(begin); + } } // ---------------------------------------------------------------------- @@ -531,72 +502,65 @@ string SplitOneStringToken(const char ** source, const char * delim) { // account. '\' is not allowed as a delimiter. // ---------------------------------------------------------------------- template -static inline -void SplitStringWithEscapingToIterator(const string& src, - const strings::CharSet& delimiters, - const bool allow_empty, - ITR* result) { - CHECK(!delimiters.Test('\\')) << "\\ is not allowed as a delimiter."; - CHECK(result); - string part; - - for (uint32 i = 0; i < src.size(); ++i) { - char current_char = src[i]; - if (delimiters.Test(current_char)) { - // Push substrings when we encounter delimiters. - if (allow_empty || !part.empty()) { - *(*result)++ = part; - part.clear(); - } - } else if (current_char == '\\' && ++i < src.size()) { - // If we see a backslash, the next delimiter or backslash is literal. - current_char = src[i]; - if (current_char != '\\' && !delimiters.Test(current_char)) { - // Don't honour unknown escape sequences: emit \f for \f. - part.push_back('\\'); - } - part.push_back(current_char); - } else { - // Otherwise, we have a normal character or trailing backslash. - part.push_back(current_char); +static inline void SplitStringWithEscapingToIterator(const string& src, + const strings::CharSet& delimiters, + const bool allow_empty, ITR* result) { + CHECK(!delimiters.Test('\\')) << "\\ is not allowed as a delimiter."; + CHECK(result); + string part; + + for (uint32 i = 0; i < src.size(); ++i) { + char current_char = src[i]; + if (delimiters.Test(current_char)) { + // Push substrings when we encounter delimiters. + if (allow_empty || !part.empty()) { + *(*result)++ = part; + part.clear(); + } + } else if (current_char == '\\' && ++i < src.size()) { + // If we see a backslash, the next delimiter or backslash is literal. + current_char = src[i]; + if (current_char != '\\' && !delimiters.Test(current_char)) { + // Don't honour unknown escape sequences: emit \f for \f. + part.push_back('\\'); + } + part.push_back(current_char); + } else { + // Otherwise, we have a normal character or trailing backslash. + part.push_back(current_char); + } } - } - // Push the trailing part. - if (allow_empty || !part.empty()) { - *(*result)++ = part; - } + // Push the trailing part. + if (allow_empty || !part.empty()) { + *(*result)++ = part; + } } -void SplitStringWithEscaping(const string &full, - const strings::CharSet& delimiters, - vector *result) { - std::back_insert_iterator< vector> it(*result); - SplitStringWithEscapingToIterator(full, delimiters, false, &it); +void SplitStringWithEscaping(const string& full, const strings::CharSet& delimiters, + vector* result) { + std::back_insert_iterator> it(*result); + SplitStringWithEscapingToIterator(full, delimiters, false, &it); } -void SplitStringWithEscapingAllowEmpty(const string &full, - const strings::CharSet& delimiters, - vector *result) { - std::back_insert_iterator< vector> it(*result); - SplitStringWithEscapingToIterator(full, delimiters, true, &it); +void SplitStringWithEscapingAllowEmpty(const string& full, const strings::CharSet& delimiters, + vector* result) { + std::back_insert_iterator> it(*result); + SplitStringWithEscapingToIterator(full, delimiters, true, &it); } -void SplitStringWithEscapingToSet(const string &full, - const strings::CharSet& delimiters, - set *result) { - std::insert_iterator< set> it(*result, result->end()); - SplitStringWithEscapingToIterator(full, delimiters, false, &it); +void SplitStringWithEscapingToSet(const string& full, const strings::CharSet& delimiters, + set* result) { + std::insert_iterator> it(*result, result->end()); + SplitStringWithEscapingToIterator(full, delimiters, false, &it); } -void SplitStringWithEscapingToHashset(const string &full, - const strings::CharSet& delimiters, - unordered_set *result) { - std::insert_iterator< unordered_set> it(*result, result->end()); - SplitStringWithEscapingToIterator(full, delimiters, false, &it); +void SplitStringWithEscapingToHashset(const string& full, const strings::CharSet& delimiters, + unordered_set* result) { + std::insert_iterator> it(*result, result->end()); + SplitStringWithEscapingToIterator(full, delimiters, false, &it); } - // ---------------------------------------------------------------------- // SplitOneIntToken() // SplitOneInt32Token() @@ -615,49 +579,55 @@ void SplitStringWithEscapingToHashset(const string &full, // Mainly a stringified wrapper around strtol/strtoul/strtod // ---------------------------------------------------------------------- // Curried functions for the macro below -static inline long strto32_0(const char * source, char ** end) { - return strto32(source, end, 0); } -static inline unsigned long strtou32_0(const char * source, char ** end) { - return strtou32(source, end, 0); } -static inline int64 strto64_0(const char * source, char ** end) { - return strto64(source, end, 0); } -static inline uint64 strtou64_0(const char * source, char ** end) { - return strtou64(source, end, 0); } -static inline long strto32_10(const char * source, char ** end) { - return strto32(source, end, 10); } -static inline unsigned long strtou32_10(const char * source, char ** end) { - return strtou32(source, end, 10); } -static inline int64 strto64_10(const char * source, char ** end) { - return strto64(source, end, 10); } -static inline uint64 strtou64_10(const char * source, char ** end) { - return strtou64(source, end, 10); } -static inline uint32 strtou32_16(const char * source, char ** end) { - return strtou32(source, end, 16); } -static inline uint64 strtou64_16(const char * source, char ** end) { - return strtou64(source, end, 16); } - -#define DEFINE_SPLIT_ONE_NUMBER_TOKEN(name, type, function) \ -bool SplitOne##name##Token(const char ** source, const char * delim, \ - type * value) { \ - assert(source); \ - assert(delim); \ - assert(value); \ - if (!*source) \ - return false; \ - /* Parse int */ \ - char * end; \ - *value = function(*source, &end); \ - if (end == *source) \ - return false; /* number not present at start of string */ \ - if (end[0] && !strchr(delim, end[0])) \ - return false; /* Garbage characters after int */ \ - /* Advance past token */ \ - if (*end != '\0') \ - *source = const_cast(end+1); \ - else \ - *source = NULL; \ - return true; \ -} +static inline long strto32_0(const char* source, char** end) { + return strto32(source, end, 0); +} +static inline unsigned long strtou32_0(const char* source, char** end) { + return strtou32(source, end, 0); +} +static inline int64 strto64_0(const char* source, char** end) { + return strto64(source, end, 0); +} +static inline uint64 strtou64_0(const char* source, char** end) { + return strtou64(source, end, 0); +} +static inline long strto32_10(const char* source, char** end) { + return strto32(source, end, 10); +} +static inline unsigned long strtou32_10(const char* source, char** end) { + return strtou32(source, end, 10); +} +static inline int64 strto64_10(const char* source, char** end) { + return strto64(source, end, 10); +} +static inline uint64 strtou64_10(const char* source, char** end) { + return strtou64(source, end, 10); +} +static inline uint32 strtou32_16(const char* source, char** end) { + return strtou32(source, end, 16); +} +static inline uint64 strtou64_16(const char* source, char** end) { + return strtou64(source, end, 16); +} + +#define DEFINE_SPLIT_ONE_NUMBER_TOKEN(name, type, function) \ + bool SplitOne##name##Token(const char** source, const char* delim, type* value) { \ + assert(source); \ + assert(delim); \ + assert(value); \ + if (!*source) return false; \ + /* Parse int */ \ + char* end; \ + *value = function(*source, &end); \ + if (end == *source) return false; /* number not present at start of string */ \ + if (end[0] && !strchr(delim, end[0])) return false; /* Garbage characters after int */ \ + /* Advance past token */ \ + if (*end != '\0') \ + *source = const_cast(end + 1); \ + else \ + *source = NULL; \ + return true; \ + } DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int, int, strto32_0) DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int32, int32, strto32_0) @@ -665,7 +635,7 @@ DEFINE_SPLIT_ONE_NUMBER_TOKEN(Uint32, uint32, strtou32_0) DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int64, int64, strto64_0) DEFINE_SPLIT_ONE_NUMBER_TOKEN(Uint64, uint64, strtou64_0) DEFINE_SPLIT_ONE_NUMBER_TOKEN(Double, double, strtod) -#ifdef _MSC_VER // has no strtof() +#ifdef _MSC_VER // has no strtof() // Note: does an implicit cast to float. DEFINE_SPLIT_ONE_NUMBER_TOKEN(Float, float, strtod) #else @@ -679,7 +649,6 @@ DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalUint64, uint64, strtou64_10) DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint32, uint32, strtou32_16) DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint64, uint64, strtou64_16) - // ---------------------------------------------------------------------- // SplitRange() // Splits a string of the form "-". Either or both can be @@ -690,104 +659,97 @@ DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint64, uint64, strtou64_16) // terminated either by "\0" or by whitespace. // ---------------------------------------------------------------------- -#define EOS(ch) ( (ch) == '\0' || ascii_isspace(ch) ) +#define EOS(ch) ((ch) == '\0' || ascii_isspace(ch)) bool SplitRange(const char* rangestr, int* from, int* to) { - // We need to do the const-cast because strol takes a char**, not const char** - char* val = const_cast(rangestr); - if (val == nullptr || EOS(*val)) return true; // we'll say nothingness is ok + // We need to do the const-cast because strol takes a char**, not const char** + char* val = const_cast(rangestr); + if (val == nullptr || EOS(*val)) return true; // we'll say nothingness is ok - if ( val[0] == '-' && EOS(val[1]) ) // CASE 1: - - return true; // nothing changes + if (val[0] == '-' && EOS(val[1])) // CASE 1: - + return true; // nothing changes - if ( val[0] == '-' ) { // CASE 2: - - const int int2 = strto32(val+1, &val, 10); - if ( !EOS(*val) ) return false; // not a valid integer - *to = int2; // only "to" changes - return true; + if (val[0] == '-') { // CASE 2: - + const int int2 = strto32(val + 1, &val, 10); + if (!EOS(*val)) return false; // not a valid integer + *to = int2; // only "to" changes + return true; - } else { - const int int1 = strto32(val, &val, 10); - if ( EOS(*val) || (*val == '-' && EOS(*(val+1))) ) { - *from = int1; // CASE 3: , same as - - return true; // only "from" changes - } else if (*val != '-') { // not a valid range - return false; + } else { + const int int1 = strto32(val, &val, 10); + if (EOS(*val) || (*val == '-' && EOS(*(val + 1)))) { + *from = int1; // CASE 3: , same as - + return true; // only "from" changes + } else if (*val != '-') { // not a valid range + return false; + } + const int int2 = strto32(val + 1, &val, 10); + if (!EOS(*val)) return false; // not a valid integer + *from = int1; // CASE 4: - + *to = int2; + return true; } - const int int2 = strto32(val+1, &val, 10); - if ( !EOS(*val) ) return false; // not a valid integer - *from = int1; // CASE 4: - - *to = int2; - return true; - } } -void SplitCSVLineWithDelimiter(char* line, char delimiter, - vector* cols) { - char* end_of_line = line + strlen(line); - char* end; - char* start; - - for (; line < end_of_line; line++) { - // Skip leading whitespace, unless said whitespace is the delimiter. - while (ascii_isspace(*line) && *line != delimiter) - ++line; - - if (*line == '"' && delimiter == ',') { // Quoted value... - start = ++line; - end = start; - for (; *line; line++) { - if (*line == '"') { - line++; - if (*line != '"') // [""] is an escaped ["] - break; // but just ["] is end of value +void SplitCSVLineWithDelimiter(char* line, char delimiter, vector* cols) { + char* end_of_line = line + strlen(line); + char* end; + char* start; + + for (; line < end_of_line; line++) { + // Skip leading whitespace, unless said whitespace is the delimiter. + while (ascii_isspace(*line) && *line != delimiter) ++line; + + if (*line == '"' && delimiter == ',') { // Quoted value... + start = ++line; + end = start; + for (; *line; line++) { + if (*line == '"') { + line++; + if (*line != '"') // [""] is an escaped ["] + break; // but just ["] is end of value + } + *end++ = *line; + } + // All characters after the closing quote and before the comma + // are ignored. + line = strchr(line, delimiter); + if (!line) line = end_of_line; + } else { + start = line; + line = strchr(line, delimiter); + if (!line) line = end_of_line; + // Skip all trailing whitespace, unless said whitespace is the delimiter. + for (end = line; end > start; --end) { + if (!ascii_isspace(end[-1]) || end[-1] == delimiter) break; + } } - *end++ = *line; - } - // All characters after the closing quote and before the comma - // are ignored. - line = strchr(line, delimiter); - if (!line) line = end_of_line; - } else { - start = line; - line = strchr(line, delimiter); - if (!line) line = end_of_line; - // Skip all trailing whitespace, unless said whitespace is the delimiter. - for (end = line; end > start; --end) { - if (!ascii_isspace(end[-1]) || end[-1] == delimiter) - break; - } + const bool need_another_column = (*line == delimiter) && (line == end_of_line - 1); + *end = '\0'; + cols->push_back(start); + // If line was something like [paul,] (comma is the last character + // and is not proceeded by whitespace or quote) then we are about + // to eliminate the last column (which is empty). This would be + // incorrect. + if (need_another_column) cols->push_back(end); + + assert(*line == '\0' || *line == delimiter); } - const bool need_another_column = - (*line == delimiter) && (line == end_of_line - 1); - *end = '\0'; - cols->push_back(start); - // If line was something like [paul,] (comma is the last character - // and is not proceeded by whitespace or quote) then we are about - // to eliminate the last column (which is empty). This would be - // incorrect. - if (need_another_column) - cols->push_back(end); - - assert(*line == '\0' || *line == delimiter); - } } void SplitCSVLine(char* line, vector* cols) { - SplitCSVLineWithDelimiter(line, ',', cols); + SplitCSVLineWithDelimiter(line, ',', cols); } -void SplitCSVLineWithDelimiterForStrings(const string &line, - char delimiter, - vector *cols) { - // Unfortunately, the interface requires char* instead of const char* - // which requires copying the string. - char *cline = strndup_with_new(line.c_str(), line.size()); - vector v; - SplitCSVLineWithDelimiter(cline, delimiter, &v); - for (vector::const_iterator ci = v.begin(); ci != v.end(); ++ci) { - cols->push_back(*ci); - } - delete[] cline; +void SplitCSVLineWithDelimiterForStrings(const string& line, char delimiter, vector* cols) { + // Unfortunately, the interface requires char* instead of const char* + // which requires copying the string. + char* cline = strndup_with_new(line.c_str(), line.size()); + vector v; + SplitCSVLineWithDelimiter(cline, delimiter, &v); + for (vector::const_iterator ci = v.begin(); ci != v.end(); ++ci) { + cols->push_back(*ci); + } + delete[] cline; } // ---------------------------------------------------------------------- @@ -795,234 +757,205 @@ namespace { // Helper class used by SplitStructuredLineInternal. class ClosingSymbolLookup { - public: - explicit ClosingSymbolLookup(const char* symbol_pairs) - : closing_(), - valid_closing_() { - // Initialize the opening/closing arrays. - for (const char* symbol = symbol_pairs; *symbol != 0; ++symbol) { - unsigned char opening = *symbol; - ++symbol; - // If the string ends before the closing character has been found, - // use the opening character as the closing character. - unsigned char closing = *symbol != 0 ? *symbol : opening; - closing_[opening] = closing; - valid_closing_[closing] = true; - if (*symbol == 0) break; +public: + explicit ClosingSymbolLookup(const char* symbol_pairs) : closing_(), valid_closing_() { + // Initialize the opening/closing arrays. + for (const char* symbol = symbol_pairs; *symbol != 0; ++symbol) { + unsigned char opening = *symbol; + ++symbol; + // If the string ends before the closing character has been found, + // use the opening character as the closing character. + unsigned char closing = *symbol != 0 ? *symbol : opening; + closing_[opening] = closing; + valid_closing_[closing] = true; + if (*symbol == 0) break; + } } - } - - // Returns the closing character corresponding to an opening one, - // or 0 if the argument is not an opening character. - char GetClosingChar(char opening) const { - return closing_[static_cast(opening)]; - } - - // Returns true if the argument is a closing character. - bool IsClosing(char c) const { - return valid_closing_[static_cast(c)]; - } - - private: - // Maps an opening character to its closing. If the entry contains 0, - // the character is not in the opening set. - char closing_[256]; - // Valid closing characters. - bool valid_closing_[256]; - - DISALLOW_COPY_AND_ASSIGN(ClosingSymbolLookup); -}; -char* SplitStructuredLineInternal(char* line, - char delimiter, - const char* symbol_pairs, - vector* cols, - bool with_escapes) { - ClosingSymbolLookup lookup(symbol_pairs); - - // Stack of symbols expected to close the current opened expressions. - vector expected_to_close; - bool in_escape = false; - - CHECK(cols); - cols->push_back(line); - char* current; - for (current = line; *current; ++current) { - char c = *current; - if (in_escape) { - in_escape = false; - } else if (with_escapes && c == '\\') { - // We are escaping the next character. Note the escape still appears - // in the output. - in_escape = true; - } else if (expected_to_close.empty() && c == delimiter) { - // We don't have any open expression, this is a valid separator. - *current = 0; - cols->push_back(current + 1); - } else if (!expected_to_close.empty() && c == expected_to_close.back()) { - // Can we close the currently open expression? - expected_to_close.pop_back(); - } else if (lookup.GetClosingChar(c)) { - // If this is an opening symbol, we open a new expression and push - // the expected closing symbol on the stack. - expected_to_close.push_back(lookup.GetClosingChar(c)); - } else if (lookup.IsClosing(c)) { - // Error: mismatched closing symbol. - return current; + // Returns the closing character corresponding to an opening one, + // or 0 if the argument is not an opening character. + char GetClosingChar(char opening) const { + return closing_[static_cast(opening)]; } - } - if (!expected_to_close.empty()) { - return current; // Missing closing symbol(s) - } - return nullptr; // Success -} -bool SplitStructuredLineInternal(StringPiece line, - char delimiter, - const char* symbol_pairs, - vector* cols, - bool with_escapes) { - ClosingSymbolLookup lookup(symbol_pairs); - - // Stack of symbols expected to close the current opened expressions. - vector expected_to_close; - bool in_escape = false; - - CHECK_NOTNULL(cols); - cols->push_back(line); - for (int i = 0; i < line.size(); ++i) { - char c = line[i]; - if (in_escape) { - in_escape = false; - } else if (with_escapes && c == '\\') { - // We are escaping the next character. Note the escape still appears - // in the output. - in_escape = true; - } else if (expected_to_close.empty() && c == delimiter) { - // We don't have any open expression, this is a valid separator. - cols->back().remove_suffix(line.size() - i); - cols->push_back(StringPiece(line, i + 1)); - } else if (!expected_to_close.empty() && c == expected_to_close.back()) { - // Can we close the currently open expression? - expected_to_close.pop_back(); - } else if (lookup.GetClosingChar(c)) { - // If this is an opening symbol, we open a new expression and push - // the expected closing symbol on the stack. - expected_to_close.push_back(lookup.GetClosingChar(c)); - } else if (lookup.IsClosing(c)) { - // Error: mismatched closing symbol. - return false; + // Returns true if the argument is a closing character. + bool IsClosing(char c) const { return valid_closing_[static_cast(c)]; } + +private: + // Maps an opening character to its closing. If the entry contains 0, + // the character is not in the opening set. + char closing_[256]; + // Valid closing characters. + bool valid_closing_[256]; + + DISALLOW_COPY_AND_ASSIGN(ClosingSymbolLookup); +}; + +char* SplitStructuredLineInternal(char* line, char delimiter, const char* symbol_pairs, + vector* cols, bool with_escapes) { + ClosingSymbolLookup lookup(symbol_pairs); + + // Stack of symbols expected to close the current opened expressions. + vector expected_to_close; + bool in_escape = false; + + CHECK(cols); + cols->push_back(line); + char* current; + for (current = line; *current; ++current) { + char c = *current; + if (in_escape) { + in_escape = false; + } else if (with_escapes && c == '\\') { + // We are escaping the next character. Note the escape still appears + // in the output. + in_escape = true; + } else if (expected_to_close.empty() && c == delimiter) { + // We don't have any open expression, this is a valid separator. + *current = 0; + cols->push_back(current + 1); + } else if (!expected_to_close.empty() && c == expected_to_close.back()) { + // Can we close the currently open expression? + expected_to_close.pop_back(); + } else if (lookup.GetClosingChar(c)) { + // If this is an opening symbol, we open a new expression and push + // the expected closing symbol on the stack. + expected_to_close.push_back(lookup.GetClosingChar(c)); + } else if (lookup.IsClosing(c)) { + // Error: mismatched closing symbol. + return current; + } + } + if (!expected_to_close.empty()) { + return current; // Missing closing symbol(s) + } + return nullptr; // Success +} + +bool SplitStructuredLineInternal(StringPiece line, char delimiter, const char* symbol_pairs, + vector* cols, bool with_escapes) { + ClosingSymbolLookup lookup(symbol_pairs); + + // Stack of symbols expected to close the current opened expressions. + vector expected_to_close; + bool in_escape = false; + + CHECK_NOTNULL(cols); + cols->push_back(line); + for (int i = 0; i < line.size(); ++i) { + char c = line[i]; + if (in_escape) { + in_escape = false; + } else if (with_escapes && c == '\\') { + // We are escaping the next character. Note the escape still appears + // in the output. + in_escape = true; + } else if (expected_to_close.empty() && c == delimiter) { + // We don't have any open expression, this is a valid separator. + cols->back().remove_suffix(line.size() - i); + cols->push_back(StringPiece(line, i + 1)); + } else if (!expected_to_close.empty() && c == expected_to_close.back()) { + // Can we close the currently open expression? + expected_to_close.pop_back(); + } else if (lookup.GetClosingChar(c)) { + // If this is an opening symbol, we open a new expression and push + // the expected closing symbol on the stack. + expected_to_close.push_back(lookup.GetClosingChar(c)); + } else if (lookup.IsClosing(c)) { + // Error: mismatched closing symbol. + return false; + } + } + if (!expected_to_close.empty()) { + return false; // Missing closing symbol(s) } - } - if (!expected_to_close.empty()) { - return false; // Missing closing symbol(s) - } - return true; // Success + return true; // Success } -} // anonymous namespace +} // anonymous namespace -char* SplitStructuredLine(char* line, - char delimiter, - const char *symbol_pairs, +char* SplitStructuredLine(char* line, char delimiter, const char* symbol_pairs, vector* cols) { - return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, - false); + return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, false); } -bool SplitStructuredLine(StringPiece line, - char delimiter, - const char* symbol_pairs, +bool SplitStructuredLine(StringPiece line, char delimiter, const char* symbol_pairs, vector* cols) { - return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, - false); + return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, false); } -char* SplitStructuredLineWithEscapes(char* line, - char delimiter, - const char *symbol_pairs, +char* SplitStructuredLineWithEscapes(char* line, char delimiter, const char* symbol_pairs, vector* cols) { - return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, - true); + return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, true); } -bool SplitStructuredLineWithEscapes(StringPiece line, - char delimiter, - const char* symbol_pairs, - vector* cols) { - return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, - true); +bool SplitStructuredLineWithEscapes(StringPiece line, char delimiter, const char* symbol_pairs, + vector* cols) { + return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols, true); } - // ---------------------------------------------------------------------- // SplitStringIntoKeyValues() // ---------------------------------------------------------------------- -bool SplitStringIntoKeyValues(const string& line, - const string& key_value_delimiters, - const string& value_value_delimiters, - string *key, vector *values) { - key->clear(); - values->clear(); - - // find the key string - size_t end_key_pos = line.find_first_of(key_value_delimiters); - if (end_key_pos == string::npos) { - VLOG(1) << "cannot parse key from line: " << line; - return false; // no key - } - key->assign(line, 0, end_key_pos); - - // find the values string - string remains(line, end_key_pos, line.size() - end_key_pos); - size_t begin_values_pos = remains.find_first_not_of(key_value_delimiters); - if (begin_values_pos == string::npos) { - VLOG(1) << "cannot parse value from line: " << line; - return false; // no value - } - string values_string(remains, - begin_values_pos, - remains.size() - begin_values_pos); - - // construct the values vector - if (value_value_delimiters.empty()) { // one value - values->push_back(values_string); - } else { // multiple values - SplitStringUsing(values_string, value_value_delimiters.c_str(), values); - if (values->size() < 1) { - VLOG(1) << "cannot parse value from line: " << line; - return false; // no value +bool SplitStringIntoKeyValues(const string& line, const string& key_value_delimiters, + const string& value_value_delimiters, string* key, + vector* values) { + key->clear(); + values->clear(); + + // find the key string + size_t end_key_pos = line.find_first_of(key_value_delimiters); + if (end_key_pos == string::npos) { + VLOG(1) << "cannot parse key from line: " << line; + return false; // no key + } + key->assign(line, 0, end_key_pos); + + // find the values string + string remains(line, end_key_pos, line.size() - end_key_pos); + size_t begin_values_pos = remains.find_first_not_of(key_value_delimiters); + if (begin_values_pos == string::npos) { + VLOG(1) << "cannot parse value from line: " << line; + return false; // no value } - } - return true; + string values_string(remains, begin_values_pos, remains.size() - begin_values_pos); + + // construct the values vector + if (value_value_delimiters.empty()) { // one value + values->push_back(values_string); + } else { // multiple values + SplitStringUsing(values_string, value_value_delimiters.c_str(), values); + if (values->size() < 1) { + VLOG(1) << "cannot parse value from line: " << line; + return false; // no value + } + } + return true; } -bool SplitStringIntoKeyValuePairs(const string& line, - const string& key_value_delimiters, +bool SplitStringIntoKeyValuePairs(const string& line, const string& key_value_delimiters, const string& key_value_pair_delimiters, vector>* kv_pairs) { - kv_pairs->clear(); - - vector pairs; - SplitStringUsing(line, key_value_pair_delimiters.c_str(), &pairs); - - bool success = true; - for (const auto& pair : pairs) { - string key; - vector value; - if (!SplitStringIntoKeyValues(pair, - key_value_delimiters, - "", &key, &value)) { - // Don't return here, to allow for keys without associated - // values; just record that our split failed. - success = false; + kv_pairs->clear(); + + vector pairs; + SplitStringUsing(line, key_value_pair_delimiters.c_str(), &pairs); + + bool success = true; + for (const auto& pair : pairs) { + string key; + vector value; + if (!SplitStringIntoKeyValues(pair, key_value_delimiters, "", &key, &value)) { + // Don't return here, to allow for keys without associated + // values; just record that our split failed. + success = false; + } + // we expect atmost one value because we passed in an empty vsep to + // SplitStringIntoKeyValues + DCHECK_LE(value.size(), 1); + kv_pairs->push_back(make_pair(key, value.empty() ? "" : value[0])); } - // we expect atmost one value because we passed in an empty vsep to - // SplitStringIntoKeyValues - DCHECK_LE(value.size(), 1); - kv_pairs->push_back(make_pair(key, value.empty()? "" : value[0])); - } - return success; + return success; } // ---------------------------------------------------------------------- @@ -1034,58 +967,51 @@ bool SplitStringIntoKeyValuePairs(const string& line, // whitespace (does not consume trailing whitespace), and returns // a pointer beyond the last character parsed. // -------------------------------------------------------------------- -const char* SplitLeadingDec32Values(const char *str, vector *result) { - for (;;) { - char *end = nullptr; - long value = strtol(str, &end, 10); - if (end == str) - break; - // Limit long values to int32 min/max. Needed for lp64. - if (value > numeric_limits::max()) { - value = numeric_limits::max(); - } else if (value < numeric_limits::min()) { - value = numeric_limits::min(); +const char* SplitLeadingDec32Values(const char* str, vector* result) { + for (;;) { + char* end = nullptr; + long value = strtol(str, &end, 10); + if (end == str) break; + // Limit long values to int32 min/max. Needed for lp64. + if (value > numeric_limits::max()) { + value = numeric_limits::max(); + } else if (value < numeric_limits::min()) { + value = numeric_limits::min(); + } + result->push_back(value); + str = end; + if (!ascii_isspace(*end)) break; } - result->push_back(value); - str = end; - if (!ascii_isspace(*end)) - break; - } - return str; + return str; } -const char* SplitLeadingDec64Values(const char *str, vector *result) { - for (;;) { - char *end = nullptr; - const int64 value = strtoll(str, &end, 10); - if (end == str) - break; - result->push_back(value); - str = end; - if (!ascii_isspace(*end)) - break; - } - return str; +const char* SplitLeadingDec64Values(const char* str, vector* result) { + for (;;) { + char* end = nullptr; + const int64 value = strtoll(str, &end, 10); + if (end == str) break; + result->push_back(value); + str = end; + if (!ascii_isspace(*end)) break; + } + return str; } -void SplitStringToLines(const char* full, - int max_len, - int num_lines, - vector* result) { - if (max_len <= 0) { - return; - } - int pos = 0; - for (int i = 0; (i < num_lines || num_lines <= 0); i++) { - int cut_at = ClipStringHelper(full+pos, max_len, (i == num_lines - 1)); - if (cut_at == -1) { - result->push_back(string(full+pos)); - return; +void SplitStringToLines(const char* full, int max_len, int num_lines, vector* result) { + if (max_len <= 0) { + return; } - result->push_back(string(full+pos, cut_at)); - if (i == num_lines - 1 && max_len > kCutStrSize) { - result->at(i).append(kCutStr); + int pos = 0; + for (int i = 0; (i < num_lines || num_lines <= 0); i++) { + int cut_at = ClipStringHelper(full + pos, max_len, (i == num_lines - 1)); + if (cut_at == -1) { + result->push_back(string(full + pos)); + return; + } + result->push_back(string(full + pos, cut_at)); + if (i == num_lines - 1 && max_len > kCutStrSize) { + result->at(i).append(kCutStr); + } + pos += cut_at; } - pos += cut_at; - } } diff --git a/be/src/gutil/strings/split.h b/be/src/gutil/strings/split.h index ab4afb9d332db3..c7ca1e13f4de0f 100644 --- a/be/src/gutil/strings/split.h +++ b/be/src/gutil/strings/split.h @@ -21,6 +21,7 @@ #define STRINGS_SPLIT_H_ #include + #include using std::copy; using std::max; @@ -44,11 +45,11 @@ using std::make_pair; using std::pair; #include using std::vector; +#include + #include #include -#include - #include "gutil/integral_types.h" #include "gutil/logging-inl.h" #include "gutil/strings/charset.h" @@ -292,13 +293,12 @@ namespace strings { // Definitions of the main Split() function. template inline internal::Splitter Split(StringPiece text, Delimiter d) { - return internal::Splitter(text, d); + return internal::Splitter(text, d); } template -inline internal::Splitter Split( - StringPiece text, Delimiter d, Predicate p) { - return internal::Splitter(text, d, p); +inline internal::Splitter Split(StringPiece text, Delimiter d, Predicate p) { + return internal::Splitter(text, d, p); } namespace delimiter { @@ -347,12 +347,12 @@ namespace delimiter { // assert(v[2] == "c"); // class Literal { - public: - explicit Literal(StringPiece sp); - StringPiece Find(StringPiece text) const; +public: + explicit Literal(StringPiece sp); + StringPiece Find(StringPiece text) const; - private: - const string delimiter_; +private: + const string delimiter_; }; // Represents a delimiter that will match any of the given byte-sized @@ -374,12 +374,12 @@ class Literal { // Note: The string passed to AnyOf is assumed to be a string of single-byte // ASCII characters. AnyOf does not work with multi-byte characters. class AnyOf { - public: - explicit AnyOf(StringPiece sp); - StringPiece Find(StringPiece text) const; +public: + explicit AnyOf(StringPiece sp); + StringPiece Find(StringPiece text) const; - private: - const string delimiters_; +private: + const string delimiters_; }; // Wraps another delimiter and sets a max number of matches for that delimiter. @@ -395,20 +395,20 @@ class AnyOf { // template class LimitImpl { - public: - LimitImpl(Delimiter delimiter, int limit) - : delimiter_(std::move(delimiter)), limit_(limit), count_(0) {} - StringPiece Find(StringPiece text) { - if (count_++ == limit_) { - return StringPiece(text.end(), 0); // No more matches. +public: + LimitImpl(Delimiter delimiter, int limit) + : delimiter_(std::move(delimiter)), limit_(limit), count_(0) {} + StringPiece Find(StringPiece text) { + if (count_++ == limit_) { + return StringPiece(text.end(), 0); // No more matches. + } + return delimiter_.Find(text); } - return delimiter_.Find(text); - } - private: - Delimiter delimiter_; - const int limit_; - int count_; +private: + Delimiter delimiter_; + const int limit_; + int count_; }; // Overloaded Limit() function to create LimitImpl<> objects. Uses the Delimiter @@ -416,22 +416,22 @@ class LimitImpl { // parameter. This is similar to the overloads for Split() below. template inline LimitImpl Limit(Delimiter delim, int limit) { - return LimitImpl(delim, limit); + return LimitImpl(delim, limit); } inline LimitImpl Limit(const char* s, int limit) { - return LimitImpl(Literal(s), limit); + return LimitImpl(Literal(s), limit); } inline LimitImpl Limit(const string& s, int limit) { - return LimitImpl(Literal(s), limit); + return LimitImpl(Literal(s), limit); } inline LimitImpl Limit(StringPiece s, int limit) { - return LimitImpl(Literal(s), limit); + return LimitImpl(Literal(s), limit); } -} // namespace delimiter +} // namespace delimiter // // Predicates are functors that return bool indicating whether the given @@ -448,9 +448,7 @@ inline LimitImpl Limit(StringPiece s, int limit) { // vector v = Split(" a , ,,b,", ",", AllowEmpty()); // EXPECT_THAT(v, ElementsAre(" a ", " ", "", "b", "")); struct AllowEmpty { - bool operator()(StringPiece sp) const { - return true; - } + bool operator()(StringPiece sp) const { return true; } }; // Returns false if the given StringPiece is empty, indicating that the @@ -459,9 +457,7 @@ struct AllowEmpty { // vector v = Split(" a , ,,b,", ",", SkipEmpty()); // EXPECT_THAT(v, ElementsAre(" a ", " ", "b")); struct SkipEmpty { - bool operator()(StringPiece sp) const { - return !sp.empty(); - } + bool operator()(StringPiece sp) const { return !sp.empty(); } }; // Returns false if the given StringPiece is empty or contains only whitespace, @@ -470,10 +466,10 @@ struct SkipEmpty { // vector v = Split(" a , ,,b,", ",", SkipWhitespace()); // EXPECT_THAT(v, ElementsAre(" a ", "b")); struct SkipWhitespace { - bool operator()(StringPiece sp) const { - StripWhiteSpace(&sp); - return !sp.empty(); - } + bool operator()(StringPiece sp) const { + StripWhiteSpace(&sp); + return !sp.empty(); + } }; // Split() function overloads to effectively give Split() a default Delimiter @@ -489,47 +485,42 @@ struct SkipWhitespace { // - const string& // - StringPiece -inline internal::Splitter Split( - StringPiece text, const char* delimiter) { - return internal::Splitter( - text, delimiter::Literal(delimiter)); +inline internal::Splitter Split(StringPiece text, const char* delimiter) { + return internal::Splitter(text, delimiter::Literal(delimiter)); } -inline internal::Splitter Split( - StringPiece text, const string& delimiter) { - return internal::Splitter( - text, delimiter::Literal(delimiter)); +inline internal::Splitter Split(StringPiece text, const string& delimiter) { + return internal::Splitter(text, delimiter::Literal(delimiter)); } -inline internal::Splitter Split( - StringPiece text, StringPiece delimiter) { - return internal::Splitter( - text, delimiter::Literal(delimiter)); +inline internal::Splitter Split(StringPiece text, StringPiece delimiter) { + return internal::Splitter(text, delimiter::Literal(delimiter)); } // Same overloads as above, but also including a Predicate argument. template -inline internal::Splitter Split( - StringPiece text, const char* delimiter, Predicate p) { - return internal::Splitter( - text, delimiter::Literal(delimiter), p); +inline internal::Splitter Split(StringPiece text, + const char* delimiter, Predicate p) { + return internal::Splitter(text, delimiter::Literal(delimiter), + p); } template -inline internal::Splitter Split( - StringPiece text, const string& delimiter, Predicate p) { - return internal::Splitter( - text, delimiter::Literal(delimiter), p); +inline internal::Splitter Split(StringPiece text, + const string& delimiter, + Predicate p) { + return internal::Splitter(text, delimiter::Literal(delimiter), + p); } template -inline internal::Splitter Split( - StringPiece text, StringPiece delimiter, Predicate p) { - return internal::Splitter( - text, delimiter::Literal(delimiter), p); +inline internal::Splitter Split(StringPiece text, + StringPiece delimiter, Predicate p) { + return internal::Splitter(text, delimiter::Literal(delimiter), + p); } -} // namespace strings +} // namespace strings // // ==================== LEGACY SPLIT FUNCTIONS ==================== @@ -580,10 +571,7 @@ void ClipString(string* full_str, int max_len); // appends a "..." to the end of the last line if the string is too // long to fit completely into 'num_lines' lines. // ---------------------------------------------------------------------- -void SplitStringToLines(const char* full, - int max_len, - int num_lines, - vector* result); +void SplitStringToLines(const char* full, int max_len, int num_lines, vector* result); // ---------------------------------------------------------------------- // SplitOneStringToken() @@ -623,11 +611,8 @@ vector* SplitUsing(char* full, const char* delimiters); // strings to 'vec'. Modifies "full". If omit empty strings is // true, empty strings are omitted from the resulting vector. // ---------------------------------------------------------------------- -void SplitToVector(char* full, const char* delimiters, - vector* vec, - bool omit_empty_strings); -void SplitToVector(char* full, const char* delimiters, - vector* vec, +void SplitToVector(char* full, const char* delimiters, vector* vec, bool omit_empty_strings); +void SplitToVector(char* full, const char* delimiters, vector* vec, bool omit_empty_strings); // ---------------------------------------------------------------------- @@ -639,9 +624,7 @@ void SplitToVector(char* full, const char* delimiters, // Expects the original string (from which 'full' is derived) to exist // for the full lifespan of 'vec'. // ---------------------------------------------------------------------- -void SplitStringPieceToVector(const StringPiece& full, - const char* delim, - vector* vec, +void SplitStringPieceToVector(const StringPiece& full, const char* delim, vector* vec, bool omit_empty_strings); // ---------------------------------------------------------------------- @@ -672,19 +655,16 @@ void SplitStringPieceToVector(const StringPiece& full, // For even better performance, store the result in a vector // to avoid string copies. // ---------------------------------------------------------------------- -void SplitStringUsing(const string& full, const char* delimiters, - vector* result); +void SplitStringUsing(const string& full, const char* delimiters, vector* result); void SplitStringToHashsetUsing(const string& full, const char* delimiters, std::unordered_set* result); -void SplitStringToSetUsing(const string& full, const char* delimiters, - set* result); +void SplitStringToSetUsing(const string& full, const char* delimiters, set* result); // The even-positioned (0-based) components become the keys for the // odd-positioned components that follow them. When there is an odd // number of components, the value for the last key will be unchanged // if the key was already present in the hash table, or will be the // empty string if the key is a newly inserted key. -void SplitStringToMapUsing(const string& full, const char* delim, - map* result); +void SplitStringToMapUsing(const string& full, const char* delim, map* result); void SplitStringToHashmapUsing(const string& full, const char* delim, std::unordered_map* result); @@ -708,8 +688,7 @@ void SplitStringToHashmapUsing(const string& full, const char* delim, // For even better performance, store the result in a vector to // avoid string copies. // ---------------------------------------------------------------------- -void SplitStringAllowEmpty(const string& full, const char* delim, - vector* result); +void SplitStringAllowEmpty(const string& full, const char* delim, vector* result); // ---------------------------------------------------------------------- // SplitStringWithEscaping() @@ -729,17 +708,13 @@ void SplitStringAllowEmpty(const string& full, const char* delim, // // All versions other than "AllowEmpty" discard any empty substrings. // ---------------------------------------------------------------------- -void SplitStringWithEscaping(const string& full, - const strings::CharSet& delimiters, +void SplitStringWithEscaping(const string& full, const strings::CharSet& delimiters, vector* result); -void SplitStringWithEscapingAllowEmpty(const string& full, - const strings::CharSet& delimiters, +void SplitStringWithEscapingAllowEmpty(const string& full, const strings::CharSet& delimiters, vector* result); -void SplitStringWithEscapingToSet(const string& full, - const strings::CharSet& delimiters, +void SplitStringWithEscapingToSet(const string& full, const strings::CharSet& delimiters, set* result); -void SplitStringWithEscapingToHashset(const string& full, - const strings::CharSet& delimiters, +void SplitStringWithEscapingToHashset(const string& full, const strings::CharSet& delimiters, std::unordered_set* result); // ---------------------------------------------------------------------- @@ -755,9 +730,7 @@ void SplitStringWithEscapingToHashset(const string& full, // // If "full" is the empty string, yields an empty string as the only value. // ---------------------------------------------------------------------- -void SplitStringIntoNPiecesAllowEmpty(const string& full, - const char* delimiters, - int pieces, +void SplitStringIntoNPiecesAllowEmpty(const string& full, const char* delimiters, int pieces, vector* result); // ---------------------------------------------------------------------- @@ -802,19 +775,17 @@ void SplitStringIntoNPiecesAllowEmpty(const string& full, // ---------------------------------------------------------------------- template bool SplitStringAndParse(StringPiece source, StringPiece delim, - bool (*parse)(const string& str, T* value), - vector* result); + bool (*parse)(const string& str, T* value), vector* result); template -bool SplitStringAndParseToContainer( - StringPiece source, StringPiece delim, - bool (*parse)(const string& str, typename Container::value_type* value), - Container* result); +bool SplitStringAndParseToContainer(StringPiece source, StringPiece delim, + bool (*parse)(const string& str, + typename Container::value_type* value), + Container* result); template -bool SplitStringAndParseToList( - StringPiece source, StringPiece delim, - bool (*parse)(const string& str, typename List::value_type* value), - List* result); +bool SplitStringAndParseToList(StringPiece source, StringPiece delim, + bool (*parse)(const string& str, typename List::value_type* value), + List* result); // ---------------------------------------------------------------------- // SplitRange() // Splits a string of the form "-". Either or both can be @@ -863,11 +834,9 @@ bool SplitRange(const char* rangestr, int* from, int* to); // // ---------------------------------------------------------------------- void SplitCSVLine(char* line, vector* cols); -void SplitCSVLineWithDelimiter(char* line, char delimiter, - vector* cols); +void SplitCSVLineWithDelimiter(char* line, char delimiter, vector* cols); // SplitCSVLine string wrapper that internally makes a copy of string line. -void SplitCSVLineWithDelimiterForStrings(const string& line, char delimiter, - vector* cols); +void SplitCSVLineWithDelimiterForStrings(const string& line, char delimiter, vector* cols); // ---------------------------------------------------------------------- // SplitStructuredLine() @@ -891,16 +860,12 @@ void SplitCSVLineWithDelimiterForStrings(const string& line, char delimiter, // function will return the position of the problem : ] // // ---------------------------------------------------------------------- -char* SplitStructuredLine(char* line, - char delimiter, - const char* symbol_pairs, +char* SplitStructuredLine(char* line, char delimiter, const char* symbol_pairs, vector* cols); // Similar to the function with the same name above, but splits a StringPiece // into StringPiece parts. Returns true if successful. -bool SplitStructuredLine(StringPiece line, - char delimiter, - const char* symbol_pairs, +bool SplitStructuredLine(StringPiece line, char delimiter, const char* symbol_pairs, vector* cols); // ---------------------------------------------------------------------- @@ -922,16 +887,12 @@ bool SplitStructuredLine(StringPiece line, // "item4\,item5", "[5,{6,7}]" } // // ---------------------------------------------------------------------- -char* SplitStructuredLineWithEscapes(char* line, - char delimiter, - const char* symbol_pairs, +char* SplitStructuredLineWithEscapes(char* line, char delimiter, const char* symbol_pairs, vector* cols); // Similar to the function with the same name above, but splits a StringPiece // into StringPiece parts. Returns true if successful. -bool SplitStructuredLineWithEscapes(StringPiece line, - char delimiter, - const char* symbol_pairs, +bool SplitStructuredLineWithEscapes(StringPiece line, char delimiter, const char* symbol_pairs, vector* cols); // ---------------------------------------------------------------------- @@ -983,10 +944,9 @@ bool SplitStructuredLineWithEscapes(StringPiece line, // vector values = Split(key_values.second, AnyOf(vv_delim)); // // ---------------------------------------------------------------------- -bool SplitStringIntoKeyValues(const string& line, - const string& key_value_delimiters, - const string& value_value_delimiters, - string* key, vector* values); +bool SplitStringIntoKeyValues(const string& line, const string& key_value_delimiters, + const string& value_value_delimiters, string* key, + vector* values); // ---------------------------------------------------------------------- // SplitStringIntoKeyValuePairs() @@ -1029,12 +989,10 @@ bool SplitStringIntoKeyValues(const string& line, // } // // ---------------------------------------------------------------------- -bool SplitStringIntoKeyValuePairs(const string& line, - const string& key_value_delimiters, +bool SplitStringIntoKeyValuePairs(const string& line, const string& key_value_delimiters, const string& key_value_pair_delimiters, vector>* kv_pairs); - // ---------------------------------------------------------------------- // SplitLeadingDec32Values() // SplitLeadingDec64Values() @@ -1067,33 +1025,24 @@ const char* SplitLeadingDec64Values(const char* next, vector* result); // They cannot handle decimal numbers with leading 0s, since they will be // treated as octal. // ---------------------------------------------------------------------- -bool SplitOneIntToken(const char** source, const char* delim, - int* value); -bool SplitOneInt32Token(const char** source, const char* delim, - int32* value); -bool SplitOneUint32Token(const char** source, const char* delim, - uint32* value); -bool SplitOneInt64Token(const char** source, const char* delim, - int64* value); -bool SplitOneUint64Token(const char** source, const char* delim, - uint64* value); -bool SplitOneDoubleToken(const char** source, const char* delim, - double* value); -bool SplitOneFloatToken(const char** source, const char* delim, - float* value); +bool SplitOneIntToken(const char** source, const char* delim, int* value); +bool SplitOneInt32Token(const char** source, const char* delim, int32* value); +bool SplitOneUint32Token(const char** source, const char* delim, uint32* value); +bool SplitOneInt64Token(const char** source, const char* delim, int64* value); +bool SplitOneUint64Token(const char** source, const char* delim, uint64* value); +bool SplitOneDoubleToken(const char** source, const char* delim, double* value); +bool SplitOneFloatToken(const char** source, const char* delim, float* value); // Some aliases, so that the function names are standardized against the names // of the reflection setters/getters in proto2. This makes it easier to use // certain macros with reflection when creating custom text formats for protos. -inline bool SplitOneUInt32Token(const char** source, const char* delim, - uint32* value) { - return SplitOneUint32Token(source, delim, value); +inline bool SplitOneUInt32Token(const char** source, const char* delim, uint32* value) { + return SplitOneUint32Token(source, delim, value); } -inline bool SplitOneUInt64Token(const char** source, const char* delim, - uint64* value) { - return SplitOneUint64Token(source, delim, value); +inline bool SplitOneUInt64Token(const char** source, const char* delim, uint64* value) { + return SplitOneUint64Token(source, delim, value); } // ---------------------------------------------------------------------- @@ -1105,101 +1054,94 @@ inline bool SplitOneUInt64Token(const char** source, const char* delim, // Parse a single "delim"-delimited number from "*source" into "*value". // Unlike SplitOneIntToken, etc., this function always interprets // the numbers as decimal. -bool SplitOneDecimalIntToken(const char** source, const char* delim, - int* value); -bool SplitOneDecimalInt32Token(const char** source, const char* delim, - int32* value); -bool SplitOneDecimalUint32Token(const char** source, const char* delim, - uint32* value); -bool SplitOneDecimalInt64Token(const char** source, const char* delim, - int64* value); -bool SplitOneDecimalUint64Token(const char** source, const char* delim, - uint64* value); +bool SplitOneDecimalIntToken(const char** source, const char* delim, int* value); +bool SplitOneDecimalInt32Token(const char** source, const char* delim, int32* value); +bool SplitOneDecimalUint32Token(const char** source, const char* delim, uint32* value); +bool SplitOneDecimalInt64Token(const char** source, const char* delim, int64* value); +bool SplitOneDecimalUint64Token(const char** source, const char* delim, uint64* value); // ---------------------------------------------------------------------- // SplitOneHexUint32Token() // SplitOneHexUint64Token() // Once more, for hexadecimal numbers (unsigned only). -bool SplitOneHexUint32Token(const char** source, const char* delim, - uint32* value); -bool SplitOneHexUint64Token(const char** source, const char* delim, - uint64* value); - +bool SplitOneHexUint32Token(const char** source, const char* delim, uint32* value); +bool SplitOneHexUint64Token(const char** source, const char* delim, uint64* value); // ###################### TEMPLATE INSTANTIATIONS BELOW ####################### // SplitStringAndParse() -- see description above template bool SplitStringAndParse(StringPiece source, StringPiece delim, - bool (*parse)(const string& str, T* value), - vector* result) { - return SplitStringAndParseToList(source, delim, parse, result); + bool (*parse)(const string& str, T* value), vector* result) { + return SplitStringAndParseToList(source, delim, parse, result); } namespace strings { namespace internal { template -bool SplitStringAndParseToInserter( - StringPiece source, StringPiece delim, - bool (*parse)(const string& str, typename Container::value_type* value), - Container* result, InsertPolicy insert_policy) { - CHECK(NULL != parse); - CHECK(NULL != result); - CHECK(NULL != delim.data()); - CHECK_GT(delim.size(), 0); - bool retval = true; - vector pieces = strings::Split(source, - strings::delimiter::AnyOf(delim), - strings::SkipEmpty()); - for (const auto& piece : pieces) { - typename Container::value_type t; - if (parse(piece.as_string(), &t)) { - insert_policy(result, t); - } else { - retval = false; +bool SplitStringAndParseToInserter(StringPiece source, StringPiece delim, + bool (*parse)(const string& str, + typename Container::value_type* value), + Container* result, InsertPolicy insert_policy) { + CHECK(NULL != parse); + CHECK(NULL != result); + CHECK(NULL != delim.data()); + CHECK_GT(delim.size(), 0); + bool retval = true; + vector pieces = + strings::Split(source, strings::delimiter::AnyOf(delim), strings::SkipEmpty()); + for (const auto& piece : pieces) { + typename Container::value_type t; + if (parse(piece.as_string(), &t)) { + insert_policy(result, t); + } else { + retval = false; + } } - } - return retval; + return retval; } // Cannot use output iterator here (e.g. std::inserter, std::back_inserter) // because some callers use non-standard containers that don't have iterators, // only an insert() or push_back() method. struct BasicInsertPolicy { - template - void operator()(C* c, const V& v) const { c->insert(v); } + template + void operator()(C* c, const V& v) const { + c->insert(v); + } }; struct BackInsertPolicy { - template - void operator()(C* c, const V& v) const { c->push_back(v); } + template + void operator()(C* c, const V& v) const { + c->push_back(v); + } }; -} // namespace internal -} // namespace strings +} // namespace internal +} // namespace strings // SplitStringAndParseToContainer() -- see description above template -bool SplitStringAndParseToContainer( - StringPiece source, StringPiece delim, - bool (*parse)(const string& str, typename Container::value_type* value), - Container* result) { - return strings::internal::SplitStringAndParseToInserter( - source, delim, parse, result, strings::internal::BasicInsertPolicy()); +bool SplitStringAndParseToContainer(StringPiece source, StringPiece delim, + bool (*parse)(const string& str, + typename Container::value_type* value), + Container* result) { + return strings::internal::SplitStringAndParseToInserter(source, delim, parse, result, + strings::internal::BasicInsertPolicy()); } // SplitStringAndParseToList() -- see description above template -bool SplitStringAndParseToList( - StringPiece source, StringPiece delim, - bool (*parse)(const string& str, typename List::value_type* value), - List* result) { - return strings::internal::SplitStringAndParseToInserter( - source, delim, parse, result, strings::internal::BackInsertPolicy()); +bool SplitStringAndParseToList(StringPiece source, StringPiece delim, + bool (*parse)(const string& str, typename List::value_type* value), + List* result) { + return strings::internal::SplitStringAndParseToInserter(source, delim, parse, result, + strings::internal::BackInsertPolicy()); } // END DOXYGEN SplitFunctions grouping /* @} */ -#endif // STRINGS_SPLIT_H_ +#endif // STRINGS_SPLIT_H_ diff --git a/be/src/gutil/strings/split_internal.h b/be/src/gutil/strings/split_internal.h index f044990bf4bee3..5c3f817eba4bd4 100644 --- a/be/src/gutil/strings/split_internal.h +++ b/be/src/gutil/strings/split_internal.h @@ -26,13 +26,13 @@ using std::multimap; #include using std::vector; -#include "gutil/port.h" // for LANG_CXX11 +#include "gutil/port.h" // for LANG_CXX11 #include "gutil/strings/stringpiece.h" #ifdef LANG_CXX11 // This must be included after "base/port.h", which defines LANG_CXX11. #include -#endif // LANG_CXX11 +#endif // LANG_CXX11 namespace strings { @@ -40,9 +40,7 @@ namespace internal { // The default Predicate object, which doesn't filter out anything. struct NoFilter { - bool operator()(StringPiece /* ignored */) { - return true; - } + bool operator()(StringPiece /* ignored */) { return true; } }; // This class splits a string using the given delimiter, returning the split @@ -67,87 +65,78 @@ struct NoFilter { // The two-argument constructor is used to split the given text using the given // delimiter. template -class SplitIterator - : public std::iterator { - public: - // Two constructors for "end" iterators. - explicit SplitIterator(Delimiter d) - : delimiter_(std::move(d)), predicate_(), is_end_(true) {} - SplitIterator(Delimiter d, Predicate p) - : delimiter_(std::move(d)), predicate_(std::move(p)), is_end_(true) {} - // Two constructors taking the text to iterator. - SplitIterator(StringPiece text, Delimiter d) - : text_(std::move(text)), - delimiter_(std::move(d)), - predicate_(), - is_end_(false) { - ++(*this); - } - SplitIterator(StringPiece text, Delimiter d, Predicate p) - : text_(std::move(text)), - delimiter_(std::move(d)), - predicate_(std::move(p)), - is_end_(false) { - ++(*this); - } - - StringPiece operator*() { return curr_piece_; } - StringPiece* operator->() { return &curr_piece_; } - - SplitIterator& operator++() { - do { - if (text_.end() == curr_piece_.end()) { - // Already consumed all of text_, so we're done. - is_end_ = true; +class SplitIterator : public std::iterator { +public: + // Two constructors for "end" iterators. + explicit SplitIterator(Delimiter d) : delimiter_(std::move(d)), predicate_(), is_end_(true) {} + SplitIterator(Delimiter d, Predicate p) + : delimiter_(std::move(d)), predicate_(std::move(p)), is_end_(true) {} + // Two constructors taking the text to iterator. + SplitIterator(StringPiece text, Delimiter d) + : text_(std::move(text)), delimiter_(std::move(d)), predicate_(), is_end_(false) { + ++(*this); + } + SplitIterator(StringPiece text, Delimiter d, Predicate p) + : text_(std::move(text)), + delimiter_(std::move(d)), + predicate_(std::move(p)), + is_end_(false) { + ++(*this); + } + + StringPiece operator*() { return curr_piece_; } + StringPiece* operator->() { return &curr_piece_; } + + SplitIterator& operator++() { + do { + if (text_.end() == curr_piece_.end()) { + // Already consumed all of text_, so we're done. + is_end_ = true; + return *this; + } + StringPiece found_delimiter = delimiter_.Find(text_); + assert(found_delimiter.data() != NULL); + assert(text_.begin() <= found_delimiter.begin()); + assert(found_delimiter.end() <= text_.end()); + // found_delimiter is allowed to be empty. + // Sets curr_piece_ to all text up to but excluding the delimiter itself. + // Sets text_ to remaining data after the delimiter. + curr_piece_.set(text_.begin(), found_delimiter.begin() - text_.begin()); + text_.remove_prefix(found_delimiter.end() - text_.begin()); + } while (!predicate_(curr_piece_)); return *this; - } - StringPiece found_delimiter = delimiter_.Find(text_); - assert(found_delimiter.data() != NULL); - assert(text_.begin() <= found_delimiter.begin()); - assert(found_delimiter.end() <= text_.end()); - // found_delimiter is allowed to be empty. - // Sets curr_piece_ to all text up to but excluding the delimiter itself. - // Sets text_ to remaining data after the delimiter. - curr_piece_.set(text_.begin(), found_delimiter.begin() - text_.begin()); - text_.remove_prefix(found_delimiter.end() - text_.begin()); - } while (!predicate_(curr_piece_)); - return *this; - } - - SplitIterator operator++(int /* postincrement */) { - SplitIterator old(*this); - ++(*this); - return old; - } - - bool operator==(const SplitIterator& other) const { - // Two "end" iterators are always equal. If the two iterators being compared - // aren't both end iterators, then we fallback to comparing their fields. - // Importantly, the text being split must be equal and the current piece - // within the text being split must also be equal. The delimiter_ and - // predicate_ fields need not be checked here because they're template - // parameters that are already part of the SplitIterator's type. - return (is_end_ && other.is_end_) || - (is_end_ == other.is_end_ && - text_ == other.text_ && - text_.data() == other.text_.data() && - curr_piece_ == other.curr_piece_ && - curr_piece_.data() == other.curr_piece_.data()); - } - - bool operator!=(const SplitIterator& other) const { - return !(*this == other); - } - - private: - // The text being split. Modified as delimited pieces are consumed. - StringPiece text_; - Delimiter delimiter_; - Predicate predicate_; - bool is_end_; - // Holds the currently split piece of text. Will always refer to string data - // within text_. This value is returned when the iterator is dereferenced. - StringPiece curr_piece_; + } + + SplitIterator operator++(int /* postincrement */) { + SplitIterator old(*this); + ++(*this); + return old; + } + + bool operator==(const SplitIterator& other) const { + // Two "end" iterators are always equal. If the two iterators being compared + // aren't both end iterators, then we fallback to comparing their fields. + // Importantly, the text being split must be equal and the current piece + // within the text being split must also be equal. The delimiter_ and + // predicate_ fields need not be checked here because they're template + // parameters that are already part of the SplitIterator's type. + return (is_end_ && other.is_end_) || + (is_end_ == other.is_end_ && text_ == other.text_ && + text_.data() == other.text_.data() && curr_piece_ == other.curr_piece_ && + curr_piece_.data() == other.curr_piece_.data()); + } + + bool operator!=(const SplitIterator& other) const { return !(*this == other); } + +private: + // The text being split. Modified as delimited pieces are consumed. + StringPiece text_; + Delimiter delimiter_; + Predicate predicate_; + bool is_end_; + // Holds the currently split piece of text. Will always refer to string data + // within text_. This value is returned when the iterator is dereferenced. + StringPiece curr_piece_; }; // Declares a functor that can convert a StringPiece to another type. This works @@ -158,25 +147,19 @@ class SplitIterator // then a solution for error reporting would need to be devised. template struct StringPieceTo { - To operator()(StringPiece from) const { - return To(from); - } + To operator()(StringPiece from) const { return To(from); } }; // Specialization for converting to string. template <> struct StringPieceTo { - string operator()(StringPiece from) const { - return from.ToString(); - } + string operator()(StringPiece from) const { return from.ToString(); } }; // Specialization for converting to *const* string. template <> struct StringPieceTo { - string operator()(StringPiece from) const { - return from.ToString(); - } + string operator()(StringPiece from) const { return from.ToString(); } }; #ifdef LANG_CXX11 @@ -184,11 +167,11 @@ struct StringPieceTo { // details below in Splitter<> where this is used. template struct IsNotInitializerList { - typedef void type; + typedef void type; }; template struct IsNotInitializerList> {}; -#endif // LANG_CXX11 +#endif // LANG_CXX11 // This class implements the behavior of the split API by giving callers access // to the underlying split substrings in various convenient ways, such as @@ -208,24 +191,22 @@ struct IsNotInitializerList> {}; // which does not filter out anything. template class Splitter { - public: - typedef internal::SplitIterator Iterator; +public: + typedef internal::SplitIterator Iterator; - Splitter(StringPiece text, Delimiter d) - : begin_(text, d), end_(d) {} + Splitter(StringPiece text, Delimiter d) : begin_(text, d), end_(d) {} - Splitter(StringPiece text, Delimiter d, Predicate p) - : begin_(text, d, p), end_(d, p) {} + Splitter(StringPiece text, Delimiter d, Predicate p) : begin_(text, d, p), end_(d, p) {} - // Range functions that iterate the split substrings as StringPiece objects. - // These methods enable a Splitter to be used in a range-based for loop in - // C++11, for example: - // - // for (StringPiece sp : my_splitter) { - // DoWork(sp); - // } - const Iterator& begin() const { return begin_; } - const Iterator& end() const { return end_; } + // Range functions that iterate the split substrings as StringPiece objects. + // These methods enable a Splitter to be used in a range-based for loop in + // C++11, for example: + // + // for (StringPiece sp : my_splitter) { + // DoWork(sp); + // } + const Iterator& begin() const { return begin_; } + const Iterator& end() const { return end_; } #ifdef LANG_CXX11 // Support for default template arguments for function templates was added in @@ -243,171 +224,170 @@ class Splitter { #pragma GCC diagnostic ignored "-Wpragmas" #pragma GCC diagnostic ignored "-Wc++98-compat" - // Uses SFINAE to restrict conversion to container-like types (by testing for - // the presence of a const_iterator member type) and also to disable - // conversion to an initializer_list (which also has a const_iterator). - // Otherwise, code compiled in C++11 will get an error due to ambiguous - // conversion paths (in C++11 vector::operator= is overloaded to take - // either a vector or an initializer_list). - // - // This trick was taken from util/gtl/container_literal.h - template ::type, - typename ContainerChecker = - typename Container::const_iterator> - operator Container() { - return SelectContainer::value>()(this); - } + // Uses SFINAE to restrict conversion to container-like types (by testing for + // the presence of a const_iterator member type) and also to disable + // conversion to an initializer_list (which also has a const_iterator). + // Otherwise, code compiled in C++11 will get an error due to ambiguous + // conversion paths (in C++11 vector::operator= is overloaded to take + // either a vector or an initializer_list). + // + // This trick was taken from util/gtl/container_literal.h + template ::type, + typename ContainerChecker = typename Container::const_iterator> + operator Container() { + return SelectContainer::value>()(this); + } // Restores diagnostic settings, i.e., removes the "ignore" on -Wpragmas and // -Wc++98-compat. #pragma GCC diagnostic pop #else - // Not under LANG_CXX11 - template - operator Container() { - return SelectContainer::value>()(this); - } -#endif // LANG_CXX11 - - template - operator std::pair() { - return ToPair(); - } - - private: - // is_map::value is true iff there exists a type T::mapped_type. This is - // used to dispatch to one of the SelectContainer<> functors (below) from the - // implicit conversion operator (above). - template - struct is_map { - template static base::big_ test(typename U::mapped_type*); - template static base::small_ test(...); - static const bool value = (sizeof(test(0)) == sizeof(base::big_)); - }; - - // Base template handles splitting to non-map containers - template - struct SelectContainer { - Container operator()(Splitter* splitter) const { - return splitter->template ToContainer(); + // Not under LANG_CXX11 + template + operator Container() { + return SelectContainer::value>()(this); } - }; +#endif // LANG_CXX11 - // Partial template specialization for splitting to map-like containers. - template - struct SelectContainer { - Container operator()(Splitter* splitter) const { - return splitter->template ToMap(); + template + operator std::pair() { + return ToPair(); } - }; - - // Inserts split results into the container. To do this the results are first - // stored in a vector. This is where the input text is actually - // "parsed". This vector is then used to possibly reserve space in the output - // container, and the StringPieces in "v" are converted as necessary to the - // output container's value type. - // - // The reason to use an intermediate vector of StringPiece is so we can learn - // the needed capacity of the output container. This is needed when the output - // container is a vector in which case resizes can be expensive due to - // copying of the ::string objects. - // - // At some point in the future we might add a C++11 move constructor to - // ::string, in which case the vector resizes are much less expensive and the - // use of this intermediate vector "v" can be removed. - template - Container ToContainer() { - vector v; - for (Iterator it = begin(); it != end_; ++it) { - v.push_back(*it); + +private: + // is_map::value is true iff there exists a type T::mapped_type. This is + // used to dispatch to one of the SelectContainer<> functors (below) from the + // implicit conversion operator (above). + template + struct is_map { + template + static base::big_ test(typename U::mapped_type*); + template + static base::small_ test(...); + static const bool value = (sizeof(test(0)) == sizeof(base::big_)); + }; + + // Base template handles splitting to non-map containers + template + struct SelectContainer { + Container operator()(Splitter* splitter) const { + return splitter->template ToContainer(); + } + }; + + // Partial template specialization for splitting to map-like containers. + template + struct SelectContainer { + Container operator()(Splitter* splitter) const { + return splitter->template ToMap(); + } + }; + + // Inserts split results into the container. To do this the results are first + // stored in a vector. This is where the input text is actually + // "parsed". This vector is then used to possibly reserve space in the output + // container, and the StringPieces in "v" are converted as necessary to the + // output container's value type. + // + // The reason to use an intermediate vector of StringPiece is so we can learn + // the needed capacity of the output container. This is needed when the output + // container is a vector in which case resizes can be expensive due to + // copying of the ::string objects. + // + // At some point in the future we might add a C++11 move constructor to + // ::string, in which case the vector resizes are much less expensive and the + // use of this intermediate vector "v" can be removed. + template + Container ToContainer() { + vector v; + for (Iterator it = begin(); it != end_; ++it) { + v.push_back(*it); + } + typedef typename Container::value_type ToType; + internal::StringPieceTo converter; + Container c; + ReserveCapacity(&c, v.size()); + std::insert_iterator inserter(c, c.begin()); + for (const auto& sp : v) { + *inserter++ = converter(sp); + } + return c; } - typedef typename Container::value_type ToType; - internal::StringPieceTo converter; - Container c; - ReserveCapacity(&c, v.size()); - std::insert_iterator inserter(c, c.begin()); - for (const auto& sp : v) { - *inserter++ = converter(sp); + + // The algorithm is to insert a new pair into the map for each even-numbered + // item, with the even-numbered item as the key with a default-constructed + // value. Each odd-numbered item will then be assigned to the last pair's + // value. + template + Map ToMap() { + typedef typename Map::key_type Key; + typedef typename Map::mapped_type Data; + Map m; + StringPieceTo key_converter; + StringPieceTo val_converter; + typename Map::iterator curr_pair; + bool is_even = true; + for (Iterator it = begin(); it != end_; ++it) { + if (is_even) { + curr_pair = InsertInMap(std::make_pair(key_converter(*it), Data()), &m); + } else { + curr_pair->second = val_converter(*it); + } + is_even = !is_even; + } + return m; } - return c; - } - - // The algorithm is to insert a new pair into the map for each even-numbered - // item, with the even-numbered item as the key with a default-constructed - // value. Each odd-numbered item will then be assigned to the last pair's - // value. - template - Map ToMap() { - typedef typename Map::key_type Key; - typedef typename Map::mapped_type Data; - Map m; - StringPieceTo key_converter; - StringPieceTo val_converter; - typename Map::iterator curr_pair; - bool is_even = true; - for (Iterator it = begin(); it != end_; ++it) { - if (is_even) { - curr_pair = InsertInMap(std::make_pair(key_converter(*it), Data()), &m); - } else { - curr_pair->second = val_converter(*it); - } - is_even = !is_even; + + // Returns a pair with its .first and .second members set to the first two + // strings returned by the begin() iterator. Either/both of .first and .second + // will be empty strings if the iterator doesn't have a corresponding value. + template + std::pair ToPair() { + StringPieceTo first_converter; + StringPieceTo second_converter; + StringPiece first, second; + Iterator it = begin(); + if (it != end()) { + first = *it; + if (++it != end()) { + second = *it; + } + } + return std::make_pair(first_converter(first), second_converter(second)); } - return m; - } - - // Returns a pair with its .first and .second members set to the first two - // strings returned by the begin() iterator. Either/both of .first and .second - // will be empty strings if the iterator doesn't have a corresponding value. - template - std::pair ToPair() { - StringPieceTo first_converter; - StringPieceTo second_converter; - StringPiece first, second; - Iterator it = begin(); - if (it != end()) { - first = *it; - if (++it != end()) { - second = *it; - } + + // Overloaded InsertInMap() function. The first overload is the commonly used + // one for most map-like objects. The second overload is a special case for + // multimap, because multimap's insert() member function directly returns an + // iterator, rather than a pair like map's. + template + typename Map::iterator InsertInMap(const typename Map::value_type& value, Map* map) { + return map->insert(value).first; } - return std::make_pair(first_converter(first), second_converter(second)); - } - - // Overloaded InsertInMap() function. The first overload is the commonly used - // one for most map-like objects. The second overload is a special case for - // multimap, because multimap's insert() member function directly returns an - // iterator, rather than a pair like map's. - template - typename Map::iterator InsertInMap( - const typename Map::value_type& value, Map* map) { - return map->insert(value).first; - } - - // InsertInMap overload for multimap. - template - typename std::multimap::iterator InsertInMap( - const typename std::multimap::value_type& value, - typename std::multimap* map) { - return map->insert(value); - } - - // Reserves the given amount of capacity in a vector - template - void ReserveCapacity(vector* v, size_t size) { - v->reserve(size); - } - void ReserveCapacity(...) {} - - const Iterator begin_; - const Iterator end_; + + // InsertInMap overload for multimap. + template + typename std::multimap::iterator InsertInMap( + const typename std::multimap::value_type& value, + typename std::multimap* map) { + return map->insert(value); + } + + // Reserves the given amount of capacity in a vector + template + void ReserveCapacity(vector* v, size_t size) { + v->reserve(size); + } + void ReserveCapacity(...) {} + + const Iterator begin_; + const Iterator end_; }; -} // namespace internal +} // namespace internal -} // namespace strings +} // namespace strings -#endif // STRINGS_SPLIT_INTERNAL_H_ +#endif // STRINGS_SPLIT_INTERNAL_H_ diff --git a/be/src/gutil/strings/strcat.cc b/be/src/gutil/strings/strcat.cc index 1322f44dc6c6cc..55ea2c94c2dd22 100644 --- a/be/src/gutil/strings/strcat.cc +++ b/be/src/gutil/strings/strcat.cc @@ -2,16 +2,16 @@ #include "gutil/strings/strcat.h" +#include #include #include #include #include -#include #include "gutil/gscoped_ptr.h" +#include "gutil/stl_util.h" #include "gutil/strings/ascii_ctype.h" #include "gutil/strings/escaping.h" -#include "gutil/stl_util.h" AlphaNum gEmptyAlphaNum(""); @@ -25,120 +25,112 @@ AlphaNum gEmptyAlphaNum(""); // Append is merely a version of memcpy that returns the address of the byte // after the area just overwritten. It comes in multiple flavors to minimize // call overhead. -static char *Append1(char *out, const AlphaNum &x) { - memcpy(out, x.data(), x.size()); - return out + x.size(); -} - -static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) { - memcpy(out, x1.data(), x1.size()); - out += x1.size(); - - memcpy(out, x2.data(), x2.size()); - return out + x2.size(); -} - -static char *Append4(char *out, - const AlphaNum &x1, const AlphaNum &x2, - const AlphaNum &x3, const AlphaNum &x4) { - memcpy(out, x1.data(), x1.size()); - out += x1.size(); - - memcpy(out, x2.data(), x2.size()); - out += x2.size(); - - memcpy(out, x3.data(), x3.size()); - out += x3.size(); - - memcpy(out, x4.data(), x4.size()); - return out + x4.size(); -} - -string StrCat(const AlphaNum &a) { - return string(a.data(), a.size()); -} - -string StrCat(const AlphaNum &a, const AlphaNum &b) { - string result; - STLStringResizeUninitialized(&result, a.size() + b.size()); - char *const begin = &*result.begin(); - char *out = Append2(begin, a, b); - DCHECK_EQ(out, begin + result.size()); - return result; -} - -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { - string result; - STLStringResizeUninitialized(&result, a.size() + b.size() + c.size()); - char *const begin = &*result.begin(); - char *out = Append2(begin, a, b); - out = Append1(out, c); - DCHECK_EQ(out, begin + result.size()); - return result; -} - -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d) { - string result; - STLStringResizeUninitialized(&result, - a.size() + b.size() + c.size() + d.size()); - char *const begin = &*result.begin(); - char *out = Append4(begin, a, b, c, d); - DCHECK_EQ(out, begin + result.size()); - return result; -} - -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e) { - string result; - STLStringResizeUninitialized(&result, - a.size() + b.size() + c.size() + d.size() + e.size()); - char *const begin = &*result.begin(); - char *out = Append4(begin, a, b, c, d); - out = Append1(out, e); - DCHECK_EQ(out, begin + result.size()); - return result; -} - -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) { - string result; - STLStringResizeUninitialized(&result, - a.size() + b.size() + c.size() + d.size() + e.size() + f.size()); - char *const begin = &*result.begin(); - char *out = Append4(begin, a, b, c, d); - out = Append2(out, e, f); - DCHECK_EQ(out, begin + result.size()); - return result; -} - -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g) { - string result; - STLStringResizeUninitialized(&result, - a.size() + b.size() + c.size() + d.size() + e.size() - + f.size() + g.size()); - char *const begin = &*result.begin(); - char *out = Append4(begin, a, b, c, d); - out = Append2(out, e, f); - out = Append1(out, g); - DCHECK_EQ(out, begin + result.size()); - return result; -} - -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h) { - string result; - STLStringResizeUninitialized(&result, - a.size() + b.size() + c.size() + d.size() + e.size() - + f.size() + g.size() + h.size()); - char *const begin = &*result.begin(); - char *out = Append4(begin, a, b, c, d); - out = Append4(out, e, f, g, h); - DCHECK_EQ(out, begin + result.size()); - return result; +static char* Append1(char* out, const AlphaNum& x) { + memcpy(out, x.data(), x.size()); + return out + x.size(); +} + +static char* Append2(char* out, const AlphaNum& x1, const AlphaNum& x2) { + memcpy(out, x1.data(), x1.size()); + out += x1.size(); + + memcpy(out, x2.data(), x2.size()); + return out + x2.size(); +} + +static char* Append4(char* out, const AlphaNum& x1, const AlphaNum& x2, const AlphaNum& x3, + const AlphaNum& x4) { + memcpy(out, x1.data(), x1.size()); + out += x1.size(); + + memcpy(out, x2.data(), x2.size()); + out += x2.size(); + + memcpy(out, x3.data(), x3.size()); + out += x3.size(); + + memcpy(out, x4.data(), x4.size()); + return out + x4.size(); +} + +string StrCat(const AlphaNum& a) { + return string(a.data(), a.size()); +} + +string StrCat(const AlphaNum& a, const AlphaNum& b) { + string result; + STLStringResizeUninitialized(&result, a.size() + b.size()); + char* const begin = &*result.begin(); + char* out = Append2(begin, a, b); + DCHECK_EQ(out, begin + result.size()); + return result; +} + +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { + string result; + STLStringResizeUninitialized(&result, a.size() + b.size() + c.size()); + char* const begin = &*result.begin(); + char* out = Append2(begin, a, b); + out = Append1(out, c); + DCHECK_EQ(out, begin + result.size()); + return result; +} + +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d) { + string result; + STLStringResizeUninitialized(&result, a.size() + b.size() + c.size() + d.size()); + char* const begin = &*result.begin(); + char* out = Append4(begin, a, b, c, d); + DCHECK_EQ(out, begin + result.size()); + return result; +} + +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e) { + string result; + STLStringResizeUninitialized(&result, a.size() + b.size() + c.size() + d.size() + e.size()); + char* const begin = &*result.begin(); + char* out = Append4(begin, a, b, c, d); + out = Append1(out, e); + DCHECK_EQ(out, begin + result.size()); + return result; +} + +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f) { + string result; + STLStringResizeUninitialized(&result, + a.size() + b.size() + c.size() + d.size() + e.size() + f.size()); + char* const begin = &*result.begin(); + char* out = Append4(begin, a, b, c, d); + out = Append2(out, e, f); + DCHECK_EQ(out, begin + result.size()); + return result; +} + +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g) { + string result; + STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size() + d.size() + e.size() + f.size() + g.size()); + char* const begin = &*result.begin(); + char* out = Append4(begin, a, b, c, d); + out = Append2(out, e, f); + out = Append1(out, g); + DCHECK_EQ(out, begin + result.size()); + return result; +} + +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h) { + string result; + STLStringResizeUninitialized(&result, a.size() + b.size() + c.size() + d.size() + e.size() + + f.size() + g.size() + h.size()); + char* const begin = &*result.begin(); + char* out = Append4(begin, a, b, c, d); + out = Append4(out, e, f, g, h); + DCHECK_EQ(out, begin + result.size()); + return result; } namespace strings { @@ -147,30 +139,30 @@ namespace internal { // StrCat with this many params is exceedingly rare, but it has been // requested... therefore we'll rely on default arguments to make calling // slightly less efficient, to preserve code size. -string StrCatNineOrMore(const AlphaNum *a, ...) { - string result; - - va_list args; - va_start(args, a); - size_t size = a->size(); - while (const AlphaNum *arg = va_arg(args, const AlphaNum *)) { - size += arg->size(); - } - STLStringResizeUninitialized(&result, size); - va_end(args); - va_start(args, a); - char *const begin = &*result.begin(); - char *out = Append1(begin, *a); - while (const AlphaNum *arg = va_arg(args, const AlphaNum *)) { - out = Append1(out, *arg); - } - va_end(args); - DCHECK_EQ(out, begin + size); - return result; -} - -} // namespace internal -} // namespace strings +string StrCatNineOrMore(const AlphaNum* a, ...) { + string result; + + va_list args; + va_start(args, a); + size_t size = a->size(); + while (const AlphaNum* arg = va_arg(args, const AlphaNum*)) { + size += arg->size(); + } + STLStringResizeUninitialized(&result, size); + va_end(args); + va_start(args, a); + char* const begin = &*result.begin(); + char* out = Append1(begin, *a); + while (const AlphaNum* arg = va_arg(args, const AlphaNum*)) { + out = Append1(out, *arg); + } + va_end(args); + DCHECK_EQ(out, begin + size); + return result; +} + +} // namespace internal +} // namespace strings // It's possible to call StrAppend with a StringPiece that is itself a fragment // of the string we're appending to. However the results of this are random. @@ -179,73 +171,68 @@ string StrCatNineOrMore(const AlphaNum *a, ...) { #define DCHECK_NO_OVERLAP(dest, src) \ DCHECK_GT(uintptr_t((src).data() - (dest).data()), uintptr_t((dest).size())) -void StrAppend(string *result, const AlphaNum &a) { - DCHECK_NO_OVERLAP(*result, a); - result->append(a.data(), a.size()); -} - -void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) { - DCHECK_NO_OVERLAP(*result, a); - DCHECK_NO_OVERLAP(*result, b); - string::size_type old_size = result->size(); - STLStringResizeUninitialized(result, old_size + a.size() + b.size()); - char *const begin = &*result->begin(); - char *out = Append2(begin + old_size, a, b); - DCHECK_EQ(out, begin + result->size()); -} - -void StrAppend(string *result, - const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { - DCHECK_NO_OVERLAP(*result, a); - DCHECK_NO_OVERLAP(*result, b); - DCHECK_NO_OVERLAP(*result, c); - string::size_type old_size = result->size(); - STLStringResizeUninitialized(result, - old_size + a.size() + b.size() + c.size()); - char *const begin = &*result->begin(); - char *out = Append2(begin + old_size, a, b); - out = Append1(out, c); - DCHECK_EQ(out, begin + result->size()); -} - -void StrAppend(string *result, - const AlphaNum &a, const AlphaNum &b, - const AlphaNum &c, const AlphaNum &d) { - DCHECK_NO_OVERLAP(*result, a); - DCHECK_NO_OVERLAP(*result, b); - DCHECK_NO_OVERLAP(*result, c); - DCHECK_NO_OVERLAP(*result, d); - string::size_type old_size = result->size(); - STLStringResizeUninitialized(result, - old_size + a.size() + b.size() + c.size() + d.size()); - char *const begin = &*result->begin(); - char *out = Append4(begin + old_size, a, b, c, d); - DCHECK_EQ(out, begin + result->size()); +void StrAppend(string* result, const AlphaNum& a) { + DCHECK_NO_OVERLAP(*result, a); + result->append(a.data(), a.size()); +} + +void StrAppend(string* result, const AlphaNum& a, const AlphaNum& b) { + DCHECK_NO_OVERLAP(*result, a); + DCHECK_NO_OVERLAP(*result, b); + string::size_type old_size = result->size(); + STLStringResizeUninitialized(result, old_size + a.size() + b.size()); + char* const begin = &*result->begin(); + char* out = Append2(begin + old_size, a, b); + DCHECK_EQ(out, begin + result->size()); +} + +void StrAppend(string* result, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { + DCHECK_NO_OVERLAP(*result, a); + DCHECK_NO_OVERLAP(*result, b); + DCHECK_NO_OVERLAP(*result, c); + string::size_type old_size = result->size(); + STLStringResizeUninitialized(result, old_size + a.size() + b.size() + c.size()); + char* const begin = &*result->begin(); + char* out = Append2(begin + old_size, a, b); + out = Append1(out, c); + DCHECK_EQ(out, begin + result->size()); +} + +void StrAppend(string* result, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d) { + DCHECK_NO_OVERLAP(*result, a); + DCHECK_NO_OVERLAP(*result, b); + DCHECK_NO_OVERLAP(*result, c); + DCHECK_NO_OVERLAP(*result, d); + string::size_type old_size = result->size(); + STLStringResizeUninitialized(result, old_size + a.size() + b.size() + c.size() + d.size()); + char* const begin = &*result->begin(); + char* out = Append4(begin + old_size, a, b, c, d); + DCHECK_EQ(out, begin + result->size()); } // StrAppend with this many params is even rarer than with StrCat. // Therefore we'll again rely on default arguments to make calling // slightly less efficient, to preserve code size. -void StrAppend(string *result, - const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) { - DCHECK_NO_OVERLAP(*result, a); - DCHECK_NO_OVERLAP(*result, b); - DCHECK_NO_OVERLAP(*result, c); - DCHECK_NO_OVERLAP(*result, d); - DCHECK_NO_OVERLAP(*result, e); - DCHECK_NO_OVERLAP(*result, f); - DCHECK_NO_OVERLAP(*result, g); - DCHECK_NO_OVERLAP(*result, h); - DCHECK_NO_OVERLAP(*result, i); - string::size_type old_size = result->size(); - STLStringResizeUninitialized(result, - old_size + a.size() + b.size() + c.size() + d.size() - + e.size() + f.size() + g.size() + h.size() + i.size()); - char *const begin = &*result->begin(); - char *out = Append4(begin + old_size, a, b, c, d); - out = Append4(out, e, f, g, h); - out = Append1(out, i); - DCHECK_EQ(out, begin + result->size()); +void StrAppend(string* result, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d, const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, + const AlphaNum& h, const AlphaNum& i) { + DCHECK_NO_OVERLAP(*result, a); + DCHECK_NO_OVERLAP(*result, b); + DCHECK_NO_OVERLAP(*result, c); + DCHECK_NO_OVERLAP(*result, d); + DCHECK_NO_OVERLAP(*result, e); + DCHECK_NO_OVERLAP(*result, f); + DCHECK_NO_OVERLAP(*result, g); + DCHECK_NO_OVERLAP(*result, h); + DCHECK_NO_OVERLAP(*result, i); + string::size_type old_size = result->size(); + STLStringResizeUninitialized(result, old_size + a.size() + b.size() + c.size() + d.size() + + e.size() + f.size() + g.size() + h.size() + + i.size()); + char* const begin = &*result->begin(); + char* out = Append4(begin + old_size, a, b, c, d); + out = Append4(out, e, f, g, h); + out = Append1(out, i); + DCHECK_EQ(out, begin + result->size()); } diff --git a/be/src/gutil/strings/strcat.h b/be/src/gutil/strings/strcat.h index 4493a0bd2a03ca..819db4fd840b05 100644 --- a/be/src/gutil/strings/strcat.h +++ b/be/src/gutil/strings/strcat.h @@ -36,42 +36,41 @@ using std::string; // http://goto/style-guide-exception-20978288 // struct AlphaNum { - StringPiece piece; - char digits[kFastToBufferSize]; + StringPiece piece; + char digits[kFastToBufferSize]; - // No bool ctor -- bools convert to an integral type. - // A bool ctor would also convert incoming pointers (bletch). + // No bool ctor -- bools convert to an integral type. + // A bool ctor would also convert incoming pointers (bletch). - AlphaNum(int32 i32) // NOLINT(runtime/explicit) - : piece(digits, FastInt32ToBufferLeft(i32, digits) - &digits[0]) {} - AlphaNum(uint32 u32) // NOLINT(runtime/explicit) - : piece(digits, FastUInt32ToBufferLeft(u32, digits) - &digits[0]) {} - AlphaNum(int64 i64) // NOLINT(runtime/explicit) - : piece(digits, FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} - AlphaNum(uint64 u64) // NOLINT(runtime/explicit) - : piece(digits, FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} + AlphaNum(int32 i32) // NOLINT(runtime/explicit) + : piece(digits, FastInt32ToBufferLeft(i32, digits) - &digits[0]) {} + AlphaNum(uint32 u32) // NOLINT(runtime/explicit) + : piece(digits, FastUInt32ToBufferLeft(u32, digits) - &digits[0]) {} + AlphaNum(int64 i64) // NOLINT(runtime/explicit) + : piece(digits, FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} + AlphaNum(uint64 u64) // NOLINT(runtime/explicit) + : piece(digits, FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} #if defined(__APPLE__) - AlphaNum(size_t size) // NOLINT(runtime/explicit) - : piece(digits, FastUInt64ToBufferLeft(size, digits) - &digits[0]) {} + AlphaNum(size_t size) // NOLINT(runtime/explicit) + : piece(digits, FastUInt64ToBufferLeft(size, digits) - &digits[0]) {} #endif - AlphaNum(float f) // NOLINT(runtime/explicit) - : piece(digits, strlen(FloatToBuffer(f, digits))) {} - AlphaNum(double f) // NOLINT(runtime/explicit) - : piece(digits, strlen(DoubleToBuffer(f, digits))) {} + AlphaNum(float f) // NOLINT(runtime/explicit) + : piece(digits, strlen(FloatToBuffer(f, digits))) {} + AlphaNum(double f) // NOLINT(runtime/explicit) + : piece(digits, strlen(DoubleToBuffer(f, digits))) {} - AlphaNum(const char *c_str) : piece(c_str) {} // NOLINT(runtime/explicit) - AlphaNum(StringPiece pc) - : piece(std::move(pc)) {} // NOLINT(runtime/explicit) - AlphaNum(const string &s) : piece(s) {} // NOLINT(runtime/explicit) + AlphaNum(const char* c_str) : piece(c_str) {} // NOLINT(runtime/explicit) + AlphaNum(StringPiece pc) : piece(std::move(pc)) {} // NOLINT(runtime/explicit) + AlphaNum(const string& s) : piece(s) {} // NOLINT(runtime/explicit) - StringPiece::size_type size() const { return piece.size(); } - const char *data() const { return piece.data(); } + StringPiece::size_type size() const { return piece.size(); } + const char* data() const { return piece.data(); } - private: - // Use ":" not ':' - AlphaNum(char c); // NOLINT(runtime/explicit) +private: + // Use ":" not ':' + AlphaNum(char c); // NOLINT(runtime/explicit) }; extern AlphaNum gEmptyAlphaNum; @@ -99,246 +98,206 @@ extern AlphaNum gEmptyAlphaNum; // be a reference into str. // ---------------------------------------------------------------------- -string StrCat(const AlphaNum &a); -string StrCat(const AlphaNum &a, const AlphaNum &b); -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c); -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d); -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e); -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f); -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g); -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h); +string StrCat(const AlphaNum& a); +string StrCat(const AlphaNum& a, const AlphaNum& b); +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c); +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d); +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e); +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f); +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g); +string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h); namespace strings { namespace internal { // Do not call directly - this is not part of the public API. -string StrCatNineOrMore(const AlphaNum *a1, ...); +string StrCatNineOrMore(const AlphaNum* a1, ...); -} // namespace internal -} // namespace strings +} // namespace internal +} // namespace strings // Support 9 or more arguments -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t, const AlphaNum &u) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, &u, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t, + const AlphaNum& u) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, &u, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t, const AlphaNum &u, - const AlphaNum &v) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, &u, &v, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t, + const AlphaNum& u, const AlphaNum& v) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, &u, &v, null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t, const AlphaNum &u, - const AlphaNum &v, const AlphaNum &w) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, &u, &v, &w, null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t, + const AlphaNum& u, const AlphaNum& v, const AlphaNum& w) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, &u, &v, &w, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t, const AlphaNum &u, - const AlphaNum &v, const AlphaNum &w, const AlphaNum &x) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, &u, &v, &w, &x, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t, + const AlphaNum& u, const AlphaNum& v, const AlphaNum& w, const AlphaNum& x) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, &u, &v, &w, &x, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t, const AlphaNum &u, - const AlphaNum &v, const AlphaNum &w, const AlphaNum &x, - const AlphaNum &y) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, &u, &v, &w, &x, &y, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t, + const AlphaNum& u, const AlphaNum& v, const AlphaNum& w, const AlphaNum& x, + const AlphaNum& y) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, &u, &v, &w, &x, &y, + null_alphanum); } -inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i, - const AlphaNum &j, const AlphaNum &k, const AlphaNum &l, - const AlphaNum &m, const AlphaNum &n, const AlphaNum &o, - const AlphaNum &p, const AlphaNum &q, const AlphaNum &r, - const AlphaNum &s, const AlphaNum &t, const AlphaNum &u, - const AlphaNum &v, const AlphaNum &w, const AlphaNum &x, - const AlphaNum &y, const AlphaNum &z) { - const AlphaNum* null_alphanum = NULL; - return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, - &j, &k, &l, &m, &n, &o, &p, &q, &r, - &s, &t, &u, &v, &w, &x, &y, &z, - null_alphanum); +inline string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i, const AlphaNum& j, const AlphaNum& k, const AlphaNum& l, + const AlphaNum& m, const AlphaNum& n, const AlphaNum& o, const AlphaNum& p, + const AlphaNum& q, const AlphaNum& r, const AlphaNum& s, const AlphaNum& t, + const AlphaNum& u, const AlphaNum& v, const AlphaNum& w, const AlphaNum& x, + const AlphaNum& y, const AlphaNum& z) { + const AlphaNum* null_alphanum = NULL; + return strings::internal::StrCatNineOrMore(&a, &b, &c, &d, &e, &f, &g, &h, &i, &j, &k, &l, &m, + &n, &o, &p, &q, &r, &s, &t, &u, &v, &w, &x, &y, &z, + null_alphanum); } // ---------------------------------------------------------------------- @@ -362,19 +321,16 @@ inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, // worked around as consecutive calls to StrAppend are quite efficient. // ---------------------------------------------------------------------- -void StrAppend(string *dest, const AlphaNum &a); -void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b); -void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b, - const AlphaNum &c); -void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b, - const AlphaNum &c, const AlphaNum &d); +void StrAppend(string* dest, const AlphaNum& a); +void StrAppend(string* dest, const AlphaNum& a, const AlphaNum& b); +void StrAppend(string* dest, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c); +void StrAppend(string* dest, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d); // Support up to 9 params by using a default empty AlphaNum. -void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b, - const AlphaNum &c, const AlphaNum &d, const AlphaNum &e, - const AlphaNum &f = gEmptyAlphaNum, - const AlphaNum &g = gEmptyAlphaNum, - const AlphaNum &h = gEmptyAlphaNum, - const AlphaNum &i = gEmptyAlphaNum); - -#endif // STRINGS_STRCAT_H_ +void StrAppend(string* dest, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d, const AlphaNum& e, const AlphaNum& f = gEmptyAlphaNum, + const AlphaNum& g = gEmptyAlphaNum, const AlphaNum& h = gEmptyAlphaNum, + const AlphaNum& i = gEmptyAlphaNum); + +#endif // STRINGS_STRCAT_H_ diff --git a/be/src/gutil/strings/stringpiece.cc b/be/src/gutil/strings/stringpiece.cc index 0c27ac81552733..094e8e27b61874 100644 --- a/be/src/gutil/strings/stringpiece.cc +++ b/be/src/gutil/strings/stringpiece.cc @@ -4,10 +4,11 @@ #include "gutil/strings/stringpiece.h" -#include -#include #include #include + +#include +#include #include #include "gutil/hash/hash.h" @@ -23,87 +24,83 @@ using std::swap; using std::string; namespace std { - size_t hash::operator()(StringPiece s) const { +size_t hash::operator()(StringPiece s) const { return HashTo32(s.data(), s.size()); - } +} } // namespace std std::ostream& operator<<(std::ostream& o, StringPiece piece) { - o.write(piece.data(), piece.size()); - return o; + o.write(piece.data(), piece.size()); + return o; } -StringPiece::StringPiece(StringPiece x, int pos) - : ptr_(x.ptr_ + pos), length_(x.length_ - pos) { - DCHECK_LE(0, pos); - DCHECK_LE(pos, x.length_); +StringPiece::StringPiece(StringPiece x, int pos) : ptr_(x.ptr_ + pos), length_(x.length_ - pos) { + DCHECK_LE(0, pos); + DCHECK_LE(pos, x.length_); } StringPiece::StringPiece(StringPiece x, int pos, int len) - : ptr_(x.ptr_ + pos), length_(min(len, x.length_ - pos)) { - DCHECK_LE(0, pos); - DCHECK_LE(pos, x.length_); - DCHECK_GE(len, 0); + : ptr_(x.ptr_ + pos), length_(min(len, x.length_ - pos)) { + DCHECK_LE(0, pos); + DCHECK_LE(pos, x.length_); + DCHECK_GE(len, 0); } void StringPiece::CopyToString(string* target) const { - STLAssignToString(target, ptr_, length_); + STLAssignToString(target, ptr_, length_); } void StringPiece::AppendToString(string* target) const { - STLAppendToString(target, ptr_, length_); + STLAppendToString(target, ptr_, length_); } int StringPiece::copy(char* buf, size_type n, size_type pos) const { - int ret = min(length_ - pos, n); - memcpy(buf, ptr_ + pos, ret); - return ret; + int ret = min(length_ - pos, n); + memcpy(buf, ptr_ + pos, ret); + return ret; } bool StringPiece::contains(StringPiece s) const { - return find(s, 0) != npos; + return find(s, 0) != npos; } int StringPiece::find(StringPiece s, size_type pos) const { - if (length_ <= 0 || pos > static_cast(length_)) { - if (length_ == 0 && pos == 0 && s.length_ == 0) return 0; - return npos; - } - const char *result = memmatch(ptr_ + pos, length_ - pos, - s.ptr_, s.length_); - return result ? result - ptr_ : npos; + if (length_ <= 0 || pos > static_cast(length_)) { + if (length_ == 0 && pos == 0 && s.length_ == 0) return 0; + return npos; + } + const char* result = memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); + return result ? result - ptr_ : npos; } int StringPiece::find(char c, size_type pos) const { - if (length_ <= 0 || pos >= static_cast(length_)) { - return npos; - } - const char* result = static_cast( - memchr(ptr_ + pos, c, length_ - pos)); - return result != nullptr ? result - ptr_ : npos; + if (length_ <= 0 || pos >= static_cast(length_)) { + return npos; + } + const char* result = static_cast(memchr(ptr_ + pos, c, length_ - pos)); + return result != nullptr ? result - ptr_ : npos; } int StringPiece::rfind(StringPiece s, size_type pos) const { - if (length_ < s.length_) return npos; - const size_t ulen = length_; - if (s.length_ == 0) return min(ulen, pos); + if (length_ < s.length_) return npos; + const size_t ulen = length_; + if (s.length_ == 0) return min(ulen, pos); - const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_; - const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); - return result != last ? result - ptr_ : npos; + const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_; + const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); + return result != last ? result - ptr_ : npos; } // Search range is [0..pos] inclusive. If pos == npos, search everything. int StringPiece::rfind(char c, size_type pos) const { - // Note: memrchr() is not available on Windows. - if (length_ <= 0) return npos; - for (int i = min(pos, static_cast(length_ - 1)); - i >= 0; --i) { - if (ptr_[i] == c) { - return i; + // Note: memrchr() is not available on Windows. + if (length_ <= 0) return npos; + for (int i = min(pos, static_cast(length_ - 1)); i >= 0; --i) { + if (ptr_[i] == c) { + return i; + } } - } - return npos; + return npos; } // For each character in characters_wanted, sets the index corresponding @@ -114,110 +111,107 @@ int StringPiece::rfind(char c, size_type pos) const { // the possible values of an unsigned char. Thus it should be be declared // as follows: // bool table[UCHAR_MAX + 1] -static inline void BuildLookupTable(StringPiece characters_wanted, - bool* table) { - const int length = characters_wanted.length(); - const char* const data = characters_wanted.data(); - for (int i = 0; i < length; ++i) { - table[static_cast(data[i])] = true; - } +static inline void BuildLookupTable(StringPiece characters_wanted, bool* table) { + const int length = characters_wanted.length(); + const char* const data = characters_wanted.data(); + for (int i = 0; i < length; ++i) { + table[static_cast(data[i])] = true; + } } int StringPiece::find_first_of(StringPiece s, size_type pos) const { - if (length_ <= 0 || s.length_ <= 0) { - return npos; - } - // Avoid the cost of BuildLookupTable() for a single-character search. - if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); - - bool lookup[UCHAR_MAX + 1] = { false }; - BuildLookupTable(s, lookup); - for (int i = pos; i < length_; ++i) { - if (lookup[static_cast(ptr_[i])]) { - return i; + if (length_ <= 0 || s.length_ <= 0) { + return npos; } - } - return npos; + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); + + bool lookup[UCHAR_MAX + 1] = {false}; + BuildLookupTable(s, lookup); + for (int i = pos; i < length_; ++i) { + if (lookup[static_cast(ptr_[i])]) { + return i; + } + } + return npos; } int StringPiece::find_first_not_of(StringPiece s, size_type pos) const { - if (length_ <= 0) return npos; - if (s.length_ <= 0) return 0; - // Avoid the cost of BuildLookupTable() for a single-character search. - if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); - - bool lookup[UCHAR_MAX + 1] = { false }; - BuildLookupTable(s, lookup); - for (int i = pos; i < length_; ++i) { - if (!lookup[static_cast(ptr_[i])]) { - return i; + if (length_ <= 0) return npos; + if (s.length_ <= 0) return 0; + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); + + bool lookup[UCHAR_MAX + 1] = {false}; + BuildLookupTable(s, lookup); + for (int i = pos; i < length_; ++i) { + if (!lookup[static_cast(ptr_[i])]) { + return i; + } } - } - return npos; + return npos; } int StringPiece::find_first_not_of(char c, size_type pos) const { - if (length_ <= 0) return npos; + if (length_ <= 0) return npos; - for (; pos < static_cast(length_); ++pos) { - if (ptr_[pos] != c) { - return pos; + for (; pos < static_cast(length_); ++pos) { + if (ptr_[pos] != c) { + return pos; + } } - } - return npos; + return npos; } int StringPiece::find_last_of(StringPiece s, size_type pos) const { - if (length_ <= 0 || s.length_ <= 0) return npos; - // Avoid the cost of BuildLookupTable() for a single-character search. - if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); - - bool lookup[UCHAR_MAX + 1] = { false }; - BuildLookupTable(s, lookup); - for (int i = min(pos, static_cast(length_ - 1)); - i >= 0; --i) { - if (lookup[static_cast(ptr_[i])]) { - return i; + if (length_ <= 0 || s.length_ <= 0) return npos; + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); + + bool lookup[UCHAR_MAX + 1] = {false}; + BuildLookupTable(s, lookup); + for (int i = min(pos, static_cast(length_ - 1)); i >= 0; --i) { + if (lookup[static_cast(ptr_[i])]) { + return i; + } } - } - return npos; + return npos; } int StringPiece::find_last_not_of(StringPiece s, size_type pos) const { - if (length_ <= 0) return npos; + if (length_ <= 0) return npos; - int i = min(pos, static_cast(length_ - 1)); - if (s.length_ <= 0) return i; + int i = min(pos, static_cast(length_ - 1)); + if (s.length_ <= 0) return i; - // Avoid the cost of BuildLookupTable() for a single-character search. - if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); - bool lookup[UCHAR_MAX + 1] = { false }; - BuildLookupTable(s, lookup); - for (; i >= 0; --i) { - if (!lookup[static_cast(ptr_[i])]) { - return i; + bool lookup[UCHAR_MAX + 1] = {false}; + BuildLookupTable(s, lookup); + for (; i >= 0; --i) { + if (!lookup[static_cast(ptr_[i])]) { + return i; + } } - } - return npos; + return npos; } int StringPiece::find_last_not_of(char c, size_type pos) const { - if (length_ <= 0) return npos; + if (length_ <= 0) return npos; - for (int i = min(pos, static_cast(length_ - 1)); - i >= 0; --i) { - if (ptr_[i] != c) { - return i; + for (int i = min(pos, static_cast(length_ - 1)); i >= 0; --i) { + if (ptr_[i] != c) { + return i; + } } - } - return npos; + return npos; } StringPiece StringPiece::substr(size_type pos, size_type n) const { - if (pos > length_) pos = length_; - if (n > length_ - pos) n = length_ - pos; - return StringPiece(ptr_ + pos, n); + if (pos > length_) pos = length_; + if (n > length_ - pos) n = length_ - pos; + return StringPiece(ptr_ + pos, n); } const StringPiece::size_type StringPiece::npos = size_type(-1); diff --git a/be/src/gutil/strings/stringpiece.h b/be/src/gutil/strings/stringpiece.h index 0f59b38e20786b..c76ecf53457a04 100644 --- a/be/src/gutil/strings/stringpiece.h +++ b/be/src/gutil/strings/stringpiece.h @@ -112,231 +112,223 @@ #ifndef STRINGS_STRINGPIECE_H_ #define STRINGS_STRINGPIECE_H_ - #include +#include +#include + #include #include #include -#include -#include #include +#include "gutil/hash/hash.h" #include "gutil/integral_types.h" #include "gutil/port.h" -#include "gutil/type_traits.h" #include "gutil/strings/fastmem.h" -#include "gutil/hash/hash.h" +#include "gutil/type_traits.h" class StringPiece { - private: - const char* ptr_; - int length_; - - public: - // We provide non-explicit singleton constructors so users can pass - // in a "const char*" or a "string" wherever a "StringPiece" is - // expected. - // - // Style guide exception granted: - // http://goto/style-guide-exception-20978288 - StringPiece() : ptr_(NULL), length_(0) {} - StringPiece(const char* str) // NOLINT(runtime/explicit) - : ptr_(str), length_(0) { - if (str != NULL) { - size_t length = strlen(str); - assert(length <= static_cast(std::numeric_limits::max())); - length_ = static_cast(length); +private: + const char* ptr_; + int length_; + +public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected. + // + // Style guide exception granted: + // http://goto/style-guide-exception-20978288 + StringPiece() : ptr_(NULL), length_(0) {} + StringPiece(const char* str) // NOLINT(runtime/explicit) + : ptr_(str), length_(0) { + if (str != NULL) { + size_t length = strlen(str); + assert(length <= static_cast(std::numeric_limits::max())); + length_ = static_cast(length); + } + } + StringPiece(const std::string& str) // NOLINT(runtime/explicit) + : ptr_(str.data()), length_(0) { + size_t length = str.size(); + assert(length <= static_cast(std::numeric_limits::max())); + length_ = static_cast(length); + } + StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { assert(len >= 0); } + + // Substring of another StringPiece. + // pos must be non-negative and <= x.length(). + StringPiece(StringPiece x, int pos); + // Substring of another StringPiece. + // pos must be non-negative and <= x.length(). + // len must be non-negative and will be pinned to at most x.length() - pos. + StringPiece(StringPiece x, int pos, int len); + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. + const char* data() const { return ptr_; } + int size() const { return length_; } + int length() const { return length_; } + bool empty() const { return length_ == 0; } + + void clear() { + ptr_ = NULL; + length_ = 0; + } + + void set(const char* data, int len) { + assert(len >= 0); + ptr_ = data; + length_ = len; + } + + void set(const char* str) { + ptr_ = str; + if (str != NULL) + length_ = static_cast(strlen(str)); + else + length_ = 0; + } + void set(const void* data, int len) { + ptr_ = reinterpret_cast(data); + length_ = len; + } + + char operator[](int i) const { + assert(0 <= i); + assert(i < length_); + return ptr_[i]; + } + + void remove_prefix(int n) { + assert(length_ >= n); + ptr_ += n; + length_ -= n; + } + + void remove_suffix(int n) { + assert(length_ >= n); + length_ -= n; + } + + // returns {-1, 0, 1} + int compare(StringPiece x) const { + const int min_size = length_ < x.length_ ? length_ : x.length_; + int r = memcmp(ptr_, x.ptr_, min_size); + if (r < 0) return -1; + if (r > 0) return 1; + if (length_ < x.length_) return -1; + if (length_ > x.length_) return 1; + return 0; + } + + std::string as_string() const { return ToString(); } + // We also define ToString() here, since many other string-like + // interfaces name the routine that converts to a C++ string + // "ToString", and it's confusing to have the method that does that + // for a StringPiece be called "as_string()". We also leave the + // "as_string()" method defined here for existing code. + std::string ToString() const { + if (ptr_ == NULL) return std::string(); + return std::string(data(), size()); } - } - StringPiece(const std::string& str) // NOLINT(runtime/explicit) - : ptr_(str.data()), length_(0) { - size_t length = str.size(); - assert(length <= static_cast(std::numeric_limits::max())); - length_ = static_cast(length); - } - StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { - assert(len >= 0); - } - - // Substring of another StringPiece. - // pos must be non-negative and <= x.length(). - StringPiece(StringPiece x, int pos); - // Substring of another StringPiece. - // pos must be non-negative and <= x.length(). - // len must be non-negative and will be pinned to at most x.length() - pos. - StringPiece(StringPiece x, int pos, int len); - - // data() may return a pointer to a buffer with embedded NULs, and the - // returned buffer may or may not be null terminated. Therefore it is - // typically a mistake to pass data() to a routine that expects a NUL - // terminated string. - const char* data() const { return ptr_; } - int size() const { return length_; } - int length() const { return length_; } - bool empty() const { return length_ == 0; } - - void clear() { - ptr_ = NULL; - length_ = 0; - } - - void set(const char* data, int len) { - assert(len >= 0); - ptr_ = data; - length_ = len; - } - - void set(const char* str) { - ptr_ = str; - if (str != NULL) - length_ = static_cast(strlen(str)); - else - length_ = 0; - } - void set(const void* data, int len) { - ptr_ = reinterpret_cast(data); - length_ = len; - } - - char operator[](int i) const { - assert(0 <= i); - assert(i < length_); - return ptr_[i]; - } - - void remove_prefix(int n) { - assert(length_ >= n); - ptr_ += n; - length_ -= n; - } - - void remove_suffix(int n) { - assert(length_ >= n); - length_ -= n; - } - - // returns {-1, 0, 1} - int compare(StringPiece x) const { - const int min_size = length_ < x.length_ ? length_ : x.length_; - int r = memcmp(ptr_, x.ptr_, min_size); - if (r < 0) return -1; - if (r > 0) return 1; - if (length_ < x.length_) return -1; - if (length_ > x.length_) return 1; - return 0; - } - - std::string as_string() const { - return ToString(); - } - // We also define ToString() here, since many other string-like - // interfaces name the routine that converts to a C++ string - // "ToString", and it's confusing to have the method that does that - // for a StringPiece be called "as_string()". We also leave the - // "as_string()" method defined here for existing code. - std::string ToString() const { - if (ptr_ == NULL) return std::string(); - return std::string(data(), size()); - } - - void CopyToString(std::string* target) const; - void AppendToString(std::string* target) const; - - bool starts_with(StringPiece x) const { - return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0); - } - - bool ends_with(StringPiece x) const { - return ((length_ >= x.length_) && - (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); - } - - // standard STL container boilerplate - typedef char value_type; - typedef const char* pointer; - typedef const char& reference; - typedef const char& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - static const size_type npos; - typedef const char* const_iterator; - typedef const char* iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef std::reverse_iterator reverse_iterator; - iterator begin() const { return ptr_; } - iterator end() const { return ptr_ + length_; } - const_reverse_iterator rbegin() const { - return const_reverse_iterator(ptr_ + length_); - } - const_reverse_iterator rend() const { - return const_reverse_iterator(ptr_); - } - // STLS says return size_type, but Google says return int - int max_size() const { return length_; } - int capacity() const { return length_; } - - // cpplint.py emits a false positive [build/include_what_you_use] - int copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT - - bool contains(StringPiece s) const; - - int find(StringPiece s, size_type pos = 0) const; - int find(char c, size_type pos = 0) const; - int rfind(StringPiece s, size_type pos = npos) const; - int rfind(char c, size_type pos = npos) const; - - int find_first_of(StringPiece s, size_type pos = 0) const; - int find_first_of(char c, size_type pos = 0) const { return find(c, pos); } - int find_first_not_of(StringPiece s, size_type pos = 0) const; - int find_first_not_of(char c, size_type pos = 0) const; - int find_last_of(StringPiece s, size_type pos = npos) const; - int find_last_of(char c, size_type pos = npos) const { return rfind(c, pos); } - int find_last_not_of(StringPiece s, size_type pos = npos) const; - int find_last_not_of(char c, size_type pos = npos) const; - - StringPiece substr(size_type pos, size_type n = npos) const; + + void CopyToString(std::string* target) const; + void AppendToString(std::string* target) const; + + bool starts_with(StringPiece x) const { + return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0); + } + + bool ends_with(StringPiece x) const { + return ((length_ >= x.length_) && + (memcmp(ptr_ + (length_ - x.length_), x.ptr_, x.length_) == 0)); + } + + // standard STL container boilerplate + typedef char value_type; + typedef const char* pointer; + typedef const char& reference; + typedef const char& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + static const size_type npos; + typedef const char* const_iterator; + typedef const char* iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + iterator begin() const { return ptr_; } + iterator end() const { return ptr_ + length_; } + const_reverse_iterator rbegin() const { return const_reverse_iterator(ptr_ + length_); } + const_reverse_iterator rend() const { return const_reverse_iterator(ptr_); } + // STLS says return size_type, but Google says return int + int max_size() const { return length_; } + int capacity() const { return length_; } + + // cpplint.py emits a false positive [build/include_what_you_use] + int copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT + + bool contains(StringPiece s) const; + + int find(StringPiece s, size_type pos = 0) const; + int find(char c, size_type pos = 0) const; + int rfind(StringPiece s, size_type pos = npos) const; + int rfind(char c, size_type pos = npos) const; + + int find_first_of(StringPiece s, size_type pos = 0) const; + int find_first_of(char c, size_type pos = 0) const { return find(c, pos); } + int find_first_not_of(StringPiece s, size_type pos = 0) const; + int find_first_not_of(char c, size_type pos = 0) const; + int find_last_of(StringPiece s, size_type pos = npos) const; + int find_last_of(char c, size_type pos = npos) const { return rfind(c, pos); } + int find_last_not_of(StringPiece s, size_type pos = npos) const; + int find_last_not_of(char c, size_type pos = npos) const; + + StringPiece substr(size_type pos, size_type n = npos) const; }; #ifndef SWIG -DECLARE_POD(StringPiece); // So vector becomes really fast +DECLARE_POD(StringPiece); // So vector becomes really fast #endif // This large function is defined inline so that in a fairly common case where // one of the arguments is a literal, the compiler can elide a lot of the // following comparisons. inline bool operator==(StringPiece x, StringPiece y) { - int len = x.size(); - if (len != y.size()) { - return false; - } + int len = x.size(); + if (len != y.size()) { + return false; + } - return x.data() == y.data() || len <= 0 || - strings::memeq(x.data(), y.data(), len); + return x.data() == y.data() || len <= 0 || strings::memeq(x.data(), y.data(), len); } inline bool operator!=(StringPiece x, StringPiece y) { - return !(x == y); + return !(x == y); } inline bool operator<(StringPiece x, StringPiece y) { - const int min_size = x.size() < y.size() ? x.size() : y.size(); - const int r = memcmp(x.data(), y.data(), min_size); - return (r < 0) || (r == 0 && x.size() < y.size()); + const int min_size = x.size() < y.size() ? x.size() : y.size(); + const int r = memcmp(x.data(), y.data(), min_size); + return (r < 0) || (r == 0 && x.size() < y.size()); } inline bool operator>(StringPiece x, StringPiece y) { - return y < x; + return y < x; } inline bool operator<=(StringPiece x, StringPiece y) { - return !(x > y); + return !(x > y); } inline bool operator>=(StringPiece x, StringPiece y) { - return !(x < y); + return !(x < y); } class StringPiece; -template struct GoodFastHash; +template +struct GoodFastHash; // ------------------------------------------------------------------ // Functions used to create STL containers that use StringPiece @@ -349,29 +341,25 @@ template struct GoodFastHash; #ifndef SWIG namespace std { -template<> struct hash { - size_t operator()(StringPiece s) const; +template <> +struct hash { + size_t operator()(StringPiece s) const; }; -} // namespace std - +} // namespace std // An implementation of GoodFastHash for StringPiece. See // GoodFastHash values. -template<> struct GoodFastHash { - size_t operator()(StringPiece s) const { - return HashStringThoroughly(s.data(), s.size()); - } - // Less than operator, for MSVC. - bool operator()(const StringPiece& s1, const StringPiece& s2) const { - return s1 < s2; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are defaults. +template <> +struct GoodFastHash { + size_t operator()(StringPiece s) const { return HashStringThoroughly(s.data(), s.size()); } + // Less than operator, for MSVC. + bool operator()(const StringPiece& s1, const StringPiece& s2) const { return s1 < s2; } + static const size_t bucket_size = 4; // These are required by MSVC + static const size_t min_buckets = 8; // 4 and 8 are defaults. }; #endif // allow StringPiece to be logged extern ostream& operator<<(ostream& o, StringPiece piece); - -#endif // STRINGS_STRINGPIECE_H__ +#endif // STRINGS_STRINGPIECE_H__ diff --git a/be/src/gutil/strings/strip.cc b/be/src/gutil/strings/strip.cc index ff7449b769bbdc..05b54e357a1e35 100644 --- a/be/src/gutil/strings/strip.cc +++ b/be/src/gutil/strings/strip.cc @@ -8,6 +8,7 @@ #include #include + #include using std::copy; using std::max; @@ -22,33 +23,27 @@ using std::string; #include "gutil/strings/stringpiece.h" string StripPrefixString(StringPiece str, const StringPiece& prefix) { - if (str.starts_with(prefix)) - str.remove_prefix(prefix.length()); - return str.as_string(); + if (str.starts_with(prefix)) str.remove_prefix(prefix.length()); + return str.as_string(); } -bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, - string* result) { - const bool has_prefix = str.starts_with(prefix); - if (has_prefix) - str.remove_prefix(prefix.length()); - str.as_string().swap(*result); - return has_prefix; +bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, string* result) { + const bool has_prefix = str.starts_with(prefix); + if (has_prefix) str.remove_prefix(prefix.length()); + str.as_string().swap(*result); + return has_prefix; } string StripSuffixString(StringPiece str, const StringPiece& suffix) { - if (str.ends_with(suffix)) - str.remove_suffix(suffix.length()); - return str.as_string(); + if (str.ends_with(suffix)) str.remove_suffix(suffix.length()); + return str.as_string(); } -bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, - string* result) { - const bool has_suffix = str.ends_with(suffix); - if (has_suffix) - str.remove_suffix(suffix.length()); - str.as_string().swap(*result); - return has_suffix; +bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, string* result) { + const bool has_suffix = str.ends_with(suffix); + if (has_suffix) str.remove_suffix(suffix.length()); + str.as_string().swap(*result); + return has_suffix; } // ---------------------------------------------------------------------- @@ -57,195 +52,191 @@ bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, // in 'remove') with the character 'replacewith'. // ---------------------------------------------------------------------- void StripString(char* str, StringPiece remove, char replacewith) { - for (; *str != '\0'; ++str) { - if (remove.find(*str) != StringPiece::npos) { - *str = replacewith; + for (; *str != '\0'; ++str) { + if (remove.find(*str) != StringPiece::npos) { + *str = replacewith; + } } - } } void StripString(char* str, int len, StringPiece remove, char replacewith) { - char* end = str + len; - for (; str < end; ++str) { - if (remove.find(*str) != StringPiece::npos) { - *str = replacewith; + char* end = str + len; + for (; str < end; ++str) { + if (remove.find(*str) != StringPiece::npos) { + *str = replacewith; + } } - } } void StripString(string* s, StringPiece remove, char replacewith) { - for (char& c : *s) { - if (remove.find(c) != StringPiece::npos) { - c = replacewith; + for (char& c : *s) { + if (remove.find(c) != StringPiece::npos) { + c = replacewith; + } } - } } // ---------------------------------------------------------------------- // StripWhiteSpace // ---------------------------------------------------------------------- void StripWhiteSpace(const char** str, int* len) { - // strip off trailing whitespace - while ((*len) > 0 && ascii_isspace((*str)[(*len)-1])) { - (*len)--; - } - - // strip off leading whitespace - while ((*len) > 0 && ascii_isspace((*str)[0])) { - (*len)--; - (*str)++; - } + // strip off trailing whitespace + while ((*len) > 0 && ascii_isspace((*str)[(*len) - 1])) { + (*len)--; + } + + // strip off leading whitespace + while ((*len) > 0 && ascii_isspace((*str)[0])) { + (*len)--; + (*str)++; + } } bool StripTrailingNewline(string* s) { - if (!s->empty() && (*s)[s->size() - 1] == '\n') { - if (s->size() > 1 && (*s)[s->size() - 2] == '\r') - s->resize(s->size() - 2); - else - s->resize(s->size() - 1); - return true; - } - return false; + if (!s->empty() && (*s)[s->size() - 1] == '\n') { + if (s->size() > 1 && (*s)[s->size() - 2] == '\r') + s->resize(s->size() - 2); + else + s->resize(s->size() - 1); + return true; + } + return false; } void StripWhiteSpace(string* str) { - int str_length = str->length(); - - // Strip off leading whitespace. - int first = 0; - while (first < str_length && ascii_isspace(str->at(first))) { - ++first; - } - // If entire string is white space. - if (first == str_length) { - str->clear(); - return; - } - if (first > 0) { - str->erase(0, first); - str_length -= first; - } - - // Strip off trailing whitespace. - int last = str_length - 1; - while (last >= 0 && ascii_isspace(str->at(last))) { - --last; - } - if (last != (str_length - 1) && last >= 0) { - str->erase(last + 1, string::npos); - } + int str_length = str->length(); + + // Strip off leading whitespace. + int first = 0; + while (first < str_length && ascii_isspace(str->at(first))) { + ++first; + } + // If entire string is white space. + if (first == str_length) { + str->clear(); + return; + } + if (first > 0) { + str->erase(0, first); + str_length -= first; + } + + // Strip off trailing whitespace. + int last = str_length - 1; + while (last >= 0 && ascii_isspace(str->at(last))) { + --last; + } + if (last != (str_length - 1) && last >= 0) { + str->erase(last + 1, string::npos); + } } // ---------------------------------------------------------------------- // Misc. stripping routines // ---------------------------------------------------------------------- void StripCurlyBraces(string* s) { - return StripBrackets('{', '}', s); + return StripBrackets('{', '}', s); } void StripBrackets(char left, char right, string* s) { - string::iterator opencurly = find(s->begin(), s->end(), left); - while (opencurly != s->end()) { - string::iterator closecurly = find(opencurly, s->end(), right); - if (closecurly == s->end()) - return; - opencurly = s->erase(opencurly, closecurly + 1); - opencurly = find(opencurly, s->end(), left); - } + string::iterator opencurly = find(s->begin(), s->end(), left); + while (opencurly != s->end()) { + string::iterator closecurly = find(opencurly, s->end(), right); + if (closecurly == s->end()) return; + opencurly = s->erase(opencurly, closecurly + 1); + opencurly = find(opencurly, s->end(), left); + } } void StripMarkupTags(string* s) { - string::iterator openbracket = find(s->begin(), s->end(), '<'); - while (openbracket != s->end()) { - string::iterator closebracket = find(openbracket, s->end(), '>'); - if (closebracket == s->end()) { - s->erase(openbracket, closebracket); - return; - } + string::iterator openbracket = find(s->begin(), s->end(), '<'); + while (openbracket != s->end()) { + string::iterator closebracket = find(openbracket, s->end(), '>'); + if (closebracket == s->end()) { + s->erase(openbracket, closebracket); + return; + } - openbracket = s->erase(openbracket, closebracket + 1); - openbracket = find(openbracket, s->end(), '<'); - } + openbracket = s->erase(openbracket, closebracket + 1); + openbracket = find(openbracket, s->end(), '<'); + } } string OutputWithMarkupTagsStripped(const string& s) { - string result(s); - StripMarkupTags(&result); - return result; + string result(s); + StripMarkupTags(&result); + return result; } - int TrimStringLeft(string* s, const StringPiece& remove) { - int i = 0; - while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { - ++i; - } - if (i > 0) s->erase(0, i); - return i; + int i = 0; + while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { + ++i; + } + if (i > 0) s->erase(0, i); + return i; } int TrimStringRight(string* s, const StringPiece& remove) { - int i = s->size(), trimmed = 0; - while (i > 0 && memchr(remove.data(), (*s)[i-1], remove.size())) { - --i; - } - if (i < s->size()) { - trimmed = s->size() - i; - s->erase(i); - } - return trimmed; + int i = s->size(), trimmed = 0; + while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { + --i; + } + if (i < s->size()) { + trimmed = s->size() - i; + s->erase(i); + } + return trimmed; } // ---------------------------------------------------------------------- // Various removal routines // ---------------------------------------------------------------------- int strrm(char* str, char c) { - char *src, *dest; - for (src = dest = str; *src != '\0'; ++src) - if (*src != c) *(dest++) = *src; - *dest = '\0'; - return dest - str; + char *src, *dest; + for (src = dest = str; *src != '\0'; ++src) + if (*src != c) *(dest++) = *src; + *dest = '\0'; + return dest - str; } int memrm(char* str, int strlen, char c) { - char *src, *dest; - for (src = dest = str; strlen-- > 0; ++src) - if (*src != c) *(dest++) = *src; - return dest - str; + char *src, *dest; + for (src = dest = str; strlen-- > 0; ++src) + if (*src != c) *(dest++) = *src; + return dest - str; } int strrmm(char* str, const char* chars) { - char *src, *dest; - for (src = dest = str; *src != '\0'; ++src) { - bool skip = false; - for (const char* c = chars; *c != '\0'; c++) { - if (*src == *c) { - skip = true; - break; - } + char *src, *dest; + for (src = dest = str; *src != '\0'; ++src) { + bool skip = false; + for (const char* c = chars; *c != '\0'; c++) { + if (*src == *c) { + skip = true; + break; + } + } + if (!skip) *(dest++) = *src; } - if (!skip) *(dest++) = *src; - } - *dest = '\0'; - return dest - str; + *dest = '\0'; + return dest - str; } int strrmm(string* str, const string& chars) { - size_t str_len = str->length(); - size_t in_index = str->find_first_of(chars); - if (in_index == string::npos) - return str_len; + size_t str_len = str->length(); + size_t in_index = str->find_first_of(chars); + if (in_index == string::npos) return str_len; - size_t out_index = in_index++; + size_t out_index = in_index++; - while (in_index < str_len) { - char c = (*str)[in_index++]; - if (chars.find(c) == string::npos) - (*str)[out_index++] = c; - } + while (in_index < str_len) { + char c = (*str)[in_index++]; + if (chars.find(c) == string::npos) (*str)[out_index++] = c; + } - str->resize(out_index); - return out_index; + str->resize(out_index); + return out_index; } // ---------------------------------------------------------------------- @@ -256,29 +247,27 @@ int strrmm(string* str, const string& chars) { // Return the number of characters removed // ---------------------------------------------------------------------- int StripDupCharacters(string* s, char dup_char, int start_pos) { - if (start_pos < 0) - start_pos = 0; - - // remove dups by compaction in-place - int input_pos = start_pos; // current reader position - int output_pos = start_pos; // current writer position - const int input_end = s->size(); - while (input_pos < input_end) { - // keep current character - const char curr_char = (*s)[input_pos]; - if (output_pos != input_pos) // must copy - (*s)[output_pos] = curr_char; - ++input_pos; - ++output_pos; - - if (curr_char == dup_char) { // skip subsequent dups - while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) + if (start_pos < 0) start_pos = 0; + + // remove dups by compaction in-place + int input_pos = start_pos; // current reader position + int output_pos = start_pos; // current writer position + const int input_end = s->size(); + while (input_pos < input_end) { + // keep current character + const char curr_char = (*s)[input_pos]; + if (output_pos != input_pos) // must copy + (*s)[output_pos] = curr_char; ++input_pos; + ++output_pos; + + if (curr_char == dup_char) { // skip subsequent dups + while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) ++input_pos; + } } - } - const int num_deleted = input_pos - output_pos; - s->resize(s->size() - num_deleted); - return num_deleted; + const int num_deleted = input_pos - output_pos; + s->resize(s->size() - num_deleted); + return num_deleted; } // ---------------------------------------------------------------------- @@ -286,58 +275,56 @@ int StripDupCharacters(string* s, char dup_char, int start_pos) { // Remove leading, trailing, and duplicate internal whitespace. // ---------------------------------------------------------------------- void RemoveExtraWhitespace(string* s) { - assert(s != nullptr); - // Empty strings clearly have no whitespace, and this code assumes that - // string length is greater than 0 - if (s->empty()) - return; - - int input_pos = 0; // current reader position - int output_pos = 0; // current writer position - const int input_end = s->size(); - // Strip off leading space - while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++; - - while (input_pos < input_end - 1) { - char c = (*s)[input_pos]; - char next = (*s)[input_pos + 1]; - // Copy each non-whitespace character to the right position. - // For a block of whitespace, print the last one. - if (!ascii_isspace(c) || !ascii_isspace(next)) { - if (output_pos != input_pos) { // only copy if needed - (*s)[output_pos] = c; - } - output_pos++; + assert(s != nullptr); + // Empty strings clearly have no whitespace, and this code assumes that + // string length is greater than 0 + if (s->empty()) return; + + int input_pos = 0; // current reader position + int output_pos = 0; // current writer position + const int input_end = s->size(); + // Strip off leading space + while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++; + + while (input_pos < input_end - 1) { + char c = (*s)[input_pos]; + char next = (*s)[input_pos + 1]; + // Copy each non-whitespace character to the right position. + // For a block of whitespace, print the last one. + if (!ascii_isspace(c) || !ascii_isspace(next)) { + if (output_pos != input_pos) { // only copy if needed + (*s)[output_pos] = c; + } + output_pos++; + } + input_pos++; } - input_pos++; - } - // Pick up the last character if needed. - char c = (*s)[input_end - 1]; - if (!ascii_isspace(c)) (*s)[output_pos++] = c; + // Pick up the last character if needed. + char c = (*s)[input_end - 1]; + if (!ascii_isspace(c)) (*s)[output_pos++] = c; - s->resize(output_pos); + s->resize(output_pos); } //------------------------------------------------------------------------ // See comment in header file for a complete description. //------------------------------------------------------------------------ void StripLeadingWhiteSpace(string* str) { - char const* const leading = StripLeadingWhiteSpace( - const_cast(str->c_str())); - if (leading != nullptr) { - string const tmp(leading); - str->assign(tmp); - } else { - str->assign(""); - } + char const* const leading = StripLeadingWhiteSpace(const_cast(str->c_str())); + if (leading != nullptr) { + string const tmp(leading); + str->assign(tmp); + } else { + str->assign(""); + } } void StripTrailingWhitespace(string* const s) { - string::size_type i; - for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) { - } + string::size_type i; + for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) { + } - s->resize(i); + s->resize(i); } // ---------------------------------------------------------------------- @@ -353,26 +340,26 @@ void StripTrailingWhitespace(string* const s) { // "first,last::(area)phone, ::zip" -> "first last area phone zip" // ---------------------------------------------------------------------- void TrimRunsInString(string* s, StringPiece remove) { - string::iterator dest = s->begin(); - string::iterator src_end = s->end(); - for (string::iterator src = s->begin(); src != src_end; ) { - if (remove.find(*src) == StringPiece::npos) { - *(dest++) = *(src++); - } else { - // Skip to the end of this run of chars that are in 'remove'. - for (++src; src != src_end; ++src) { + string::iterator dest = s->begin(); + string::iterator src_end = s->end(); + for (string::iterator src = s->begin(); src != src_end;) { if (remove.find(*src) == StringPiece::npos) { - if (dest != s->begin()) { - // This is an internal run; collapse it. - *(dest++) = remove[0]; - } - *(dest++) = *(src++); - break; + *(dest++) = *(src++); + } else { + // Skip to the end of this run of chars that are in 'remove'. + for (++src; src != src_end; ++src) { + if (remove.find(*src) == StringPiece::npos) { + if (dest != s->begin()) { + // This is an internal run; collapse it. + *(dest++) = remove[0]; + } + *(dest++) = *(src++); + break; + } + } } - } } - } - s->erase(dest, src_end); + s->erase(dest, src_end); } // ---------------------------------------------------------------------- @@ -380,5 +367,5 @@ void TrimRunsInString(string* s, StringPiece remove) { // Removes any internal \0 characters from the string. // ---------------------------------------------------------------------- void RemoveNullsInString(string* s) { - s->erase(remove(s->begin(), s->end(), '\0'), s->end()); + s->erase(remove(s->begin(), s->end(), '\0'), s->end()); } diff --git a/be/src/gutil/strings/strip.h b/be/src/gutil/strings/strip.h index cdde845a4bae12..e18bea2999096b 100644 --- a/be/src/gutil/strings/strip.h +++ b/be/src/gutil/strings/strip.h @@ -8,6 +8,7 @@ #define STRINGS_STRIP_H_ #include + #include using std::string; @@ -22,20 +23,17 @@ string StripPrefixString(StringPiece str, const StringPiece& prefix); // Like StripPrefixString, but return true if the prefix was // successfully matched. Write the output to *result. // It is safe for result to point back to the input string. -bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, - string* result); +bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, string* result); // Given a string and a putative suffix, returns the string minus the // suffix string if the suffix matches, otherwise the original // string. string StripSuffixString(StringPiece str, const StringPiece& suffix); - // Like StripSuffixString, but return true if the suffix was // successfully matched. Write the output to *result. // It is safe for result to point back to the input string. -bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, - string* result); +bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, string* result); // ---------------------------------------------------------------------- // StripString @@ -45,10 +43,9 @@ bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, // of places where they might cause a problem. // ---------------------------------------------------------------------- inline void StripString(char* str, char remove, char replacewith) { - for (; *str; str++) { - if (*str == remove) - *str = replacewith; - } + for (; *str; str++) { + if (*str == remove) *str = replacewith; + } } void StripString(char* str, StringPiece remove, char replacewith); @@ -100,18 +97,18 @@ void StripTrailingWhitespace(string* s); bool StripTrailingNewline(string* s); inline void StripWhiteSpace(char** str, int* len) { - // The "real" type for StripWhiteSpace is ForAll char types C, take - // (C, int) as input and return (C, int) as output. We're using the - // cast here to assert that we can take a char*, even though the - // function thinks it's assigning to const char*. - StripWhiteSpace(const_cast(str), len); + // The "real" type for StripWhiteSpace is ForAll char types C, take + // (C, int) as input and return (C, int) as output. We're using the + // cast here to assert that we can take a char*, even though the + // function thinks it's assigning to const char*. + StripWhiteSpace(const_cast(str), len); } inline void StripWhiteSpace(StringPiece* str) { - const char* data = str->data(); - int len = str->size(); - StripWhiteSpace(&data, &len); - str->set(data, len); + const char* data = str->data(); + int len = str->size(); + StripWhiteSpace(&data, &len); + str->set(data, len); } void StripWhiteSpace(string* str); @@ -120,12 +117,11 @@ namespace strings { template inline void StripWhiteSpaceInCollection(Collection* collection) { - for (typename Collection::iterator it = collection->begin(); - it != collection->end(); ++it) - StripWhiteSpace(&(*it)); + for (typename Collection::iterator it = collection->begin(); it != collection->end(); ++it) + StripWhiteSpace(&(*it)); } -} // namespace strings +} // namespace strings // ---------------------------------------------------------------------- // StripLeadingWhiteSpace @@ -135,20 +131,18 @@ inline void StripWhiteSpaceInCollection(Collection* collection) { // ---------------------------------------------------------------------- inline const char* StripLeadingWhiteSpace(const char* line) { - // skip leading whitespace - while (ascii_isspace(*line)) - ++line; + // skip leading whitespace + while (ascii_isspace(*line)) ++line; - if ('\0' == *line) // end of line, no non-whitespace - return NULL; + if ('\0' == *line) // end of line, no non-whitespace + return NULL; - return line; + return line; } // StripLeadingWhiteSpace for non-const strings. inline char* StripLeadingWhiteSpace(char* line) { - return const_cast( - StripLeadingWhiteSpace(const_cast(line))); + return const_cast(StripLeadingWhiteSpace(const_cast(line))); } void StripLeadingWhiteSpace(string* str); @@ -156,22 +150,19 @@ void StripLeadingWhiteSpace(string* str); // Remove leading, trailing, and duplicate internal whitespace. void RemoveExtraWhitespace(string* s); - // ---------------------------------------------------------------------- // SkipLeadingWhiteSpace // Returns str advanced past white space characters, if any. // Never returns NULL. "str" must be terminated by a null character. // ---------------------------------------------------------------------- inline const char* SkipLeadingWhiteSpace(const char* str) { - while (ascii_isspace(*str)) - ++str; - return str; + while (ascii_isspace(*str)) ++str; + return str; } inline char* SkipLeadingWhiteSpace(char* str) { - while (ascii_isspace(*str)) - ++str; - return str; + while (ascii_isspace(*str)) ++str; + return str; } // ---------------------------------------------------------------------- @@ -187,7 +178,6 @@ inline char* SkipLeadingWhiteSpace(char* str) { void StripCurlyBraces(string* s); void StripBrackets(char left, char right, string* s); - // ---------------------------------------------------------------------- // StripMarkupTags // Strips everything enclosed in pairs of angle brackets and the angle @@ -228,7 +218,7 @@ int TrimStringRight(string* s, const StringPiece& remove); // end of the string. // ---------------------------------------------------------------------- inline int TrimString(string* s, const StringPiece& remove) { - return TrimStringRight(s, remove) + TrimStringLeft(s, remove); + return TrimStringRight(s, remove) + TrimStringLeft(s, remove); } // ---------------------------------------------------------------------- @@ -269,4 +259,4 @@ int memrm(char* str, int strlen, char c); int strrmm(char* str, const char* chars); int strrmm(string* str, const string& chars); -#endif // STRINGS_STRIP_H_ +#endif // STRINGS_STRIP_H_ diff --git a/be/src/gutil/strings/substitute.cc b/be/src/gutil/strings/substitute.cc index 15f4e743f85d36..dafee40c558531 100644 --- a/be/src/gutil/strings/substitute.cc +++ b/be/src/gutil/strings/substitute.cc @@ -3,10 +3,11 @@ #include "gutil/strings/substitute.h" #include + #include "gutil/macros.h" +#include "gutil/stl_util.h" #include "gutil/strings/ascii_ctype.h" #include "gutil/strings/escaping.h" -#include "gutil/stl_util.h" namespace strings { @@ -17,116 +18,109 @@ const SubstituteArg SubstituteArg::NoArg; // Returns the number of args in arg_array which were passed explicitly // to Substitute(). static int CountSubstituteArgs(const SubstituteArg* const* args_array) { - int count = 0; - while (args_array[count] != &SubstituteArg::NoArg) { - ++count; - } - return count; + int count = 0; + while (args_array[count] != &SubstituteArg::NoArg) { + ++count; + } + return count; } namespace internal { -int SubstitutedSize(StringPiece format, - const SubstituteArg* const* args_array) { - int size = 0; - for (int i = 0; i < format.size(); i++) { - if (format[i] == '$') { - if (i+1 >= format.size()) { - LOG(DFATAL) << "Invalid strings::Substitute() format string: \"" - << CEscape(format) << "\"."; - return 0; - } else if (ascii_isdigit(format[i+1])) { - int index = format[i+1] - '0'; - if (args_array[index]->size() == -1) { - LOG(DFATAL) - << "strings::Substitute format string invalid: asked for \"$" - << index << "\", but only " << CountSubstituteArgs(args_array) - << " args were given. Full format string was: \"" - << CEscape(format) << "\"."; - return 0; +int SubstitutedSize(StringPiece format, const SubstituteArg* const* args_array) { + int size = 0; + for (int i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (i + 1 >= format.size()) { + LOG(DFATAL) << "Invalid strings::Substitute() format string: \"" << CEscape(format) + << "\"."; + return 0; + } else if (ascii_isdigit(format[i + 1])) { + int index = format[i + 1] - '0'; + if (args_array[index]->size() == -1) { + LOG(DFATAL) << "strings::Substitute format string invalid: asked for \"$" + << index << "\", but only " << CountSubstituteArgs(args_array) + << " args were given. Full format string was: \"" + << CEscape(format) << "\"."; + return 0; + } + size += args_array[index]->size(); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + ++size; + ++i; // Skip next char. + } else { + LOG(DFATAL) << "Invalid strings::Substitute() format string: \"" << CEscape(format) + << "\"."; + return 0; + } + } else { + ++size; } - size += args_array[index]->size(); - ++i; // Skip next char. - } else if (format[i+1] == '$') { - ++size; - ++i; // Skip next char. - } else { - LOG(DFATAL) << "Invalid strings::Substitute() format string: \"" - << CEscape(format) << "\"."; - return 0; - } - } else { - ++size; } - } - return size; + return size; } -char* SubstituteToBuffer(StringPiece format, - const SubstituteArg* const* args_array, - char* target) { - for (int i = 0; i < format.size(); i++) { - if (format[i] == '$') { - if (ascii_isdigit(format[i+1])) { - const SubstituteArg* src = args_array[format[i+1] - '0']; - memcpy(target, src->data(), src->size()); - target += src->size(); - ++i; // Skip next char. - } else if (format[i+1] == '$') { - *target++ = '$'; - ++i; // Skip next char. - } - } else { - *target++ = format[i]; +char* SubstituteToBuffer(StringPiece format, const SubstituteArg* const* args_array, char* target) { + for (int i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (ascii_isdigit(format[i + 1])) { + const SubstituteArg* src = args_array[format[i + 1] - '0']; + memcpy(target, src->data(), src->size()); + target += src->size(); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + *target++ = '$'; + ++i; // Skip next char. + } + } else { + *target++ = format[i]; + } } - } - return target; + return target; } } // namespace internal -void SubstituteAndAppend( - string* output, StringPiece format, - const SubstituteArg& arg0, const SubstituteArg& arg1, - const SubstituteArg& arg2, const SubstituteArg& arg3, - const SubstituteArg& arg4, const SubstituteArg& arg5, - const SubstituteArg& arg6, const SubstituteArg& arg7, - const SubstituteArg& arg8, const SubstituteArg& arg9) { - const SubstituteArg* const args_array[] = { - &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, nullptr - }; - - // Determine total size needed. - int size = SubstitutedSize(format, args_array); - if (size == 0) return; - - // Build the string. - int original_size = output->size(); - STLStringResizeUninitialized(output, original_size + size); - char* target = string_as_array(output) + original_size; - - target = SubstituteToBuffer(format, args_array, target); - DCHECK_EQ(target - output->data(), output->size()); +void SubstituteAndAppend(string* output, StringPiece format, const SubstituteArg& arg0, + const SubstituteArg& arg1, const SubstituteArg& arg2, + const SubstituteArg& arg3, const SubstituteArg& arg4, + const SubstituteArg& arg5, const SubstituteArg& arg6, + const SubstituteArg& arg7, const SubstituteArg& arg8, + const SubstituteArg& arg9) { + const SubstituteArg* const args_array[] = {&arg0, &arg1, &arg2, &arg3, &arg4, &arg5, + &arg6, &arg7, &arg8, &arg9, nullptr}; + + // Determine total size needed. + int size = SubstitutedSize(format, args_array); + if (size == 0) return; + + // Build the string. + int original_size = output->size(); + STLStringResizeUninitialized(output, original_size + size); + char* target = string_as_array(output) + original_size; + + target = SubstituteToBuffer(format, args_array, target); + DCHECK_EQ(target - output->data(), output->size()); } SubstituteArg::SubstituteArg(const void* value) { - COMPILE_ASSERT(sizeof(scratch_) >= sizeof(value) * 2 + 2, - fix_sizeof_scratch_); - if (value == nullptr) { - text_ = "NULL"; - size_ = strlen(text_); - } else { - char* ptr = scratch_ + sizeof(scratch_); - uintptr_t num = reinterpret_cast(value); - static const char kHexDigits[] = "0123456789abcdef"; - do { - *--ptr = kHexDigits[num & 0xf]; - num >>= 4; - } while (num != 0); - *--ptr = 'x'; - *--ptr = '0'; - text_ = ptr; - size_ = scratch_ + sizeof(scratch_) - ptr; - } + COMPILE_ASSERT(sizeof(scratch_) >= sizeof(value) * 2 + 2, fix_sizeof_scratch_); + if (value == nullptr) { + text_ = "NULL"; + size_ = strlen(text_); + } else { + char* ptr = scratch_ + sizeof(scratch_); + uintptr_t num = reinterpret_cast(value); + static const char kHexDigits[] = "0123456789abcdef"; + do { + *--ptr = kHexDigits[num & 0xf]; + num >>= 4; + } while (num != 0); + *--ptr = 'x'; + *--ptr = '0'; + text_ = ptr; + size_ = scratch_ + sizeof(scratch_) - ptr; + } } -} // namespace strings +} // namespace strings diff --git a/be/src/gutil/strings/substitute.h b/be/src/gutil/strings/substitute.h index 84d362a48d81d8..a6bbcde6259405 100644 --- a/be/src/gutil/strings/substitute.h +++ b/be/src/gutil/strings/substitute.h @@ -1,6 +1,7 @@ // Copyright 2008 Google Inc. All rights reserved. #include + #include using std::string; @@ -8,7 +9,6 @@ using std::string; #include "gutil/strings/numbers.h" #include "gutil/strings/stringpiece.h" - #ifndef STRINGS_SUBSTITUTE_H_ #define STRINGS_SUBSTITUTE_H_ @@ -61,132 +61,123 @@ namespace strings { // large strings, it may be orders of magnitude faster. // ---------------------------------------------------------------------- -namespace internal { // Implementation details. +namespace internal { // Implementation details. // This class has implicit constructors. // Style guide exception granted: // http://goto/style-guide-exception-20978288 class SubstituteArg { - public: - // We must explicitly overload char* so that the compiler doesn't try to - // cast it to bool to construct a DynamicSubstituteArg. Might as well - // overload const string& as well, since this allows us to avoid a temporary - // object. - inline SubstituteArg(const char* value) // NOLINT(runtime/explicit) - : text_(value), size_(value == NULL ? 0 : strlen(text_)) {} - inline SubstituteArg(const string& value) // NOLINT(runtime/explicit) - : text_(value.data()), size_(value.size()) {} - inline SubstituteArg(const StringPiece& value) // NOLINT(runtime/explicit) - : text_(value.data()), size_(value.size()) {} - - // Primitives - // We don't overload for signed and unsigned char because if people are - // explicitly declaring their chars as signed or unsigned then they are - // probably actually using them as 8-bit integers and would probably - // prefer an integer representation. But, we don't really know. So, we - // make the caller decide what to do. - inline SubstituteArg(char value) // NOLINT(runtime/explicit) - : text_(scratch_), size_(1) { scratch_[0] = value; } - inline SubstituteArg(short value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_(FastInt32ToBufferLeft(value, scratch_) - scratch_) {} - inline SubstituteArg(unsigned short value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_(FastUInt32ToBufferLeft(value, scratch_) - scratch_) {} - inline SubstituteArg(int value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_(FastInt32ToBufferLeft(value, scratch_) - scratch_) {} - inline SubstituteArg(unsigned int value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_(FastUInt32ToBufferLeft(value, scratch_) - scratch_) {} - inline SubstituteArg(long value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_((sizeof(value) == 4 ? FastInt32ToBufferLeft(value, scratch_) - : FastInt64ToBufferLeft(value, scratch_)) - - scratch_) {} - inline SubstituteArg(unsigned long value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_((sizeof(value) == 4 ? FastUInt32ToBufferLeft(value, scratch_) - : FastUInt64ToBufferLeft(value, scratch_)) - - scratch_) {} - inline SubstituteArg(long long value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_(FastInt64ToBufferLeft(value, scratch_) - scratch_) {} - inline SubstituteArg(unsigned long long value) // NOLINT(runtime/explicit) - : text_(scratch_), - size_(FastUInt64ToBufferLeft(value, scratch_) - scratch_) {} - inline SubstituteArg(float value) // NOLINT(runtime/explicit) - : text_(FloatToBuffer(value, scratch_)), size_(strlen(text_)) {} - inline SubstituteArg(double value) // NOLINT(runtime/explicit) - : text_(DoubleToBuffer(value, scratch_)), size_(strlen(text_)) {} - inline SubstituteArg(bool value) // NOLINT(runtime/explicit) - : text_(value ? "true" : "false"), size_(strlen(text_)) {} - // void* values, with the exception of char*, are printed as - // StringPrintf with format "%p" would ("0x"), with the - // exception of NULL, which is printed as "NULL". - SubstituteArg(const void* value); // NOLINT(runtime/explicit) - - inline const char* data() const { return text_; } - inline int size() const { return size_; } - - // Indicates that no argument was given. - static const SubstituteArg NoArg; - - private: - inline SubstituteArg() : text_(NULL), size_(-1) {} - - const char* text_; - int size_; - char scratch_[kFastToBufferSize]; +public: + // We must explicitly overload char* so that the compiler doesn't try to + // cast it to bool to construct a DynamicSubstituteArg. Might as well + // overload const string& as well, since this allows us to avoid a temporary + // object. + inline SubstituteArg(const char* value) // NOLINT(runtime/explicit) + : text_(value), size_(value == NULL ? 0 : strlen(text_)) {} + inline SubstituteArg(const string& value) // NOLINT(runtime/explicit) + : text_(value.data()), size_(value.size()) {} + inline SubstituteArg(const StringPiece& value) // NOLINT(runtime/explicit) + : text_(value.data()), size_(value.size()) {} + + // Primitives + // We don't overload for signed and unsigned char because if people are + // explicitly declaring their chars as signed or unsigned then they are + // probably actually using them as 8-bit integers and would probably + // prefer an integer representation. But, we don't really know. So, we + // make the caller decide what to do. + inline SubstituteArg(char value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(1) { + scratch_[0] = value; + } + inline SubstituteArg(short value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(FastInt32ToBufferLeft(value, scratch_) - scratch_) {} + inline SubstituteArg(unsigned short value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(FastUInt32ToBufferLeft(value, scratch_) - scratch_) {} + inline SubstituteArg(int value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(FastInt32ToBufferLeft(value, scratch_) - scratch_) {} + inline SubstituteArg(unsigned int value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(FastUInt32ToBufferLeft(value, scratch_) - scratch_) {} + inline SubstituteArg(long value) // NOLINT(runtime/explicit) + : text_(scratch_), + size_((sizeof(value) == 4 ? FastInt32ToBufferLeft(value, scratch_) + : FastInt64ToBufferLeft(value, scratch_)) - + scratch_) {} + inline SubstituteArg(unsigned long value) // NOLINT(runtime/explicit) + : text_(scratch_), + size_((sizeof(value) == 4 ? FastUInt32ToBufferLeft(value, scratch_) + : FastUInt64ToBufferLeft(value, scratch_)) - + scratch_) {} + inline SubstituteArg(long long value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(FastInt64ToBufferLeft(value, scratch_) - scratch_) {} + inline SubstituteArg(unsigned long long value) // NOLINT(runtime/explicit) + : text_(scratch_), size_(FastUInt64ToBufferLeft(value, scratch_) - scratch_) {} + inline SubstituteArg(float value) // NOLINT(runtime/explicit) + : text_(FloatToBuffer(value, scratch_)), size_(strlen(text_)) {} + inline SubstituteArg(double value) // NOLINT(runtime/explicit) + : text_(DoubleToBuffer(value, scratch_)), size_(strlen(text_)) {} + inline SubstituteArg(bool value) // NOLINT(runtime/explicit) + : text_(value ? "true" : "false"), size_(strlen(text_)) {} + // void* values, with the exception of char*, are printed as + // StringPrintf with format "%p" would ("0x"), with the + // exception of NULL, which is printed as "NULL". + SubstituteArg(const void* value); // NOLINT(runtime/explicit) + + inline const char* data() const { return text_; } + inline int size() const { return size_; } + + // Indicates that no argument was given. + static const SubstituteArg NoArg; + +private: + inline SubstituteArg() : text_(NULL), size_(-1) {} + + const char* text_; + int size_; + char scratch_[kFastToBufferSize]; }; // Return the length of the resulting string after performing the given // substitution. -int SubstitutedSize(StringPiece format, - const SubstituteArg* const* args_array); +int SubstitutedSize(StringPiece format, const SubstituteArg* const* args_array); // Perform the given substitution into 'target'. 'target' must have // space for the result -- use SubstitutedSize() to determine how many // bytes are required. Returns a pointer to the next byte following // the result in 'target'. -char* SubstituteToBuffer(StringPiece format, - const SubstituteArg* const* args_array, - char* target); - -} // namespace internal - -void SubstituteAndAppend( - string* output, StringPiece format, - const internal::SubstituteArg& arg0 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg1 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg2 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg3 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg4 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg5 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg6 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg7 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg8 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg9 = internal::SubstituteArg::NoArg); - -inline string Substitute( - StringPiece format, - const internal::SubstituteArg& arg0 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg1 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg2 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg3 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg4 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg5 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg6 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg7 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg8 = internal::SubstituteArg::NoArg, - const internal::SubstituteArg& arg9 = internal::SubstituteArg::NoArg) { - string result; - SubstituteAndAppend(&result, format, arg0, arg1, arg2, arg3, arg4, - arg5, arg6, arg7, arg8, arg9); - return result; +char* SubstituteToBuffer(StringPiece format, const SubstituteArg* const* args_array, char* target); + +} // namespace internal + +void SubstituteAndAppend(string* output, StringPiece format, + const internal::SubstituteArg& arg0 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg1 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg2 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg3 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg4 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg5 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg6 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg7 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg8 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg9 = internal::SubstituteArg::NoArg); + +inline string Substitute(StringPiece format, + const internal::SubstituteArg& arg0 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg1 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg2 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg3 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg4 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg5 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg6 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg7 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg8 = internal::SubstituteArg::NoArg, + const internal::SubstituteArg& arg9 = internal::SubstituteArg::NoArg) { + string result; + SubstituteAndAppend(&result, format, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, + arg9); + return result; } -} // namespace strings +} // namespace strings -#endif // STRINGS_SUBSTITUTE_H_ +#endif // STRINGS_SUBSTITUTE_H_ diff --git a/be/src/gutil/strings/util.cc b/be/src/gutil/strings/util.cc index 0c1cdc04c464ad..e567ed4ef458cb 100644 --- a/be/src/gutil/strings/util.cc +++ b/be/src/gutil/strings/util.cc @@ -11,7 +11,8 @@ #include #include #include -#include // for FastTimeToBuffer() +#include // for FastTimeToBuffer() + #include using std::copy; using std::max; @@ -25,14 +26,15 @@ using std::string; using std::vector; #include + +#include "gutil/stl_util.h" // for string_as_array, STLAppendToString #include "gutil/strings/ascii_ctype.h" #include "gutil/strings/numbers.h" #include "gutil/strings/stringpiece.h" -#include "gutil/stl_util.h" // for string_as_array, STLAppendToString #include "gutil/utf/utf.h" #ifdef OS_WINDOWS -#ifdef min // windows.h defines this to something silly +#ifdef min // windows.h defines this to something silly #undef min #endif #endif @@ -42,141 +44,133 @@ using std::vector; // TODO(user): Probably belongs in //base:time_support.{cc|h}. static struct tm* PortableSafeGmtime(const time_t* timep, struct tm* result) { #ifdef OS_WINDOWS - return gmtime_s(result, timep) == 0 ? result : NULL; + return gmtime_s(result, timep) == 0 ? result : NULL; #else - return gmtime_r(timep, result); -#endif // OS_WINDOWS + return gmtime_r(timep, result); +#endif // OS_WINDOWS } -char* strnstr(const char* haystack, const char* needle, - size_t haystack_len) { - if (*needle == '\0') { - return const_cast(haystack); - } - size_t needle_len = strlen(needle); - char* where; - while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) { - if (where - haystack + needle_len > haystack_len) { - return nullptr; +char* strnstr(const char* haystack, const char* needle, size_t haystack_len) { + if (*needle == '\0') { + return const_cast(haystack); } - if (strncmp(where, needle, needle_len) == 0) { - return where; + size_t needle_len = strlen(needle); + char* where; + while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) { + if (where - haystack + needle_len > haystack_len) { + return nullptr; + } + if (strncmp(where, needle, needle_len) == 0) { + return where; + } + haystack_len -= where + 1 - haystack; + haystack = where + 1; } - haystack_len -= where + 1 - haystack; - haystack = where + 1; - } - return nullptr; + return nullptr; } -const char* strnprefix(const char* haystack, int haystack_size, - const char* needle, int needle_size) { - if (needle_size > haystack_size) { - return nullptr; - } else { - if (strncmp(haystack, needle, needle_size) == 0) { - return haystack + needle_size; +const char* strnprefix(const char* haystack, int haystack_size, const char* needle, + int needle_size) { + if (needle_size > haystack_size) { + return nullptr; } else { - return nullptr; + if (strncmp(haystack, needle, needle_size) == 0) { + return haystack + needle_size; + } else { + return nullptr; + } } - } } -const char* strncaseprefix(const char* haystack, int haystack_size, - const char* needle, int needle_size) { - if (needle_size > haystack_size) { - return nullptr; - } else { - if (strncasecmp(haystack, needle, needle_size) == 0) { - return haystack + needle_size; +const char* strncaseprefix(const char* haystack, int haystack_size, const char* needle, + int needle_size) { + if (needle_size > haystack_size) { + return nullptr; } else { - return nullptr; + if (strncasecmp(haystack, needle, needle_size) == 0) { + return haystack + needle_size; + } else { + return nullptr; + } } - } } char* strcasesuffix(char* str, const char* suffix) { - const int lenstr = strlen(str); - const int lensuffix = strlen(suffix); - char* strbeginningoftheend = str + lenstr - lensuffix; - - if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) { - return (strbeginningoftheend); - } else { - return (nullptr); - } + const int lenstr = strlen(str); + const int lensuffix = strlen(suffix); + char* strbeginningoftheend = str + lenstr - lensuffix; + + if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) { + return (strbeginningoftheend); + } else { + return (nullptr); + } } -const char* strnsuffix(const char* haystack, int haystack_size, - const char* needle, int needle_size) { - if (needle_size > haystack_size) { - return nullptr; - } else { - const char* start = haystack + haystack_size - needle_size; - if (strncmp(start, needle, needle_size) == 0) { - return start; +const char* strnsuffix(const char* haystack, int haystack_size, const char* needle, + int needle_size) { + if (needle_size > haystack_size) { + return nullptr; } else { - return nullptr; + const char* start = haystack + haystack_size - needle_size; + if (strncmp(start, needle, needle_size) == 0) { + return start; + } else { + return nullptr; + } } - } } -const char* strncasesuffix(const char* haystack, int haystack_size, - const char* needle, int needle_size) { - if (needle_size > haystack_size) { - return nullptr; - } else { - const char* start = haystack + haystack_size - needle_size; - if (strncasecmp(start, needle, needle_size) == 0) { - return start; +const char* strncasesuffix(const char* haystack, int haystack_size, const char* needle, + int needle_size) { + if (needle_size > haystack_size) { + return nullptr; } else { - return nullptr; + const char* start = haystack + haystack_size - needle_size; + if (strncasecmp(start, needle, needle_size) == 0) { + return start; + } else { + return nullptr; + } } - } } char* strchrnth(const char* str, const char& c, int n) { - if (str == nullptr) - return nullptr; - if (n <= 0) - return const_cast(str); - const char* sp; - int k = 0; - for (sp = str; *sp != '\0'; sp ++) { - if (*sp == c) { - ++k; - if (k >= n) - break; + if (str == nullptr) return nullptr; + if (n <= 0) return const_cast(str); + const char* sp; + int k = 0; + for (sp = str; *sp != '\0'; sp++) { + if (*sp == c) { + ++k; + if (k >= n) break; + } } - } - return (k < n) ? nullptr : const_cast(sp); + return (k < n) ? nullptr : const_cast(sp); } char* AdjustedLastPos(const char* str, char separator, int n) { - if ( str == nullptr ) - return nullptr; - const char* pos = nullptr; - if ( n > 0 ) - pos = strchrnth(str, separator, n); - - // if n <= 0 or separator appears fewer than n times, get the last occurrence - if ( pos == nullptr) - pos = strrchr(str, separator); - return const_cast(pos); -} + if (str == nullptr) return nullptr; + const char* pos = nullptr; + if (n > 0) pos = strchrnth(str, separator, n); + // if n <= 0 or separator appears fewer than n times, get the last occurrence + if (pos == nullptr) pos = strrchr(str, separator); + return const_cast(pos); +} // ---------------------------------------------------------------------- // Misc. routines // ---------------------------------------------------------------------- bool IsAscii(const char* str, int len) { - const char* end = str + len; - while (str < end) { - if (!ascii_isascii(*str++)) { - return false; + const char* end = str + len; + while (str < end) { + if (!ascii_isascii(*str++)) { + return false; + } } - } - return true; + return true; } // ---------------------------------------------------------------------- @@ -188,14 +182,13 @@ bool IsAscii(const char* str, int len) { // happened or not. // ---------------------------------------------------------------------- -string StringReplace(const StringPiece& s, const StringPiece& oldsub, - const StringPiece& newsub, bool replace_all) { - string ret; - StringReplace(s, oldsub, newsub, replace_all, &ret); - return ret; +string StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub, + bool replace_all) { + string ret; + StringReplace(s, oldsub, newsub, replace_all, &ret); + return ret; } - // ---------------------------------------------------------------------- // StringReplace() // Replace the "old" pattern with the "new" pattern in a string, @@ -203,27 +196,26 @@ string StringReplace(const StringPiece& s, const StringPiece& oldsub, // it only replaces the first instance of "old." // ---------------------------------------------------------------------- -void StringReplace(const StringPiece& s, const StringPiece& oldsub, - const StringPiece& newsub, bool replace_all, - string* res) { - if (oldsub.empty()) { - res->append(s.data(), s.length()); // If empty, append the given string. - return; - } - - StringPiece::size_type start_pos = 0; - StringPiece::size_type pos; - do { - pos = s.find(oldsub, start_pos); - if (pos == StringPiece::npos) { - break; +void StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub, + bool replace_all, string* res) { + if (oldsub.empty()) { + res->append(s.data(), s.length()); // If empty, append the given string. + return; } - res->append(s.data() + start_pos, pos - start_pos); - res->append(newsub.data(), newsub.length()); - // Start searching again after the "old". - start_pos = pos + oldsub.length(); - } while (replace_all); - res->append(s.data() + start_pos, s.length() - start_pos); + + StringPiece::size_type start_pos = 0; + StringPiece::size_type pos; + do { + pos = s.find(oldsub, start_pos); + if (pos == StringPiece::npos) { + break; + } + res->append(s.data() + start_pos, pos - start_pos); + res->append(newsub.data(), newsub.length()); + // Start searching again after the "old". + start_pos = pos + oldsub.length(); + } while (replace_all); + res->append(s.data() + start_pos, s.length() - start_pos); } // ---------------------------------------------------------------------- @@ -234,32 +226,29 @@ void StringReplace(const StringPiece& s, const StringPiece& oldsub, // NOTE: The string pieces must not overlap s. // ---------------------------------------------------------------------- -int GlobalReplaceSubstring(const StringPiece& substring, - const StringPiece& replacement, +int GlobalReplaceSubstring(const StringPiece& substring, const StringPiece& replacement, string* s) { - CHECK(s != nullptr); - if (s->empty() || substring.empty()) - return 0; - string tmp; - int num_replacements = 0; - size_t pos = 0; - for (size_t match_pos = s->find(substring.data(), pos, substring.length()); - match_pos != string::npos; - pos = match_pos + substring.length(), - match_pos = s->find(substring.data(), pos, substring.length())) { - ++num_replacements; - // Append the original content before the match. - tmp.append(*s, pos, match_pos - pos); - // Append the replacement for the match. - tmp.append(replacement.begin(), replacement.end()); - } - // Append the content after the last match. If no replacements were made, the - // original string is left untouched. - if (num_replacements > 0) { - tmp.append(*s, pos, s->length() - pos); - s->swap(tmp); - } - return num_replacements; + CHECK(s != nullptr); + if (s->empty() || substring.empty()) return 0; + string tmp; + int num_replacements = 0; + size_t pos = 0; + for (size_t match_pos = s->find(substring.data(), pos, substring.length()); + match_pos != string::npos; pos = match_pos + substring.length(), + match_pos = s->find(substring.data(), pos, substring.length())) { + ++num_replacements; + // Append the original content before the match. + tmp.append(*s, pos, match_pos - pos); + // Append the replacement for the match. + tmp.append(replacement.begin(), replacement.end()); + } + // Append the content after the last match. If no replacements were made, the + // original string is left untouched. + if (num_replacements > 0) { + tmp.append(*s, pos, s->length() - pos); + s->swap(tmp); + } + return num_replacements; } //--------------------------------------------------------------------------- @@ -269,22 +258,22 @@ int GlobalReplaceSubstring(const StringPiece& substring, // Order of v is *not* preserved. //--------------------------------------------------------------------------- void RemoveStrings(vector* v, const vector& indices) { - assert(v); - assert(indices.size() <= v->size()); - // go from largest index to smallest so that smaller indices aren't - // invalidated - for (int lcv = indices.size() - 1; lcv >= 0; --lcv) { + assert(v); + assert(indices.size() <= v->size()); + // go from largest index to smallest so that smaller indices aren't + // invalidated + for (int lcv = indices.size() - 1; lcv >= 0; --lcv) { #ifndef NDEBUG - // verify that indices is sorted least->greatest - if (indices.size() >= 2 && lcv > 0) - // use LT and not LE because we should never see repeat indices - CHECK_LT(indices[lcv-1], indices[lcv]); + // verify that indices is sorted least->greatest + if (indices.size() >= 2 && lcv > 0) + // use LT and not LE because we should never see repeat indices + CHECK_LT(indices[lcv - 1], indices[lcv]); #endif - assert(indices[lcv] >= 0); - assert(indices[lcv] < v->size()); - swap((*v)[indices[lcv]], v->back()); - v->pop_back(); - } + assert(indices[lcv] >= 0); + assert(indices[lcv] < v->size()); + swap((*v)[indices[lcv]], v->back()); + v->pop_back(); + } } // ---------------------------------------------------------------------- @@ -295,23 +284,22 @@ void RemoveStrings(vector* v, const vector& indices) { // This function uses ascii_tolower() instead of tolower(), for speed. // ---------------------------------------------------------------------- -char *gstrcasestr(const char* haystack, const char* needle) { - char c, sc; - size_t len; - - if ((c = *needle++) != 0) { - c = ascii_tolower(c); - len = strlen(needle); - do { - do { - if ((sc = *haystack++) == 0) - return nullptr; - } while (ascii_tolower(sc) != c); - } while (strncasecmp(haystack, needle, len) != 0); - haystack--; - } - // This is a const violation but strstr() also returns a char*. - return const_cast(haystack); +char* gstrcasestr(const char* haystack, const char* needle) { + char c, sc; + size_t len; + + if ((c = *needle++) != 0) { + c = ascii_tolower(c); + len = strlen(needle); + do { + do { + if ((sc = *haystack++) == 0) return nullptr; + } while (ascii_tolower(sc) != c); + } while (strncasecmp(haystack, needle, len) != 0); + haystack--; + } + // This is a const violation but strstr() also returns a char*. + return const_cast(haystack); } // ---------------------------------------------------------------------- @@ -322,22 +310,20 @@ char *gstrcasestr(const char* haystack, const char* needle) { // // This function uses ascii_tolower() instead of tolower(), for speed. // ---------------------------------------------------------------------- -const char *gstrncasestr(const char* haystack, const char* needle, size_t len) { - char c, sc; - - if ((c = *needle++) != 0) { - c = ascii_tolower(c); - size_t needle_len = strlen(needle); - do { - do { - if (len-- <= needle_len - || 0 == (sc = *haystack++)) - return nullptr; - } while (ascii_tolower(sc) != c); - } while (strncasecmp(haystack, needle, needle_len) != 0); - haystack--; - } - return haystack; +const char* gstrncasestr(const char* haystack, const char* needle, size_t len) { + char c, sc; + + if ((c = *needle++) != 0) { + c = ascii_tolower(c); + size_t needle_len = strlen(needle); + do { + do { + if (len-- <= needle_len || 0 == (sc = *haystack++)) return nullptr; + } while (ascii_tolower(sc) != c); + } while (strncasecmp(haystack, needle, needle_len) != 0); + haystack--; + } + return haystack; } // ---------------------------------------------------------------------- @@ -348,45 +334,41 @@ const char *gstrncasestr(const char* haystack, const char* needle, size_t len) { // // This function uses ascii_tolower() instead of tolower(), for speed. // ---------------------------------------------------------------------- -char *gstrncasestr(char* haystack, const char* needle, size_t len) { - return const_cast(gstrncasestr(static_cast(haystack), - needle, len)); +char* gstrncasestr(char* haystack, const char* needle, size_t len) { + return const_cast(gstrncasestr(static_cast(haystack), needle, len)); } // ---------------------------------------------------------------------- // gstrncasestr_split performs a case insensitive search // on (prefix, non_alpha, suffix). // ---------------------------------------------------------------------- -char *gstrncasestr_split(const char* str, - const char* prefix, char non_alpha, - const char* suffix, +char* gstrncasestr_split(const char* str, const char* prefix, char non_alpha, const char* suffix, size_t n) { - int prelen = prefix == nullptr ? 0 : strlen(prefix); - int suflen = suffix == nullptr ? 0 : strlen(suffix); - - // adjust the string and its length to avoid unnessary searching. - // an added benefit is to avoid unnecessary range checks in the if - // statement in the inner loop. - if (suflen + prelen >= n) return nullptr; - str += prelen; - n -= prelen; - n -= suflen; - - const char* where = nullptr; - - // for every occurance of non_alpha in the string ... - while ((where = static_cast( - memchr(str, non_alpha, n))) != nullptr) { - // ... test whether it is followed by suffix and preceded by prefix - if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) && - (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) { - return const_cast(where - prelen); + int prelen = prefix == nullptr ? 0 : strlen(prefix); + int suflen = suffix == nullptr ? 0 : strlen(suffix); + + // adjust the string and its length to avoid unnessary searching. + // an added benefit is to avoid unnecessary range checks in the if + // statement in the inner loop. + if (suflen + prelen >= n) return nullptr; + str += prelen; + n -= prelen; + n -= suflen; + + const char* where = nullptr; + + // for every occurance of non_alpha in the string ... + while ((where = static_cast(memchr(str, non_alpha, n))) != nullptr) { + // ... test whether it is followed by suffix and preceded by prefix + if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) && + (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) { + return const_cast(where - prelen); + } + // if not, advance the pointer, and adjust the length according + n -= (where + 1) - str; + str = where + 1; } - // if not, advance the pointer, and adjust the length according - n -= (where + 1) - str; - str = where + 1; - } - return nullptr; + return nullptr; } // ---------------------------------------------------------------------- @@ -400,50 +382,44 @@ char *gstrncasestr_split(const char* str, // E.g. strcasestr_alnum("i use google all the time", " !!Google!! ") // returns pointer to "google all the time" // ---------------------------------------------------------------------- -char *strcasestr_alnum(const char *haystack, const char *needle) { - const char *haystack_ptr; - const char *needle_ptr; - - // Skip non-alnums at beginning - while ( !ascii_isalnum(*needle) ) - if ( *needle++ == '\0' ) - return const_cast(haystack); - needle_ptr = needle; - - // Skip non-alnums at beginning - while ( !ascii_isalnum(*haystack) ) - if ( *haystack++ == '\0' ) - return nullptr; - haystack_ptr = haystack; - - while ( *needle_ptr != '\0' ) { - // Non-alnums - advance - while ( !ascii_isalnum(*needle_ptr) ) - if ( *needle_ptr++ == '\0' ) - return const_cast(haystack); - - while ( !ascii_isalnum(*haystack_ptr) ) - if ( *haystack_ptr++ == '\0' ) - return nullptr; - - if ( ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr) ) { - // Case-insensitive match - advance - needle_ptr++; - haystack_ptr++; - } else { - // No match - rollback to next start point in haystack - haystack++; - while ( !ascii_isalnum(*haystack) ) - if ( *haystack++ == '\0' ) - return nullptr; - haystack_ptr = haystack; - needle_ptr = needle; +char* strcasestr_alnum(const char* haystack, const char* needle) { + const char* haystack_ptr; + const char* needle_ptr; + + // Skip non-alnums at beginning + while (!ascii_isalnum(*needle)) + if (*needle++ == '\0') return const_cast(haystack); + needle_ptr = needle; + + // Skip non-alnums at beginning + while (!ascii_isalnum(*haystack)) + if (*haystack++ == '\0') return nullptr; + haystack_ptr = haystack; + + while (*needle_ptr != '\0') { + // Non-alnums - advance + while (!ascii_isalnum(*needle_ptr)) + if (*needle_ptr++ == '\0') return const_cast(haystack); + + while (!ascii_isalnum(*haystack_ptr)) + if (*haystack_ptr++ == '\0') return nullptr; + + if (ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr)) { + // Case-insensitive match - advance + needle_ptr++; + haystack_ptr++; + } else { + // No match - rollback to next start point in haystack + haystack++; + while (!ascii_isalnum(*haystack)) + if (*haystack++ == '\0') return nullptr; + haystack_ptr = haystack; + needle_ptr = needle; + } } - } - return const_cast(haystack); + return const_cast(haystack); } - // ---------------------------------------------------------------------- // CountSubstring() // Return the number times a "substring" appears in the "text" @@ -453,15 +429,15 @@ char *strcasestr_alnum(const char *haystack, const char *needle) { // DO NOT pass in long "text". // ---------------------------------------------------------------------- int CountSubstring(StringPiece text, StringPiece substring) { - CHECK_GT(substring.length(), 0); - - int count = 0; - StringPiece::size_type curr = 0; - while (StringPiece::npos != (curr = text.find(substring, curr))) { - ++count; - ++curr; - } - return count; + CHECK_GT(substring.length(), 0); + + int count = 0; + StringPiece::size_type curr = 0; + while (StringPiece::npos != (curr = text.find(substring, curr))) { + ++count; + ++curr; + } + return count; } // ---------------------------------------------------------------------- @@ -473,82 +449,77 @@ int CountSubstring(StringPiece text, StringPiece substring) { // Like strstr(), returns haystack if needle is empty, or NULL if // either needle/haystack is NULL. // ---------------------------------------------------------------------- -const char* strstr_delimited(const char* haystack, - const char* needle, - char delim) { - if (!needle || !haystack) return nullptr; - if (*needle == '\0') return haystack; - - int needle_len = strlen(needle); - - while (true) { - // Skip any leading delimiters. - while (*haystack == delim) ++haystack; - - // Walk down the haystack, matching every character in the needle. - const char* this_match = haystack; - int i = 0; - for (; i < needle_len; i++) { - if (*haystack != needle[i]) { - // We ran out of haystack or found a non-matching character. - break; - } - ++haystack; - } - - // If we matched the whole needle, ensure that it's properly delimited. - if (i == needle_len && (*haystack == '\0' || *haystack == delim)) { - return this_match; - } - - // No match. Consume non-delimiter characters until we run out of them. - while (*haystack != delim) { - if (*haystack == '\0') return nullptr; - ++haystack; +const char* strstr_delimited(const char* haystack, const char* needle, char delim) { + if (!needle || !haystack) return nullptr; + if (*needle == '\0') return haystack; + + int needle_len = strlen(needle); + + while (true) { + // Skip any leading delimiters. + while (*haystack == delim) ++haystack; + + // Walk down the haystack, matching every character in the needle. + const char* this_match = haystack; + int i = 0; + for (; i < needle_len; i++) { + if (*haystack != needle[i]) { + // We ran out of haystack or found a non-matching character. + break; + } + ++haystack; + } + + // If we matched the whole needle, ensure that it's properly delimited. + if (i == needle_len && (*haystack == '\0' || *haystack == delim)) { + return this_match; + } + + // No match. Consume non-delimiter characters until we run out of them. + while (*haystack != delim) { + if (*haystack == '\0') return nullptr; + ++haystack; + } } - } - LOG(FATAL) << "Unreachable statement"; - return nullptr; + LOG(FATAL) << "Unreachable statement"; + return nullptr; } - // ---------------------------------------------------------------------- // Older versions of libc have a buggy strsep. // ---------------------------------------------------------------------- char* gstrsep(char** stringp, const char* delim) { - char *s; - const char *spanp; - int c, sc; - char *tok; - - if ((s = *stringp) == nullptr) - return nullptr; + char* s; + const char* spanp; + int c, sc; + char* tok; + + if ((s = *stringp) == nullptr) return nullptr; + + tok = s; + while (true) { + c = *s++; + spanp = delim; + do { + if ((sc = *spanp++) == c) { + if (c == 0) + s = nullptr; + else + s[-1] = 0; + *stringp = s; + return tok; + } + } while (sc != 0); + } - tok = s; - while (true) { - c = *s++; - spanp = delim; - do { - if ((sc = *spanp++) == c) { - if (c == 0) - s = nullptr; - else - s[-1] = 0; - *stringp = s; - return tok; - } - } while (sc != 0); - } - - return nullptr; /* should not happen */ + return nullptr; /* should not happen */ } void FastStringAppend(string* s, const char* data, int len) { - STLAppendToString(s, data, len); + STLAppendToString(s, data, len); } - // TODO(user): add a microbenchmark and revisit // the optimizations done here. // @@ -557,110 +528,146 @@ void FastStringAppend(string* s, const char* data, int len) { extern const char two_ASCII_digits[100][2]; const char two_ASCII_digits[100][2] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, - {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, - {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, - {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, - {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, - {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, - {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, - {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, - {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, - {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, - {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, - {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, - {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, - {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, - {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, - {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, - {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'} -}; + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, + {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, + {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, {'2', '0'}, + {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, + {'2', '8'}, {'2', '9'}, {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, + {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, + {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, + {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, {'6', '0'}, {'6', '1'}, {'6', '2'}, + {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, + {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, + {'7', '7'}, {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, {'9', '0'}, + {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, + {'9', '8'}, {'9', '9'}}; static inline void PutTwoDigits(int i, char* p) { - DCHECK_GE(i, 0); - DCHECK_LT(i, 100); - p[0] = two_ASCII_digits[i][0]; - p[1] = two_ASCII_digits[i][1]; + DCHECK_GE(i, 0); + DCHECK_LT(i, 100); + p[0] = two_ASCII_digits[i][0]; + p[1] = two_ASCII_digits[i][1]; } char* FastTimeToBuffer(time_t s, char* buffer) { - if (s == 0) { - time(&s); - } - - struct tm tm; - if (PortableSafeGmtime(&s, &tm) == nullptr) { - // Error message must fit in 30-char buffer. - memcpy(buffer, "Invalid:", sizeof("Invalid:")); - FastInt64ToBufferLeft(s, buffer+strlen(buffer)); + if (s == 0) { + time(&s); + } + + struct tm tm; + if (PortableSafeGmtime(&s, &tm) == nullptr) { + // Error message must fit in 30-char buffer. + memcpy(buffer, "Invalid:", sizeof("Invalid:")); + FastInt64ToBufferLeft(s, buffer + strlen(buffer)); + return buffer; + } + + // strftime format: "%a, %d %b %Y %H:%M:%S GMT", + // but strftime does locale stuff which we do not want + // plus strftime takes > 10x the time of hard code + + const char* weekday_name = "Xxx"; + switch (tm.tm_wday) { + default: { + DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday; + } break; + case 0: + weekday_name = "Sun"; + break; + case 1: + weekday_name = "Mon"; + break; + case 2: + weekday_name = "Tue"; + break; + case 3: + weekday_name = "Wed"; + break; + case 4: + weekday_name = "Thu"; + break; + case 5: + weekday_name = "Fri"; + break; + case 6: + weekday_name = "Sat"; + break; + } + + const char* month_name = "Xxx"; + switch (tm.tm_mon) { + default: { + DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon; + } break; + case 0: + month_name = "Jan"; + break; + case 1: + month_name = "Feb"; + break; + case 2: + month_name = "Mar"; + break; + case 3: + month_name = "Apr"; + break; + case 4: + month_name = "May"; + break; + case 5: + month_name = "Jun"; + break; + case 6: + month_name = "Jul"; + break; + case 7: + month_name = "Aug"; + break; + case 8: + month_name = "Sep"; + break; + case 9: + month_name = "Oct"; + break; + case 10: + month_name = "Nov"; + break; + case 11: + month_name = "Dec"; + break; + } + + // Write out the buffer. + + memcpy(buffer + 0, weekday_name, 3); + buffer[3] = ','; + buffer[4] = ' '; + + PutTwoDigits(tm.tm_mday, buffer + 5); + buffer[7] = ' '; + + memcpy(buffer + 8, month_name, 3); + buffer[11] = ' '; + + int32 year = tm.tm_year + 1900; + PutTwoDigits(year / 100, buffer + 12); + PutTwoDigits(year % 100, buffer + 14); + buffer[16] = ' '; + + PutTwoDigits(tm.tm_hour, buffer + 17); + buffer[19] = ':'; + + PutTwoDigits(tm.tm_min, buffer + 20); + buffer[22] = ':'; + + PutTwoDigits(tm.tm_sec, buffer + 23); + + // includes ending NUL + memcpy(buffer + 25, " GMT", 5); + return buffer; - } - - // strftime format: "%a, %d %b %Y %H:%M:%S GMT", - // but strftime does locale stuff which we do not want - // plus strftime takes > 10x the time of hard code - - const char* weekday_name = "Xxx"; - switch (tm.tm_wday) { - default: { DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday; } break; - case 0: weekday_name = "Sun"; break; - case 1: weekday_name = "Mon"; break; - case 2: weekday_name = "Tue"; break; - case 3: weekday_name = "Wed"; break; - case 4: weekday_name = "Thu"; break; - case 5: weekday_name = "Fri"; break; - case 6: weekday_name = "Sat"; break; - } - - const char* month_name = "Xxx"; - switch (tm.tm_mon) { - default: { DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon; } break; - case 0: month_name = "Jan"; break; - case 1: month_name = "Feb"; break; - case 2: month_name = "Mar"; break; - case 3: month_name = "Apr"; break; - case 4: month_name = "May"; break; - case 5: month_name = "Jun"; break; - case 6: month_name = "Jul"; break; - case 7: month_name = "Aug"; break; - case 8: month_name = "Sep"; break; - case 9: month_name = "Oct"; break; - case 10: month_name = "Nov"; break; - case 11: month_name = "Dec"; break; - } - - // Write out the buffer. - - memcpy(buffer+0, weekday_name, 3); - buffer[3] = ','; - buffer[4] = ' '; - - PutTwoDigits(tm.tm_mday, buffer+5); - buffer[7] = ' '; - - memcpy(buffer+8, month_name, 3); - buffer[11] = ' '; - - int32 year = tm.tm_year + 1900; - PutTwoDigits(year/100, buffer+12); - PutTwoDigits(year%100, buffer+14); - buffer[16] = ' '; - - PutTwoDigits(tm.tm_hour, buffer+17); - buffer[19] = ':'; - - PutTwoDigits(tm.tm_min, buffer+20); - buffer[22] = ':'; - - PutTwoDigits(tm.tm_sec, buffer+23); - - // includes ending NUL - memcpy(buffer+25, " GMT", 5); - - return buffer; } // ---------------------------------------------------------------------- @@ -677,24 +684,20 @@ char* FastTimeToBuffer(time_t s, char* buffer) { // and didn't want to (or cannot) modify the string // ---------------------------------------------------------------------- char* strdup_with_new(const char* the_string) { - if (the_string == nullptr) - return nullptr; - else - return strndup_with_new(the_string, strlen(the_string)); + if (the_string == nullptr) + return nullptr; + else + return strndup_with_new(the_string, strlen(the_string)); } char* strndup_with_new(const char* the_string, int max_length) { - if (the_string == nullptr) - return nullptr; + if (the_string == nullptr) return nullptr; - auto result = new char[max_length + 1]; - result[max_length] = '\0'; // terminate the string because strncpy might not - return strncpy(result, the_string, max_length); + auto result = new char[max_length + 1]; + result[max_length] = '\0'; // terminate the string because strncpy might not + return strncpy(result, the_string, max_length); } - - - // ---------------------------------------------------------------------- // ScanForFirstWord() // This function finds the first word in the string "the_string" given. @@ -709,27 +712,26 @@ char* strndup_with_new(const char* the_string, int max_length) { // Precondition: (end_ptr != NULL) // ---------------------------------------------------------------------- const char* ScanForFirstWord(const char* the_string, const char** end_ptr) { - CHECK(end_ptr != nullptr) << ": precondition violated"; + CHECK(end_ptr != nullptr) << ": precondition violated"; - if (the_string == nullptr) // empty string - return nullptr; + if (the_string == nullptr) // empty string + return nullptr; - const char* curr = the_string; - while ((*curr != '\0') && ascii_isspace(*curr)) // skip initial spaces - ++curr; + const char* curr = the_string; + while ((*curr != '\0') && ascii_isspace(*curr)) // skip initial spaces + ++curr; - if (*curr == '\0') // no valid word found - return nullptr; + if (*curr == '\0') // no valid word found + return nullptr; - // else has a valid word - const char* first_word = curr; + // else has a valid word + const char* first_word = curr; - // now locate the end of the word - while ((*curr != '\0') && !ascii_isspace(*curr)) - ++curr; + // now locate the end of the word + while ((*curr != '\0') && !ascii_isspace(*curr)) ++curr; - *end_ptr = curr; - return first_word; + *end_ptr = curr; + return first_word; } // ---------------------------------------------------------------------- @@ -739,174 +741,156 @@ const char* ScanForFirstWord(const char* the_string, const char** end_ptr) { // one. A C-style identifier begins with an ASCII letter or underscore // and continues with ASCII letters, digits, or underscores. // ---------------------------------------------------------------------- -const char *AdvanceIdentifier(const char *str) { - // Not using isalpha and isalnum so as not to rely on the locale. - // We could have used ascii_isalpha and ascii_isalnum. - char ch = *str++; - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) - return nullptr; - while (true) { - ch = *str; - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || ch == '_')) - return str; - str++; - } +const char* AdvanceIdentifier(const char* str) { + // Not using isalpha and isalnum so as not to rely on the locale. + // We could have used ascii_isalpha and ascii_isalnum. + char ch = *str++; + if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) return nullptr; + while (true) { + ch = *str; + if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || + ch == '_')) + return str; + str++; + } } - // ---------------------------------------------------------------------- // IsIdentifier() // This function returns true if str is a C-style identifier. // A C-style identifier begins with an ASCII letter or underscore // and continues with ASCII letters, digits, or underscores. // ---------------------------------------------------------------------- -bool IsIdentifier(const char *str) { - const char *end = AdvanceIdentifier(str); - return end && *end == '\0'; +bool IsIdentifier(const char* str) { + const char* end = AdvanceIdentifier(str); + return end && *end == '\0'; } static bool IsWildcard(Rune character) { - return character == '*' || character == '?'; + return character == '*' || character == '?'; } // Move the strings pointers to the point where they start to differ. template -static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, - const CHAR** string, const CHAR* string_end, - NEXT next) { - const CHAR* escape = nullptr; - while (*pattern != pattern_end && *string != string_end) { - if (!escape && IsWildcard(**pattern)) { - // We don't want to match wildcard here, except if it's escaped. - return; +static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, const CHAR** string, + const CHAR* string_end, NEXT next) { + const CHAR* escape = nullptr; + while (*pattern != pattern_end && *string != string_end) { + if (!escape && IsWildcard(**pattern)) { + // We don't want to match wildcard here, except if it's escaped. + return; + } + + // Check if the escapement char is found. If so, skip it and move to the + // next character. + if (!escape && **pattern == '\\') { + escape = *pattern; + next(pattern, pattern_end); + continue; + } + + // Check if the chars match, if so, increment the ptrs. + const CHAR* pattern_next = *pattern; + const CHAR* string_next = *string; + Rune pattern_char = next(&pattern_next, pattern_end); + if (pattern_char == next(&string_next, string_end) && pattern_char != Runeerror && + pattern_char <= Runemax) { + *pattern = pattern_next; + *string = string_next; + } else { + // Uh ho, it did not match, we are done. If the last char was an + // escapement, that means that it was an error to advance the ptr here, + // let's put it back where it was. This also mean that the MatchPattern + // function will return false because if we can't match an escape char + // here, then no one will. + if (escape) { + *pattern = escape; + } + return; + } + + escape = nullptr; } - - // Check if the escapement char is found. If so, skip it and move to the - // next character. - if (!escape && **pattern == '\\') { - escape = *pattern; - next(pattern, pattern_end); - continue; - } - - // Check if the chars match, if so, increment the ptrs. - const CHAR* pattern_next = *pattern; - const CHAR* string_next = *string; - Rune pattern_char = next(&pattern_next, pattern_end); - if (pattern_char == next(&string_next, string_end) && - pattern_char != Runeerror && - pattern_char <= Runemax) { - *pattern = pattern_next; - *string = string_next; - } else { - // Uh ho, it did not match, we are done. If the last char was an - // escapement, that means that it was an error to advance the ptr here, - // let's put it back where it was. This also mean that the MatchPattern - // function will return false because if we can't match an escape char - // here, then no one will. - if (escape) { - *pattern = escape; - } - return; - } - - escape = nullptr; - } } template static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { - while (*pattern != end) { - if (!IsWildcard(**pattern)) - return; - next(pattern, end); - } + while (*pattern != end) { + if (!IsWildcard(**pattern)) return; + next(pattern, end); + } } template -static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, - const CHAR* pattern, const CHAR* pattern_end, - int depth, - NEXT next) { - const int kMaxDepth = 16; - if (depth > kMaxDepth) - return false; +static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, const CHAR* pattern, + const CHAR* pattern_end, int depth, NEXT next) { + const int kMaxDepth = 16; + if (depth > kMaxDepth) return false; - // Eat all the matching chars. - EatSameChars(&pattern, pattern_end, &eval, eval_end, next); + // Eat all the matching chars. + EatSameChars(&pattern, pattern_end, &eval, eval_end, next); - // If the string is empty, then the pattern must be empty too, or contains - // only wildcards. - if (eval == eval_end) { - EatWildcard(&pattern, pattern_end, next); - return pattern == pattern_end; - } - - // Pattern is empty but not string, this is not a match. - if (pattern == pattern_end) - return false; + // If the string is empty, then the pattern must be empty too, or contains + // only wildcards. + if (eval == eval_end) { + EatWildcard(&pattern, pattern_end, next); + return pattern == pattern_end; + } - // If this is a question mark, then we need to compare the rest with - // the current string or the string with one character eaten. - const CHAR* next_pattern = pattern; - next(&next_pattern, pattern_end); - if (pattern[0] == '?') { - if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, - depth + 1, next)) - return true; - const CHAR* next_eval = eval; - next(&next_eval, eval_end); - if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, - depth + 1, next)) - return true; - } - - // This is a *, try to match all the possible substrings with the remainder - // of the pattern. - if (pattern[0] == '*') { - // Collapse duplicate wild cards (********** into *) so that the - // method does not recurse unnecessarily. http://crbug.com/52839 - EatWildcard(&next_pattern, pattern_end, next); - - while (eval != eval_end) { - if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, - depth + 1, next)) - return true; - eval++; + // Pattern is empty but not string, this is not a match. + if (pattern == pattern_end) return false; + + // If this is a question mark, then we need to compare the rest with + // the current string or the string with one character eaten. + const CHAR* next_pattern = pattern; + next(&next_pattern, pattern_end); + if (pattern[0] == '?') { + if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true; + const CHAR* next_eval = eval; + next(&next_eval, eval_end); + if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, next)) + return true; } - // We reached the end of the string, let see if the pattern contains only - // wildcards. - if (eval == eval_end) { - EatWildcard(&pattern, pattern_end, next); - if (pattern != pattern_end) - return false; - return true; + // This is a *, try to match all the possible substrings with the remainder + // of the pattern. + if (pattern[0] == '*') { + // Collapse duplicate wild cards (********** into *) so that the + // method does not recurse unnecessarily. http://crbug.com/52839 + EatWildcard(&next_pattern, pattern_end, next); + + while (eval != eval_end) { + if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) + return true; + eval++; + } + + // We reached the end of the string, let see if the pattern contains only + // wildcards. + if (eval == eval_end) { + EatWildcard(&pattern, pattern_end, next); + if (pattern != pattern_end) return false; + return true; + } } - } - return false; + return false; } struct NextCharUTF8 { - Rune operator()(const char** p, const char* end) { - Rune c; - int offset = charntorune(&c, *p, static_cast(end - *p)); - *p += offset; - return c; - } + Rune operator()(const char** p, const char* end) { + Rune c; + int offset = charntorune(&c, *p, static_cast(end - *p)); + *p += offset; + return c; + } }; -bool MatchPattern(const StringPiece& eval, - const StringPiece& pattern) { - return MatchPatternT(eval.data(), eval.data() + eval.size(), - pattern.data(), pattern.data() + pattern.size(), - 0, NextCharUTF8()); +bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { + return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), + pattern.data() + pattern.size(), 0, NextCharUTF8()); } - - // ---------------------------------------------------------------------- // FindTagValuePair // Given a string of the form @@ -918,103 +902,98 @@ bool MatchPattern(const StringPiece& eval, // and "tag_len" and "value_len" are set to the respective lengths. // ---------------------------------------------------------------------- -bool FindTagValuePair(const char* arg_str, char tag_value_separator, - char attribute_separator, char string_terminal, - char **tag, int *tag_len, - char **value, int *value_len) { - char* in_str = const_cast(arg_str); // For msvc8. - if (in_str == nullptr) - return false; - char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'}; - char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'}; - - // Look for beginning of tag - *tag = strpbrk(in_str, attr_sep_or_term); - // If string_terminal is '\0', strpbrk won't find it but return null. - if (*tag == nullptr || **tag == string_terminal) - *tag = in_str; - else - (*tag)++; // Move past separator - // Now look for value... - char *tv_sep_pos = strpbrk(*tag, tv_sep_or_term); - if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal) - return false; - // ...and end of value - char *attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term); - - *tag_len = tv_sep_pos - *tag; - *value = tv_sep_pos + 1; - if (attr_sep_pos != nullptr) - *value_len = attr_sep_pos - *value; - else - *value_len = strlen(*value); - return true; +bool FindTagValuePair(const char* arg_str, char tag_value_separator, char attribute_separator, + char string_terminal, char** tag, int* tag_len, char** value, + int* value_len) { + char* in_str = const_cast(arg_str); // For msvc8. + if (in_str == nullptr) return false; + char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'}; + char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'}; + + // Look for beginning of tag + *tag = strpbrk(in_str, attr_sep_or_term); + // If string_terminal is '\0', strpbrk won't find it but return null. + if (*tag == nullptr || **tag == string_terminal) + *tag = in_str; + else + (*tag)++; // Move past separator + // Now look for value... + char* tv_sep_pos = strpbrk(*tag, tv_sep_or_term); + if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal) return false; + // ...and end of value + char* attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term); + + *tag_len = tv_sep_pos - *tag; + *value = tv_sep_pos + 1; + if (attr_sep_pos != nullptr) + *value_len = attr_sep_pos - *value; + else + *value_len = strlen(*value); + return true; } void UniformInsertString(string* s, int interval, const char* separator) { - const size_t separator_len = strlen(separator); + const size_t separator_len = strlen(separator); - if (interval < 1 || // invalid interval - s->empty() || // nothing to do - separator_len == 0) // invalid separator - return; + if (interval < 1 || // invalid interval + s->empty() || // nothing to do + separator_len == 0) // invalid separator + return; - int num_inserts = (s->size() - 1) / interval; // -1 to avoid appending at end - if (num_inserts == 0) // nothing to do - return; + int num_inserts = (s->size() - 1) / interval; // -1 to avoid appending at end + if (num_inserts == 0) // nothing to do + return; - string tmp; - tmp.reserve(s->size() + num_inserts * separator_len + 1); + string tmp; + tmp.reserve(s->size() + num_inserts * separator_len + 1); - for (int i = 0; i < num_inserts ; ++i) { - // append this interval - tmp.append(*s, i * interval, interval); - // append a separator - tmp.append(separator, separator_len); - } + for (int i = 0; i < num_inserts; ++i) { + // append this interval + tmp.append(*s, i * interval, interval); + // append a separator + tmp.append(separator, separator_len); + } - // append the tail - const size_t tail_pos = num_inserts * interval; - tmp.append(*s, tail_pos, s->size() - tail_pos); + // append the tail + const size_t tail_pos = num_inserts * interval; + tmp.append(*s, tail_pos, s->size() - tail_pos); - s->swap(tmp); + s->swap(tmp); } -void InsertString(string *const s, - const vector &indices, - char const *const separator) { - const unsigned num_indices(indices.size()); - if (num_indices == 0) { - return; // nothing to do... - } - - const unsigned separator_len(strlen(separator)); - if (separator_len == 0) { - return; // still nothing to do... - } - - string tmp; - const unsigned s_len(s->size()); - tmp.reserve(s_len + separator_len * num_indices); - - vector::const_iterator const ind_end(indices.end()); - auto ind_pos(indices.begin()); - - uint32 last_pos(0); - while (ind_pos != ind_end) { - const uint32 pos(*ind_pos); - DCHECK_GE(pos, last_pos); - DCHECK_LE(pos, s_len); - - tmp.append(s->substr(last_pos, pos - last_pos)); - tmp.append(separator); - - last_pos = pos; - ++ind_pos; - } - tmp.append(s->substr(last_pos)); - - s->swap(tmp); +void InsertString(string* const s, const vector& indices, char const* const separator) { + const unsigned num_indices(indices.size()); + if (num_indices == 0) { + return; // nothing to do... + } + + const unsigned separator_len(strlen(separator)); + if (separator_len == 0) { + return; // still nothing to do... + } + + string tmp; + const unsigned s_len(s->size()); + tmp.reserve(s_len + separator_len * num_indices); + + vector::const_iterator const ind_end(indices.end()); + auto ind_pos(indices.begin()); + + uint32 last_pos(0); + while (ind_pos != ind_end) { + const uint32 pos(*ind_pos); + DCHECK_GE(pos, last_pos); + DCHECK_LE(pos, s_len); + + tmp.append(s->substr(last_pos, pos - last_pos)); + tmp.append(separator); + + last_pos = pos; + ++ind_pos; + } + tmp.append(s->substr(last_pos)); + + s->swap(tmp); } //------------------------------------------------------------------------ @@ -1024,15 +1003,15 @@ void InsertString(string *const s, // (returns string::npos = -1 if n <= 0) //------------------------------------------------------------------------ int FindNth(StringPiece s, char c, int n) { - size_t pos = string::npos; + size_t pos = string::npos; - for ( int i = 0; i < n; ++i ) { - pos = s.find_first_of(c, pos + 1); - if ( pos == StringPiece::npos ) { - break; + for (int i = 0; i < n; ++i) { + pos = s.find_first_of(c, pos + 1); + if (pos == StringPiece::npos) { + break; + } } - } - return pos; + return pos; } //------------------------------------------------------------------------ @@ -1042,25 +1021,25 @@ int FindNth(StringPiece s, char c, int n) { // (returns string::npos if n <= 0) //------------------------------------------------------------------------ int ReverseFindNth(StringPiece s, char c, int n) { - if ( n <= 0 ) { - return static_cast(StringPiece::npos); - } - - size_t pos = s.size(); - - for ( int i = 0; i < n; ++i ) { - // If pos == 0, we return StringPiece::npos right away. Otherwise, - // the following find_last_of call would take (pos - 1) as string::npos, - // which means it would again search the entire input string. - if (pos == 0) { - return static_cast(StringPiece::npos); + if (n <= 0) { + return static_cast(StringPiece::npos); } - pos = s.find_last_of(c, pos - 1); - if ( pos == string::npos ) { - break; + + size_t pos = s.size(); + + for (int i = 0; i < n; ++i) { + // If pos == 0, we return StringPiece::npos right away. Otherwise, + // the following find_last_of call would take (pos - 1) as string::npos, + // which means it would again search the entire input string. + if (pos == 0) { + return static_cast(StringPiece::npos); + } + pos = s.find_last_of(c, pos - 1); + if (pos == string::npos) { + break; + } } - } - return pos; + return pos; } namespace strings { @@ -1069,149 +1048,145 @@ namespace strings { // Returns the location of the next end-of-line sequence. StringPiece FindEol(StringPiece s) { - for (size_t i = 0; i < s.length(); ++i) { - if (s[i] == '\n') { - return StringPiece(s.data() + i, 1); - } - if (s[i] == '\r') { - if (i+1 < s.length() && s[i+1] == '\n') { - return StringPiece(s.data() + i, 2); - } else { - return StringPiece(s.data() + i, 1); - } + for (size_t i = 0; i < s.length(); ++i) { + if (s[i] == '\n') { + return StringPiece(s.data() + i, 1); + } + if (s[i] == '\r') { + if (i + 1 < s.length() && s[i + 1] == '\n') { + return StringPiece(s.data() + i, 2); + } else { + return StringPiece(s.data() + i, 1); + } + } } - } - return StringPiece(s.data() + s.length(), 0); + return StringPiece(s.data() + s.length(), 0); } -} // namespace strings +} // namespace strings //------------------------------------------------------------------------ // OnlyWhitespace() // return true if string s contains only whitespace characters //------------------------------------------------------------------------ bool OnlyWhitespace(const StringPiece& s) { - for (const auto& c : s) { - if ( !ascii_isspace(c) ) return false; - } - return true; + for (const auto& c : s) { + if (!ascii_isspace(c)) return false; + } + return true; } string PrefixSuccessor(const StringPiece& prefix) { - // We can increment the last character in the string and be done - // unless that character is 255, in which case we have to erase the - // last character and increment the previous character, unless that - // is 255, etc. If the string is empty or consists entirely of - // 255's, we just return the empty string. - bool done = false; - string limit(prefix.data(), prefix.size()); - int index = limit.length() - 1; - while (!done && index >= 0) { - if (limit[index] == 255) { - limit.erase(index); - index--; + // We can increment the last character in the string and be done + // unless that character is 255, in which case we have to erase the + // last character and increment the previous character, unless that + // is 255, etc. If the string is empty or consists entirely of + // 255's, we just return the empty string. + bool done = false; + string limit(prefix.data(), prefix.size()); + int index = limit.length() - 1; + while (!done && index >= 0) { + if (limit[index] == 255) { + limit.erase(index); + index--; + } else { + limit[index]++; + done = true; + } + } + if (!done) { + return ""; } else { - limit[index]++; - done = true; + return limit; } - } - if (!done) { - return ""; - } else { - return limit; - } } string ImmediateSuccessor(const StringPiece& s) { - // Return the input string, with an additional NUL byte appended. - string out; - out.reserve(s.size() + 1); - out.append(s.data(), s.size()); - out.push_back('\0'); - return out; + // Return the input string, with an additional NUL byte appended. + string out; + out.reserve(s.size() + 1); + out.append(s.data(), s.size()); + out.push_back('\0'); + return out; } -void FindShortestSeparator(const StringPiece& start, - const StringPiece& limit, - string* separator) { - // Find length of common prefix - size_t min_length = min(start.size(), limit.size()); - size_t diff_index = 0; - while ((diff_index < min_length) && - (start[diff_index] == limit[diff_index])) { - diff_index++; - } - - if (diff_index >= min_length) { - // Handle the case where either string is a prefix of the other - // string, or both strings are identical. - start.CopyToString(separator); - return; - } - - if (diff_index+1 == start.size()) { - // If the first difference is in the last character, do not bother - // incrementing that character since the separator will be no - // shorter than "start". - start.CopyToString(separator); - return; - } - - if (start[diff_index] == 0xff) { - // Avoid overflow when incrementing start[diff_index] - start.CopyToString(separator); - return; - } - - separator->assign(start.data(), diff_index); - separator->push_back(start[diff_index] + 1); - if (*separator >= limit) { - // Never pick a separator that causes confusion with "limit" - start.CopyToString(separator); - } +void FindShortestSeparator(const StringPiece& start, const StringPiece& limit, string* separator) { + // Find length of common prefix + size_t min_length = min(start.size(), limit.size()); + size_t diff_index = 0; + while ((diff_index < min_length) && (start[diff_index] == limit[diff_index])) { + diff_index++; + } + + if (diff_index >= min_length) { + // Handle the case where either string is a prefix of the other + // string, or both strings are identical. + start.CopyToString(separator); + return; + } + + if (diff_index + 1 == start.size()) { + // If the first difference is in the last character, do not bother + // incrementing that character since the separator will be no + // shorter than "start". + start.CopyToString(separator); + return; + } + + if (start[diff_index] == 0xff) { + // Avoid overflow when incrementing start[diff_index] + start.CopyToString(separator); + return; + } + + separator->assign(start.data(), diff_index); + separator->push_back(start[diff_index] + 1); + if (*separator >= limit) { + // Never pick a separator that causes confusion with "limit" + start.CopyToString(separator); + } } -int SafeSnprintf(char *str, size_t size, const char *format, ...) { - va_list printargs; - va_start(printargs, format); - int ncw = vsnprintf(str, size, format, printargs); - va_end(printargs); - return (ncw < size && ncw >= 0) ? ncw : 0; +int SafeSnprintf(char* str, size_t size, const char* format, ...) { + va_list printargs; + va_start(printargs, format); + int ncw = vsnprintf(str, size, format, printargs); + va_end(printargs); + return (ncw < size && ncw >= 0) ? ncw : 0; } bool GetlineFromStdioFile(FILE* file, string* str, char delim) { - str->erase(); - while (true) { - if (feof(file) || ferror(file)) { - return false; + str->erase(); + while (true) { + if (feof(file) || ferror(file)) { + return false; + } + int c = getc(file); + if (c == EOF) return false; + if (c == delim) return true; + str->push_back(c); } - int c = getc(file); - if (c == EOF) return false; - if (c == delim) return true; - str->push_back(c); - } } namespace { template size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { - for (size_t i = 0; i < dst_size; ++i) { - if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. - return i; - } - - // We were left off at dst_size. We over copied 1 byte. Null terminate. - if (dst_size != 0) - dst[dst_size - 1] = 0; - - // Count the rest of the |src|, and return it's length in characters. - while (src[dst_size]) ++dst_size; - return dst_size; + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) ++dst_size; + return dst_size; } -} // namespace +} // namespace size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) { - return lcpyT(dst, src, dst_size); + return lcpyT(dst, src, dst_size); } diff --git a/be/src/gutil/strings/util.h b/be/src/gutil/strings/util.h index 2090e0f35fc097..fecec84f68c8fc 100644 --- a/be/src/gutil/strings/util.h +++ b/be/src/gutil/strings/util.h @@ -31,7 +31,7 @@ #include #include #ifndef _MSC_VER -#include // for strcasecmp, but msvc does not have this header +#include // for strcasecmp, but msvc does not have this header #endif #include @@ -65,15 +65,15 @@ namespace strings { StringPiece FindEol(StringPiece sp); -} // namespace strings +} // namespace strings // Older functions. // Duplicates a non-null, non-empty char* string. Returns a pointer to the new // string, or NULL if the input is null or empty. inline char* strdup_nonempty(const char* src) { - if (src && src[0]) return strdup(src); - return NULL; + if (src && src[0]) return strdup(src); + return NULL; } // Finds the first occurrence of a character in at most a given number of bytes @@ -83,13 +83,12 @@ inline char* strdup_nonempty(const char* src) { // suitable for null-terminated strings. // WARNING: Removes const-ness of string argument! inline char* strnchr(const char* buf, char c, int sz) { - const char* end = buf + sz; - while (buf != end && *buf) { - if (*buf == c) - return const_cast(buf); - ++buf; - } - return NULL; + const char* end = buf + sz; + while (buf != end && *buf) { + if (*buf == c) return const_cast(buf); + ++buf; + } + return NULL; } // Finds the first occurrence of the null-terminated needle in at most the first @@ -106,18 +105,14 @@ char* strnstr(const char* haystack, const char* needle, size_t haystack_len); // The ""'s catch people who don't pass in a literal for "prefix" #ifndef strprefix #define strprefix(str, prefix) \ - (strncmp(str, prefix, sizeof("" prefix "")-1) == 0 ? \ - str + sizeof(prefix)-1 : \ - NULL) + (strncmp(str, prefix, sizeof("" prefix "") - 1) == 0 ? str + sizeof(prefix) - 1 : NULL) #endif // Same as strprefix() (immediately above), but matches a case-insensitive // prefix. #ifndef strcaseprefix #define strcaseprefix(str, prefix) \ - (strncasecmp(str, prefix, sizeof("" prefix "")-1) == 0 ? \ - str + sizeof(prefix)-1 : \ - NULL) + (strncasecmp(str, prefix, sizeof("" prefix "") - 1) == 0 ? str + sizeof(prefix) - 1 : NULL) #endif // Matches a prefix (up to the first needle_size bytes of needle) in the first @@ -133,50 +128,48 @@ char* strnstr(const char* haystack, const char* needle, size_t haystack_len); #ifdef strnprefix #undef strnprefix #endif -const char* strnprefix(const char* haystack, int haystack_size, - const char* needle, int needle_size); +const char* strnprefix(const char* haystack, int haystack_size, const char* needle, + int needle_size); // Matches a case-insensitive prefix (up to the first needle_size bytes of // needle) in the first haystack_size byte of haystack. Returns a pointer past // the prefix, or NULL if the prefix wasn't matched. // // Always returns either NULL or haystack + needle_size. -const char* strncaseprefix(const char* haystack, int haystack_size, - const char* needle, int needle_size); +const char* strncaseprefix(const char* haystack, int haystack_size, const char* needle, + int needle_size); // Matches a prefix; returns a pointer past the prefix, or NULL if not found. // (Like strprefix() and strcaseprefix() but not restricted to searching for // char* literals). Templated so searching a const char* returns a const char*, // and searching a non-const char* returns a non-const char*. -template +template inline CharStar var_strprefix(CharStar str, const char* prefix) { - const int len = strlen(prefix); - return strncmp(str, prefix, len) == 0 ? str + len : NULL; + const int len = strlen(prefix); + return strncmp(str, prefix, len) == 0 ? str + len : NULL; } // Same as var_strprefix() (immediately above), but matches a case-insensitive // prefix. -template +template inline CharStar var_strcaseprefix(CharStar str, const char* prefix) { - const int len = strlen(prefix); - return strncasecmp(str, prefix, len) == 0 ? str + len : NULL; + const int len = strlen(prefix); + return strncasecmp(str, prefix, len) == 0 ? str + len : NULL; } // Returns input, or "(null)" if NULL. (Useful for logging.) inline const char* GetPrintableString(const char* const in) { - return NULL == in ? "(null)" : in; + return NULL == in ? "(null)" : in; } // Returns whether str begins with prefix. -inline bool HasPrefixString(const StringPiece& str, - const StringPiece& prefix) { - return str.starts_with(prefix); +inline bool HasPrefixString(const StringPiece& str, const StringPiece& prefix) { + return str.starts_with(prefix); } // Returns whether str ends with suffix. -inline bool HasSuffixString(const StringPiece& str, - const StringPiece& suffix) { - return str.ends_with(suffix); +inline bool HasSuffixString(const StringPiece& str, const StringPiece& suffix) { + return str.ends_with(suffix); } // Returns true if the string passed in matches the pattern. The pattern @@ -184,72 +177,66 @@ inline bool HasSuffixString(const StringPiece& str, // The backslash character (\) is an escape character for * and ? // We limit the patterns to having a max of 16 * or ? characters. // ? matches 0 or 1 character, while * matches 0 or more characters. -bool MatchPattern(const StringPiece& string, - const StringPiece& pattern); +bool MatchPattern(const StringPiece& string, const StringPiece& pattern); // Returns where suffix begins in str, or NULL if str doesn't end with suffix. inline char* strsuffix(char* str, const char* suffix) { - const int lenstr = strlen(str); - const int lensuffix = strlen(suffix); - char* strbeginningoftheend = str + lenstr - lensuffix; - - if (lenstr >= lensuffix && 0 == strcmp(strbeginningoftheend, suffix)) { - return (strbeginningoftheend); - } else { - return (NULL); - } + const int lenstr = strlen(str); + const int lensuffix = strlen(suffix); + char* strbeginningoftheend = str + lenstr - lensuffix; + + if (lenstr >= lensuffix && 0 == strcmp(strbeginningoftheend, suffix)) { + return (strbeginningoftheend); + } else { + return (NULL); + } } inline const char* strsuffix(const char* str, const char* suffix) { - return const_cast(strsuffix(const_cast(str), suffix)); + return const_cast(strsuffix(const_cast(str), suffix)); } // Same as strsuffix() (immediately above), but matches a case-insensitive // suffix. char* strcasesuffix(char* str, const char* suffix); inline const char* strcasesuffix(const char* str, const char* suffix) { - return const_cast(strcasesuffix(const_cast(str), suffix)); + return const_cast(strcasesuffix(const_cast(str), suffix)); } -const char* strnsuffix(const char* haystack, int haystack_size, - const char* needle, int needle_size); -const char* strncasesuffix(const char* haystack, int haystack_size, - const char* needle, int needle_size); +const char* strnsuffix(const char* haystack, int haystack_size, const char* needle, + int needle_size); +const char* strncasesuffix(const char* haystack, int haystack_size, const char* needle, + int needle_size); // Returns the number of times a character occurs in a string for a null // terminated string. inline ptrdiff_t strcount(const char* buf, char c) { - if (buf == NULL) - return 0; - ptrdiff_t num = 0; - for (const char* bp = buf; *bp != '\0'; bp++) { - if (*bp == c) - num++; - } - return num; + if (buf == NULL) return 0; + ptrdiff_t num = 0; + for (const char* bp = buf; *bp != '\0'; bp++) { + if (*bp == c) num++; + } + return num; } // Returns the number of times a character occurs in a string for a string // defined by a pointer to the first character and a pointer just past the last // character. inline ptrdiff_t strcount(const char* buf_begin, const char* buf_end, char c) { - if (buf_begin == NULL) - return 0; - if (buf_end <= buf_begin) - return 0; - ptrdiff_t num = 0; - for (const char* bp = buf_begin; bp != buf_end; bp++) { - if (*bp == c) - num++; - } - return num; + if (buf_begin == NULL) return 0; + if (buf_end <= buf_begin) return 0; + ptrdiff_t num = 0; + for (const char* bp = buf_begin; bp != buf_end; bp++) { + if (*bp == c) num++; + } + return num; } // Returns the number of times a character occurs in a string for a string // defined by a pointer to the first char and a length: inline ptrdiff_t strcount(const char* buf, size_t len, char c) { - return strcount(buf, buf + len, c); + return strcount(buf, buf + len, c); } // Returns the number of times a character occurs in a string for a C++ string: inline ptrdiff_t strcount(const string& buf, char c) { - return strcount(buf.c_str(), buf.size(), c); + return strcount(buf.c_str(), buf.size(), c); } // Returns a pointer to the nth occurrence of a character in a null-terminated @@ -268,26 +255,25 @@ char* AdjustedLastPos(const char* str, char separator, int n); // equal only to another NULL). Useful in hash tables: // hash_map, streq> ht; struct streq : public binary_function { - bool operator()(const char* s1, const char* s2) const { - return ((s1 == 0 && s2 == 0) || - (s1 && s2 && *s1 == *s2 && strcmp(s1, s2) == 0)); - } + bool operator()(const char* s1, const char* s2) const { + return ((s1 == 0 && s2 == 0) || (s1 && s2 && *s1 == *s2 && strcmp(s1, s2) == 0)); + } }; // Compares two char* strings. (Works with NULL, which compares greater than any // non-NULL). Useful in maps: // map m; struct strlt : public binary_function { - bool operator()(const char* s1, const char* s2) const { - return (s1 != s2) && (s2 == 0 || (s1 != 0 && strcmp(s1, s2) < 0)); - } + bool operator()(const char* s1, const char* s2) const { + return (s1 != s2) && (s2 == 0 || (s1 != 0 && strcmp(s1, s2) < 0)); + } }; // Returns whether str has only Ascii characters (as defined by ascii_isascii() // in strings/ascii_ctype.h). bool IsAscii(const char* str, int len); inline bool IsAscii(const StringPiece& str) { - return IsAscii(str.data(), str.size()); + return IsAscii(str.data(), str.size()); } // Returns the smallest lexicographically larger string of equal or smaller @@ -326,23 +312,21 @@ string ImmediateSuccessor(const StringPiece& s); // FindShortestSeparator("foobar", "foxhunt", &sep) => sep == "fop" // FindShortestSeparator("abracadabra", "bacradabra", &sep) => sep == "b" // If limit is less than or equal to start, fills in *separator with start. -void FindShortestSeparator(const StringPiece& start, const StringPiece& limit, - string* separator); +void FindShortestSeparator(const StringPiece& start, const StringPiece& limit, string* separator); // Copies at most n-1 bytes from src to dest, and returns dest. If n >=1, null // terminates dest; otherwise, returns dest unchanged. Unlike strncpy(), only // puts one null character at the end of dest. inline char* safestrncpy(char* dest, const char* src, size_t n) { - if (n < 1) return dest; + if (n < 1) return dest; - // Avoid using non-ANSI memccpy(), which is also deprecated in MSVC - for (size_t i = 0; i < n; ++i) { - if ((dest[i] = src[i]) == '\0') - return dest; - } + // Avoid using non-ANSI memccpy(), which is also deprecated in MSVC + for (size_t i = 0; i < n; ++i) { + if ((dest[i] = src[i]) == '\0') return dest; + } - dest[n-1] = '\0'; - return dest; + dest[n - 1] = '\0'; + return dest; } namespace strings { @@ -360,19 +344,16 @@ size_t strlcpy(char* dst, const char* src, size_t dst_size); // Replaces the first occurrence (if replace_all is false) or all occurrences // (if replace_all is true) of oldsub in s with newsub. In the second version, // *res must be distinct from all the other arguments. -string StringReplace(const StringPiece& s, const StringPiece& oldsub, - const StringPiece& newsub, bool replace_all); -void StringReplace(const StringPiece& s, const StringPiece& oldsub, - const StringPiece& newsub, bool replace_all, - string* res); +string StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub, + bool replace_all); +void StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub, + bool replace_all, string* res); // Replaces all occurrences of substring in s with replacement. Returns the // number of instances replaced. s must be distinct from the other arguments. // // Less flexible, but faster, than RE::GlobalReplace(). -int GlobalReplaceSubstring(const StringPiece& substring, - const StringPiece& replacement, - string* s); +int GlobalReplaceSubstring(const StringPiece& substring, const StringPiece& replacement, string* s); // Removes v[i] for every element i in indices. Does *not* preserve the order of // v. indices must be sorted in strict increasing order (no duplicates). Runs in @@ -394,9 +375,7 @@ char* gstrncasestr(char* haystack, const char* needle, size_t len); // non_alpha), a token prefix and a token suffix. Returns a pointer into str of // the position of prefix, or NULL if not found. // WARNING: Removes const-ness of string argument! -char* gstrncasestr_split(const char* str, - const char* prefix, char non_alpha, - const char* suffix, +char* gstrncasestr_split(const char* str, const char* prefix, char non_alpha, const char* suffix, size_t n); // Finds (case insensitively) needle in haystack, paying attention only to @@ -415,9 +394,7 @@ int CountSubstring(StringPiece text, StringPiece substring); // Finds, in haystack (which is a list of tokens separated by delim), an token // equal to needle. Returns a pointer into haystack, or NULL if not found (or // either needle or haystack is empty). -const char* strstr_delimited(const char* haystack, - const char* needle, - char delim); +const char* strstr_delimited(const char* haystack, const char* needle, char delim); // Gets the next token from string *stringp, where tokens are strings separated // by characters from delim. @@ -440,11 +417,10 @@ char* strndup_with_new(const char* the_string, int max_length); // end_ptr must not be NULL. const char* ScanForFirstWord(const char* the_string, const char** end_ptr); inline char* ScanForFirstWord(char* the_string, char** end_ptr) { - // implicit_cast<> would be more appropriate for casting to const, - // but we save the inclusion of "base/casts.h" here by using const_cast<>. - return const_cast( - ScanForFirstWord(const_cast(the_string), - const_cast(end_ptr))); + // implicit_cast<> would be more appropriate for casting to const, + // but we save the inclusion of "base/casts.h" here by using const_cast<>. + return const_cast(ScanForFirstWord(const_cast(the_string), + const_cast(end_ptr))); } // For the following functions, an "identifier" is a letter or underscore, @@ -454,9 +430,9 @@ inline char* ScanForFirstWord(char* the_string, char** end_ptr) { // str, or NULL if str doesn't start with an identifier. const char* AdvanceIdentifier(const char* str); inline char* AdvanceIdentifier(char* str) { - // implicit_cast<> would be more appropriate for casting to const, - // but we save the inclusion of "base/casts.h" here by using const_cast<>. - return const_cast(AdvanceIdentifier(const_cast(str))); + // implicit_cast<> would be more appropriate for casting to const, + // but we save the inclusion of "base/casts.h" here by using const_cast<>. + return const_cast(AdvanceIdentifier(const_cast(str))); } // Returns whether str is an "identifier" (see above). @@ -471,10 +447,8 @@ bool IsIdentifier(const char* str); // // Returns true (and populates tag, tag_len, value, and value_len) if a // tag/value pair is founds; returns false otherwise. -bool FindTagValuePair(const char* in_str, char tag_value_separator, - char attribute_separator, char string_terminal, - char** tag, int* tag_len, - char** value, int* value_len); +bool FindTagValuePair(const char* in_str, char tag_value_separator, char attribute_separator, + char string_terminal, char** tag, int* tag_len, char** value, int* value_len); // Inserts separator after every interval characters in *s (but never appends to // the end of the original *s). @@ -482,8 +456,7 @@ void UniformInsertString(string* s, int interval, const char* separator); // Inserts separator into s at each specified index. indices must be sorted in // ascending order. -void InsertString( - string* s, const vector& indices, char const* separator); +void InsertString(string* s, const vector& indices, char const* separator); // Finds the nth occurrence of c in n; returns the index in s of that // occurrence, or string::npos if fewer than n occurrences. @@ -503,12 +476,11 @@ bool OnlyWhitespace(const StringPiece& s); // enough space had been available.) // // A drop-in replacement for the safe_snprintf() macro. -int SafeSnprintf(char* str, size_t size, const char* format, ...) - PRINTF_ATTRIBUTE(3, 4); +int SafeSnprintf(char* str, size_t size, const char* format, ...) PRINTF_ATTRIBUTE(3, 4); // Reads a line (terminated by delim) from file into *str. Reads delim from // file, but doesn't copy it into *str. Returns true if read a delim-terminated // line, or false on end-of-file or error. bool GetlineFromStdioFile(FILE* file, string* str, char delim); -#endif // STRINGS_UTIL_H_ +#endif // STRINGS_UTIL_H_ diff --git a/be/src/gutil/strtoint.cc b/be/src/gutil/strtoint.cc index 9df29a3d2680bd..b3b711f323fb61 100644 --- a/be/src/gutil/strtoint.cc +++ b/be/src/gutil/strtoint.cc @@ -4,44 +4,44 @@ // See strtoint.h for details on how to use this component. // +#include "gutil/strtoint.h" + #include + #include "gutil/port.h" -#include "gutil/strtoint.h" // Replacement strto[u]l functions that have identical overflow and underflow // characteristics for both ILP-32 and LP-64 platforms, including errno // preservation for error-free calls. -int32 strto32_adapter(const char *nptr, char **endptr, int base) { - const int saved_errno = errno; - errno = 0; - const long result = strtol(nptr, endptr, base); - if (errno == ERANGE && result == LONG_MIN) { - return kint32min; - } else if (errno == ERANGE && result == LONG_MAX) { - return kint32max; - } else if (errno == 0 && result < kint32min) { - errno = ERANGE; - return kint32min; - } else if (errno == 0 && result > kint32max) { - errno = ERANGE; - return kint32max; - } - if (errno == 0) - errno = saved_errno; - return static_cast(result); +int32 strto32_adapter(const char* nptr, char** endptr, int base) { + const int saved_errno = errno; + errno = 0; + const long result = strtol(nptr, endptr, base); + if (errno == ERANGE && result == LONG_MIN) { + return kint32min; + } else if (errno == ERANGE && result == LONG_MAX) { + return kint32max; + } else if (errno == 0 && result < kint32min) { + errno = ERANGE; + return kint32min; + } else if (errno == 0 && result > kint32max) { + errno = ERANGE; + return kint32max; + } + if (errno == 0) errno = saved_errno; + return static_cast(result); } -uint32 strtou32_adapter(const char *nptr, char **endptr, int base) { - const int saved_errno = errno; - errno = 0; - const unsigned long result = strtoul(nptr, endptr, base); - if (errno == ERANGE && result == ULONG_MAX) { - return kuint32max; - } else if (errno == 0 && result > kuint32max) { - errno = ERANGE; - return kuint32max; - } - if (errno == 0) - errno = saved_errno; - return static_cast(result); +uint32 strtou32_adapter(const char* nptr, char** endptr, int base) { + const int saved_errno = errno; + errno = 0; + const unsigned long result = strtoul(nptr, endptr, base); + if (errno == ERANGE && result == ULONG_MAX) { + return kuint32max; + } else if (errno == 0 && result > kuint32max) { + errno = ERANGE; + return kuint32max; + } + if (errno == 0) errno = saved_errno; + return static_cast(result); } diff --git a/be/src/gutil/strtoint.h b/be/src/gutil/strtoint.h index 581ebf91860e3e..fb839034df1371 100644 --- a/be/src/gutil/strtoint.h +++ b/be/src/gutil/strtoint.h @@ -31,6 +31,7 @@ #define BASE_STRTOINT_H_ #include // For strtol* functions. + #include using std::string; #include "gutil/integral_types.h" @@ -38,56 +39,55 @@ using std::string; #include "gutil/port.h" // Adapter functions for handling overflow and errno. -int32 strto32_adapter(const char *nptr, char **endptr, int base); -uint32 strtou32_adapter(const char *nptr, char **endptr, int base); +int32 strto32_adapter(const char* nptr, char** endptr, int base); +uint32 strtou32_adapter(const char* nptr, char** endptr, int base); // Conversions to a 32-bit integer can pass the call to strto[u]l on 32-bit // platforms, but need a little extra work on 64-bit platforms. -inline int32 strto32(const char *nptr, char **endptr, int base) { - if (sizeof(int32) == sizeof(long)) - return static_cast(strtol(nptr, endptr, base)); - else - return strto32_adapter(nptr, endptr, base); +inline int32 strto32(const char* nptr, char** endptr, int base) { + if (sizeof(int32) == sizeof(long)) + return static_cast(strtol(nptr, endptr, base)); + else + return strto32_adapter(nptr, endptr, base); } -inline uint32 strtou32(const char *nptr, char **endptr, int base) { - if (sizeof(uint32) == sizeof(unsigned long)) - return static_cast(strtoul(nptr, endptr, base)); - else - return strtou32_adapter(nptr, endptr, base); +inline uint32 strtou32(const char* nptr, char** endptr, int base) { + if (sizeof(uint32) == sizeof(unsigned long)) + return static_cast(strtoul(nptr, endptr, base)); + else + return strtou32_adapter(nptr, endptr, base); } // For now, long long is 64-bit on all the platforms we care about, so these // functions can simply pass the call to strto[u]ll. -inline int64 strto64(const char *nptr, char **endptr, int base) { - COMPILE_ASSERT(sizeof(int64) == sizeof(long long), - sizeof_int64_is_not_sizeof_long_long); - return strtoll(nptr, endptr, base); +inline int64 strto64(const char* nptr, char** endptr, int base) { + COMPILE_ASSERT(sizeof(int64) == sizeof(long long), sizeof_int64_is_not_sizeof_long_long); + return strtoll(nptr, endptr, base); } -inline uint64 strtou64(const char *nptr, char **endptr, int base) { - COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), - sizeof_uint64_is_not_sizeof_long_long); - return strtoull(nptr, endptr, base); +inline uint64 strtou64(const char* nptr, char** endptr, int base) { + COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), + sizeof_uint64_is_not_sizeof_long_long); + return strtoull(nptr, endptr, base); } // Although it returns an int, atoi() is implemented in terms of strtol, and // so has differing overflow and underflow behavior. atol is the same. -inline int32 atoi32(const char *nptr) { - return strto32(nptr, NULL, 10); +inline int32 atoi32(const char* nptr) { + return strto32(nptr, NULL, 10); } -inline int64 atoi64(const char *nptr) { - return strto64(nptr, NULL, 10); +inline int64 atoi64(const char* nptr) { + return strto64(nptr, NULL, 10); } // Convenience versions of the above that take a string argument. -inline int32 atoi32(const string &s) { - return atoi32(s.c_str()); +inline int32 atoi32(const string& s) { + return atoi32(s.c_str()); } -inline int64 atoi64(const string &s) { - return atoi64(s.c_str()); +inline int64 atoi64(const string& s) { + return atoi64(s.c_str()); } -#endif // BASE_STRTOINT_H_ +#endif // BASE_STRTOINT_H_ diff --git a/be/src/gutil/sysinfo-test.cc b/be/src/gutil/sysinfo-test.cc index 85ccd11448f7c3..3036dfa705f05c 100644 --- a/be/src/gutil/sysinfo-test.cc +++ b/be/src/gutil/sysinfo-test.cc @@ -22,50 +22,50 @@ namespace doris { TEST(SysInfoTest, ReadMaxCpuIndexTest) { - using base::ParseMaxCpuIndex; - EXPECT_EQ(0, ParseMaxCpuIndex("0\n")); - EXPECT_EQ(1, ParseMaxCpuIndex("1\n")); - EXPECT_EQ(7, ParseMaxCpuIndex("0-7\n")); - EXPECT_EQ(40, ParseMaxCpuIndex("0-7,30-40\n")); - EXPECT_EQ(143, ParseMaxCpuIndex("2,4-127,128-143\n")); - EXPECT_EQ(44, ParseMaxCpuIndex("44-44\n")); + using base::ParseMaxCpuIndex; + EXPECT_EQ(0, ParseMaxCpuIndex("0\n")); + EXPECT_EQ(1, ParseMaxCpuIndex("1\n")); + EXPECT_EQ(7, ParseMaxCpuIndex("0-7\n")); + EXPECT_EQ(40, ParseMaxCpuIndex("0-7,30-40\n")); + EXPECT_EQ(143, ParseMaxCpuIndex("2,4-127,128-143\n")); + EXPECT_EQ(44, ParseMaxCpuIndex("44-44\n")); - // Don't assume that ranges are in ascending order or non-overlapping, - // just in case. - EXPECT_EQ(8, ParseMaxCpuIndex("0-7,5-8\n")); - EXPECT_EQ(7, ParseMaxCpuIndex("0-7,5-6\n")); - EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0-1\n")); - EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0\n")); - EXPECT_EQ(3, ParseMaxCpuIndex("3,0-2\n")); + // Don't assume that ranges are in ascending order or non-overlapping, + // just in case. + EXPECT_EQ(8, ParseMaxCpuIndex("0-7,5-8\n")); + EXPECT_EQ(7, ParseMaxCpuIndex("0-7,5-6\n")); + EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0-1\n")); + EXPECT_EQ(3, ParseMaxCpuIndex("2-3,0\n")); + EXPECT_EQ(3, ParseMaxCpuIndex("3,0-2\n")); - // Invalid inputs. - EXPECT_EQ(-1, ParseMaxCpuIndex("")); - EXPECT_EQ(-1, ParseMaxCpuIndex("\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex(" ")); - EXPECT_EQ(-1, ParseMaxCpuIndex("a\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("0\n1\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("\n1\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("\n1")); - EXPECT_EQ(-1, ParseMaxCpuIndex("0-\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("-2\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("1-9qwerty\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("1-9,0-\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3-\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,a-4\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3@4\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,\n")); - EXPECT_EQ(-1, ParseMaxCpuIndex("3-2\n")); + // Invalid inputs. + EXPECT_EQ(-1, ParseMaxCpuIndex("")); + EXPECT_EQ(-1, ParseMaxCpuIndex("\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex(" ")); + EXPECT_EQ(-1, ParseMaxCpuIndex("a\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("0\n1\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("\n1\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("\n1")); + EXPECT_EQ(-1, ParseMaxCpuIndex("0-\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("-2\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1-9qwerty\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1-9,0-\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3-\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,a-4\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,3@4\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("1,2,\n")); + EXPECT_EQ(-1, ParseMaxCpuIndex("3-2\n")); - // Overflows in various positions. - EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648")); // 2^31 - EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617")); // 2^64 + 1 - EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999")); - EXPECT_EQ(-1, ParseMaxCpuIndex("0-2147483648")); // 2^31 - EXPECT_EQ(-1, ParseMaxCpuIndex("0-18446744073709551617")); // 2^64 + 1 - EXPECT_EQ(-1, ParseMaxCpuIndex("0-999999999999999999999999999999999999999999999999")); - EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648-1")); // 2^31 - EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617-1")); // 2^64 + 1 - EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999-1")); + // Overflows in various positions. + EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648")); // 2^31 + EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617")); // 2^64 + 1 + EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999")); + EXPECT_EQ(-1, ParseMaxCpuIndex("0-2147483648")); // 2^31 + EXPECT_EQ(-1, ParseMaxCpuIndex("0-18446744073709551617")); // 2^64 + 1 + EXPECT_EQ(-1, ParseMaxCpuIndex("0-999999999999999999999999999999999999999999999999")); + EXPECT_EQ(-1, ParseMaxCpuIndex("2147483648-1")); // 2^31 + EXPECT_EQ(-1, ParseMaxCpuIndex("18446744073709551617-1")); // 2^64 + 1 + EXPECT_EQ(-1, ParseMaxCpuIndex("999999999999999999999999999999999999999999999999-1")); } } // namespace doris diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc index 4365b18ff8c65c..ce43f5fa069596 100644 --- a/be/src/gutil/sysinfo.cc +++ b/be/src/gutil/sysinfo.cc @@ -29,43 +29,42 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32) -# define PLATFORM_WINDOWS 1 +#define PLATFORM_WINDOWS 1 #endif #include -#include // for open() -#include // for read() +#include // for open() +#include // for read() -#if defined __MACH__ // Mac OS X, almost certainly -#include // how we figure out numcpu's on OS X +#if defined __MACH__ // Mac OS X, almost certainly +#include // how we figure out numcpu's on OS X #include #elif defined __FreeBSD__ #include -#elif defined __sun__ // Solaris -#include // for, e.g., prmap_t +#elif defined __sun__ // Solaris +#include // for, e.g., prmap_t #elif defined(PLATFORM_WINDOWS) -#include // for getpid() (actually, _getpid()) -#include // for SHGetValueA() -#include // for Module32First() +#include // for getpid() (actually, _getpid()) +#include // for SHGetValueA() +#include // for Module32First() #endif -#include "gutil/sysinfo.h" +#include #include -#include // for errno -#include // for snprintf(), sscanf() -#include // for getenv() -#include // for memmove(), memchr(), etc. +#include // for errno +#include // for snprintf(), sscanf() +#include // for getenv() +#include // for memmove(), memchr(), etc. #include #include #include -#include - -#include "gutil/dynamic_annotations.h" // for RunningOnValgrind +#include "gutil/dynamic_annotations.h" // for RunningOnValgrind #include "gutil/integral_types.h" #include "gutil/macros.h" #include "gutil/port.h" +#include "gutil/sysinfo.h" #include "gutil/walltime.h" using std::numeric_limits; @@ -83,35 +82,34 @@ namespace base { // hooks and such. // ---------------------------------------------------------------------- -static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous -static int cpuinfo_num_cpus = 1; // Conservative guess +static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous +static int cpuinfo_num_cpus = 1; // Conservative guess static int cpuinfo_max_cpu_index = -1; void SleepForNanoseconds(int64_t nanoseconds) { - // Sleep for nanosecond duration - struct timespec sleep_time; - sleep_time.tv_sec = nanoseconds / 1000 / 1000 / 1000; - sleep_time.tv_nsec = (nanoseconds % (1000 * 1000 * 1000)); - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. + // Sleep for nanosecond duration + struct timespec sleep_time; + sleep_time.tv_sec = nanoseconds / 1000 / 1000 / 1000; + sleep_time.tv_nsec = (nanoseconds % (1000 * 1000 * 1000)); + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. } void SleepForMilliseconds(int64_t milliseconds) { - SleepForNanoseconds(milliseconds * 1000 * 1000); + SleepForNanoseconds(milliseconds * 1000 * 1000); } // Helper function estimates cycles/sec by observing cycles elapsed during // sleep(). Using small sleep time decreases accuracy significantly. static int64 EstimateCyclesPerSecond(const int estimate_time_ms) { - CHECK(estimate_time_ms > 0); - if (estimate_time_ms <= 0) - return 1; - double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much - - const int64 start_ticks = CycleClock::Now(); - SleepForMilliseconds(estimate_time_ms); - const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks)); - return guess; + CHECK(estimate_time_ms > 0); + if (estimate_time_ms <= 0) return 1; + double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much + + const int64 start_ticks = CycleClock::Now(); + SleepForMilliseconds(estimate_time_ms); + const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks)); + return guess; } // ReadIntFromFile is only called on linux and cygwin platforms. @@ -124,101 +122,101 @@ static int64 EstimateCyclesPerSecond(const int estimate_time_ms) { // 'buflen' must be more than large enough to hold the whole file, or else this will // issue a FATAL error. static bool SlurpSmallTextFile(const char* file, char* buf, int buflen) { - bool ret = false; - int fd; - RETRY_ON_EINTR(fd, open(file, O_RDONLY)); - if (fd == -1) return ret; - - memset(buf, '\0', buflen); - int n; - RETRY_ON_EINTR(n, read(fd, buf, buflen - 1)); - CHECK_NE(n, buflen - 1) << "buffer of len " << buflen << " not large enough to store " - << "contents of " << file; - if (n > 0) { - ret = true; - } - - int close_ret; - RETRY_ON_EINTR(close_ret, close(fd)); - if (PREDICT_FALSE(close_ret != 0)) { - PLOG(WARNING) << "Failed to close fd " << fd; - } - - return ret; + bool ret = false; + int fd; + RETRY_ON_EINTR(fd, open(file, O_RDONLY)); + if (fd == -1) return ret; + + memset(buf, '\0', buflen); + int n; + RETRY_ON_EINTR(n, read(fd, buf, buflen - 1)); + CHECK_NE(n, buflen - 1) << "buffer of len " << buflen << " not large enough to store " + << "contents of " << file; + if (n > 0) { + ret = true; + } + + int close_ret; + RETRY_ON_EINTR(close_ret, close(fd)); + if (PREDICT_FALSE(close_ret != 0)) { + PLOG(WARNING) << "Failed to close fd " << fd; + } + + return ret; } // Helper function for reading an int from a file. Returns true if successful // and the memory location pointed to by value is set to the value read. -static bool ReadIntFromFile(const char *file, int *value) { - char line[1024]; - if (!SlurpSmallTextFile(file, line, arraysize(line))) { +static bool ReadIntFromFile(const char* file, int* value) { + char line[1024]; + if (!SlurpSmallTextFile(file, line, arraysize(line))) { + return false; + } + char* err; + const int temp_value = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + *value = temp_value; + return true; + } return false; - } - char* err; - const int temp_value = strtol(line, &err, 10); - if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { - *value = temp_value; - return true; - } - return false; } static int ReadMaxCPUIndex() { - char buf[1024]; - // TODO(tarmstrong): KUDU-2730: 'present' doesn't include CPUs that could be hotplugged - // in the future. 'possible' does, but using it instead could result in a blow-up in the - // number of per-CPU data structures. - CHECK(SlurpSmallTextFile("/sys/devices/system/cpu/present", buf, arraysize(buf))); - int max_idx = ParseMaxCpuIndex(buf); - CHECK_GE(max_idx, 0) << "unable to parse max CPU index from: " << buf; - return max_idx; + char buf[1024]; + // TODO(tarmstrong): KUDU-2730: 'present' doesn't include CPUs that could be hotplugged + // in the future. 'possible' does, but using it instead could result in a blow-up in the + // number of per-CPU data structures. + CHECK(SlurpSmallTextFile("/sys/devices/system/cpu/present", buf, arraysize(buf))); + int max_idx = ParseMaxCpuIndex(buf); + CHECK_GE(max_idx, 0) << "unable to parse max CPU index from: " << buf; + return max_idx; } int ParseMaxCpuIndex(const char* str) { - DCHECK(str != nullptr); - const char* pos = str; - // Initialize max_idx to invalid so we can just return if we find zero ranges. - int max_idx = -1; - - while (true) { - const char* range_start = pos; - const char* dash = nullptr; - // Scan forward until we find the separator indicating end of range, which is always a - // newline or comma if the input is valid. - for (; *pos != ',' && *pos != '\n'; pos++) { - // Check for early end of string - bail here to avoid advancing past end. - if (*pos == '\0') return -1; - if (*pos == '-') { - // Multiple dashes in range is invalid. - if (dash != nullptr) return -1; - dash = pos; - } else if (!isdigit(*pos)) { - return -1; - } - } + DCHECK(str != nullptr); + const char* pos = str; + // Initialize max_idx to invalid so we can just return if we find zero ranges. + int max_idx = -1; + + while (true) { + const char* range_start = pos; + const char* dash = nullptr; + // Scan forward until we find the separator indicating end of range, which is always a + // newline or comma if the input is valid. + for (; *pos != ',' && *pos != '\n'; pos++) { + // Check for early end of string - bail here to avoid advancing past end. + if (*pos == '\0') return -1; + if (*pos == '-') { + // Multiple dashes in range is invalid. + if (dash != nullptr) return -1; + dash = pos; + } else if (!isdigit(*pos)) { + return -1; + } + } - // At this point we found a range [range_start, pos) comprised of digits and an - // optional dash. - const char* num_start = dash == nullptr ? range_start : dash + 1; - // Check for ranges with missing numbers, e.g. "", "3-", "-3". - if (num_start == pos || dash == range_start) return -1; - // The numbers are comprised only of digits, so it can only fail if it is out of - // range of int (the return type of this function). - unsigned long start_idx = strtoul(range_start, nullptr, 10); - if (start_idx > numeric_limits::max()) return -1; - unsigned long end_idx = strtoul(num_start, nullptr, 10); - if (end_idx > numeric_limits::max() || start_idx > end_idx) { - return -1; + // At this point we found a range [range_start, pos) comprised of digits and an + // optional dash. + const char* num_start = dash == nullptr ? range_start : dash + 1; + // Check for ranges with missing numbers, e.g. "", "3-", "-3". + if (num_start == pos || dash == range_start) return -1; + // The numbers are comprised only of digits, so it can only fail if it is out of + // range of int (the return type of this function). + unsigned long start_idx = strtoul(range_start, nullptr, 10); + if (start_idx > numeric_limits::max()) return -1; + unsigned long end_idx = strtoul(num_start, nullptr, 10); + if (end_idx > numeric_limits::max() || start_idx > end_idx) { + return -1; + } + // Keep track of the max index we've seen so far. + max_idx = std::max(static_cast(end_idx), max_idx); + // End of line, expect no more input. + if (*pos == '\n') break; + ++pos; } - // Keep track of the max index we've seen so far. - max_idx = std::max(static_cast(end_idx), max_idx); - // End of line, expect no more input. - if (*pos == '\n') break; - ++pos; - } - // String must have a single newline at the very end. - if (*pos != '\n' || *(pos + 1) != '\0') return -1; - return max_idx; + // String must have a single newline at the very end. + if (*pos != '\n' || *(pos + 1) != '\0') return -1; + return max_idx; } #endif @@ -231,244 +229,237 @@ int ParseMaxCpuIndex(const char* str) { // memory. static void InitializeSystemInfo() { - static bool already_called = false; // safe if we run before threads - if (already_called) return; - already_called = true; + static bool already_called = false; // safe if we run before threads + if (already_called) return; + already_called = true; + + bool saw_mhz = false; + + if (RunningOnValgrind()) { + // Valgrind may slow the progress of time artificially (--scale-time=N + // option). We thus can't rely on CPU Mhz info stored in /sys or /proc + // files. Thus, actually measure the cps. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100); + saw_mhz = true; + } - bool saw_mhz = false; +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + char line[1024]; + char* err; + int freq; + + // If the kernel is exporting the tsc frequency use that. There are issues + // where cpuinfo_max_freq cannot be relied on because the BIOS may be + // exporintg an invalid p-state (on x86) or p-states may be used to put the + // processor in a new mode (turbo mode). Essentially, those frequencies + // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as + // well. + if (!saw_mhz && ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + // The value is in kHz (as the file name suggests). For example, on a + // 2GHz warpstation, the file contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } - if (RunningOnValgrind()) { - // Valgrind may slow the progress of time artificially (--scale-time=N - // option). We thus can't rely on CPU Mhz info stored in /sys or /proc - // files. Thus, actually measure the cps. - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100); - saw_mhz = true; - } + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", &freq)) { + // The value is in kHz. For example, on a 2GHz machine, the file + // contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } -#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) - char line[1024]; - char* err; - int freq; - - // If the kernel is exporting the tsc frequency use that. There are issues - // where cpuinfo_max_freq cannot be relied on because the BIOS may be - // exporintg an invalid p-state (on x86) or p-states may be used to put the - // processor in a new mode (turbo mode). Essentially, those frequencies - // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as - // well. - if (!saw_mhz && - ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { - // The value is in kHz (as the file name suggests). For example, on a - // 2GHz warpstation, the file contains the value "2000000". - cpuinfo_cycles_per_second = freq * 1000.0; - saw_mhz = true; - } - - // If CPU scaling is in effect, we want to use the *maximum* frequency, - // not whatever CPU speed some random processor happens to be using now. - if (!saw_mhz && - ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &freq)) { - // The value is in kHz. For example, on a 2GHz machine, the file - // contains the value "2000000". - cpuinfo_cycles_per_second = freq * 1000.0; - saw_mhz = true; - } - - // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. - const char* pname = "/proc/cpuinfo"; - int fd; - RETRY_ON_EINTR(fd, open(pname, O_RDONLY)); - if (fd == -1) { - PLOG(FATAL) << "Unable to read CPU info from /proc. procfs must be mounted."; - } - - double bogo_clock = 1.0; - bool saw_bogo = false; - int num_cpus = 0; - line[0] = line[1] = '\0'; - int chars_read = 0; - do { // we'll exit when the last read didn't read anything - // Move the next line to the beginning of the buffer - const int oldlinelen = strlen(line); - if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line - line[0] = '\0'; - else // still other lines left to save - memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1)); - // Terminate the new line, reading more if we can't find the newline - char* newline = strchr(line, '\n'); - if (newline == NULL) { - const int linelen = strlen(line); - const int bytes_to_read = sizeof(line)-1 - linelen; - CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes - RETRY_ON_EINTR(chars_read, read(fd, line + linelen, bytes_to_read)); - line[linelen + chars_read] = '\0'; - newline = strchr(line, '\n'); + // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. + const char* pname = "/proc/cpuinfo"; + int fd; + RETRY_ON_EINTR(fd, open(pname, O_RDONLY)); + if (fd == -1) { + PLOG(FATAL) << "Unable to read CPU info from /proc. procfs must be mounted."; } - if (newline != NULL) - *newline = '\0'; -#if defined(__powerpc__) || defined(__ppc__) - // PowerPC cpus report the frequency in "clock" line - if (strncasecmp(line, "clock", sizeof("clock")-1) == 0) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - // PowerPC frequencies are only reported as MHz (check 'show_cpuinfo' - // function at arch/powerpc/kernel/setup-common.c) - char *endp = strstr(line, "MHz"); - if (endp) { - *endp = 0; - cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) - saw_mhz = true; + double bogo_clock = 1.0; + bool saw_bogo = false; + int num_cpus = 0; + line[0] = line[1] = '\0'; + int chars_read = 0; + do { // we'll exit when the last read didn't read anything + // Move the next line to the beginning of the buffer + const int oldlinelen = strlen(line); + if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line + line[0] = '\0'; + else // still other lines left to save + memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); + // Terminate the new line, reading more if we can't find the newline + char* newline = strchr(line, '\n'); + if (newline == NULL) { + const int linelen = strlen(line); + const int bytes_to_read = sizeof(line) - 1 - linelen; + CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes + RETRY_ON_EINTR(chars_read, read(fd, line + linelen, bytes_to_read)); + line[linelen + chars_read] = '\0'; + newline = strchr(line, '\n'); } - } + if (newline != NULL) *newline = '\0'; + +#if defined(__powerpc__) || defined(__ppc__) + // PowerPC cpus report the frequency in "clock" line + if (strncasecmp(line, "clock", sizeof("clock") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + // PowerPC frequencies are only reported as MHz (check 'show_cpuinfo' + // function at arch/powerpc/kernel/setup-common.c) + char* endp = strstr(line, "MHz"); + if (endp) { + *endp = 0; + cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) + saw_mhz = true; + } + } #else - // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only - // accept postive values. Some environments (virtual machines) report zero, - // which would cause infinite looping in WallTime_Init. - if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) - saw_mhz = true; - } - } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - bogo_clock = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) - saw_bogo = true; - } + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept postive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. + if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) + saw_mhz = true; + } + } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) saw_bogo = true; + } #endif - } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) { - num_cpus++; // count up every time we see an "processor :" entry + } else if (strncasecmp(line, "processor", sizeof("processor") - 1) == 0) { + num_cpus++; // count up every time we see an "processor :" entry + } + } while (chars_read > 0); + int ret; + RETRY_ON_EINTR(ret, close(fd)); + if (PREDICT_FALSE(ret != 0)) { + PLOG(WARNING) << "Failed to close fd " << fd; } - } while (chars_read > 0); - int ret; - RETRY_ON_EINTR(ret, close(fd)); - if (PREDICT_FALSE(ret != 0)) { - PLOG(WARNING) << "Failed to close fd " << fd; - } - - if (!saw_mhz) { - if (saw_bogo) { - // If we didn't find anything better, we'll use bogomips, but - // we're not happy about it. - cpuinfo_cycles_per_second = bogo_clock; - } else { - // If we don't even have bogomips, we'll use the slow estimation. - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + + if (!saw_mhz) { + if (saw_bogo) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } else { + // If we don't even have bogomips, we'll use the slow estimation. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } + } + if (cpuinfo_cycles_per_second == 0.0) { + cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe } - } - if (cpuinfo_cycles_per_second == 0.0) { - cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe - } - if (num_cpus > 0) { - cpuinfo_num_cpus = num_cpus; - } - cpuinfo_max_cpu_index = ReadMaxCPUIndex(); + if (num_cpus > 0) { + cpuinfo_num_cpus = num_cpus; + } + cpuinfo_max_cpu_index = ReadMaxCPUIndex(); #elif defined __FreeBSD__ - // For this sysctl to work, the machine must be configured without - // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 - // and later. Before that, it's a 32-bit quantity (and gives the - // wrong answer on machines faster than 2^32 Hz). See - // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html - // But also compare FreeBSD 7.0: - // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 - // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); - // To FreeBSD 6.3 (it's the same in 6-STABLE): - // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 - // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); + // For this sysctl to work, the machine must be configured without + // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 + // and later. Before that, it's a 32-bit quantity (and gives the + // wrong answer on machines faster than 2^32 Hz). See + // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html + // But also compare FreeBSD 7.0: + // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 + // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); + // To FreeBSD 6.3 (it's the same in 6-STABLE): + // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 + // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); #if __FreeBSD__ >= 7 - uint64_t hz = 0; + uint64_t hz = 0; #else - unsigned int hz = 0; + unsigned int hz = 0; #endif - size_t sz = sizeof(hz); - const char *sysctl_path = "machdep.tsc_freq"; - if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) { - fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - sysctl_path, strerror(errno)); - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); - } else { - cpuinfo_cycles_per_second = hz; - } - // TODO(csilvers): also figure out cpuinfo_num_cpus + size_t sz = sizeof(hz); + const char* sysctl_path = "machdep.tsc_freq"; + if (sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0) { + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", sysctl_path, + strerror(errno)); + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } else { + cpuinfo_cycles_per_second = hz; + } + // TODO(csilvers): also figure out cpuinfo_num_cpus #elif defined(PLATFORM_WINDOWS) -# pragma comment(lib, "shlwapi.lib") // for SHGetValue() - // In NT, read MHz from the registry. If we fail to do so or we're in win9x - // then make a crude estimate. - OSVERSIONINFO os; - os.dwOSVersionInfoSize = sizeof(os); - DWORD data, data_size = sizeof(data); - if (GetVersionEx(&os) && - os.dwPlatformId == VER_PLATFORM_WIN32_NT && - SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE, - "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", - "~MHz", NULL, &data, &data_size))) - cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz - else - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500? - - // Get the number of processors. - SYSTEM_INFO info; - GetSystemInfo(&info); - cpuinfo_num_cpus = info.dwNumberOfProcessors; +#pragma comment(lib, "shlwapi.lib") // for SHGetValue() + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + OSVERSIONINFO os; + os.dwOSVersionInfoSize = sizeof(os); + DWORD data, data_size = sizeof(data); + if (GetVersionEx(&os) && os.dwPlatformId == VER_PLATFORM_WIN32_NT && + SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "~MHz", NULL, + &data, &data_size))) + cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz + else + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500? + + // Get the number of processors. + SYSTEM_INFO info; + GetSystemInfo(&info); + cpuinfo_num_cpus = info.dwNumberOfProcessors; #elif defined(__MACH__) && defined(__APPLE__) - // returning "mach time units" per second. the current number of elapsed - // mach time units can be found by calling uint64 mach_absolute_time(); - // while not as precise as actual CPU cycles, it is accurate in the face - // of CPU frequency scaling and multi-cpu/core machines. - // Our mac users have these types of machines, and accuracy - // (i.e. correctness) trumps precision. - // See cycleclock.h: CycleClock::Now(), which returns number of mach time - // units on Mac OS X. - mach_timebase_info_data_t timebase_info; - mach_timebase_info(&timebase_info); - double mach_time_units_per_nanosecond = - static_cast(timebase_info.denom) / - static_cast(timebase_info.numer); - cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; - - int num_cpus = 0; - size_t size = sizeof(num_cpus); - int numcpus_name[] = { CTL_HW, HW_NCPU }; - if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) - == 0 - && (size == sizeof(num_cpus))) - cpuinfo_num_cpus = num_cpus; + // returning "mach time units" per second. the current number of elapsed + // mach time units can be found by calling uint64 mach_absolute_time(); + // while not as precise as actual CPU cycles, it is accurate in the face + // of CPU frequency scaling and multi-cpu/core machines. + // Our mac users have these types of machines, and accuracy + // (i.e. correctness) trumps precision. + // See cycleclock.h: CycleClock::Now(), which returns number of mach time + // units on Mac OS X. + mach_timebase_info_data_t timebase_info; + mach_timebase_info(&timebase_info); + double mach_time_units_per_nanosecond = + static_cast(timebase_info.denom) / static_cast(timebase_info.numer); + cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; + + int num_cpus = 0; + size_t size = sizeof(num_cpus); + int numcpus_name[] = {CTL_HW, HW_NCPU}; + if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) == 0 && + (size == sizeof(num_cpus))) + cpuinfo_num_cpus = num_cpus; #else - // Generic cycles per second counter - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + // Generic cycles per second counter + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); #endif - // On platforms where we can't determine the max CPU index, just use the - // number of CPUs. This might break if CPUs are taken offline, but - // better than a wild guess. - if (cpuinfo_max_cpu_index < 0) { - cpuinfo_max_cpu_index = cpuinfo_num_cpus - 1; - } + // On platforms where we can't determine the max CPU index, just use the + // number of CPUs. This might break if CPUs are taken offline, but + // better than a wild guess. + if (cpuinfo_max_cpu_index < 0) { + cpuinfo_max_cpu_index = cpuinfo_num_cpus - 1; + } } double CyclesPerSecond(void) { - InitializeSystemInfo(); - return cpuinfo_cycles_per_second; + InitializeSystemInfo(); + return cpuinfo_cycles_per_second; } int NumCPUs(void) { - InitializeSystemInfo(); - return cpuinfo_num_cpus; + InitializeSystemInfo(); + return cpuinfo_num_cpus; } int MaxCPUIndex(void) { - InitializeSystemInfo(); - return cpuinfo_max_cpu_index; + InitializeSystemInfo(); + return cpuinfo_max_cpu_index; } } // namespace base diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h index d46cfe55049404..49069398d65d5f 100644 --- a/be/src/gutil/sysinfo.h +++ b/be/src/gutil/sysinfo.h @@ -1,11 +1,11 @@ // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- // Copyright (c) 2006, Google Inc. // All rights reserved. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above @@ -15,7 +15,7 @@ // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. -// +// // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -66,4 +66,4 @@ extern double CyclesPerSecond(void); extern int ParseMaxCpuIndex(const char* str); } // namespace base -#endif /* #ifndef _SYSINFO_H_ */ +#endif /* #ifndef _SYSINFO_H_ */ diff --git a/be/src/gutil/template_util.h b/be/src/gutil/template_util.h index 007f84d2290e66..fa245110721bb7 100644 --- a/be/src/gutil/template_util.h +++ b/be/src/gutil/template_util.h @@ -56,7 +56,7 @@ namespace base { typedef char small_; struct big_ { - char dummy[2]; + char dummy[2]; }; // Types YesType and NoType are guaranteed such that sizeof(YesType) < @@ -67,7 +67,7 @@ typedef big_ NoType; // Identity metafunction. template struct identity_ { - typedef T type; + typedef T type; }; // integral_constant, defined in tr1, is a wrapper for an integer @@ -75,90 +75,88 @@ struct identity_ { // with hardcoding the integer type to bool. We use the fully // general integer_constant for compatibility with tr1. -template +template struct integral_constant { - static const T value = v; - typedef T value_type; - typedef integral_constant type; + static const T value = v; + typedef T value_type; + typedef integral_constant type; }; -template const T integral_constant::value; - +template +const T integral_constant::value; // Abbreviations: true_type and false_type are structs that represent boolean // true and false values. Also define the boost::mpl versions of those names, // true_ and false_. -typedef integral_constant true_type; +typedef integral_constant true_type; typedef integral_constant false_type; -typedef true_type true_; +typedef true_type true_; typedef false_type false_; -template struct is_non_const_reference : false_type {}; -template struct is_non_const_reference : true_type {}; -template struct is_non_const_reference : false_type {}; +template +struct is_non_const_reference : false_type {}; +template +struct is_non_const_reference : true_type {}; +template +struct is_non_const_reference : false_type {}; -template struct is_const : false_type {}; -template struct is_const : true_type {}; +template +struct is_const : false_type {}; +template +struct is_const : true_type {}; -template struct is_void : false_type {}; -template <> struct is_void : true_type {}; +template +struct is_void : false_type {}; +template <> +struct is_void : true_type {}; // if_ is a templatize conditional statement. // if_ is a compile time evaluation of cond. // if_<>::type contains A if cond is true, B otherwise. -template -struct if_{ - typedef A type; +template +struct if_ { + typedef A type; }; -template +template struct if_ { - typedef B type; + typedef B type; }; - // type_equals_ is a template type comparator, similar to Loki IsSameType. // type_equals_::value is true iff "A" is the same type as "B". // // New code should prefer base::is_same, defined in base/type_traits.h. // It is functionally identical, but is_same is the standard spelling. -template -struct type_equals_ : public false_ { -}; +template +struct type_equals_ : public false_ {}; -template -struct type_equals_ : public true_ { -}; +template +struct type_equals_ : public true_ {}; // and_ is a template && operator. // and_::value evaluates "A::value && B::value". -template -struct and_ : public integral_constant { -}; +template +struct and_ : public integral_constant {}; // or_ is a template || operator. // or_::value evaluates "A::value || B::value". -template -struct or_ : public integral_constant { -}; +template +struct or_ : public integral_constant {}; // Used to determine if a type is a struct/union/class. Inspired by Boost's // is_class type_trait implementation. struct IsClassHelper { - template - static YesType Test(void(C::*)(void)); + template + static YesType Test(void (C::*)(void)); - template - static NoType Test(...); + template + static NoType Test(...); }; template -struct is_class - : integral_constant(0)) == - sizeof(YesType)> { -}; +struct is_class : integral_constant(0)) == sizeof(YesType)> {}; -} +} // namespace base -#endif // BASE_TEMPLATE_UTIL_H_ +#endif // BASE_TEMPLATE_UTIL_H_ diff --git a/be/src/gutil/threading/thread_collision_warner.cc b/be/src/gutil/threading/thread_collision_warner.cc index 26329cfb9c2e03..1a2991a14947d3 100644 --- a/be/src/gutil/threading/thread_collision_warner.cc +++ b/be/src/gutil/threading/thread_collision_warner.cc @@ -10,18 +10,17 @@ #include #endif +#include #include #include #include -#include - namespace base { void DCheckAsserter::warn(int64_t previous_thread_id, int64_t current_thread_id) { - LOG(FATAL) << "Thread Collision! Previous thread id: " << previous_thread_id - << ", current thread id: " << current_thread_id; + LOG(FATAL) << "Thread Collision! Previous thread id: " << previous_thread_id + << ", current thread id: " << current_thread_id; } #if 0 @@ -42,52 +41,50 @@ static subtle::Atomic32 CurrentThread() { static subtle::Atomic64 CurrentThread() { #if defined(__APPLE__) - uint64_t tid; - CHECK_EQ(0, pthread_threadid_np(NULL, &tid)); - return tid; + uint64_t tid; + CHECK_EQ(0, pthread_threadid_np(NULL, &tid)); + return tid; #elif defined(__linux__) - return syscall(__NR_gettid); + return syscall(__NR_gettid); #endif } #endif void ThreadCollisionWarner::EnterSelf() { - // If the active thread is 0 then I'll write the current thread ID - // if two or more threads arrive here only one will succeed to - // write on valid_thread_id_ the current thread ID. - subtle::Atomic64 current_thread_id = CurrentThread(); - - int64_t previous_thread_id = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, - 0, - current_thread_id); - if (previous_thread_id != 0 && previous_thread_id != current_thread_id) { - // gotcha! a thread is trying to use the same class and that is - // not current thread. - asserter_->warn(previous_thread_id, current_thread_id); - } - - subtle::NoBarrier_AtomicIncrement(&counter_, 1); + // If the active thread is 0 then I'll write the current thread ID + // if two or more threads arrive here only one will succeed to + // write on valid_thread_id_ the current thread ID. + subtle::Atomic64 current_thread_id = CurrentThread(); + + int64_t previous_thread_id = + subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, 0, current_thread_id); + if (previous_thread_id != 0 && previous_thread_id != current_thread_id) { + // gotcha! a thread is trying to use the same class and that is + // not current thread. + asserter_->warn(previous_thread_id, current_thread_id); + } + + subtle::NoBarrier_AtomicIncrement(&counter_, 1); } void ThreadCollisionWarner::Enter() { - subtle::Atomic64 current_thread_id = CurrentThread(); + subtle::Atomic64 current_thread_id = CurrentThread(); - int64_t previous_thread_id = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, - 0, - current_thread_id); - if (previous_thread_id != 0) { - // gotcha! another thread is trying to use the same class. - asserter_->warn(previous_thread_id, current_thread_id); - } + int64_t previous_thread_id = + subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, 0, current_thread_id); + if (previous_thread_id != 0) { + // gotcha! another thread is trying to use the same class. + asserter_->warn(previous_thread_id, current_thread_id); + } - subtle::NoBarrier_AtomicIncrement(&counter_, 1); + subtle::NoBarrier_AtomicIncrement(&counter_, 1); } void ThreadCollisionWarner::Leave() { - if (subtle::Barrier_AtomicIncrement(&counter_, -1) == 0) { - subtle::NoBarrier_Store(&valid_thread_id_, 0); - } + if (subtle::Barrier_AtomicIncrement(&counter_, -1) == 0) { + subtle::NoBarrier_Store(&valid_thread_id_, 0); + } } -} // namespace base +} // namespace base diff --git a/be/src/gutil/threading/thread_collision_warner.h b/be/src/gutil/threading/thread_collision_warner.h index 5d9a9ac6921457..1007b7538d0f8f 100644 --- a/be/src/gutil/threading/thread_collision_warner.h +++ b/be/src/gutil/threading/thread_collision_warner.h @@ -100,24 +100,20 @@ // DFAKE_MUTEX(shareable_section_); // }; - #if !defined(NDEBUG) // Defines a class member that acts like a mutex. It is used only as a // verification tool. -#define DFAKE_MUTEX(obj) \ - mutable base::ThreadCollisionWarner obj +#define DFAKE_MUTEX(obj) mutable base::ThreadCollisionWarner obj // Asserts the call is never called simultaneously in two threads. Used at // member function scope. -#define DFAKE_SCOPED_LOCK(obj) \ - base::ThreadCollisionWarner::ScopedCheck s_check_##obj(&obj) +#define DFAKE_SCOPED_LOCK(obj) base::ThreadCollisionWarner::ScopedCheck s_check_##obj(&obj) // Asserts the call is never called simultaneously in two threads. Used at // member function scope. Same as DFAKE_SCOPED_LOCK but allows recursive locks. #define DFAKE_SCOPED_RECURSIVE_LOCK(obj) \ - base::ThreadCollisionWarner::ScopedRecursiveCheck sr_check_##obj(&obj) + base::ThreadCollisionWarner::ScopedRecursiveCheck sr_check_##obj(&obj) // Asserts the code is always executed in the same thread. -#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) \ - base::ThreadCollisionWarner::Check check_##obj(&obj) +#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) base::ThreadCollisionWarner::Check check_##obj(&obj) #else @@ -135,113 +131,98 @@ namespace base { // used. During the unit tests is used another class that doesn't "DCHECK" // in case of collision (check thread_collision_warner_unittests.cc) struct BASE_EXPORT AsserterBase { - virtual ~AsserterBase() {} - virtual void warn(int64_t previous_thread_id, int64_t current_thread_id) = 0; + virtual ~AsserterBase() {} + virtual void warn(int64_t previous_thread_id, int64_t current_thread_id) = 0; }; struct BASE_EXPORT DCheckAsserter : public AsserterBase { - virtual ~DCheckAsserter() {} - void warn(int64_t previous_thread_id, int64_t current_thread_id) override; + virtual ~DCheckAsserter() {} + void warn(int64_t previous_thread_id, int64_t current_thread_id) override; }; class BASE_EXPORT ThreadCollisionWarner { - public: - // The parameter asserter is there only for test purpose - explicit ThreadCollisionWarner(AsserterBase* asserter = new DCheckAsserter()) - : valid_thread_id_(0), - counter_(0), - asserter_(asserter) {} - - ~ThreadCollisionWarner() { - delete asserter_; - } - - // This class is meant to be used through the macro - // DFAKE_SCOPED_LOCK_THREAD_LOCKED - // it doesn't leave the critical section, as opposed to ScopedCheck, - // because the critical section being pinned is allowed to be used only - // from one thread - class BASE_EXPORT Check { - public: - explicit Check(ThreadCollisionWarner* warner) - : warner_(warner) { - warner_->EnterSelf(); - } - - ~Check() {} - - private: - ThreadCollisionWarner* warner_; - - DISALLOW_COPY_AND_ASSIGN(Check); - }; - - // This class is meant to be used through the macro - // DFAKE_SCOPED_LOCK - class BASE_EXPORT ScopedCheck { - public: - explicit ScopedCheck(ThreadCollisionWarner* warner) - : warner_(warner) { - warner_->Enter(); - } - - ~ScopedCheck() { - warner_->Leave(); - } - - private: - ThreadCollisionWarner* warner_; - - DISALLOW_COPY_AND_ASSIGN(ScopedCheck); - }; - - // This class is meant to be used through the macro - // DFAKE_SCOPED_RECURSIVE_LOCK - class BASE_EXPORT ScopedRecursiveCheck { - public: - explicit ScopedRecursiveCheck(ThreadCollisionWarner* warner) - : warner_(warner) { - warner_->EnterSelf(); - } - - ~ScopedRecursiveCheck() { - warner_->Leave(); - } - - private: - ThreadCollisionWarner* warner_; - - DISALLOW_COPY_AND_ASSIGN(ScopedRecursiveCheck); - }; - - private: - // This method stores the current thread identifier and does a DCHECK - // if a another thread has already done it, it is safe if same thread - // calls this multiple time (recursion allowed). - void EnterSelf(); - - // Same as EnterSelf but recursion is not allowed. - void Enter(); - - // Removes the thread_id stored in order to allow other threads to - // call EnterSelf or Enter. - void Leave(); - - // This stores the thread id that is inside the critical section, if the - // value is 0 then no thread is inside. - volatile subtle::Atomic64 valid_thread_id_; - - // Counter to trace how many time a critical section was "pinned" - // (when allowed) in order to unpin it when counter_ reaches 0. - volatile subtle::Atomic64 counter_; - - // Here only for class unit tests purpose, during the test I need to not - // DCHECK but notify the collision with something else. - AsserterBase* asserter_; - - DISALLOW_COPY_AND_ASSIGN(ThreadCollisionWarner); +public: + // The parameter asserter is there only for test purpose + explicit ThreadCollisionWarner(AsserterBase* asserter = new DCheckAsserter()) + : valid_thread_id_(0), counter_(0), asserter_(asserter) {} + + ~ThreadCollisionWarner() { delete asserter_; } + + // This class is meant to be used through the macro + // DFAKE_SCOPED_LOCK_THREAD_LOCKED + // it doesn't leave the critical section, as opposed to ScopedCheck, + // because the critical section being pinned is allowed to be used only + // from one thread + class BASE_EXPORT Check { + public: + explicit Check(ThreadCollisionWarner* warner) : warner_(warner) { warner_->EnterSelf(); } + + ~Check() {} + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(Check); + }; + + // This class is meant to be used through the macro + // DFAKE_SCOPED_LOCK + class BASE_EXPORT ScopedCheck { + public: + explicit ScopedCheck(ThreadCollisionWarner* warner) : warner_(warner) { warner_->Enter(); } + + ~ScopedCheck() { warner_->Leave(); } + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(ScopedCheck); + }; + + // This class is meant to be used through the macro + // DFAKE_SCOPED_RECURSIVE_LOCK + class BASE_EXPORT ScopedRecursiveCheck { + public: + explicit ScopedRecursiveCheck(ThreadCollisionWarner* warner) : warner_(warner) { + warner_->EnterSelf(); + } + + ~ScopedRecursiveCheck() { warner_->Leave(); } + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(ScopedRecursiveCheck); + }; + +private: + // This method stores the current thread identifier and does a DCHECK + // if a another thread has already done it, it is safe if same thread + // calls this multiple time (recursion allowed). + void EnterSelf(); + + // Same as EnterSelf but recursion is not allowed. + void Enter(); + + // Removes the thread_id stored in order to allow other threads to + // call EnterSelf or Enter. + void Leave(); + + // This stores the thread id that is inside the critical section, if the + // value is 0 then no thread is inside. + volatile subtle::Atomic64 valid_thread_id_; + + // Counter to trace how many time a critical section was "pinned" + // (when allowed) in order to unpin it when counter_ reaches 0. + volatile subtle::Atomic64 counter_; + + // Here only for class unit tests purpose, during the test I need to not + // DCHECK but notify the collision with something else. + AsserterBase* asserter_; + + DISALLOW_COPY_AND_ASSIGN(ThreadCollisionWarner); }; -} // namespace base +} // namespace base -#endif // BASE_THREADING_THREAD_COLLISION_WARNER_H_ +#endif // BASE_THREADING_THREAD_COLLISION_WARNER_H_ diff --git a/be/src/gutil/type_traits.h b/be/src/gutil/type_traits.h index b0810d3f4029e9..0a1b42c6ff1e67 100644 --- a/be/src/gutil/type_traits.h +++ b/be/src/gutil/type_traits.h @@ -61,124 +61,183 @@ #include using std::make_pair; -using std::pair; // For pair +using std::pair; // For pair -#include "gutil/template_util.h" // For true_type and false_type +#include "gutil/template_util.h" // For true_type and false_type namespace base { -template struct enable_if; -template struct is_integral; -template struct is_floating_point; -template struct is_pointer; -template struct is_array; +template +struct enable_if; +template +struct is_integral; +template +struct is_floating_point; +template +struct is_pointer; +template +struct is_array; // MSVC can't compile this correctly, and neither can gcc 3.3.5 (at least) #if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) // is_enum uses is_convertible, which is not available on MSVC. -template struct is_enum; +template +struct is_enum; #endif -template struct is_reference; -template struct is_pod; -template struct has_trivial_constructor; -template struct has_trivial_copy; -template struct has_trivial_assign; -template struct has_trivial_destructor; -template struct remove_const; -template struct remove_volatile; -template struct remove_cv; -template struct remove_reference; -template struct add_reference; -template struct remove_pointer; -template struct is_same; +template +struct is_reference; +template +struct is_pod; +template +struct has_trivial_constructor; +template +struct has_trivial_copy; +template +struct has_trivial_assign; +template +struct has_trivial_destructor; +template +struct remove_const; +template +struct remove_volatile; +template +struct remove_cv; +template +struct remove_reference; +template +struct add_reference; +template +struct remove_pointer; +template +struct is_same; #if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) -template struct is_convertible; +template +struct is_convertible; #endif // enable_if, equivalent semantics to c++11 std::enable_if, specifically: // "If B is true, the member typedef type shall equal T; otherwise, there // shall be no member typedef type." // Specified by 20.9.7.6 [Other transformations] -template struct enable_if { typedef T type; }; -template struct enable_if {}; +template +struct enable_if { + typedef T type; +}; +template +struct enable_if {}; // is_integral is false except for the built-in integer types. A // cv-qualified type is integral if and only if the underlying type is. -template struct is_integral : false_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; +template +struct is_integral : false_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; #if defined(_MSC_VER) // wchar_t is not by default a distinct type from unsigned short in // Microsoft C. // See http://msdn2.microsoft.com/en-us/library/dh8che7s(VS.80).aspx -template<> struct is_integral<__wchar_t> : true_type { }; +template <> +struct is_integral<__wchar_t> : true_type {}; #else -template<> struct is_integral : true_type { }; +template <> +struct is_integral : true_type {}; #endif #if defined(__APPLE__) -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; #endif -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; #ifdef HAVE_LONG_LONG -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; #endif -template struct is_integral : is_integral { }; -template struct is_integral : is_integral { }; -template struct is_integral : is_integral { }; +template +struct is_integral : is_integral {}; +template +struct is_integral : is_integral {}; +template +struct is_integral : is_integral {}; // is_floating_point is false except for the built-in floating-point types. // A cv-qualified type is integral if and only if the underlying type is. -template struct is_floating_point : false_type { }; -template<> struct is_floating_point : true_type { }; -template<> struct is_floating_point : true_type { }; -template<> struct is_floating_point : true_type { }; -template struct is_floating_point - : is_floating_point { }; -template struct is_floating_point - : is_floating_point { }; -template struct is_floating_point - : is_floating_point { }; +template +struct is_floating_point : false_type {}; +template <> +struct is_floating_point : true_type {}; +template <> +struct is_floating_point : true_type {}; +template <> +struct is_floating_point : true_type {}; +template +struct is_floating_point : is_floating_point {}; +template +struct is_floating_point : is_floating_point {}; +template +struct is_floating_point : is_floating_point {}; // is_pointer is false except for pointer types. A cv-qualified type (e.g. // "int* const", as opposed to "int const*") is cv-qualified if and only if // the underlying type is. -template struct is_pointer : false_type { }; -template struct is_pointer : true_type { }; -template struct is_pointer : is_pointer { }; -template struct is_pointer : is_pointer { }; -template struct is_pointer : is_pointer { }; - - -template struct is_array : public false_type {}; -template struct is_array : public true_type {}; -template struct is_array : public true_type {}; +template +struct is_pointer : false_type {}; +template +struct is_pointer : true_type {}; +template +struct is_pointer : is_pointer {}; +template +struct is_pointer : is_pointer {}; +template +struct is_pointer : is_pointer {}; + +template +struct is_array : public false_type {}; +template +struct is_array : public true_type {}; +template +struct is_array : public true_type {}; #if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) namespace internal { -template struct is_class_or_union { - template static small_ tester(void (U::*)()); - template static big_ tester(...); - static const bool value = sizeof(tester(0)) == sizeof(small_); +template +struct is_class_or_union { + template + static small_ tester(void (U::*)()); + template + static big_ tester(...); + static const bool value = sizeof(tester(0)) == sizeof(small_); }; // is_convertible chokes if the first argument is an array. That's why // we use add_reference here. -template struct is_enum_impl - : is_convertible::type, int> { }; +template +struct is_enum_impl : is_convertible::type, int> {}; -template struct is_enum_impl : false_type { }; +template +struct is_enum_impl : false_type {}; -} // namespace internal +} // namespace internal // Specified by TR1 [4.5.1] primary type categories. @@ -195,127 +254,175 @@ template struct is_enum_impl : false_type { }; // Is-convertible-to-int check is done only if all other checks pass, // because it can't be used with some types (e.g. void or classes with // inaccessible conversion operators). -template struct is_enum - : internal::is_enum_impl< - is_same::value || - is_integral::value || - is_floating_point::value || - is_reference::value || - internal::is_class_or_union::value, - T> { }; - -template struct is_enum : is_enum { }; -template struct is_enum : is_enum { }; -template struct is_enum : is_enum { }; +template +struct is_enum + : internal::is_enum_impl::value || is_integral::value || + is_floating_point::value || is_reference::value || + internal::is_class_or_union::value, + T> {}; + +template +struct is_enum : is_enum {}; +template +struct is_enum : is_enum {}; +template +struct is_enum : is_enum {}; #endif // is_reference is false except for reference types. -template struct is_reference : false_type {}; -template struct is_reference : true_type {}; - +template +struct is_reference : false_type {}; +template +struct is_reference : true_type {}; // We can't get is_pod right without compiler help, so fail conservatively. // We will assume it's false except for arithmetic types, enumerations, // pointers and cv-qualified versions thereof. Note that std::pair // is not a POD even if T and U are PODs. -template struct is_pod - : integral_constant::value || - is_floating_point::value || +template +struct is_pod : integral_constant::value || is_floating_point::value || #if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) - // is_enum is not available on MSVC. - is_enum::value || + // is_enum is not available on MSVC. + is_enum::value || #endif - is_pointer::value)> { }; -template struct is_pod : is_pod { }; -template struct is_pod : is_pod { }; -template struct is_pod : is_pod { }; - + is_pointer::value)> { +}; +template +struct is_pod : is_pod {}; +template +struct is_pod : is_pod {}; +template +struct is_pod : is_pod {}; // We can't get has_trivial_constructor right without compiler help, so // fail conservatively. We will assume it's false except for: (1) types // for which is_pod is true. (2) std::pair of types with trivial // constructors. (3) array of a type with a trivial constructor. // (4) const versions thereof. -template struct has_trivial_constructor : is_pod { }; -template struct has_trivial_constructor> - : integral_constant::value && - has_trivial_constructor::value)> { }; -template struct has_trivial_constructor - : has_trivial_constructor { }; -template struct has_trivial_constructor - : has_trivial_constructor { }; +template +struct has_trivial_constructor : is_pod {}; +template +struct has_trivial_constructor> + : integral_constant::value && + has_trivial_constructor::value)> {}; +template +struct has_trivial_constructor : has_trivial_constructor {}; +template +struct has_trivial_constructor : has_trivial_constructor {}; // We can't get has_trivial_copy right without compiler help, so fail // conservatively. We will assume it's false except for: (1) types // for which is_pod is true. (2) std::pair of types with trivial copy // constructors. (3) array of a type with a trivial copy constructor. // (4) const versions thereof. -template struct has_trivial_copy : is_pod { }; -template struct has_trivial_copy> - : integral_constant::value && - has_trivial_copy::value)> { }; -template struct has_trivial_copy - : has_trivial_copy { }; -template struct has_trivial_copy : has_trivial_copy { }; +template +struct has_trivial_copy : is_pod {}; +template +struct has_trivial_copy> + : integral_constant::value && has_trivial_copy::value)> {}; +template +struct has_trivial_copy : has_trivial_copy {}; +template +struct has_trivial_copy : has_trivial_copy {}; // We can't get has_trivial_assign right without compiler help, so fail // conservatively. We will assume it's false except for: (1) types // for which is_pod is true. (2) std::pair of types with trivial copy // constructors. (3) array of a type with a trivial assign constructor. -template struct has_trivial_assign : is_pod { }; -template struct has_trivial_assign> - : integral_constant::value && - has_trivial_assign::value)> { }; -template struct has_trivial_assign - : has_trivial_assign { }; +template +struct has_trivial_assign : is_pod {}; +template +struct has_trivial_assign> + : integral_constant::value && has_trivial_assign::value)> { +}; +template +struct has_trivial_assign : has_trivial_assign {}; // We can't get has_trivial_destructor right without compiler help, so // fail conservatively. We will assume it's false except for: (1) types // for which is_pod is true. (2) std::pair of types with trivial // destructors. (3) array of a type with a trivial destructor. // (4) const versions thereof. -template struct has_trivial_destructor : is_pod { }; -template struct has_trivial_destructor> - : integral_constant::value && - has_trivial_destructor::value)> { }; -template struct has_trivial_destructor - : has_trivial_destructor { }; -template struct has_trivial_destructor - : has_trivial_destructor { }; +template +struct has_trivial_destructor : is_pod {}; +template +struct has_trivial_destructor> + : integral_constant::value && + has_trivial_destructor::value)> {}; +template +struct has_trivial_destructor : has_trivial_destructor {}; +template +struct has_trivial_destructor : has_trivial_destructor {}; // Specified by TR1 [4.7.1] -template struct remove_const { typedef T type; }; -template struct remove_const { typedef T type; }; -template struct remove_volatile { typedef T type; }; -template struct remove_volatile { typedef T type; }; -template struct remove_cv { - typedef typename remove_const::type>::type type; +template +struct remove_const { + typedef T type; +}; +template +struct remove_const { + typedef T type; +}; +template +struct remove_volatile { + typedef T type; +}; +template +struct remove_volatile { + typedef T type; +}; +template +struct remove_cv { + typedef typename remove_const::type>::type type; }; - // Specified by TR1 [4.7.2] Reference modifications. -template struct remove_reference { typedef T type; }; -template struct remove_reference { typedef T type; }; +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; -template struct add_reference { typedef T& type; }; -template struct add_reference { typedef T& type; }; +template +struct add_reference { + typedef T& type; +}; +template +struct add_reference { + typedef T& type; +}; // Specified by TR1 [4.7.4] Pointer modifications. -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { - typedef T type; }; +template +struct remove_pointer { + typedef T type; +}; +template +struct remove_pointer { + typedef T type; +}; +template +struct remove_pointer { + typedef T type; +}; +template +struct remove_pointer { + typedef T type; +}; +template +struct remove_pointer { + typedef T type; +}; // Specified by TR1 [4.6] Relationships between types -template struct is_same : public false_type { }; -template struct is_same : public true_type { }; +template +struct is_same : public false_type {}; +template +struct is_same : public true_type {}; // Specified by TR1 [4.6] Relationships between types #if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) @@ -332,23 +439,21 @@ namespace internal { template struct ConvertHelper { - static small_ Test(To); - static big_ Test(...); - static From Create(); + static small_ Test(To); + static big_ Test(...); + static From Create(); }; -} // namespace internal +} // namespace internal // Inherits from true_type if From is convertible to To, false_type otherwise. template struct is_convertible - : integral_constant::Test( - internal::ConvertHelper::Create())) - == sizeof(small_)> { -}; + : integral_constant::Test( + internal::ConvertHelper::Create())) == + sizeof(small_)> {}; #endif -} +} // namespace base // Right now these macros are no-ops, and mostly just document the fact // these types are PODs, for human use. They may be made more contentful @@ -356,8 +461,8 @@ struct is_convertible // these macros. #define DECLARE_POD(TypeName) typedef int Dummy_Type_For_DECLARE_POD ATTRIBUTE_UNUSED #define DECLARE_NESTED_POD(TypeName) DECLARE_POD(TypeName) -#define PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT(TemplateName) \ +#define PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT(TemplateName) \ typedef int Dummy_Type_For_PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT ATTRIBUTE_UNUSED #define ENFORCE_POD(TypeName) typedef int Dummy_Type_For_ENFORCE_POD ATTRIBUTE_UNUSED -#endif // BASE_TYPE_TRAITS_H_ +#endif // BASE_TYPE_TRAITS_H_ diff --git a/be/src/gutil/utf/utf.h b/be/src/gutil/utf/utf.h index 02ba472aef6db2..b9165efd5cb6aa 100644 --- a/be/src/gutil/utf/utf.h +++ b/be/src/gutil/utf/utf.h @@ -3,18 +3,17 @@ #include -typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ - -enum -{ - UTFmax = 4, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ - Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ - Runemax = 0x10FFFF, /* maximum rune value */ +typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ + +enum { + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF, /* maximum rune value */ }; -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -35,7 +34,6 @@ extern "C" { int runetochar(char* s, const Rune* r); - // chartorune copies (decodes) at most UTFmax bytes starting at s to // one rune, pointed to by r, and returns the number of bytes consumed. // If the input is not exactly in UTF format, chartorune will set *r @@ -48,12 +46,11 @@ int runetochar(char* s, const Rune* r); int chartorune(Rune* r, const char* s); - // charntorune is like chartorune, except that it will access at most // n bytes of s. If the UTF sequence is incomplete within n bytes, // charntorune will set *r to Runeerror and return 0. If it is complete // but not in UTF format, it will set *r to Runeerror and return 1. -// +// // Added 2004-09-24 by Wei-Hwa Huang int charntorune(Rune* r, const char* s, int n); @@ -69,13 +66,11 @@ int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed); int runelen(Rune r); - // runenlen returns the number of bytes required to convert the n // runes pointed to by r into UTF. int runenlen(const Rune* r, int n); - // fullrune returns 1 if the string s of length n is long enough to be // decoded by chartorune, and 0 otherwise. This does not guarantee // that the string contains a legal UTF encoding. This routine is used @@ -93,7 +88,6 @@ int fullrune(const char* s, int n); int utflen(const char* s); - // utfnlen returns the number of complete runes that are represented // by the first n bytes of the UTF string s. If the last few bytes of // the string contain an incompletely coded rune, utfnlen will not @@ -102,7 +96,6 @@ int utflen(const char* s); int utfnlen(const char* s, long n); - // utfrune returns a pointer to the first occurrence of rune r in the // UTF string s, or 0 if r does not occur in the string. The NULL // byte terminating a string is considered to be part of the string s. @@ -110,7 +103,6 @@ int utfnlen(const char* s, long n); const char* utfrune(const char* s, Rune r); - // utfrrune returns a pointer to the last occurrence of rune r in the // UTF string s, or 0 if r does not occur in the string. The NULL // byte terminating a string is considered to be part of the string s. @@ -118,26 +110,22 @@ const char* utfrune(const char* s, Rune r); const char* utfrrune(const char* s, Rune r); - // utfutf returns a pointer to the first occurrence of the UTF string // s2 as a UTF substring of s1, or 0 if there is none. If s2 is the // null string, utfutf returns s1. (cf. strstr) const char* utfutf(const char* s1, const char* s2); - // utfecpy copies UTF sequences until a null sequence has been copied, // but writes no sequences beyond es1. If any sequences are copied, // s1 is terminated by a null sequence, and a pointer to that sequence // is returned. Otherwise, the original s1 is returned. (cf. strecpy) -char* utfecpy(char *s1, char *es1, const char *s2); - - +char* utfecpy(char* s1, char* es1, const char* s2); // These functions are rune-string analogues of the corresponding // functions in strcat (3). -// +// // These routines first appeared in Plan 9. // SEE ALSO // memmove (3) @@ -164,8 +152,6 @@ const Rune* runestrrchr(const Rune* s, Rune c); long runestrlen(const Rune* s); const Rune* runestrstr(const Rune* s1, const Rune* s2); - - // The following routines test types and modify cases for Unicode // characters. Unicode defines some characters as letters and // specifies three cases: upper, lower, and title. Mappings among the @@ -187,46 +173,40 @@ Rune toupperrune(Rune r); Rune tolowerrune(Rune r); Rune totitlerune(Rune r); - // isupperrune tests for upper case characters, including Unicode // upper case letters and targets of the toupper mapping. islowerrune -// and istitlerune are defined analogously. - +// and istitlerune are defined analogously. + int isupperrune(Rune r); int islowerrune(Rune r); int istitlerune(Rune r); - // isalpharune tests for Unicode letters; this includes ideographs in // addition to alphabetic characters. int isalpharune(Rune r); - // isdigitrune tests for digits. Non-digit numbers, such as Roman // numerals, are not included. int isdigitrune(Rune r); - // isideographicrune tests for ideographic characters and numbers, as // defined by the Unicode standard. int isideographicrune(Rune r); - // isspacerune tests for whitespace characters, including "C" locale // whitespace, Unicode defined whitespace, and the "zero-width // non-break space" character. int isspacerune(Rune r); - // (The comments in this file were copied from the manpage files rune.3, // isalpharune.3, and runestrcat.3. Some formatting changes were also made // to conform to Google style. /JRM 11/11/05) -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/be/src/gutil/utf/utfdef.h b/be/src/gutil/utf/utfdef.h index 4b58ae87e42b5f..a2272fa036fcd4 100644 --- a/be/src/gutil/utf/utfdef.h +++ b/be/src/gutil/utf/utfdef.h @@ -5,10 +5,10 @@ #define vlong _utfvlong #define uvlong _utfuvlong -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; -#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define nelem(x) (sizeof(x) / sizeof((x)[0])) #define nil ((void*)0) diff --git a/be/src/gutil/valgrind.h b/be/src/gutil/valgrind.h index 577c59ab0cd02a..ecce0944b3ec97 100644 --- a/be/src/gutil/valgrind.h +++ b/be/src/gutil/valgrind.h @@ -55,7 +55,6 @@ ---------------------------------------------------------------- */ - /* This file is for inclusion into client (your!) code. You can use these macros to manipulate and query Valgrind's @@ -93,30 +92,27 @@ #undef PLAT_ppc64_aix5 #if !defined(_AIX) && defined(__i386__) -# define PLAT_x86_linux 1 +#define PLAT_x86_linux 1 #elif !defined(_AIX) && defined(__x86_64__) -# define PLAT_amd64_linux 1 +#define PLAT_amd64_linux 1 #elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__) -# define PLAT_ppc32_linux 1 +#define PLAT_ppc32_linux 1 #elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__) -# define PLAT_ppc64_linux 1 +#define PLAT_ppc64_linux 1 #elif defined(_AIX) && defined(__64BIT__) -# define PLAT_ppc64_aix5 1 +#define PLAT_ppc64_aix5 1 #elif defined(_AIX) && !defined(__64BIT__) -# define PLAT_ppc32_aix5 1 +#define PLAT_ppc32_aix5 1 #endif - /* If we're not compiling for our target platform, don't generate any inline asms. */ -#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \ - && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \ - && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5) -# if !defined(NVALGRIND) -# define NVALGRIND 1 -# endif +#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) && !defined(PLAT_ppc32_linux) && \ + !defined(PLAT_ppc64_linux) && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5) +#if !defined(NVALGRIND) +#define NVALGRIND 1 +#endif #endif - /* ------------------------------------------------------------------ */ /* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ @@ -128,14 +124,11 @@ /* Define NVALGRIND to completely remove the Valgrind magic sequence from the compiled code (analogous to NDEBUG's effects on assert()) */ -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - { \ - (_zzq_rlval) = (_zzq_default); \ - } +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { (_zzq_rlval) = (_zzq_default); } -#else /* ! NVALGRIND */ +#else /* ! NVALGRIND */ /* The following defines the magic code sequences which the JITter spots and handles magically. Don't look too closely at them as @@ -176,233 +169,214 @@ #if defined(PLAT_x86_linux) -typedef - struct { - unsigned int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "roll $3, %%edi ; roll $13, %%edi\n\t" \ - "roll $29, %%edi ; roll $19, %%edi\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - { volatile unsigned int _zzq_args[6]; \ - volatile unsigned int _zzq_result; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %EDX = client_request ( %EAX ) */ \ - "xchgl %%ebx,%%ebx" \ - : "=d" (_zzq_result) \ - : "a" (&_zzq_args[0]), "0" (_zzq_default) \ - : "cc", "memory" \ - ); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - volatile unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %EAX = guest_NRADDR */ \ - "xchgl %%ecx,%%ecx" \ - : "=a" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_CALL_NOREDIR_EAX \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* call-noredir *%EAX */ \ - "xchgl %%edx,%%edx\n\t" +typedef struct { + unsigned int nraddr; /* where's the code? */ +} OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d"(_zzq_result) \ + : "a"(&_zzq_args[0]), "0"(_zzq_default) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { \ + volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a"(__addr) \ + : \ + : "cc", "memory"); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" #endif /* PLAT_x86_linux */ /* ------------------------ amd64-linux ------------------------ */ #if defined(PLAT_amd64_linux) -typedef - struct { - unsigned long long int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ - "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - { volatile unsigned long long int _zzq_args[6]; \ - volatile unsigned long long int _zzq_result; \ - _zzq_args[0] = (unsigned long long int)(_zzq_request); \ - _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %RDX = client_request ( %RAX ) */ \ - "xchgq %%rbx,%%rbx" \ - : "=d" (_zzq_result) \ - : "a" (&_zzq_args[0]), "0" (_zzq_default) \ - : "cc", "memory" \ - ); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - volatile unsigned long long int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %RAX = guest_NRADDR */ \ - "xchgq %%rcx,%%rcx" \ - : "=a" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_CALL_NOREDIR_RAX \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* call-noredir *%RAX */ \ - "xchgq %%rdx,%%rdx\n\t" +typedef struct { + unsigned long long int nraddr; /* where's the code? */ +} OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d"(_zzq_result) \ + : "a"(&_zzq_args[0]), "0"(_zzq_default) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { \ + volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a"(__addr) \ + : \ + : "cc", "memory"); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" #endif /* PLAT_amd64_linux */ /* ------------------------ ppc32-linux ------------------------ */ #if defined(PLAT_ppc32_linux) -typedef - struct { - unsigned int nraddr; /* where's the code? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ - "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned int _zzq_args[6]; \ - unsigned int _zzq_result; \ - unsigned int* _zzq_ptr; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile("mr 3,%1\n\t" /*default*/ \ - "mr 4,%2\n\t" /*ptr*/ \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1\n\t" \ - "mr %0,3" /*result*/ \ - : "=b" (_zzq_result) \ - : "b" (_zzq_default), "b" (_zzq_ptr) \ - : "cc", "memory", "r3", "r4"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "cc", "memory", "r3" \ - ); \ - _zzq_orig->nraddr = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" +typedef struct { + unsigned int nraddr; /* where's the code? */ +} OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { \ + unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile( \ + "mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b"(_zzq_result) \ + : "b"(_zzq_default), "b"(_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { \ + volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b"(__addr) \ + : \ + : "cc", "memory", "r3"); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" #endif /* PLAT_ppc32_linux */ /* ------------------------ ppc64-linux ------------------------ */ #if defined(PLAT_ppc64_linux) -typedef - struct { - unsigned long long int nraddr; /* where's the code? */ - unsigned long long int r2; /* what tocptr do we need? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ - "rotldi 0,0,61 ; rotldi 0,0,51\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned long long int _zzq_args[6]; \ - register unsigned long long int _zzq_result __asm__("r3"); \ - register unsigned long long int* _zzq_ptr __asm__("r4"); \ - _zzq_args[0] = (unsigned long long int)(_zzq_request); \ - _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1" \ - : "=r" (_zzq_result) \ - : "0" (_zzq_default), "r" (_zzq_ptr) \ - : "cc", "memory"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - register unsigned long long int __addr __asm__("r3"); \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2" \ - : "=r" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR_GPR2 */ \ - "or 4,4,4" \ - : "=r" (__addr) \ - : \ - : "cc", "memory" \ - ); \ - _zzq_orig->r2 = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" +typedef struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ +} OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { \ + unsigned long long int _zzq_args[6]; \ + register unsigned long long int _zzq_result __asm__("r3"); \ + register unsigned long long int* _zzq_ptr __asm__("r4"); \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1" \ + : "=r"(_zzq_result) \ + : "0"(_zzq_default), "r"(_zzq_ptr) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { \ + volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr __asm__("r3"); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR */ \ + "or 2,2,2" \ + : "=r"(__addr) \ + : \ + : "cc", "memory"); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4" \ + : "=r"(__addr) \ + : \ + : "cc", "memory"); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" #endif /* PLAT_ppc64_linux */ @@ -410,71 +384,65 @@ typedef #if defined(PLAT_ppc32_aix5) -typedef - struct { - unsigned int nraddr; /* where's the code? */ - unsigned int r2; /* what tocptr do we need? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ - "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned int _zzq_args[7]; \ - register unsigned int _zzq_result; \ - register unsigned int* _zzq_ptr; \ - _zzq_args[0] = (unsigned int)(_zzq_request); \ - _zzq_args[1] = (unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int)(_zzq_arg5); \ - _zzq_args[6] = (unsigned int)(_zzq_default); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile("mr 4,%1\n\t" \ - "lwz 3, 24(4)\n\t" \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1\n\t" \ - "mr %0,3" \ - : "=b" (_zzq_result) \ - : "b" (_zzq_ptr) \ - : "r3", "r4", "cc", "memory"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - register unsigned int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR_GPR2 */ \ - "or 4,4,4\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->r2 = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" +typedef struct { + unsigned int nraddr; /* where's the code? */ + unsigned int r2; /* what tocptr do we need? */ +} OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { \ + unsigned int _zzq_args[7]; \ + register unsigned int _zzq_result; \ + register unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile( \ + "mr 4,%1\n\t" \ + "lwz 3, 24(4)\n\t" __SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b"(_zzq_result) \ + : "b"(_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { \ + volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b"(__addr) \ + : \ + : "r3", "cc", "memory"); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b"(__addr) \ + : \ + : "r3", "cc", "memory"); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" #endif /* PLAT_ppc32_aix5 */ @@ -482,71 +450,65 @@ typedef #if defined(PLAT_ppc64_aix5) -typedef - struct { - unsigned long long int nraddr; /* where's the code? */ - unsigned long long int r2; /* what tocptr do we need? */ - } - OrigFn; - -#define __SPECIAL_INSTRUCTION_PREAMBLE \ - "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ - "rotldi 0,0,61 ; rotldi 0,0,51\n\t" - -#define VALGRIND_DO_CLIENT_REQUEST( \ - _zzq_rlval, _zzq_default, _zzq_request, \ - _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ - \ - { unsigned long long int _zzq_args[7]; \ - register unsigned long long int _zzq_result; \ - register unsigned long long int* _zzq_ptr; \ - _zzq_args[0] = (unsigned int long long)(_zzq_request); \ - _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ - _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ - _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ - _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ - _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ - _zzq_args[6] = (unsigned int long long)(_zzq_default); \ - _zzq_ptr = _zzq_args; \ - __asm__ volatile("mr 4,%1\n\t" \ - "ld 3, 48(4)\n\t" \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = client_request ( %R4 ) */ \ - "or 1,1,1\n\t" \ - "mr %0,3" \ - : "=b" (_zzq_result) \ - : "b" (_zzq_ptr) \ - : "r3", "r4", "cc", "memory"); \ - _zzq_rlval = _zzq_result; \ - } - -#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ - { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ - register unsigned long long int __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR */ \ - "or 2,2,2\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->nraddr = __addr; \ - __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ - /* %R3 = guest_NRADDR_GPR2 */ \ - "or 4,4,4\n\t" \ - "mr %0,3" \ - : "=b" (__addr) \ - : \ - : "r3", "cc", "memory" \ - ); \ - _zzq_orig->r2 = __addr; \ - } - -#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - __SPECIAL_INSTRUCTION_PREAMBLE \ - /* branch-and-link-to-noredir *%R11 */ \ - "or 3,3,3\n\t" +typedef struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ +} OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { \ + unsigned long long int _zzq_args[7]; \ + register unsigned long long int _zzq_result; \ + register unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int long long)(_zzq_request); \ + _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int long long)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile( \ + "mr 4,%1\n\t" \ + "ld 3, 48(4)\n\t" __SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b"(_zzq_result) \ + : "b"(_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { \ + volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b"(__addr) \ + : \ + : "r3", "cc", "memory"); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b"(__addr) \ + : \ + : "r3", "cc", "memory"); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" #endif /* PLAT_ppc64_aix5 */ @@ -554,7 +516,6 @@ typedef #endif /* NVALGRIND */ - /* ------------------------------------------------------------------ */ /* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ /* ugly. It's the least-worst tradeoff I can think of. */ @@ -581,36 +542,42 @@ typedef /* Use these to write the name of your wrapper. NOTE: duplicates VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ -#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ - _vgwZU_##soname##_##fnname +#define I_WRAP_SONAME_FNNAME_ZU(soname, fnname) _vgwZU_##soname##_##fnname -#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ - _vgwZZ_##soname##_##fnname +#define I_WRAP_SONAME_FNNAME_ZZ(soname, fnname) _vgwZZ_##soname##_##fnname /* Use this macro from within a wrapper function to collect the context (address and possibly other info) of the original function. Once you have that you can then use it in one of the CALL_FN_ macros. The type of the argument _lval is OrigFn. */ -#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) /* Derivatives of the main macros below, for calling functions returning void. */ -#define CALL_FN_v_v(fnptr) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_v(_junk,fnptr); } while (0) - -#define CALL_FN_v_W(fnptr, arg1) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_W(_junk,fnptr,arg1); } while (0) - -#define CALL_FN_v_WW(fnptr, arg1,arg2) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) - -#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ - do { volatile unsigned long _junk; \ - CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) +#define CALL_FN_v_v(fnptr) \ + do { \ + volatile unsigned long _junk; \ + CALL_FN_W_v(_junk, fnptr); \ + } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { \ + volatile unsigned long _junk; \ + CALL_FN_W_W(_junk, fnptr, arg1); \ + } while (0) + +#define CALL_FN_v_WW(fnptr, arg1, arg2) \ + do { \ + volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk, fnptr, arg1, arg2); \ + } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1, arg2, arg3) \ + do { \ + volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk, fnptr, arg1, arg2, arg3); \ + } while (0) /* ------------------------- x86-linux ------------------------- */ @@ -623,389 +590,357 @@ typedef /* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned long) == 4. */ -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile("movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - __asm__ volatile( \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $4, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - __asm__ volatile( \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $8, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - __asm__ volatile( \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $12, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - __asm__ volatile( \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $16, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - __asm__ volatile( \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $20, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - __asm__ volatile( \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $24, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - __asm__ volatile( \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $28, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - __asm__ volatile( \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $32, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - __asm__ volatile( \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $36, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - __asm__ volatile( \ - "pushl 40(%%eax)\n\t" \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $40, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ - arg6,arg7,arg8,arg9,arg10, \ - arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - __asm__ volatile( \ - "pushl 44(%%eax)\n\t" \ - "pushl 40(%%eax)\n\t" \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $44, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ - arg6,arg7,arg8,arg9,arg10, \ - arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - _argvec[12] = (unsigned long)(arg12); \ - __asm__ volatile( \ - "pushl 48(%%eax)\n\t" \ - "pushl 44(%%eax)\n\t" \ - "pushl 40(%%eax)\n\t" \ - "pushl 36(%%eax)\n\t" \ - "pushl 32(%%eax)\n\t" \ - "pushl 28(%%eax)\n\t" \ - "pushl 24(%%eax)\n\t" \ - "pushl 20(%%eax)\n\t" \ - "pushl 16(%%eax)\n\t" \ - "pushl 12(%%eax)\n\t" \ - "pushl 8(%%eax)\n\t" \ - "pushl 4(%%eax)\n\t" \ - "movl (%%eax), %%eax\n\t" /* target->%eax */ \ - VALGRIND_CALL_NOREDIR_EAX \ - "addl $48, %%esp\n" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $4, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $8, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $12, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $16, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $20, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $24, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $28, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $32, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $36, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $40, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $44, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11, arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX "addl $48, %%esp\n" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #endif /* PLAT_x86_linux */ @@ -1016,8 +951,7 @@ typedef /* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ /* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ - "rdi", "r8", "r9", "r10", "r11" +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11" /* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned long) == 8. */ @@ -1044,406 +978,382 @@ typedef stack. */ #define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - "addq $128,%%rsp\n\t" \ - VALGRIND_CALL_NOREDIR_RAX \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $8, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $16, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $24, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 80(%%rax)\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $32, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 88(%%rax)\n\t" \ - "pushq 80(%%rax)\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $40, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)(arg1); \ - _argvec[2] = (unsigned long)(arg2); \ - _argvec[3] = (unsigned long)(arg3); \ - _argvec[4] = (unsigned long)(arg4); \ - _argvec[5] = (unsigned long)(arg5); \ - _argvec[6] = (unsigned long)(arg6); \ - _argvec[7] = (unsigned long)(arg7); \ - _argvec[8] = (unsigned long)(arg8); \ - _argvec[9] = (unsigned long)(arg9); \ - _argvec[10] = (unsigned long)(arg10); \ - _argvec[11] = (unsigned long)(arg11); \ - _argvec[12] = (unsigned long)(arg12); \ - __asm__ volatile( \ - "subq $128,%%rsp\n\t" \ - "pushq 96(%%rax)\n\t" \ - "pushq 88(%%rax)\n\t" \ - "pushq 80(%%rax)\n\t" \ - "pushq 72(%%rax)\n\t" \ - "pushq 64(%%rax)\n\t" \ - "pushq 56(%%rax)\n\t" \ - "movq 48(%%rax), %%r9\n\t" \ - "movq 40(%%rax), %%r8\n\t" \ - "movq 32(%%rax), %%rcx\n\t" \ - "movq 24(%%rax), %%rdx\n\t" \ - "movq 16(%%rax), %%rsi\n\t" \ - "movq 8(%%rax), %%rdi\n\t" \ - "movq (%%rax), %%rax\n\t" /* target->%rax */ \ - VALGRIND_CALL_NOREDIR_RAX \ - "addq $48, %%rsp\n" \ - "addq $128,%%rsp\n\t" \ - : /*out*/ "=a" (_res) \ - : /*in*/ "a" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + "addq $128,%%rsp\n\t" VALGRIND_CALL_NOREDIR_RAX \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11, arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a"(_res) \ + : /*in*/ "a"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #endif /* PLAT_amd64_linux */ @@ -1475,442 +1385,400 @@ typedef /* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ /* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "r0", "r2", "r3", \ + "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13" /* These CALL_FN_ macros assume that on ppc32-linux, sizeof(unsigned long) == 4. */ #define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[1]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[2]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[4]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[5]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[6]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[7]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[8]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[9]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[10]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-16\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,16\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[11]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - _argvec[10] = (unsigned long)arg10; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-16\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,12(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,16\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[12]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - _argvec[10] = (unsigned long)arg10; \ - _argvec[11] = (unsigned long)arg11; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-32\n\t" \ - /* arg11 */ \ - "lwz 3,44(11)\n\t" \ - "stw 3,16(1)\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,12(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,32\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[13]; \ - volatile unsigned long _res; \ - _argvec[0] = (unsigned long)_orig.nraddr; \ - _argvec[1] = (unsigned long)arg1; \ - _argvec[2] = (unsigned long)arg2; \ - _argvec[3] = (unsigned long)arg3; \ - _argvec[4] = (unsigned long)arg4; \ - _argvec[5] = (unsigned long)arg5; \ - _argvec[6] = (unsigned long)arg6; \ - _argvec[7] = (unsigned long)arg7; \ - _argvec[8] = (unsigned long)arg8; \ - _argvec[9] = (unsigned long)arg9; \ - _argvec[10] = (unsigned long)arg10; \ - _argvec[11] = (unsigned long)arg11; \ - _argvec[12] = (unsigned long)arg12; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "addi 1,1,-32\n\t" \ - /* arg12 */ \ - "lwz 3,48(11)\n\t" \ - "stw 3,20(1)\n\t" \ - /* arg11 */ \ - "lwz 3,44(11)\n\t" \ - "stw 3,16(1)\n\t" \ - /* arg10 */ \ - "lwz 3,40(11)\n\t" \ - "stw 3,12(1)\n\t" \ - /* arg9 */ \ - "lwz 3,36(11)\n\t" \ - "stw 3,8(1)\n\t" \ - /* args1-8 */ \ - "lwz 3,4(11)\n\t" /* arg1->r3 */ \ - "lwz 4,8(11)\n\t" \ - "lwz 5,12(11)\n\t" \ - "lwz 6,16(11)\n\t" /* arg4->r6 */ \ - "lwz 7,20(11)\n\t" \ - "lwz 8,24(11)\n\t" \ - "lwz 9,28(11)\n\t" \ - "lwz 10,32(11)\n\t" /* arg8->r10 */ \ - "lwz 11,0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "addi 1,1,32\n\t" \ - "mr %0,3" \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[0]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11, arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #endif /* PLAT_ppc32_linux */ @@ -1921,520 +1789,487 @@ typedef /* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ /* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "r0", "r2", "r3", \ + "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13" /* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned long) == 8. */ #define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+0]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+1]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+2]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+3]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+4]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+5]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+6]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+7]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+8]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)" /* restore tocptr */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+9]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-128\n\t" /* expand stack frame */ \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,128" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+10]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-128\n\t" /* expand stack frame */ \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,128" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+11]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-144\n\t" /* expand stack frame */ \ - /* arg11 */ \ - "ld 3,88(11)\n\t" \ - "std 3,128(1)\n\t" \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,144" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+12]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - _argvec[2+12] = (unsigned long)arg12; \ - __asm__ volatile( \ - "mr 11,%1\n\t" \ - "std 2,-16(11)\n\t" /* save tocptr */ \ - "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ - "addi 1,1,-144\n\t" /* expand stack frame */ \ - /* arg12 */ \ - "ld 3,96(11)\n\t" \ - "std 3,136(1)\n\t" \ - /* arg11 */ \ - "ld 3,88(11)\n\t" \ - "std 3,128(1)\n\t" \ - /* arg10 */ \ - "ld 3,80(11)\n\t" \ - "std 3,120(1)\n\t" \ - /* arg9 */ \ - "ld 3,72(11)\n\t" \ - "std 3,112(1)\n\t" \ - /* args1-8 */ \ - "ld 3, 8(11)\n\t" /* arg1->r3 */ \ - "ld 4, 16(11)\n\t" /* arg2->r4 */ \ - "ld 5, 24(11)\n\t" /* arg3->r5 */ \ - "ld 6, 32(11)\n\t" /* arg4->r6 */ \ - "ld 7, 40(11)\n\t" /* arg5->r7 */ \ - "ld 8, 48(11)\n\t" /* arg6->r8 */ \ - "ld 9, 56(11)\n\t" /* arg7->r9 */ \ - "ld 10, 64(11)\n\t" /* arg8->r10 */ \ - "ld 11, 0(11)\n\t" /* target->r11 */ \ - VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ - "mr 11,%1\n\t" \ - "mr %0,3\n\t" \ - "ld 2,-16(11)\n\t" /* restore tocptr */ \ - "addi 1,1,144" /* restore frame */ \ - : /*out*/ "=r" (_res) \ - : /*in*/ "r" (&_argvec[2]) \ - : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + _argvec[2 + 11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11, arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + _argvec[2 + 11] = (unsigned long)arg11; \ + _argvec[2 + 12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r"(_res) \ + : /*in*/ "r"(&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS); \ + lval = (__typeof__(lval))_res; \ + } while (0) #endif /* PLAT_ppc64_linux */ @@ -2445,35 +2280,34 @@ typedef /* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ /* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "r0", "r2", "r3", \ + "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13" /* Expand the stack frame, copying enough info that unwinding still works. Trashes r3. */ -#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ - "addi 1,1,-" #_n_fr "\n\t" \ - "lwz 3," #_n_fr "(1)\n\t" \ - "stw 3,0(1)\n\t" +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr \ + "\n\t" \ + "lwz 3," #_n_fr \ + "(1)\n\t" \ + "stw 3,0(1)\n\t" -#define VG_CONTRACT_FRAME_BY(_n_fr) \ - "addi 1,1," #_n_fr "\n\t" +#define VG_CONTRACT_FRAME_BY(_n_fr) "addi 1,1," #_n_fr "\n\t" /* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned long) == 4. */ -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+0]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2487,20 +2321,20 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+1]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2515,21 +2349,21 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+2]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2545,22 +2379,22 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+3]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2577,23 +2411,23 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+4]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2611,24 +2445,24 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+5]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2647,25 +2481,25 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+6]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2685,27 +2519,26 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+7]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2726,28 +2559,27 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+8]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2769,29 +2601,28 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+9]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2819,30 +2650,29 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+10]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2873,31 +2703,31 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+11]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + _argvec[2 + 11] = (unsigned long)arg11; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2931,32 +2761,32 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+12]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - _argvec[2+12] = (unsigned long)arg12; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11, arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + _argvec[2 + 11] = (unsigned long)arg11; \ + _argvec[2 + 12] = (unsigned long)arg12; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "stw 2,-8(11)\n\t" /* save tocptr */ \ @@ -2993,9 +2823,9 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) #endif /* PLAT_ppc32_aix5 */ @@ -3006,35 +2836,34 @@ typedef /* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ /* These regs are trashed by the hidden call. */ -#define __CALLER_SAVED_REGS \ - "lr", "ctr", "xer", \ - "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ - "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ - "r11", "r12", "r13" +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "r0", "r2", "r3", \ + "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13" /* Expand the stack frame, copying enough info that unwinding still works. Trashes r3. */ -#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ - "addi 1,1,-" #_n_fr "\n\t" \ - "ld 3," #_n_fr "(1)\n\t" \ - "std 3,0(1)\n\t" +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr \ + "\n\t" \ + "ld 3," #_n_fr \ + "(1)\n\t" \ + "std 3,0(1)\n\t" -#define VG_CONTRACT_FRAME_BY(_n_fr) \ - "addi 1,1," #_n_fr "\n\t" +#define VG_CONTRACT_FRAME_BY(_n_fr) "addi 1,1," #_n_fr "\n\t" /* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned long) == 8. */ -#define CALL_FN_W_v(lval, orig) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+0]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - __asm__ volatile( \ +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3048,20 +2877,20 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_W(lval, orig, arg1) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+1]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3076,21 +2905,21 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+2]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3106,22 +2935,22 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+3]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3138,23 +2967,23 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+4]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3172,24 +3001,24 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+5]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3208,25 +3037,25 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+6]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3246,27 +3075,26 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+7]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3287,28 +3115,27 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+8]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3330,29 +3157,28 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+9]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3380,30 +3206,29 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+10]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3434,31 +3259,31 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+11]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + _argvec[2 + 11] = (unsigned long)arg11; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3492,32 +3317,32 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) - -#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ - arg7,arg8,arg9,arg10,arg11,arg12) \ - do { \ - volatile OrigFn _orig = (orig); \ - volatile unsigned long _argvec[3+12]; \ - volatile unsigned long _res; \ - /* _argvec[0] holds current r2 across the call */ \ - _argvec[1] = (unsigned long)_orig.r2; \ - _argvec[2] = (unsigned long)_orig.nraddr; \ - _argvec[2+1] = (unsigned long)arg1; \ - _argvec[2+2] = (unsigned long)arg2; \ - _argvec[2+3] = (unsigned long)arg3; \ - _argvec[2+4] = (unsigned long)arg4; \ - _argvec[2+5] = (unsigned long)arg5; \ - _argvec[2+6] = (unsigned long)arg6; \ - _argvec[2+7] = (unsigned long)arg7; \ - _argvec[2+8] = (unsigned long)arg8; \ - _argvec[2+9] = (unsigned long)arg9; \ - _argvec[2+10] = (unsigned long)arg10; \ - _argvec[2+11] = (unsigned long)arg11; \ - _argvec[2+12] = (unsigned long)arg12; \ - __asm__ volatile( \ + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, \ + arg11, arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3 + 12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2 + 1] = (unsigned long)arg1; \ + _argvec[2 + 2] = (unsigned long)arg2; \ + _argvec[2 + 3] = (unsigned long)arg3; \ + _argvec[2 + 4] = (unsigned long)arg4; \ + _argvec[2 + 5] = (unsigned long)arg5; \ + _argvec[2 + 6] = (unsigned long)arg6; \ + _argvec[2 + 7] = (unsigned long)arg7; \ + _argvec[2 + 8] = (unsigned long)arg8; \ + _argvec[2 + 9] = (unsigned long)arg9; \ + _argvec[2 + 10] = (unsigned long)arg10; \ + _argvec[2 + 11] = (unsigned long)arg11; \ + _argvec[2 + 12] = (unsigned long)arg12; \ + __asm__ volatile( \ "mr 11,%1\n\t" \ VG_EXPAND_FRAME_BY_trashes_r3(512) \ "std 2,-16(11)\n\t" /* save tocptr */ \ @@ -3554,13 +3379,12 @@ typedef : /*out*/ "=r" (_res) \ : /*in*/ "r" (&_argvec[2]) \ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ - ); \ - lval = (__typeof__(lval)) _res; \ - } while (0) + ); \ + lval = (__typeof__(lval))_res; \ + } while (0) #endif /* PLAT_ppc64_aix5 */ - /* ------------------------------------------------------------------ */ /* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ /* */ @@ -3576,132 +3400,121 @@ typedef /* These macros are used by tools -- they must be public, but don't embed them into other programs. */ -#define VG_USERREQ_TOOL_BASE(a,b) \ - ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) -#define VG_IS_TOOL_USERREQ(a, b, v) \ - (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) +#define VG_USERREQ_TOOL_BASE(a, b) ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) (VG_USERREQ_TOOL_BASE(a, b) == ((v)&0xffff0000)) /* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! This enum comprises an ABI exported by Valgrind to programs which use client requests. DO NOT CHANGE THE ORDER OF THESE ENTRIES, NOR DELETE ANY -- add new ones at the end. */ -typedef - enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, - VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, +typedef enum { + VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, - /* These allow any function to be called from the simulated + /* These allow any function to be called from the simulated CPU but run on the real CPU. Nb: the first arg passed to the function is always the ThreadId of the running thread! So CLIENT_CALL0 actually requires a 1 arg function, etc. */ - VG_USERREQ__CLIENT_CALL0 = 0x1101, - VG_USERREQ__CLIENT_CALL1 = 0x1102, - VG_USERREQ__CLIENT_CALL2 = 0x1103, - VG_USERREQ__CLIENT_CALL3 = 0x1104, + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, - /* Can be useful in regression testing suites -- eg. can + /* Can be useful in regression testing suites -- eg. can send Valgrind's output to /dev/null and still count errors. */ - VG_USERREQ__COUNT_ERRORS = 0x1201, + VG_USERREQ__COUNT_ERRORS = 0x1201, - /* These are useful and can be interpreted by any tool that + /* These are useful and can be interpreted by any tool that tracks malloc() et al, by using vg_replace_malloc.c. */ - VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, - VG_USERREQ__FREELIKE_BLOCK = 0x1302, - /* Memory pool support. */ - VG_USERREQ__CREATE_MEMPOOL = 0x1303, - VG_USERREQ__DESTROY_MEMPOOL = 0x1304, - VG_USERREQ__MEMPOOL_ALLOC = 0x1305, - VG_USERREQ__MEMPOOL_FREE = 0x1306, - VG_USERREQ__MEMPOOL_TRIM = 0x1307, - VG_USERREQ__MOVE_MEMPOOL = 0x1308, - VG_USERREQ__MEMPOOL_CHANGE = 0x1309, - VG_USERREQ__MEMPOOL_EXISTS = 0x130a, - - /* Allow printfs to valgrind log. */ - VG_USERREQ__PRINTF = 0x1401, - VG_USERREQ__PRINTF_BACKTRACE = 0x1402, - - /* Stack support. */ - VG_USERREQ__STACK_REGISTER = 0x1501, - VG_USERREQ__STACK_DEREGISTER = 0x1502, - VG_USERREQ__STACK_CHANGE = 0x1503 - } Vg_ClientRequest; + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503 +} Vg_ClientRequest; #if !defined(__GNUC__) -# define __extension__ /* */ +#define __extension__ /* */ #endif /* Returns the number of Valgrinds this code is running under. That is, 0 if running natively, 1 if running under Valgrind, 2 if running under Valgrind which is running under another Valgrind, etc. */ -#define RUNNING_ON_VALGRIND __extension__ \ - ({unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ - VG_USERREQ__RUNNING_ON_VALGRIND, \ - 0, 0, 0, 0, 0); \ - _qzz_res; \ - }) - +#define RUNNING_ON_VALGRIND \ + __extension__({ \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, VG_USERREQ__RUNNING_ON_VALGRIND, 0, \ + 0, 0, 0, 0); \ + _qzz_res; \ + }) /* Discard translation of code in the range [_qzz_addr .. _qzz_addr + _qzz_len - 1]. Useful if you are debugging a JITter or some such, since it provides a way to make sure valgrind will retranslate the invalidated area. Returns no value. */ -#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__DISCARD_TRANSLATIONS, \ - _qzz_addr, _qzz_len, 0, 0, 0); \ - } - +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr, _qzz_len) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__DISCARD_TRANSLATIONS, _qzz_addr, \ + _qzz_len, 0, 0, 0); \ + } /* These requests are for getting Valgrind itself to print something. Possibly with a backtrace. This is a really ugly hack. */ #if defined(NVALGRIND) -# define VALGRIND_PRINTF(...) -# define VALGRIND_PRINTF_BACKTRACE(...) +#define VALGRIND_PRINTF(...) +#define VALGRIND_PRINTF_BACKTRACE(...) #else /* NVALGRIND */ /* Modern GCC will optimize the static routine out if unused, and unused attribute will shut down warnings about it. */ -static int VALGRIND_PRINTF(const char *format, ...) - __attribute__((format(__printf__, 1, 2), __unused__)); -static int -VALGRIND_PRINTF(const char *format, ...) -{ - unsigned long _qzz_res; - va_list vargs; - va_start(vargs, format); - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF, - (unsigned long)format, (unsigned long)vargs, - 0, 0, 0); - va_end(vargs); - return (int)_qzz_res; +static int VALGRIND_PRINTF(const char* format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int VALGRIND_PRINTF(const char* format, ...) { + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF, (unsigned long)format, + (unsigned long)vargs, 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; } -static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) - __attribute__((format(__printf__, 1, 2), __unused__)); -static int -VALGRIND_PRINTF_BACKTRACE(const char *format, ...) -{ - unsigned long _qzz_res; - va_list vargs; - va_start(vargs, format); - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE, - (unsigned long)format, (unsigned long)vargs, - 0, 0, 0); - va_end(vargs); - return (int)_qzz_res; +static int VALGRIND_PRINTF_BACKTRACE(const char* format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int VALGRIND_PRINTF_BACKTRACE(const char* format, ...) { + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE, (unsigned long)format, + (unsigned long)vargs, 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; } #endif /* NVALGRIND */ - /* These requests allow control to move from the simulated CPU to the real CPU, calling an arbitary function. @@ -3725,59 +3538,48 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...) likely to have a bad outcome, for tricky reasons which we've grappled with a lot in the past. */ -#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL0, \ - _qyy_fn, \ - 0, 0, 0, 0); \ - _qyy_res; \ - }) - -#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL1, \ - _qyy_fn, \ - _qyy_arg1, 0, 0, 0); \ - _qyy_res; \ - }) - -#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL2, \ - _qyy_fn, \ - _qyy_arg1, _qyy_arg2, 0, 0); \ - _qyy_res; \ - }) - -#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ - __extension__ \ - ({unsigned long _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__CLIENT_CALL3, \ - _qyy_fn, \ - _qyy_arg1, _qyy_arg2, \ - _qyy_arg3, 0); \ - _qyy_res; \ - }) - +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + __extension__({ \ + unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, 0, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + __extension__({ \ + unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, _qyy_arg1, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + __extension__({ \ + unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, _qyy_arg1, _qyy_arg2, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + __extension__({ \ + unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3, 0); \ + _qyy_res; \ + }) /* Counts the number of errors that have been recorded by a tool. Nb: the tool must record the errors with VG_(maybe_record_error)() or VG_(unique_error)() for them to be counted. */ -#define VALGRIND_COUNT_ERRORS \ - __extension__ \ - ({unsigned int _qyy_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ - VG_USERREQ__COUNT_ERRORS, \ - 0, 0, 0, 0, 0); \ - _qyy_res; \ - }) +#define VALGRIND_COUNT_ERRORS \ + __extension__({ \ + unsigned int _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, VG_USERREQ__COUNT_ERRORS, 0, \ + 0, 0, 0, 0); \ + _qyy_res; \ + }) /* Mark a block of memory as having been allocated by a malloc()-like function. `addr' is the start of the usable block (ie. after any @@ -3804,115 +3606,105 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...) Nb: block must be freed via a free()-like function specified with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */ -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MALLOCLIKE_BLOCK, \ - addr, sizeB, rzB, is_zeroed, 0); \ - } +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MALLOCLIKE_BLOCK, addr, sizeB, rzB, \ + is_zeroed, 0); \ + } /* Mark a block of memory as having been freed by a free()-like function. `rzB' is redzone size; it must match that given to VALGRIND_MALLOCLIKE_BLOCK. Memory not freed will be detected by the leak checker. Put it immediately after the point where the block is freed. */ -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__FREELIKE_BLOCK, \ - addr, rzB, 0, 0, 0); \ - } +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__FREELIKE_BLOCK, addr, rzB, 0, 0, 0); \ + } /* Create a memory pool. */ -#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__CREATE_MEMPOOL, \ - pool, rzB, is_zeroed, 0, 0); \ - } +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__CREATE_MEMPOOL, pool, rzB, is_zeroed, \ + 0, 0); \ + } /* Destroy a memory pool. */ -#define VALGRIND_DESTROY_MEMPOOL(pool) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__DESTROY_MEMPOOL, \ - pool, 0, 0, 0, 0); \ - } +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__DESTROY_MEMPOOL, pool, 0, 0, 0, 0); \ + } /* Associate a piece of memory with a memory pool. */ -#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_ALLOC, \ - pool, addr, size, 0, 0); \ - } +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MEMPOOL_ALLOC, pool, addr, size, 0, \ + 0); \ + } /* Disassociate a piece of memory from a memory pool. */ -#define VALGRIND_MEMPOOL_FREE(pool, addr) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_FREE, \ - pool, addr, 0, 0, 0); \ - } +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MEMPOOL_FREE, pool, addr, 0, 0, 0); \ + } /* Disassociate any pieces outside a particular range. */ -#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_TRIM, \ - pool, addr, size, 0, 0); \ - } +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MEMPOOL_TRIM, pool, addr, size, 0, 0); \ + } /* Resize and/or move a piece associated with a memory pool. */ -#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MOVE_MEMPOOL, \ - poolA, poolB, 0, 0, 0); \ - } +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MOVE_MEMPOOL, poolA, poolB, 0, 0, 0); \ + } /* Resize and/or move a piece associated with a memory pool. */ -#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_CHANGE, \ - pool, addrA, addrB, size, 0); \ - } +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MEMPOOL_CHANGE, pool, addrA, addrB, \ + size, 0); \ + } /* Return 1 if a mempool exists, else 0. */ -#define VALGRIND_MEMPOOL_EXISTS(pool) \ - ({unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__MEMPOOL_EXISTS, \ - pool, 0, 0, 0, 0); \ - _qzz_res; \ - }) +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + ({ \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__MEMPOOL_EXISTS, pool, 0, 0, 0, 0); \ + _qzz_res; \ + }) /* Mark a piece of memory as being a stack. Returns a stack id. */ -#define VALGRIND_STACK_REGISTER(start, end) \ - ({unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__STACK_REGISTER, \ - start, end, 0, 0, 0); \ - _qzz_res; \ - }) +#define VALGRIND_STACK_REGISTER(start, end) \ + ({ \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__STACK_REGISTER, start, end, 0, 0, 0); \ + _qzz_res; \ + }) /* Unmark the piece of memory associated with a stack id as being a stack. */ -#define VALGRIND_STACK_DEREGISTER(id) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__STACK_DEREGISTER, \ - id, 0, 0, 0, 0); \ - } +#define VALGRIND_STACK_DEREGISTER(id) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__STACK_DEREGISTER, id, 0, 0, 0, 0); \ + } /* Change the start and end address of the stack id. */ -#define VALGRIND_STACK_CHANGE(id, start, end) \ - {unsigned int _qzz_res; \ - VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ - VG_USERREQ__STACK_CHANGE, \ - id, start, end, 0, 0); \ - } - +#define VALGRIND_STACK_CHANGE(id, start, end) \ + { \ + unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__STACK_CHANGE, id, start, end, 0, 0); \ + } #undef PLAT_x86_linux #undef PLAT_amd64_linux @@ -3921,4 +3713,4 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...) #undef PLAT_ppc32_aix5 #undef PLAT_ppc64_aix5 -#endif /* __VALGRIND_H */ +#endif /* __VALGRIND_H */ diff --git a/be/src/gutil/walltime.h b/be/src/gutil/walltime.h index 337535f4493249..5358af0333c274 100644 --- a/be/src/gutil/walltime.h +++ b/be/src/gutil/walltime.h @@ -25,14 +25,13 @@ #include #if defined(__APPLE__) +#include #include #include #include -#include - #include "gutil/once.h" -#endif // #if defined(__APPLE__) +#endif // #if defined(__APPLE__) #include "gutil/integral_types.h" @@ -40,10 +39,7 @@ typedef double WallTime; // Append result to a supplied string. // If an error occurs during conversion 'dst' is not modified. -void StringAppendStrftime(std::string* dst, - const char* format, - time_t when, - bool local); +void StringAppendStrftime(std::string* dst, const char* format, time_t when, bool local); // Return the given timestamp (in seconds since the epoch) as a string suitable // for user display in the current timezone. @@ -56,11 +52,8 @@ std::string LocalTimeAsString(); // argument specifying if the time_spec is in local time or UTC // time. If local is set to true, the same exact result as // WallTime_Parse is returned. -bool WallTime_Parse_Timezone(const char* time_spec, - const char* format, - const struct tm* default_time, - bool local, - WallTime* result); +bool WallTime_Parse_Timezone(const char* time_spec, const char* format, + const struct tm* default_time, bool local, WallTime* result); // Return current time in seconds as a WallTime. WallTime WallTime_Now(); @@ -76,76 +69,74 @@ extern mach_timebase_info_data_t timebase_info; extern void InitializeTimebaseInfo(); inline void GetCurrentTime(mach_timespec_t* ts) { - clock_serv_t cclock; - host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); - CHECK_EQ(KERN_SUCCESS, clock_get_time(cclock, ts)); - mach_port_deallocate(mach_task_self(), cclock); + clock_serv_t cclock; + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); + CHECK_EQ(KERN_SUCCESS, clock_get_time(cclock, ts)); + mach_port_deallocate(mach_task_self(), cclock); } inline MicrosecondsInt64 GetCurrentTimeMicros() { - mach_timespec_t ts; - GetCurrentTime(&ts); - // 'tv_sec' is just 4 bytes on macOS, need to be careful not - // to convert to nanos until we've moved to a larger int. - MicrosecondsInt64 micros_from_secs = ts.tv_sec; - micros_from_secs *= 1000 * 1000; - micros_from_secs += ts.tv_nsec / 1000; - return micros_from_secs; + mach_timespec_t ts; + GetCurrentTime(&ts); + // 'tv_sec' is just 4 bytes on macOS, need to be careful not + // to convert to nanos until we've moved to a larger int. + MicrosecondsInt64 micros_from_secs = ts.tv_sec; + micros_from_secs *= 1000 * 1000; + micros_from_secs += ts.tv_nsec / 1000; + return micros_from_secs; } inline int64_t GetMonoTimeNanos() { - // See Apple Technical Q&A QA1398 for further detail on mono time in OS X. - GoogleOnceInit(&timebase_info_once, &InitializeTimebaseInfo); + // See Apple Technical Q&A QA1398 for further detail on mono time in OS X. + GoogleOnceInit(&timebase_info_once, &InitializeTimebaseInfo); - uint64_t time = mach_absolute_time(); + uint64_t time = mach_absolute_time(); - // mach_absolute_time returns ticks, which need to be scaled by the timebase - // info to get nanoseconds. - return time * timebase_info.numer / timebase_info.denom; + // mach_absolute_time returns ticks, which need to be scaled by the timebase + // info to get nanoseconds. + return time * timebase_info.numer / timebase_info.denom; } inline MicrosecondsInt64 GetMonoTimeMicros() { - return GetMonoTimeNanos() / 1000; + return GetMonoTimeNanos() / 1000; } inline MicrosecondsInt64 GetThreadCpuTimeMicros() { - // See https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html - // and Chromium base/time/time_mac.cc. - task_t thread = mach_thread_self(); - if (thread == MACH_PORT_NULL) { - LOG(WARNING) << "Failed to get mach_thread_self()"; - return 0; - } - - mach_msg_type_number_t thread_info_count = THREAD_BASIC_INFO_COUNT; - thread_basic_info_data_t thread_info_data; - - kern_return_t result = thread_info( - thread, - THREAD_BASIC_INFO, - reinterpret_cast(&thread_info_data), - &thread_info_count); - - if (result != KERN_SUCCESS) { - LOG(WARNING) << "Failed to get thread_info()"; - return 0; - } - - return thread_info_data.user_time.seconds * 1000000 + thread_info_data.user_time.microseconds; + // See https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html + // and Chromium base/time/time_mac.cc. + task_t thread = mach_thread_self(); + if (thread == MACH_PORT_NULL) { + LOG(WARNING) << "Failed to get mach_thread_self()"; + return 0; + } + + mach_msg_type_number_t thread_info_count = THREAD_BASIC_INFO_COUNT; + thread_basic_info_data_t thread_info_data; + + kern_return_t result = + thread_info(thread, THREAD_BASIC_INFO, + reinterpret_cast(&thread_info_data), &thread_info_count); + + if (result != KERN_SUCCESS) { + LOG(WARNING) << "Failed to get thread_info()"; + return 0; + } + + return thread_info_data.user_time.seconds * 1000000 + thread_info_data.user_time.microseconds; } #else inline MicrosecondsInt64 GetClockTimeMicros(clockid_t clock) { - timespec ts; - clock_gettime(clock, &ts); - // 'tv_sec' is usually 8 bytes, but the spec says it only - // needs to be 'a signed int'. Moved to a 64 bit var before - // converting to micros to be safe. - MicrosecondsInt64 micros_from_secs = ts.tv_sec; - micros_from_secs *= 1000 * 1000; - micros_from_secs += ts.tv_nsec / 1000; - return micros_from_secs; + timespec ts; + clock_gettime(clock, &ts); + // 'tv_sec' is usually 8 bytes, but the spec says it only + // needs to be 'a signed int'. Moved to a 64 bit var before + // converting to micros to be safe. + MicrosecondsInt64 micros_from_secs = ts.tv_sec; + micros_from_secs *= 1000 * 1000; + micros_from_secs += ts.tv_nsec / 1000; + return micros_from_secs; } #endif // defined(__APPLE__) @@ -155,10 +146,10 @@ inline MicrosecondsInt64 GetClockTimeMicros(clockid_t clock) { // Returns the time since the Epoch measured in microseconds. inline MicrosecondsInt64 GetCurrentTimeMicros() { #if defined(__APPLE__) - return walltime_internal::GetCurrentTimeMicros(); + return walltime_internal::GetCurrentTimeMicros(); #else - return walltime_internal::GetClockTimeMicros(CLOCK_REALTIME); -#endif // defined(__APPLE__) + return walltime_internal::GetClockTimeMicros(CLOCK_REALTIME); +#endif // defined(__APPLE__) } // Returns the time since some arbitrary reference point, measured in microseconds. @@ -169,15 +160,15 @@ inline MicrosecondsInt64 GetCurrentTimeMicros() { // of the desired result frequency, etc. inline MicrosecondsInt64 GetMonoTimeMicros() { #if defined(__APPLE__) - // In fact, walltime_internal::GetMonoTimeMicros() is implemented via - // mach_absolute_time() which is not actually affected by adjtime() - // or the NTP discipline. On Darwin 16.0 and newer (macOS 10.12 and newer), - // it's the same as clock_gettime(CLOCK_UPTIME_RAW); see 'man clock_gettime' - // on macOS 10.12 and newer. - return walltime_internal::GetMonoTimeMicros(); + // In fact, walltime_internal::GetMonoTimeMicros() is implemented via + // mach_absolute_time() which is not actually affected by adjtime() + // or the NTP discipline. On Darwin 16.0 and newer (macOS 10.12 and newer), + // it's the same as clock_gettime(CLOCK_UPTIME_RAW); see 'man clock_gettime' + // on macOS 10.12 and newer. + return walltime_internal::GetMonoTimeMicros(); #else - return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC); -#endif // defined(__APPLE__) + return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC); +#endif // defined(__APPLE__) } // Returns the time since some arbitrary reference point, measured in microseconds. @@ -185,32 +176,32 @@ inline MicrosecondsInt64 GetMonoTimeMicros() { // adjustments such as adjtime() or the kernel's NTP discipline. inline MicrosecondsInt64 GetMonoTimeMicrosRaw() { #if defined(__APPLE__) - return walltime_internal::GetMonoTimeMicros(); + return walltime_internal::GetMonoTimeMicros(); #else - return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC_RAW); -#endif // defined(__APPLE__) + return walltime_internal::GetClockTimeMicros(CLOCK_MONOTONIC_RAW); +#endif // defined(__APPLE__) } // Returns the time spent in user CPU on the current thread, measured in microseconds. inline MicrosecondsInt64 GetThreadCpuTimeMicros() { #if defined(__APPLE__) - return walltime_internal::GetThreadCpuTimeMicros(); + return walltime_internal::GetThreadCpuTimeMicros(); #else - return walltime_internal::GetClockTimeMicros(CLOCK_THREAD_CPUTIME_ID); -#endif // defined(__APPLE__) + return walltime_internal::GetClockTimeMicros(CLOCK_THREAD_CPUTIME_ID); +#endif // defined(__APPLE__) } // A CycleClock yields the value of a cycle counter that increments at a rate // that is approximately constant. class CycleClock { - public: - // Return the value of the counter. - static inline int64 Now(); +public: + // Return the value of the counter. + static inline int64 Now(); - private: - CycleClock(); +private: + CycleClock(); }; // inline method bodies -#include "gutil/cycleclock-inl.h" // IWYU pragma: export -#endif // GUTIL_WALLTIME_H_ +#include "gutil/cycleclock-inl.h" // IWYU pragma: export +#endif // GUTIL_WALLTIME_H_ diff --git a/be/src/http/action/checksum_action.cpp b/be/src/http/action/checksum_action.cpp index 603512e673583d..f65a86a36d54d7 100644 --- a/be/src/http/action/checksum_action.cpp +++ b/be/src/http/action/checksum_action.cpp @@ -17,13 +17,12 @@ #include "http/action/checksum_action.h" -#include #include +#include +#include "agent/cgroups_mgr.h" #include "boost/lexical_cast.hpp" - #include "common/logging.h" -#include "agent/cgroups_mgr.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" @@ -43,11 +42,9 @@ const std::string TABLET_VERSION = "version"; const std::string VERSION_HASH = "version_hash"; const std::string SCHEMA_HASH = "schema_hash"; -ChecksumAction::ChecksumAction(ExecEnv* exec_env) : - _exec_env(exec_env) { -} +ChecksumAction::ChecksumAction(ExecEnv* exec_env) : _exec_env(exec_env) {} -void ChecksumAction::handle(HttpRequest *req) { +void ChecksumAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); // add tid to cgroup in order to limit read bandwidth @@ -55,8 +52,7 @@ void ChecksumAction::handle(HttpRequest *req) { // Get tablet id const std::string& tablet_id_str = req->param(TABLET_ID); if (tablet_id_str.empty()) { - std::string error_msg = std::string( - "parameter " + TABLET_ID + " not specified in url."); + std::string error_msg = std::string("parameter " + TABLET_ID + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; @@ -65,8 +61,8 @@ void ChecksumAction::handle(HttpRequest *req) { // Get version const std::string& version_str = req->param(TABLET_VERSION); if (version_str.empty()) { - std::string error_msg = std::string( - "parameter " + TABLET_VERSION + " not specified in url."); + std::string error_msg = + std::string("parameter " + TABLET_VERSION + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -74,8 +70,7 @@ void ChecksumAction::handle(HttpRequest *req) { // Get version hash const std::string& version_hash_str = req->param(VERSION_HASH); if (version_hash_str.empty()) { - std::string error_msg = std::string( - "parameter " + VERSION_HASH + " not specified in url."); + std::string error_msg = std::string("parameter " + VERSION_HASH + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -83,8 +78,7 @@ void ChecksumAction::handle(HttpRequest *req) { // Get schema hash const std::string& schema_hash_str = req->param(SCHEMA_HASH); if (schema_hash_str.empty()) { - std::string error_msg = std::string( - "parameter " + SCHEMA_HASH + " not specified in url."); + std::string error_msg = std::string("parameter " + SCHEMA_HASH + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -105,8 +99,8 @@ void ChecksumAction::handle(HttpRequest *req) { return; } - VLOG_ROW << "get checksum tablet info: " << tablet_id << "-" - << version << "-" << version_hash << "-" << schema_hash; + VLOG_ROW << "get checksum tablet info: " << tablet_id << "-" << version << "-" << version_hash + << "-" << schema_hash; int64_t checksum = do_checksum(tablet_id, version, version_hash, schema_hash, req); if (checksum == -1L) { @@ -124,22 +118,20 @@ void ChecksumAction::handle(HttpRequest *req) { } int64_t ChecksumAction::do_checksum(int64_t tablet_id, int64_t version, int64_t version_hash, - int32_t schema_hash, HttpRequest *req) { - + int32_t schema_hash, HttpRequest* req) { OLAPStatus res = OLAP_SUCCESS; uint32_t checksum; EngineChecksumTask engine_task(tablet_id, schema_hash, version, version_hash, &checksum); res = engine_task.execute(); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "checksum failed. status: " << res - << ", signature: " << tablet_id; + LOG(WARNING) << "checksum failed. status: " << res << ", signature: " << tablet_id; return -1L; } else { - LOG(INFO) << "checksum success. status: " << res - << ", signature: " << tablet_id << ". checksum: " << checksum; + LOG(INFO) << "checksum success. status: " << res << ", signature: " << tablet_id + << ". checksum: " << checksum; } return static_cast(checksum); -} +} } // end namespace doris diff --git a/be/src/http/action/checksum_action.h b/be/src/http/action/checksum_action.h index ca7af1f0792ecf..8e342e120690f7 100644 --- a/be/src/http/action/checksum_action.h +++ b/be/src/http/action/checksum_action.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_CHECKSUM_ACTION_H -#define DORIS_BE_SRC_HTTP_CHECKSUM_ACTION_H +#ifndef DORIS_BE_SRC_HTTP_CHECKSUM_ACTION_H +#define DORIS_BE_SRC_HTTP_CHECKSUM_ACTION_H #include @@ -30,12 +30,13 @@ class ChecksumAction : public HttpHandler { public: explicit ChecksumAction(ExecEnv* exec_env); - virtual ~ChecksumAction() { } + virtual ~ChecksumAction() {} + + void handle(HttpRequest* req) override; - void handle(HttpRequest *req) override; private: int64_t do_checksum(int64_t tablet_id, int64_t version, int64_t version_hash, - int32_t schema_hash, HttpRequest *req); + int32_t schema_hash, HttpRequest* req); ExecEnv* _exec_env; @@ -43,4 +44,3 @@ class ChecksumAction : public HttpHandler { } // end namespace doris #endif // DORIS_BE_SRC_COMMON_UTIL_DOWNLOAD_ACTION_H - diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index a2a3460c99fb2e..45050a29ae0ab3 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -17,22 +17,22 @@ #include "http/action/compaction_action.h" +#include + #include #include -#include +#include "common/logging.h" +#include "gutil/strings/substitute.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" #include "http/http_response.h" #include "http/http_status.h" - -#include "common/logging.h" -#include "gutil/strings/substitute.h" -#include "olap/olap_define.h" -#include "olap/storage_engine.h" #include "olap/base_compaction.h" #include "olap/cumulative_compaction.h" +#include "olap/olap_define.h" +#include "olap/storage_engine.h" #include "util/json_util.h" namespace doris { @@ -42,8 +42,8 @@ const static std::string HEADER_JSON = "application/json"; bool CompactionAction::_is_compaction_running = false; std::mutex CompactionAction::_compaction_running_mutex; -Status CompactionAction::_check_param(HttpRequest* req, uint64_t* tablet_id, uint32_t* schema_hash) { - +Status CompactionAction::_check_param(HttpRequest* req, uint64_t* tablet_id, + uint32_t* schema_hash) { std::string req_tablet_id = req->param(TABLET_ID_KEY); std::string req_schema_hash = req->param(TABLET_SCHEMA_HASH_KEY); if (req_tablet_id == "" && req_schema_hash == "") { @@ -65,10 +65,9 @@ Status CompactionAction::_check_param(HttpRequest* req, uint64_t* tablet_id, uin // for viewing the compaction status Status CompactionAction::_handle_show_compaction(HttpRequest* req, std::string* json_result) { - uint64_t tablet_id = 0; uint32_t schema_hash = 0; - + Status status = _check_param(req, &tablet_id, &schema_hash); RETURN_IF_ERROR(status); @@ -82,19 +81,19 @@ Status CompactionAction::_handle_show_compaction(HttpRequest* req, std::string* return Status::OK(); } -Status CompactionAction::_handle_run_compaction(HttpRequest *req, std::string* json_result) { - +Status CompactionAction::_handle_run_compaction(HttpRequest* req, std::string* json_result) { // 1. param check uint64_t tablet_id = 0; uint32_t schema_hash = 0; - + // check req_tablet_id and req_schema_hash is not empty Status check_status = _check_param(req, &tablet_id, &schema_hash); RETURN_IF_ERROR(check_status); std::string compaction_type = req->param(PARAM_COMPACTION_TYPE); // check compaction_type is not empty and equals base or cumulative - if (compaction_type == "" && !(compaction_type == PARAM_COMPACTION_BASE || compaction_type == PARAM_COMPACTION_CUMULATIVE)) { + if (compaction_type == "" && !(compaction_type == PARAM_COMPACTION_BASE || + compaction_type == PARAM_COMPACTION_CUMULATIVE)) { return Status::NotSupported("The compaction type is not supported"); } @@ -110,8 +109,8 @@ Status CompactionAction::_handle_run_compaction(HttpRequest *req, std::string* j } // 3. execute compaction task - std::packaged_task task([this, tablet, compaction_type]() { - return _execute_compaction_callback(tablet, compaction_type); + std::packaged_task task([this, tablet, compaction_type]() { + return _execute_compaction_callback(tablet, compaction_type); }); std::future future_obj = task.get_future(); @@ -121,7 +120,7 @@ Status CompactionAction::_handle_run_compaction(HttpRequest *req, std::string* j if (_is_compaction_running) { return Status::TooManyTasks("Manual compaction task is running"); } else { - // 3.2 execute the compaction task and set compaction task running + // 3.2 execute the compaction task and set compaction task running _is_compaction_running = true; std::thread(std::move(task)).detach(); } @@ -137,20 +136,21 @@ Status CompactionAction::_handle_run_compaction(HttpRequest *req, std::string* j strings::Substitute("fail to execute compaction, error = $0", olap_status)); } } else { - LOG(INFO) << "Manual compaction task is timeout for waiting " << (status == std::future_status::timeout); + LOG(INFO) << "Manual compaction task is timeout for waiting " + << (status == std::future_status::timeout); } - + LOG(INFO) << "Manual compaction task is successfully triggered"; - *json_result = "{\"status\": \"Success\", \"msg\": \"compaction task is successfully triggered.\"}"; + *json_result = + "{\"status\": \"Success\", \"msg\": \"compaction task is successfully triggered.\"}"; return Status::OK(); } -Status CompactionAction::_handle_run_status_compaction(HttpRequest *req, std::string* json_result) { - +Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::string* json_result) { uint64_t tablet_id = 0; uint32_t schema_hash = 0; - + // check req_tablet_id and req_schema_hash is not empty Status check_status = _check_param(req, &tablet_id, &schema_hash); RETURN_IF_ERROR(check_status); @@ -186,8 +186,8 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest *req, std::st msg = "this tablet_id is running"; compaction_type = "cumulative"; run_status = 1; - *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, schema_hash, - compaction_type); + *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, + schema_hash, compaction_type); return Status::OK(); } } @@ -199,19 +199,19 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest *req, std::st msg = "this tablet_id is running"; compaction_type = "base"; run_status = 1; - *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, schema_hash, - compaction_type); + *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, + schema_hash, compaction_type); return Status::OK(); } } // not running any compaction *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, schema_hash, - compaction_type); + compaction_type); return Status::OK(); } OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet, - const std::string& compaction_type) { + const std::string& compaction_type) { OLAPStatus status = OLAP_SUCCESS; if (compaction_type == PARAM_COMPACTION_BASE) { std::string tracker_label = "base compaction " + std::to_string(syscall(__NR_gettid)); @@ -221,7 +221,7 @@ OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet if (res != OLAP_ERR_BE_NO_SUITABLE_VERSION) { DorisMetrics::instance()->base_compaction_request_failed->increment(1); LOG(WARNING) << "failed to init base compaction. res=" << res - << ", table=" << tablet->full_name(); + << ", table=" << tablet->full_name(); } } status = res; @@ -234,12 +234,12 @@ OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet if (res != OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS) { DorisMetrics::instance()->cumulative_compaction_request_failed->increment(1); LOG(WARNING) << "failed to do cumulative compaction. res=" << res - << ", table=" << tablet->full_name(); + << ", table=" << tablet->full_name(); } } status = res; } - + LOG(INFO) << "Manual compaction task finish, status = " << status; std::lock_guard lock(_compaction_running_mutex); _is_compaction_running = false; @@ -275,6 +275,5 @@ void CompactionAction::handle(HttpRequest* req) { HttpChannel::send_reply(req, HttpStatus::OK, json_result); } } - } } // end namespace doris diff --git a/be/src/http/action/compaction_action.h b/be/src/http/action/compaction_action.h index 1a287bd4e49e8a..1c67113cf080d0 100644 --- a/be/src/http/action/compaction_action.h +++ b/be/src/http/action/compaction_action.h @@ -17,15 +17,14 @@ #pragma once -#include "http/http_handler.h" #include "common/status.h" -#include "olap/storage_engine.h" +#include "http/http_handler.h" #include "olap/base_compaction.h" +#include "olap/storage_engine.h" #include "olap/tablet.h" namespace doris { - enum CompactionActionType { SHOW_INFO = 1, RUN_COMPACTION = 2, @@ -47,17 +46,18 @@ class CompactionAction : public HttpHandler { virtual ~CompactionAction() {} - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; private: - Status _handle_show_compaction(HttpRequest *req, std::string* json_result); + Status _handle_show_compaction(HttpRequest* req, std::string* json_result); /// execute compaction request to run compaction task /// param compact_type in req to distinguish the task type, base or cumulative - Status _handle_run_compaction(HttpRequest *req, std::string* json_result); + Status _handle_run_compaction(HttpRequest* req, std::string* json_result); /// thread callback function for the tablet to do compaction - OLAPStatus _execute_compaction_callback(TabletSharedPtr tablet, const std::string& compaction_type); + OLAPStatus _execute_compaction_callback(TabletSharedPtr tablet, + const std::string& compaction_type); /// fetch compaction running status Status _handle_run_status_compaction(HttpRequest* req, std::string* json_result); @@ -77,4 +77,3 @@ class CompactionAction : public HttpHandler { }; } // end namespace doris - diff --git a/be/src/http/action/health_action.cpp b/be/src/http/action/health_action.cpp index 5eaedb932289a7..92546954f69fff 100644 --- a/be/src/http/action/health_action.cpp +++ b/be/src/http/action/health_action.cpp @@ -21,20 +21,18 @@ #include #include "http/http_channel.h" +#include "http/http_headers.h" #include "http/http_request.h" #include "http/http_response.h" -#include "http/http_headers.h" #include "http/http_status.h" namespace doris { const static std::string HEADER_JSON = "application/json"; -HealthAction::HealthAction(ExecEnv* exec_env) : - _exec_env(exec_env) { -} +HealthAction::HealthAction(ExecEnv* exec_env) : _exec_env(exec_env) {} -void HealthAction::handle(HttpRequest *req) { +void HealthAction::handle(HttpRequest* req) { std::stringstream ss; ss << "{"; ss << "\"status\": \"OK\","; @@ -51,4 +49,3 @@ void HealthAction::handle(HttpRequest *req) { } } // end namespace doris - diff --git a/be/src/http/action/health_action.h b/be/src/http/action/health_action.h index 4c2effbb373943..fd2bd596e5caf6 100644 --- a/be/src/http/action/health_action.h +++ b/be/src/http/action/health_action.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_ACTION_HEALTH_ACTION_H -#define DORIS_BE_SRC_HTTP_ACTION_HEALTH_ACTION_H +#ifndef DORIS_BE_SRC_HTTP_ACTION_HEALTH_ACTION_H +#define DORIS_BE_SRC_HTTP_ACTION_HEALTH_ACTION_H #include "http/http_handler.h" @@ -29,9 +29,9 @@ class HealthAction : public HttpHandler { public: HealthAction(ExecEnv* exec_env); - virtual ~HealthAction() {}; + virtual ~HealthAction(){}; - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; private: ExecEnv* _exec_env; @@ -40,4 +40,3 @@ class HealthAction : public HttpHandler { } // end namespace doris #endif // DORIS_BE_SRC_HTTP_ACTION_HEALTH_ACTION_H - diff --git a/be/src/http/action/meta_action.cpp b/be/src/http/action/meta_action.cpp index 3db831627bf6d6..8935c39fcc9aee 100644 --- a/be/src/http/action/meta_action.cpp +++ b/be/src/http/action/meta_action.cpp @@ -20,14 +20,13 @@ #include #include +#include "common/logging.h" +#include "gutil/strings/substitute.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" #include "http/http_response.h" #include "http/http_status.h" - -#include "common/logging.h" -#include "gutil/strings/substitute.h" #include "olap/olap_define.h" #include "olap/storage_engine.h" #include "olap/tablet.h" diff --git a/be/src/http/action/meta_action.h b/be/src/http/action/meta_action.h index 446d585938307c..720b3b82cac5eb 100644 --- a/be/src/http/action/meta_action.h +++ b/be/src/http/action/meta_action.h @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_ACTION_META_ACTION_H -#define DORIS_BE_SRC_HTTP_ACTION_META_ACTION_H +#ifndef DORIS_BE_SRC_HTTP_ACTION_META_ACTION_H +#define DORIS_BE_SRC_HTTP_ACTION_META_ACTION_H -#include "http/http_handler.h" #include "common/status.h" +#include "http/http_handler.h" namespace doris { @@ -32,14 +32,14 @@ enum META_TYPE { // Get Meta Info class MetaAction : public HttpHandler { public: - MetaAction(META_TYPE meta_type) : _meta_type(meta_type) {} + MetaAction(META_TYPE meta_type) : _meta_type(meta_type) {} virtual ~MetaAction() {} - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; private: - Status _handle_header(HttpRequest *req, std::string* json_header); + Status _handle_header(HttpRequest* req, std::string* json_header); private: META_TYPE _meta_type; diff --git a/be/src/http/action/metrics_action.cpp b/be/src/http/action/metrics_action.cpp index 809bf14360ea92..f2bd8b43cbcd71 100644 --- a/be/src/http/action/metrics_action.cpp +++ b/be/src/http/action/metrics_action.cpp @@ -17,16 +17,17 @@ #include "http/action/metrics_action.h" +#include #include #include -#include #include + #include -#include "http/http_request.h" -#include "http/http_response.h" #include "http/http_channel.h" #include "http/http_headers.h" +#include "http/http_request.h" +#include "http/http_response.h" #include "runtime/exec_env.h" #include "util/metrics.h" diff --git a/be/src/http/action/metrics_action.h b/be/src/http/action/metrics_action.h index 3cbc2f010b526c..25664a98aac28f 100644 --- a/be/src/http/action/metrics_action.h +++ b/be/src/http/action/metrics_action.h @@ -28,12 +28,13 @@ class MetricRegistry; class MetricsAction : public HttpHandler { public: - MetricsAction(MetricRegistry* metric_registry) :_metric_registry(metric_registry) { } - virtual ~MetricsAction() { } + MetricsAction(MetricRegistry* metric_registry) : _metric_registry(metric_registry) {} + virtual ~MetricsAction() {} + + void handle(HttpRequest* req) override; - void handle(HttpRequest *req) override; private: MetricRegistry* _metric_registry; }; -} +} // namespace doris diff --git a/be/src/http/action/mini_load.cpp b/be/src/http/action/mini_load.cpp index 943f8dbe35d5c9..e7a9f3eaa7dbae 100644 --- a/be/src/http/action/mini_load.cpp +++ b/be/src/http/action/mini_load.cpp @@ -17,58 +17,55 @@ #include "http/action/mini_load.h" -#include +#include +#include +#include #include -#include -#include - -#include -#include -#include - -#include +#include #include #include +#include +#include +#include +#include #include - -#include -#include -#include -#include +#include +#include +#include #include "agent/cgroups_mgr.h" #include "common/status.h" -#include "http/http_request.h" -#include "http/http_status.h" -#include "http/http_headers.h" -#include "http/http_response.h" +#include "gen_cpp/FrontendService.h" +#include "gen_cpp/FrontendService_types.h" +#include "gen_cpp/HeartbeatService_types.h" +#include "gen_cpp/MasterService_types.h" #include "http/http_channel.h" +#include "http/http_headers.h" #include "http/http_parser.h" +#include "http/http_request.h" +#include "http/http_response.h" +#include "http/http_status.h" #include "http/utils.h" #include "olap/file_helper.h" -#include "service/backend_options.h" -#include "util/url_coding.h" -#include "util/file_utils.h" -#include "util/thrift_rpc_helper.h" -#include "util/json_util.h" -#include "util/time.h" -#include "util/string_parser.hpp" +#include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" #include "runtime/load_path_mgr.h" -#include "runtime/client_cache.h" #include "runtime/stream_load/stream_load_context.h" -#include "gen_cpp/MasterService_types.h" -#include "gen_cpp/HeartbeatService_types.h" -#include "gen_cpp/FrontendService.h" -#include "gen_cpp/FrontendService_types.h" +#include "service/backend_options.h" +#include "util/file_utils.h" +#include "util/json_util.h" +#include "util/string_parser.hpp" +#include "util/thrift_rpc_helper.h" +#include "util/time.h" +#include "util/url_coding.h" namespace doris { // context used to handle mini-load in asynchronous mode struct MiniLoadAsyncCtx { - MiniLoadAsyncCtx(MiniLoadAction* handler_) : handler(handler_) { } + MiniLoadAsyncCtx(MiniLoadAction* handler_) : handler(handler_) {} ~MiniLoadAsyncCtx() { if (need_remove_handle) { handler->erase_handle(load_handle); @@ -95,7 +92,7 @@ struct MiniLoadAsyncCtx { }; struct MiniLoadCtx { - MiniLoadCtx(bool is_streaming_) : is_streaming(is_streaming_) {} + MiniLoadCtx(bool is_streaming_) : is_streaming(is_streaming_) {} bool is_streaming = false; MiniLoadAsyncCtx* mini_load_async_ctx = nullptr; @@ -116,9 +113,7 @@ const std::string STRICT_MODE_KEY = "strict_mode"; const std::string TIMEOUT_KEY = "timeout"; const char* k_100_continue = "100-continue"; -MiniLoadAction::MiniLoadAction(ExecEnv* exec_env) : - _exec_env(exec_env) { -} +MiniLoadAction::MiniLoadAction(ExecEnv* exec_env) : _exec_env(exec_env) {} static bool is_name_valid(const std::string& name) { return !name.empty(); @@ -141,8 +136,7 @@ static Status check_request(HttpRequest* req) { return Status::OK(); } -Status MiniLoadAction::data_saved_dir(const LoadHandle& desc, - const std::string& table, +Status MiniLoadAction::data_saved_dir(const LoadHandle& desc, const std::string& table, std::string* file_path) { std::string prefix; RETURN_IF_ERROR(_exec_env->load_path_mgr()->allocate_dir(desc.db, desc.label, &prefix)); @@ -155,32 +149,28 @@ Status MiniLoadAction::data_saved_dir(const LoadHandle& desc, strftime(buf, 64, "%Y%m%d%H%M%S", &tm); std::stringstream ss; - ss << prefix << "/" << table << "." << desc.sub_label - << "." << buf << "." << tv.tv_usec; + ss << prefix << "/" << table << "." << desc.sub_label << "." << buf << "." << tv.tv_usec; *file_path = ss.str(); return Status::OK(); } -Status MiniLoadAction::_load( - HttpRequest* http_req, - const std::string& file_path, - const std::string& user, - const std::string& cluster) { +Status MiniLoadAction::_load(HttpRequest* http_req, const std::string& file_path, + const std::string& user, const std::string& cluster) { // Prepare request parameters. - std::map params( - http_req->query_params().begin(), http_req->query_params().end()); + std::map params(http_req->query_params().begin(), + http_req->query_params().end()); params.erase(LABEL_KEY); params.erase(SUB_LABEL_KEY); // put here to log master information const TNetworkAddress& master_address = _exec_env->master_info()->network_address; Status status; - FrontendServiceConnection client( - _exec_env->frontend_client_cache(), master_address, config::thrift_rpc_timeout_ms, &status); + FrontendServiceConnection client(_exec_env->frontend_client_cache(), master_address, + config::thrift_rpc_timeout_ms, &status); if (!status.ok()) { std::stringstream ss; - ss << "Connect master failed, with address(" - << master_address.hostname << ":" << master_address.port << ")"; + ss << "Connect master failed, with address(" << master_address.hostname << ":" + << master_address.port << ")"; LOG(WARNING) << ss.str(); return status; } @@ -209,25 +199,23 @@ Status MiniLoadAction::_load( try { client->miniLoad(res, req); } catch (apache::thrift::transport::TTransportException& e) { - LOG(WARNING) << "Retrying mini load from master(" - << master_address.hostname << ":" << master_address.port - << ") because: " << e.what(); + LOG(WARNING) << "Retrying mini load from master(" << master_address.hostname << ":" + << master_address.port << ") because: " << e.what(); status = client.reopen(config::thrift_rpc_timeout_ms); if (!status.ok()) { - LOG(WARNING) << "Client reopen failed. with address(" - << master_address.hostname << ":" << master_address.port << ")"; + LOG(WARNING) << "Client reopen failed. with address(" << master_address.hostname + << ":" << master_address.port << ")"; return status; } client->miniLoad(res, req); } catch (apache::thrift::TApplicationException& e) { - LOG(WARNING) << "mini load request from master(" - << master_address.hostname << ":" << master_address.port - << ") got unknown result: " << e.what(); + LOG(WARNING) << "mini load request from master(" << master_address.hostname << ":" + << master_address.port << ") got unknown result: " << e.what(); status = client.reopen(config::thrift_rpc_timeout_ms); if (!status.ok()) { - LOG(WARNING) << "Client reopen failed. with address(" - << master_address.hostname << ":" << master_address.port << ")"; + LOG(WARNING) << "Client reopen failed. with address(" << master_address.hostname + << ":" << master_address.port << ")"; return status; } client->miniLoad(res, req); @@ -237,9 +225,8 @@ Status MiniLoadAction::_load( // reopen to disable this connection client.reopen(config::thrift_rpc_timeout_ms); std::stringstream ss; - ss << "Request miniload from master(" - << master_address.hostname << ":" << master_address.port - << ") because: " << e.what(); + ss << "Request miniload from master(" << master_address.hostname << ":" + << master_address.port << ") because: " << e.what(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -247,8 +234,8 @@ Status MiniLoadAction::_load( return Status(res.status); } -static bool parse_auth(const std::string& auth, std::string* user, - std::string* passwd, std::string* cluster) { +static bool parse_auth(const std::string& auth, std::string* user, std::string* passwd, + std::string* cluster) { std::string decoded_auth; if (!base64_decode(auth, &decoded_auth)) { @@ -268,18 +255,17 @@ static bool parse_auth(const std::string& auth, std::string* user, return true; } -Status MiniLoadAction::check_auth( - const HttpRequest* http_req, - const TLoadCheckRequest& check_load_req) { +Status MiniLoadAction::check_auth(const HttpRequest* http_req, + const TLoadCheckRequest& check_load_req) { // put here to log master information const TNetworkAddress& master_address = _exec_env->master_info()->network_address; Status status; - FrontendServiceConnection client( - _exec_env->frontend_client_cache(), master_address, config::thrift_rpc_timeout_ms, &status); + FrontendServiceConnection client(_exec_env->frontend_client_cache(), master_address, + config::thrift_rpc_timeout_ms, &status); if (!status.ok()) { std::stringstream ss; - ss << "Connect master failed, with address(" - << master_address.hostname << ":" << master_address.port << ")"; + ss << "Connect master failed, with address(" << master_address.hostname << ":" + << master_address.port << ")"; LOG(WARNING) << ss.str(); return status; } @@ -289,25 +275,23 @@ Status MiniLoadAction::check_auth( try { client->loadCheck(res, check_load_req); } catch (apache::thrift::transport::TTransportException& e) { - LOG(WARNING) << "Retrying mini load from master(" - << master_address.hostname << ":" << master_address.port - << ") because: " << e.what(); + LOG(WARNING) << "Retrying mini load from master(" << master_address.hostname << ":" + << master_address.port << ") because: " << e.what(); status = client.reopen(config::thrift_rpc_timeout_ms); if (!status.ok()) { - LOG(WARNING) << "Client reopen failed. with address(" - << master_address.hostname << ":" << master_address.port << ")"; + LOG(WARNING) << "Client reopen failed. with address(" << master_address.hostname + << ":" << master_address.port << ")"; return status; } client->loadCheck(res, check_load_req); } catch (apache::thrift::TApplicationException& e) { - LOG(WARNING) << "load check request from master(" - << master_address.hostname << ":" << master_address.port - << ") got unknown result: " << e.what(); + LOG(WARNING) << "load check request from master(" << master_address.hostname << ":" + << master_address.port << ") got unknown result: " << e.what(); status = client.reopen(config::thrift_rpc_timeout_ms); if (!status.ok()) { - LOG(WARNING) << "Client reopen failed. with address(" - << master_address.hostname << ":" << master_address.port << ")"; + LOG(WARNING) << "Client reopen failed. with address(" << master_address.hostname + << ":" << master_address.port << ")"; return status; } client->loadCheck(res, check_load_req); @@ -317,9 +301,8 @@ Status MiniLoadAction::check_auth( // reopen to disable this connection client.reopen(config::thrift_rpc_timeout_ms); std::stringstream ss; - ss << "Request miniload from master(" - << master_address.hostname << ":" << master_address.port - << ") because: " << e.what(); + ss << "Request miniload from master(" << master_address.hostname << ":" + << master_address.port << ") because: " << e.what(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -339,13 +322,13 @@ int MiniLoadAction::on_header(HttpRequest* req) { HttpChannel::send_basic_challenge(req, "mini_load"); return -1; } - + Status status; MiniLoadCtx* mini_load_ctx = new MiniLoadCtx(_is_streaming(req)); - req->set_handler_ctx(mini_load_ctx); - if (((MiniLoadCtx*) req->handler_ctx())->is_streaming) { + req->set_handler_ctx(mini_load_ctx); + if (((MiniLoadCtx*)req->handler_ctx())->is_streaming) { status = _on_new_header(req); - StreamLoadContext* ctx = ((MiniLoadCtx*) req->handler_ctx())->stream_load_ctx; + StreamLoadContext* ctx = ((MiniLoadCtx*)req->handler_ctx())->stream_load_ctx; if (ctx != nullptr) { ctx->status = status; } @@ -359,7 +342,7 @@ int MiniLoadAction::on_header(HttpRequest* req) { return 0; } -bool MiniLoadAction::_is_streaming(HttpRequest* req) { +bool MiniLoadAction::_is_streaming(HttpRequest* req) { // multi load must be non-streaming if (!req->param(SUB_LABEL_KEY).empty()) { return false; @@ -371,23 +354,22 @@ bool MiniLoadAction::_is_streaming(HttpRequest* req) { TFeResult res; Status status = ThriftRpcHelper::rpc( master_address.hostname, master_address.port, - [&request, &res] (FrontendServiceConnection& client) { - client->isMethodSupported(res, request); + [&request, &res](FrontendServiceConnection& client) { + client->isMethodSupported(res, request); }); if (!status.ok()) { - std::stringstream ss; + std::stringstream ss; ss << "This mini load is not streaming because: " << status.get_error_msg() - << " with address(" << master_address.hostname << ":" << master_address.port << ")"; + << " with address(" << master_address.hostname << ":" << master_address.port << ")"; LOG(INFO) << ss.str(); return false; } - + status = Status(res.status); if (!status.ok()) { - std::stringstream ss; + std::stringstream ss; ss << "This streaming mini load is not be supportd because: " << status.get_error_msg() - << " with address(" << master_address.hostname << ":" << master_address.port - << ")"; + << " with address(" << master_address.hostname << ":" << master_address.port << ")"; LOG(INFO) << ss.str(); return false; } @@ -408,8 +390,7 @@ Status MiniLoadAction::_on_header(HttpRequest* req) { } } else { evhttp_connection_set_max_body_size( - evhttp_request_get_connection(req->get_evhttp_request()), - max_body_bytes); + evhttp_request_get_connection(req->get_evhttp_request()), max_body_bytes); } RETURN_IF_ERROR(check_request(req)); @@ -441,20 +422,21 @@ Status MiniLoadAction::_on_header(HttpRequest* req) { RETURN_IF_ERROR(data_saved_dir(mini_load_async_ctx->load_handle, req->param(TABLE_KEY), &mini_load_async_ctx->file_path)); // destructor will close the file handle, not depend on DeferOp any more - mini_load_async_ctx->fd = open(mini_load_async_ctx->file_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660); + mini_load_async_ctx->fd = + open(mini_load_async_ctx->file_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660); if (mini_load_async_ctx->fd < 0) { char buf[64]; LOG(WARNING) << "open file failed, path=" << mini_load_async_ctx->file_path - << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, sizeof(buf)); + << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, sizeof(buf)); return Status::InternalError("open file failed"); } - ((MiniLoadCtx*) req->handler_ctx())->mini_load_async_ctx = mini_load_async_ctx.release(); + ((MiniLoadCtx*)req->handler_ctx())->mini_load_async_ctx = mini_load_async_ctx.release(); return Status::OK(); } void MiniLoadAction::on_chunk_data(HttpRequest* http_req) { - MiniLoadCtx* ctx = (MiniLoadCtx*) http_req->handler_ctx(); + MiniLoadCtx* ctx = (MiniLoadCtx*)http_req->handler_ctx(); if (ctx->is_streaming) { _on_new_chunk_data(http_req); } else { @@ -463,7 +445,7 @@ void MiniLoadAction::on_chunk_data(HttpRequest* http_req) { } void MiniLoadAction::_on_chunk_data(HttpRequest* http_req) { - MiniLoadAsyncCtx* ctx = ((MiniLoadCtx*) http_req->handler_ctx())->mini_load_async_ctx; + MiniLoadAsyncCtx* ctx = ((MiniLoadCtx*)http_req->handler_ctx())->mini_load_async_ctx; if (ctx == nullptr) { return; } @@ -478,11 +460,10 @@ void MiniLoadAction::_on_chunk_data(HttpRequest* http_req) { auto res = write(ctx->fd, buf, n); if (res < 0) { char errbuf[64]; - LOG(WARNING) << "write file failed, path=" << ctx->file_path - << ", errno=" << errno - << ", errmsg=" << strerror_r(errno, errbuf, sizeof(errbuf)); - HttpChannel::send_reply( - http_req, HttpStatus::INTERNAL_SERVER_ERROR, "write file failed"); + LOG(WARNING) << "write file failed, path=" << ctx->file_path << ", errno=" << errno + << ", errmsg=" << strerror_r(errno, errbuf, sizeof(errbuf)); + HttpChannel::send_reply(http_req, HttpStatus::INTERNAL_SERVER_ERROR, + "write file failed"); delete ctx; http_req->set_handler_ctx(nullptr); return; @@ -494,7 +475,7 @@ void MiniLoadAction::_on_chunk_data(HttpRequest* http_req) { } void MiniLoadAction::_on_new_chunk_data(HttpRequest* http_req) { - StreamLoadContext* ctx = ((MiniLoadCtx*) http_req->handler_ctx())->stream_load_ctx; + StreamLoadContext* ctx = ((MiniLoadCtx*)http_req->handler_ctx())->stream_load_ctx; if (ctx == nullptr || !ctx->status.ok()) { return; } @@ -510,7 +491,7 @@ void MiniLoadAction::_on_new_chunk_data(HttpRequest* http_req) { auto st = ctx->body_sink->append(bb); if (!st.ok()) { LOG(WARNING) << "append body content failed. errmsg=" << st.get_error_msg() - << ctx->brief(); + << ctx->brief(); ctx->status = st; return; } @@ -519,13 +500,14 @@ void MiniLoadAction::_on_new_chunk_data(HttpRequest* http_req) { } void MiniLoadAction::free_handler_ctx(void* param) { - MiniLoadCtx* ctx = (MiniLoadCtx*) param; + MiniLoadCtx* ctx = (MiniLoadCtx*)param; if (ctx->is_streaming) { - StreamLoadContext* streaming_ctx = ((MiniLoadCtx*) param)->stream_load_ctx; + StreamLoadContext* streaming_ctx = ((MiniLoadCtx*)param)->stream_load_ctx; if (streaming_ctx != nullptr) { // sender is going, make receiver know it if (streaming_ctx->body_sink != nullptr) { - LOG(WARNING) << "cancel stream load " << streaming_ctx->id.to_string() << " because sender failed"; + LOG(WARNING) << "cancel stream load " << streaming_ctx->id.to_string() + << " because sender failed"; streaming_ctx->body_sink->cancel(); } if (streaming_ctx->unref()) { @@ -533,14 +515,14 @@ void MiniLoadAction::free_handler_ctx(void* param) { } } } else { - MiniLoadAsyncCtx* async_ctx = ((MiniLoadCtx*) param)->mini_load_async_ctx; + MiniLoadAsyncCtx* async_ctx = ((MiniLoadCtx*)param)->mini_load_async_ctx; delete async_ctx; } delete ctx; } -void MiniLoadAction::handle(HttpRequest *http_req) { - MiniLoadCtx* ctx = (MiniLoadCtx*) http_req->handler_ctx(); +void MiniLoadAction::handle(HttpRequest* http_req) { + MiniLoadCtx* ctx = (MiniLoadCtx*)http_req->handler_ctx(); if (ctx->is_streaming) { _new_handle(http_req); } else { @@ -549,33 +531,30 @@ void MiniLoadAction::handle(HttpRequest *http_req) { } void MiniLoadAction::_handle(HttpRequest* http_req) { - MiniLoadAsyncCtx* ctx = ((MiniLoadCtx*) http_req->handler_ctx())->mini_load_async_ctx; + MiniLoadAsyncCtx* ctx = ((MiniLoadCtx*)http_req->handler_ctx())->mini_load_async_ctx; if (ctx == nullptr) { // when ctx is nullptr, there must be error happened when on_chunk_data // and reply is sent, we just return with no operation LOG(WARNING) << "handler context is nullptr when MiniLoad callback execute, uri=" - << http_req->uri(); + << http_req->uri(); return; } if (ctx->body_bytes > 0 && ctx->bytes_written != ctx->body_bytes) { - LOG(WARNING) << "bytes written is not equal with body size, uri=" - << http_req->uri() - << ", body_bytes=" << ctx->body_bytes - << ", bytes_written=" << ctx->bytes_written; + LOG(WARNING) << "bytes written is not equal with body size, uri=" << http_req->uri() + << ", body_bytes=" << ctx->body_bytes + << ", bytes_written=" << ctx->bytes_written; HttpChannel::send_reply(http_req, HttpStatus::INTERNAL_SERVER_ERROR, "receipt size not equal with body size"); return; } - auto st = _load( - http_req, ctx->file_path, ctx->load_check_req.user, ctx->load_check_req.cluster); + auto st = + _load(http_req, ctx->file_path, ctx->load_check_req.user, ctx->load_check_req.cluster); std::string str = to_json(st); HttpChannel::send_reply(http_req, str); } -Status MiniLoadAction::generate_check_load_req( - const HttpRequest* http_req, - TLoadCheckRequest* check_load_req) { - +Status MiniLoadAction::generate_check_load_req(const HttpRequest* http_req, + TLoadCheckRequest* check_load_req) { const char k_basic[] = "Basic "; const std::string& auth = http_req->header(HttpHeaders::AUTHORIZATION); if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) != 0) { @@ -608,7 +587,7 @@ Status MiniLoadAction::generate_check_load_req( return Status::OK(); } -bool LoadHandleCmp::operator() (const LoadHandle& lhs, const LoadHandle& rhs) const { +bool LoadHandleCmp::operator()(const LoadHandle& lhs, const LoadHandle& rhs) const { int ret = lhs.label.compare(rhs.label); if (ret < 0) { return true; @@ -631,7 +610,7 @@ bool LoadHandleCmp::operator() (const LoadHandle& lhs, const LoadHandle& rhs) co return false; } -// fe will begin the txn and record the metadata of load +// fe will begin the txn and record the metadata of load Status MiniLoadAction::_begin_mini_load(StreamLoadContext* ctx) { // prepare begin mini load request params TMiniLoadBeginRequest request; @@ -655,13 +634,13 @@ Status MiniLoadAction::_begin_mini_load(StreamLoadContext* ctx) { TMiniLoadBeginResult res; RETURN_IF_ERROR(ThriftRpcHelper::rpc( master_addr.hostname, master_addr.port, - [&request, &res] (FrontendServiceConnection& client) { - client->miniLoadBegin(res, request); + [&request, &res](FrontendServiceConnection& client) { + client->miniLoadBegin(res, request); })); Status begin_status(res.status); if (!begin_status.ok()) { - LOG(INFO) << "failed to begin mini load " << ctx->label << " with error msg:" - << begin_status.get_error_msg(); + LOG(INFO) << "failed to begin mini load " << ctx->label + << " with error msg:" << begin_status.get_error_msg(); return begin_status; } ctx->txn_id = res.txn_id; @@ -681,8 +660,8 @@ Status MiniLoadAction::_process_put(HttpRequest* req, StreamLoadContext* ctx) { put_request.formatType = ctx->format; put_request.__set_loadId(ctx->id.to_thrift()); put_request.fileType = TFileType::FILE_STREAM; - std::map params( - req->query_params().begin(), req->query_params().end()); + std::map params(req->query_params().begin(), + req->query_params().end()); /* merge params of columns and hll * for example: * input: columns=c1,tmp_c2,tmp_c3\&hll=hll_c2,tmp_c2:hll_c3,tmp_c3 @@ -695,16 +674,17 @@ Status MiniLoadAction::_process_put(HttpRequest* req, StreamLoadContext* ctx) { if (hll_it != params.end()) { std::string hll_value = hll_it->second; if (hll_value.empty()) { - return Status::InvalidArgument("Hll value could not be empty when hll key is exists!"); + return Status::InvalidArgument( + "Hll value could not be empty when hll key is exists!"); } std::map hll_map; RETURN_IF_ERROR(StringParser::split_string_to_map(hll_value, ":", ",", &hll_map)); if (hll_map.empty()) { - return Status::InvalidArgument("Hll value could not transform to hll expr: " + hll_value); + return Status::InvalidArgument("Hll value could not transform to hll expr: " + + hll_value); } - for (auto& hll_element: hll_map) { - columns_value += "," + hll_element.first - + "=hll_hash(" + hll_element.second + ")"; + for (auto& hll_element : hll_map) { + columns_value += "," + hll_element.first + "=hll_hash(" + hll_element.second + ")"; } } put_request.__set_columns(columns_value); @@ -730,14 +710,15 @@ Status MiniLoadAction::_process_put(HttpRequest* req, StreamLoadContext* ctx) { // plan this load TNetworkAddress master_addr = _exec_env->master_info()->network_address; - RETURN_IF_ERROR(ThriftRpcHelper::rpc(master_addr.hostname, master_addr.port, - [&put_request, ctx] (FrontendServiceConnection& client) { - client->streamLoadPut(ctx->put_result, put_request); - })); + RETURN_IF_ERROR(ThriftRpcHelper::rpc( + master_addr.hostname, master_addr.port, + [&put_request, ctx](FrontendServiceConnection& client) { + client->streamLoadPut(ctx->put_result, put_request); + })); Status plan_status(ctx->put_result.status); if (!plan_status.ok()) { LOG(WARNING) << "plan streaming load failed. errmsg=" << plan_status.get_error_msg() - << ctx->brief(); + << ctx->brief(); return plan_status; } VLOG(3) << "params is " << apache::thrift::ThriftDebugString(ctx->put_result.params); @@ -757,15 +738,14 @@ Status MiniLoadAction::_on_new_header(HttpRequest* req) { } } else { evhttp_connection_set_max_body_size( - evhttp_request_get_connection(req->get_evhttp_request()), - max_body_bytes); + evhttp_request_get_connection(req->get_evhttp_request()), max_body_bytes); } RETURN_IF_ERROR(check_request(req)); StreamLoadContext* ctx = new StreamLoadContext(_exec_env); ctx->ref(); - ((MiniLoadCtx*) req->handler_ctx())->stream_load_ctx = ctx; + ((MiniLoadCtx*)req->handler_ctx())->stream_load_ctx = ctx; // auth information if (!parse_basic_auth(*req, &ctx->auth)) { @@ -779,12 +759,12 @@ Status MiniLoadAction::_on_new_header(HttpRequest* req) { ctx->db = req->param(DB_KEY); ctx->table = req->param(TABLE_KEY); ctx->label = req->param(LABEL_KEY); - if(!req->param(SUB_LABEL_KEY).empty()) { + if (!req->param(SUB_LABEL_KEY).empty()) { ctx->sub_label = req->param(SUB_LABEL_KEY); } ctx->format = TFileFormatType::FORMAT_CSV_PLAIN; - std::map params( - req->query_params().begin(), req->query_params().end()); + std::map params(req->query_params().begin(), + req->query_params().end()); auto max_filter_ratio_it = params.find(MAX_FILTER_RATIO_KEY); if (max_filter_ratio_it != params.end()) { ctx->max_filter_ratio = strtod(max_filter_ratio_it->second.c_str(), nullptr); @@ -797,14 +777,14 @@ Status MiniLoadAction::_on_new_header(HttpRequest* req) { return Status::InvalidArgument("Invalid timeout format"); } } - - LOG(INFO) << "new income mini load request." << ctx->brief() - << ", db: " << ctx->db << ", tbl: " << ctx->table; + + LOG(INFO) << "new income mini load request." << ctx->brief() << ", db: " << ctx->db + << ", tbl: " << ctx->table; // record metadata in frontend RETURN_IF_ERROR(_begin_mini_load(ctx)); - // open sink + // open sink auto pipe = std::make_shared(); RETURN_IF_ERROR(_exec_env->load_stream_mgr()->put(ctx->id, pipe)); ctx->body_sink = pipe; @@ -817,14 +797,14 @@ Status MiniLoadAction::_on_new_header(HttpRequest* req) { } void MiniLoadAction::_new_handle(HttpRequest* req) { - StreamLoadContext* ctx = ((MiniLoadCtx*) req->handler_ctx())->stream_load_ctx; - DCHECK(ctx != nullptr); - + StreamLoadContext* ctx = ((MiniLoadCtx*)req->handler_ctx())->stream_load_ctx; + DCHECK(ctx != nullptr); + if (ctx->status.ok()) { ctx->status = _on_new_handle(ctx); if (!ctx->status.ok()) { LOG(WARNING) << "handle mini load failed, id=" << ctx->id - << ", errmsg=" << ctx->status.get_error_msg(); + << ", errmsg=" << ctx->status.get_error_msg(); } } @@ -847,12 +827,11 @@ void MiniLoadAction::_new_handle(HttpRequest* req) { Status MiniLoadAction::_on_new_handle(StreamLoadContext* ctx) { if (ctx->body_bytes > 0 && ctx->receive_bytes != ctx->body_bytes) { - LOG(WARNING) << "receive body don't equal with body bytes, body_bytes=" - << ctx->body_bytes << ", receive_bytes=" << ctx->receive_bytes - << ", id=" << ctx->id; + LOG(WARNING) << "receive body don't equal with body bytes, body_bytes=" << ctx->body_bytes + << ", receive_bytes=" << ctx->receive_bytes << ", id=" << ctx->id; return Status::InternalError("receive body don't equal with body bytes"); } - + // wait stream load sink finish RETURN_IF_ERROR(ctx->body_sink->finish()); @@ -865,4 +844,4 @@ Status MiniLoadAction::_on_new_handle(StreamLoadContext* ctx) { return Status::OK(); } -} +} // namespace doris diff --git a/be/src/http/action/mini_load.h b/be/src/http/action/mini_load.h index 064a2ad25e8b13..66350221c660c1 100644 --- a/be/src/http/action/mini_load.h +++ b/be/src/http/action/mini_load.h @@ -18,16 +18,16 @@ #ifndef DORIS_BE_SRC_COMMON_UTIL_MINI_LOAD_H #define DORIS_BE_SRC_COMMON_UTIL_MINI_LOAD_H -#include #include -#include +#include #include +#include #include "common/status.h" +#include "gen_cpp/FrontendService.h" #include "http/http_handler.h" -#include "util/defer_op.h" #include "runtime/stream_load/stream_load_context.h" -#include "gen_cpp/FrontendService.h" +#include "util/defer_op.h" namespace doris { @@ -39,7 +39,7 @@ struct LoadHandle { }; struct LoadHandleCmp { - bool operator() (const LoadHandle& lhs, const LoadHandle& rhs) const; + bool operator()(const LoadHandle& lhs, const LoadHandle& rhs) const; }; class TMasterResult; @@ -52,10 +52,9 @@ class MiniLoadAction : public HttpHandler { public: MiniLoadAction(ExecEnv* exec_env); - virtual ~MiniLoadAction() { - } + virtual ~MiniLoadAction() {} - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; bool request_will_be_read_progressively() override { return true; } @@ -63,30 +62,20 @@ class MiniLoadAction : public HttpHandler { void on_chunk_data(HttpRequest* req) override; void free_handler_ctx(void* ctx) override; - - void erase_handle(const LoadHandle& handle); + void erase_handle(const LoadHandle& handle); private: - Status _load( - HttpRequest* req, - const std::string& file_path, - const std::string& user, - const std::string& cluster); + Status _load(HttpRequest* req, const std::string& file_path, const std::string& user, + const std::string& cluster); - Status data_saved_dir(const LoadHandle& desc, - const std::string& table, - std::string* file_path); + Status data_saved_dir(const LoadHandle& desc, const std::string& table, std::string* file_path); Status _on_header(HttpRequest* http_req); - Status generate_check_load_req( - const HttpRequest* http_req, - TLoadCheckRequest* load_check_req); + Status generate_check_load_req(const HttpRequest* http_req, TLoadCheckRequest* load_check_req); - Status check_auth( - const HttpRequest* http_req, - const TLoadCheckRequest& load_check_req); + Status check_auth(const HttpRequest* http_req, const TLoadCheckRequest& load_check_req); void _on_chunk_data(HttpRequest* http_req); @@ -98,13 +87,13 @@ class MiniLoadAction : public HttpHandler { Status _begin_mini_load(StreamLoadContext* ctx); Status _process_put(HttpRequest* req, StreamLoadContext* ctx); - + void _on_new_chunk_data(HttpRequest* http_req); void _new_handle(HttpRequest* req); - + Status _on_new_handle(StreamLoadContext* ctx); - + bool _is_streaming(HttpRequest* req); const std::string _streaming_function_name = "STREAMING_MINI_LOAD"; @@ -116,6 +105,5 @@ class MiniLoadAction : public HttpHandler { std::set _current_load; }; -} +} // namespace doris #endif - diff --git a/be/src/http/action/pprof_actions.cpp b/be/src/http/action/pprof_actions.cpp index 6e2281c30e583a..a05bf2271530c4 100644 --- a/be/src/http/action/pprof_actions.cpp +++ b/be/src/http/action/pprof_actions.cpp @@ -17,15 +17,15 @@ #include "http/action/pprof_actions.h" +#include +#include +#include + #include #include #include #include -#include -#include -#include - #include "agent/utils.h" #include "common/config.h" #include "gutil/strings/substitute.h" @@ -148,7 +148,8 @@ void ProfileAction::handle(HttpRequest* req) { if (type_str != "flamegraph") { // use pprof the sample the CPU std::ostringstream tmp_prof_file_name; - tmp_prof_file_name << config::pprof_profile_dir << "/doris_profile." << getpid() << "." << rand(); + tmp_prof_file_name << config::pprof_profile_dir << "/doris_profile." << getpid() << "." + << rand(); ProfilerStart(tmp_prof_file_name.str().c_str()); sleep(seconds); ProfilerStop(); @@ -180,8 +181,10 @@ void ProfileAction::handle(HttpRequest* req) { } else { // generate flamegraph std::string svg_file_content; - std::string flamegraph_install_dir = std::string(std::getenv("DORIS_HOME")) + "/tools/FlameGraph/"; - Status st = PprofUtils::generate_flamegraph(30, flamegraph_install_dir, false, &svg_file_content); + std::string flamegraph_install_dir = + std::string(std::getenv("DORIS_HOME")) + "/tools/FlameGraph/"; + Status st = PprofUtils::generate_flamegraph(30, flamegraph_install_dir, false, + &svg_file_content); if (!st.ok()) { HttpChannel::send_reply(req, st.to_string()); } else { diff --git a/be/src/http/action/pprof_actions.h b/be/src/http/action/pprof_actions.h index 1dd1f42c23a37b..d52ff6cb109803 100644 --- a/be/src/http/action/pprof_actions.h +++ b/be/src/http/action/pprof_actions.h @@ -30,6 +30,6 @@ class PprofActions { static Status setup(ExecEnv* exec_env, EvHttpServer* http_server); }; -} +} // namespace doris #endif diff --git a/be/src/http/action/reload_tablet_action.cpp b/be/src/http/action/reload_tablet_action.cpp index fc4a216c4185f2..0e308e0be7415b 100644 --- a/be/src/http/action/reload_tablet_action.cpp +++ b/be/src/http/action/reload_tablet_action.cpp @@ -17,13 +17,12 @@ #include "http/action/reload_tablet_action.h" -#include #include +#include +#include "agent/cgroups_mgr.h" #include "boost/lexical_cast.hpp" - #include "common/logging.h" -#include "agent/cgroups_mgr.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" @@ -39,11 +38,9 @@ const std::string PATH = "path"; const std::string TABLET_ID = "tablet_id"; const std::string SCHEMA_HASH = "schema_hash"; -ReloadTabletAction::ReloadTabletAction(ExecEnv* exec_env) : - _exec_env(exec_env) { -} +ReloadTabletAction::ReloadTabletAction(ExecEnv* exec_env) : _exec_env(exec_env) {} -void ReloadTabletAction::handle(HttpRequest *req) { +void ReloadTabletAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); // add tid to cgroup in order to limit read bandwidth @@ -52,8 +49,7 @@ void ReloadTabletAction::handle(HttpRequest *req) { // Get path const std::string& path = req->param(PATH); if (path.empty()) { - std::string error_msg = std::string( - "parameter " + PATH + " not specified in url."); + std::string error_msg = std::string("parameter " + PATH + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -61,8 +57,7 @@ void ReloadTabletAction::handle(HttpRequest *req) { // Get tablet id const std::string& tablet_id_str = req->param(TABLET_ID); if (tablet_id_str.empty()) { - std::string error_msg = std::string( - "parameter " + TABLET_ID + " not specified in url."); + std::string error_msg = std::string("parameter " + TABLET_ID + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -70,8 +65,7 @@ void ReloadTabletAction::handle(HttpRequest *req) { // Get schema hash const std::string& schema_hash_str = req->param(SCHEMA_HASH); if (schema_hash_str.empty()) { - std::string error_msg = std::string( - "parameter " + SCHEMA_HASH + " not specified in url."); + std::string error_msg = std::string("parameter " + SCHEMA_HASH + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -95,9 +89,8 @@ void ReloadTabletAction::handle(HttpRequest *req) { LOG(INFO) << "deal with reload tablet request finished! tablet id: " << tablet_id; } -void ReloadTabletAction::reload( - const std::string& path, int64_t tablet_id, int32_t schema_hash, HttpRequest *req) { - +void ReloadTabletAction::reload(const std::string& path, int64_t tablet_id, int32_t schema_hash, + HttpRequest* req) { TCloneReq clone_req; clone_req.__set_tablet_id(tablet_id); clone_req.__set_schema_hash(schema_hash); @@ -105,19 +98,16 @@ void ReloadTabletAction::reload( OLAPStatus res = OLAPStatus::OLAP_SUCCESS; res = _exec_env->storage_engine()->load_header(path, clone_req); if (res != OLAPStatus::OLAP_SUCCESS) { - LOG(WARNING) << "load header failed. status: " << res - << ", signature: " << tablet_id; + LOG(WARNING) << "load header failed. status: " << res << ", signature: " << tablet_id; std::string error_msg = std::string("load header failed"); HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, error_msg); return; } else { - LOG(INFO) << "load header success. status: " << res - << ", signature: " << tablet_id; + LOG(INFO) << "load header success. status: " << res << ", signature: " << tablet_id; std::string result_msg = std::string("load header succeed"); HttpChannel::send_reply(req, result_msg); return; } -} +} } // end namespace doris - diff --git a/be/src/http/action/reload_tablet_action.h b/be/src/http/action/reload_tablet_action.h index d3c8f9508b2879..da3a7752082211 100644 --- a/be/src/http/action/reload_tablet_action.h +++ b/be/src/http/action/reload_tablet_action.h @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_RELOAD_TABLET_ACTION_H -#define DORIS_BE_SRC_HTTP_RELOAD_TABLET_ACTION_H +#ifndef DORIS_BE_SRC_HTTP_RELOAD_TABLET_ACTION_H +#define DORIS_BE_SRC_HTTP_RELOAD_TABLET_ACTION_H #include -#include "http/http_handler.h" #include "gen_cpp/AgentService_types.h" +#include "http/http_handler.h" namespace doris { @@ -31,12 +31,12 @@ class ReloadTabletAction : public HttpHandler { public: ReloadTabletAction(ExecEnv* exec_env); - virtual ~ReloadTabletAction() { } + virtual ~ReloadTabletAction() {} + + void handle(HttpRequest* req) override; - void handle(HttpRequest *req) override; private: - void reload(const std::string& path, int64_t tablet_id, int32_t schema_hash, - HttpRequest *req); + void reload(const std::string& path, int64_t tablet_id, int32_t schema_hash, HttpRequest* req); ExecEnv* _exec_env; @@ -44,4 +44,3 @@ class ReloadTabletAction : public HttpHandler { } // end namespace doris #endif // DORIS_BE_SRC_COMMON_UTIL_DOWNLOAD_ACTION_H - diff --git a/be/src/http/action/restore_tablet_action.h b/be/src/http/action/restore_tablet_action.h index 9fce3110cb2477..f96bfb4d32bcb7 100644 --- a/be/src/http/action/restore_tablet_action.h +++ b/be/src/http/action/restore_tablet_action.h @@ -15,16 +15,16 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_RESTORE_TABLET_ACTION_H -#define DORIS_BE_SRC_HTTP_RESTORE_TABLET_ACTION_H +#ifndef DORIS_BE_SRC_HTTP_RESTORE_TABLET_ACTION_H +#define DORIS_BE_SRC_HTTP_RESTORE_TABLET_ACTION_H #include #include #include -#include "http/http_handler.h" -#include "gen_cpp/AgentService_types.h" #include "common/status.h" +#include "gen_cpp/AgentService_types.h" +#include "http/http_handler.h" namespace doris { @@ -34,21 +34,24 @@ class RestoreTabletAction : public HttpHandler { public: RestoreTabletAction(ExecEnv* exec_env); - virtual ~RestoreTabletAction() { } + virtual ~RestoreTabletAction() {} + + void handle(HttpRequest* req) override; - void handle(HttpRequest *req) override; private: - Status _handle(HttpRequest *req); + Status _handle(HttpRequest* req); Status _restore(const std::string& key, int64_t tablet_id, int32_t schema_hash); - Status _reload_tablet(const std::string& key, const std::string& shard_path, int64_t tablet_id, int32_t schema_hash); + Status _reload_tablet(const std::string& key, const std::string& shard_path, int64_t tablet_id, + int32_t schema_hash); + + bool _get_latest_tablet_path_from_trash(int64_t tablet_id, int32_t schema_hash, + std::string* path); - bool _get_latest_tablet_path_from_trash(int64_t tablet_id, int32_t schema_hash, std::string* path); + bool _get_timestamp_and_count_from_schema_hash_path(const std::string& time_label, + uint64_t* timestamp, uint64_t* counter); - bool _get_timestamp_and_count_from_schema_hash_path( - const std::string& time_label, uint64_t* timestamp, uint64_t* counter); - void _clear_key(const std::string& key); Status _create_hard_link_recursive(const std::string& src, const std::string& dst); diff --git a/be/src/http/action/snapshot_action.cpp b/be/src/http/action/snapshot_action.cpp index a294eacb511fc3..18c3bd685407fb 100644 --- a/be/src/http/action/snapshot_action.cpp +++ b/be/src/http/action/snapshot_action.cpp @@ -17,10 +17,9 @@ #include "http/action/snapshot_action.h" -#include -#include - #include +#include +#include #include "agent/cgroups_mgr.h" #include "common/logging.h" @@ -30,21 +29,19 @@ #include "http/http_request.h" #include "http/http_response.h" #include "http/http_status.h" -#include "runtime/exec_env.h" #include "olap/olap_define.h" -#include "olap/storage_engine.h" #include "olap/snapshot_manager.h" +#include "olap/storage_engine.h" +#include "runtime/exec_env.h" namespace doris { const std::string TABLET_ID = "tablet_id"; const std::string SCHEMA_HASH = "schema_hash"; -SnapshotAction::SnapshotAction(ExecEnv* exec_env) : - _exec_env(exec_env) { -} +SnapshotAction::SnapshotAction(ExecEnv* exec_env) : _exec_env(exec_env) {} -void SnapshotAction::handle(HttpRequest *req) { +void SnapshotAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); // add tid to cgroup in order to limit read bandwidth @@ -52,8 +49,7 @@ void SnapshotAction::handle(HttpRequest *req) { // Get tablet id const std::string& tablet_id_str = req->param(TABLET_ID); if (tablet_id_str.empty()) { - std::string error_msg = std::string( - "parameter " + TABLET_ID + " not specified in url."); + std::string error_msg = std::string("parameter " + TABLET_ID + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; @@ -62,8 +58,7 @@ void SnapshotAction::handle(HttpRequest *req) { // Get schema hash const std::string& schema_hash_str = req->param(SCHEMA_HASH); if (schema_hash_str.empty()) { - std::string error_msg = std::string( - "parameter " + SCHEMA_HASH + " not specified in url."); + std::string error_msg = std::string("parameter " + SCHEMA_HASH + " not specified in url."); HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg); return; } @@ -107,15 +102,14 @@ int64_t SnapshotAction::make_snapshot(int64_t tablet_id, int32_t schema_hash, OLAPStatus res = OLAPStatus::OLAP_SUCCESS; res = SnapshotManager::instance()->make_snapshot(request, snapshot_path); if (res != OLAPStatus::OLAP_SUCCESS) { - LOG(WARNING) << "make snapshot failed. status: " << res - << ", signature: " << tablet_id; + LOG(WARNING) << "make snapshot failed. status: " << res << ", signature: " << tablet_id; return -1L; } else { - LOG(INFO) << "make snapshot success. status: " << res - << ", signature: " << tablet_id << ". path: " << *snapshot_path; + LOG(INFO) << "make snapshot success. status: " << res << ", signature: " << tablet_id + << ". path: " << *snapshot_path; } return 0L; -} +} } // end namespace doris diff --git a/be/src/http/action/snapshot_action.h b/be/src/http/action/snapshot_action.h index 0d75db8540c946..c606c39762d4b2 100644 --- a/be/src/http/action/snapshot_action.h +++ b/be/src/http/action/snapshot_action.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_SNAPSHOT_ACTION_H -#define DORIS_BE_SRC_HTTP_SNAPSHOT_ACTION_H +#ifndef DORIS_BE_SRC_HTTP_SNAPSHOT_ACTION_H +#define DORIS_BE_SRC_HTTP_SNAPSHOT_ACTION_H #include @@ -32,9 +32,10 @@ class SnapshotAction : public HttpHandler { public: explicit SnapshotAction(ExecEnv* exec_env); - virtual ~SnapshotAction() { } + virtual ~SnapshotAction() {} + + void handle(HttpRequest* req) override; - void handle(HttpRequest *req) override; private: int64_t make_snapshot(int64_t tablet_id, int schema_hash, std::string* snapshot_path); diff --git a/be/src/http/action/stream_load.cpp b/be/src/http/action/stream_load.cpp index 9bdaf4ba6b1832..3243019c9a9643 100644 --- a/be/src/http/action/stream_load.cpp +++ b/be/src/http/action/stream_load.cpp @@ -30,33 +30,33 @@ #include "common/logging.h" #include "common/utils.h" -#include "util/thrift_rpc_helper.h" #include "gen_cpp/FrontendService.h" #include "gen_cpp/FrontendService_types.h" #include "gen_cpp/HeartbeatService_types.h" #include "http/http_channel.h" #include "http/http_common.h" +#include "http/http_headers.h" #include "http/http_request.h" #include "http/http_response.h" -#include "http/http_headers.h" #include "http/utils.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" #include "runtime/load_path_mgr.h" #include "runtime/plan_fragment_executor.h" +#include "runtime/stream_load/load_stream_mgr.h" +#include "runtime/stream_load/stream_load_context.h" #include "runtime/stream_load/stream_load_executor.h" #include "runtime/stream_load/stream_load_pipe.h" -#include "runtime/stream_load/stream_load_context.h" -#include "runtime/stream_load/load_stream_mgr.h" #include "util/byte_buffer.h" #include "util/debug_util.h" +#include "util/doris_metrics.h" #include "util/json_util.h" #include "util/metrics.h" -#include "util/doris_metrics.h" +#include "util/string_util.h" +#include "util/thrift_rpc_helper.h" #include "util/time.h" #include "util/uid_util.h" -#include "util/string_util.h" namespace doris { @@ -89,7 +89,8 @@ static bool is_format_support_streaming(TFileFormatType::type format) { } StreamLoadAction::StreamLoadAction(ExecEnv* exec_env) : _exec_env(exec_env) { - _stream_load_entity = DorisMetrics::instance()->metric_registry()->register_entity("stream_load"); + _stream_load_entity = + DorisMetrics::instance()->metric_registry()->register_entity("stream_load"); INT_COUNTER_METRIC_REGISTER(_stream_load_entity, streaming_load_requests_total); INT_COUNTER_METRIC_REGISTER(_stream_load_entity, streaming_load_bytes); INT_COUNTER_METRIC_REGISTER(_stream_load_entity, streaming_load_duration_ms); @@ -101,7 +102,7 @@ StreamLoadAction::~StreamLoadAction() { } void StreamLoadAction::handle(HttpRequest* req) { - StreamLoadContext* ctx = (StreamLoadContext*) req->handler_ctx(); + StreamLoadContext* ctx = (StreamLoadContext*)req->handler_ctx(); if (ctx == nullptr) { return; } @@ -111,7 +112,7 @@ void StreamLoadAction::handle(HttpRequest* req) { ctx->status = _handle(ctx); if (!ctx->status.ok() && ctx->status.code() != TStatusCode::PUBLISH_TIMEOUT) { LOG(WARNING) << "handle streaming load failed, id=" << ctx->id - << ", errmsg=" << ctx->status.get_error_msg(); + << ", errmsg=" << ctx->status.get_error_msg(); } } ctx->load_cost_nanos = MonotonicNanos() - ctx->start_nanos; @@ -138,9 +139,8 @@ void StreamLoadAction::handle(HttpRequest* req) { Status StreamLoadAction::_handle(StreamLoadContext* ctx) { if (ctx->body_bytes > 0 && ctx->receive_bytes != ctx->body_bytes) { - LOG(WARNING) << "recevie body don't equal with body bytes, body_bytes=" - << ctx->body_bytes << ", receive_bytes=" << ctx->receive_bytes - << ", id=" << ctx->id; + LOG(WARNING) << "recevie body don't equal with body bytes, body_bytes=" << ctx->body_bytes + << ", receive_bytes=" << ctx->receive_bytes << ", id=" << ctx->id; return Status::InternalError("receive body don't equal with body bytes"); } if (!ctx->use_streaming) { @@ -181,8 +181,8 @@ int StreamLoadAction::on_header(HttpRequest* req) { ctx->label = generate_uuid_string(); } - LOG(INFO) << "new income streaming load request." << ctx->brief() - << ", db=" << ctx->db << ", tbl=" << ctx->table; + LOG(INFO) << "new income streaming load request." << ctx->brief() << ", db=" << ctx->db + << ", tbl=" << ctx->table; auto st = _on_header(req, ctx); if (!st.ok()) { @@ -228,12 +228,14 @@ Status StreamLoadAction::_on_header(HttpRequest* http_req, StreamLoadContext* ct if (!http_req->header(HttpHeaders::CONTENT_LENGTH).empty()) { ctx->body_bytes = std::stol(http_req->header(HttpHeaders::CONTENT_LENGTH)); // json max body size - if ((ctx->format == TFileFormatType::FORMAT_JSON) && (ctx->body_bytes > json_max_body_bytes)) { + if ((ctx->format == TFileFormatType::FORMAT_JSON) && + (ctx->body_bytes > json_max_body_bytes)) { std::stringstream ss; ss << "The size of this batch exceed the max size [" << json_max_body_bytes - << "] of json type data " << " data [ " << ctx->body_bytes << " ]"; + << "] of json type data " + << " data [ " << ctx->body_bytes << " ]"; return Status::InternalError(ss.str()); - } + } // csv max body size else if (ctx->body_bytes > csv_max_body_bytes) { LOG(WARNING) << "body exceed max size." << ctx->brief(); @@ -244,8 +246,7 @@ Status StreamLoadAction::_on_header(HttpRequest* http_req, StreamLoadContext* ct } else { #ifndef BE_TEST evhttp_connection_set_max_body_size( - evhttp_request_get_connection(http_req->get_evhttp_request()), - csv_max_body_bytes); + evhttp_request_get_connection(http_req->get_evhttp_request()), csv_max_body_bytes); #endif } @@ -284,7 +285,7 @@ void StreamLoadAction::on_chunk_data(HttpRequest* req) { auto st = ctx->body_sink->append(bb); if (!st.ok()) { LOG(WARNING) << "append body content failed. errmsg=" << st.get_error_msg() - << ctx->brief(); + << ctx->brief(); ctx->status = st; return; } @@ -294,7 +295,7 @@ void StreamLoadAction::on_chunk_data(HttpRequest* req) { } void StreamLoadAction::free_handler_ctx(void* param) { - StreamLoadContext* ctx = (StreamLoadContext*) param; + StreamLoadContext* ctx = (StreamLoadContext*)param; if (ctx == nullptr) { return; } @@ -320,10 +321,9 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* request.formatType = ctx->format; request.__set_loadId(ctx->id.to_thrift()); if (ctx->use_streaming) { - auto pipe = std::make_shared( - 1024 * 1024 /* max_buffered_bytes */, - 64 * 1024 /* min_chunk_size */, - ctx->body_bytes /* total_length */); + auto pipe = std::make_shared(1024 * 1024 /* max_buffered_bytes */, + 64 * 1024 /* min_chunk_size */, + ctx->body_bytes /* total_length */); RETURN_IF_ERROR(_exec_env->load_stream_mgr()->put(ctx->id, pipe)); request.fileType = TFileType::FILE_STREAM; ctx->body_sink = pipe; @@ -348,16 +348,18 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* request.__set_partitions(http_req->header(HTTP_PARTITIONS)); request.__set_isTempPartition(false); if (!http_req->header(HTTP_TEMP_PARTITIONS).empty()) { - return Status::InvalidArgument("Can not specify both partitions and temporary partitions"); + return Status::InvalidArgument( + "Can not specify both partitions and temporary partitions"); } - } + } if (!http_req->header(HTTP_TEMP_PARTITIONS).empty()) { request.__set_partitions(http_req->header(HTTP_TEMP_PARTITIONS)); request.__set_isTempPartition(true); if (!http_req->header(HTTP_PARTITIONS).empty()) { - return Status::InvalidArgument("Can not specify both partitions and temporary partitions"); + return Status::InvalidArgument( + "Can not specify both partitions and temporary partitions"); } - } + } if (!http_req->header(HTTP_NEGATIVE).empty() && http_req->header(HTTP_NEGATIVE) == "true") { request.__set_negative(true); } else { @@ -386,7 +388,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* request.__set_jsonpaths(http_req->header(HTTP_JSONPATHS)); } if (!http_req->header(HTTP_JSONROOT).empty()) { - request.__set_json_root(http_req->header(HTTP_JSONROOT)); + request.__set_json_root(http_req->header(HTTP_JSONROOT)); } if (!http_req->header(HTTP_STRIP_OUTER_ARRAY).empty()) { if (boost::iequals(http_req->header(HTTP_STRIP_OUTER_ARRAY), "true")) { @@ -398,7 +400,8 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* request.__set_strip_outer_array(false); } if (!http_req->header(HTTP_FUNCTION_COLUMN + "." + HTTP_SEQUENCE_COL).empty()) { - request.__set_sequence_col(http_req->header(HTTP_FUNCTION_COLUMN + "." + HTTP_SEQUENCE_COL)); + request.__set_sequence_col( + http_req->header(HTTP_FUNCTION_COLUMN + "." + HTTP_SEQUENCE_COL)); } if (ctx->timeout_second != -1) { @@ -406,22 +409,22 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* } request.__set_thrift_rpc_timeout_ms(config::thrift_rpc_timeout_ms); TMergeType::type merge_type = TMergeType::APPEND; - StringCaseMap merge_type_map = { - { "APPEND", TMergeType::APPEND }, - { "DELETE", TMergeType::DELETE }, - { "MERGE", TMergeType::MERGE } - }; + StringCaseMap merge_type_map = {{"APPEND", TMergeType::APPEND}, + {"DELETE", TMergeType::DELETE}, + {"MERGE", TMergeType::MERGE}}; if (!http_req->header(HTTP_MERGE_TYPE).empty()) { std::string merge_type_str = http_req->header(HTTP_MERGE_TYPE); - if (merge_type_map.find(merge_type_str) != merge_type_map.end() ) { + if (merge_type_map.find(merge_type_str) != merge_type_map.end()) { merge_type = merge_type_map.find(merge_type_str)->second; } else { return Status::InvalidArgument("Invalid merge type " + merge_type_str); } if (merge_type == TMergeType::MERGE && http_req->header(HTTP_DELETE_CONDITION).empty()) { return Status::InvalidArgument("Excepted DELETE ON clause when merge type is MERGE."); - } else if (merge_type != TMergeType::MERGE && !http_req->header(HTTP_DELETE_CONDITION).empty()) { - return Status::InvalidArgument("Not support DELETE ON clause when merge type is not MERGE."); + } else if (merge_type != TMergeType::MERGE && + !http_req->header(HTTP_DELETE_CONDITION).empty()) { + return Status::InvalidArgument( + "Not support DELETE ON clause when merge type is not MERGE."); } } request.__set_merge_type(merge_type); @@ -438,7 +441,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* int64_t stream_load_put_start_time = MonotonicNanos(); RETURN_IF_ERROR(ThriftRpcHelper::rpc( master_addr.hostname, master_addr.port, - [&request, ctx] (FrontendServiceConnection& client) { + [&request, ctx](FrontendServiceConnection& client) { client->streamLoadPut(ctx->put_result, request); })); ctx->stream_load_put_cost_nanos = MonotonicNanos() - stream_load_put_start_time; @@ -448,7 +451,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* Status plan_status(ctx->put_result.status); if (!plan_status.ok()) { LOG(WARNING) << "plan streaming load failed. errmsg=" << plan_status.get_error_msg() - << ctx->brief(); + << ctx->brief(); return plan_status; } VLOG(3) << "params is " << apache::thrift::ThriftDebugString(ctx->put_result.params); @@ -477,5 +480,4 @@ Status StreamLoadAction::_data_saved_path(HttpRequest* req, std::string* file_pa return Status::OK(); } -} - +} // namespace doris diff --git a/be/src/http/action/stream_load.h b/be/src/http/action/stream_load.h index 87ce0bf44461f6..9f7bc2fddc2e11 100644 --- a/be/src/http/action/stream_load.h +++ b/be/src/http/action/stream_load.h @@ -35,7 +35,7 @@ class StreamLoadAction : public HttpHandler { StreamLoadAction(ExecEnv* exec_env); ~StreamLoadAction() override; - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; bool request_will_be_read_progressively() override { return true; } @@ -61,4 +61,4 @@ class StreamLoadAction : public HttpHandler { IntGauge* streaming_load_current_processing; }; -} +} // namespace doris diff --git a/be/src/http/action/tablets_info_action.cpp b/be/src/http/action/tablets_info_action.cpp index 49d41814f63833..56ccbc09a868f8 100644 --- a/be/src/http/action/tablets_info_action.cpp +++ b/be/src/http/action/tablets_info_action.cpp @@ -20,12 +20,12 @@ #include #include "http/http_channel.h" -#include "http/http_request.h" #include "http/http_headers.h" +#include "http/http_request.h" #include "http/http_status.h" -#include "service/backend_options.h" #include "olap/storage_engine.h" #include "olap/tablet_manager.h" +#include "service/backend_options.h" namespace doris { @@ -35,7 +35,7 @@ TabletsInfoAction::TabletsInfoAction() { _host = BackendOptions::get_localhost(); } -void TabletsInfoAction::handle(HttpRequest *req) { +void TabletsInfoAction::handle(HttpRequest* req) { const std::string& tablet_num_to_return = req->param("limit"); req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); HttpChannel::send_reply(req, HttpStatus::OK, get_tablets_info(tablet_num_to_return).ToString()); @@ -76,4 +76,3 @@ EasyJson TabletsInfoAction::get_tablets_info(string tablet_num_to_return) { return tablets_info_ej; } } // namespace doris - diff --git a/be/src/http/action/tablets_info_action.h b/be/src/http/action/tablets_info_action.h index 1675a648e64ff4..f21045bfaeb57f 100644 --- a/be/src/http/action/tablets_info_action.h +++ b/be/src/http/action/tablets_info_action.h @@ -17,9 +17,10 @@ #pragma once +#include + #include "http/http_handler.h" #include "util/easy_json.h" -#include namespace doris { @@ -27,7 +28,7 @@ namespace doris { class TabletsInfoAction : public HttpHandler { public: TabletsInfoAction(); - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; EasyJson get_tablets_info(std::string tablet_num_to_return); std::string host() { return _host; } @@ -35,4 +36,3 @@ class TabletsInfoAction : public HttpHandler { std::string _host; }; } // namespace doris - diff --git a/be/src/http/action/update_config_action.cpp b/be/src/http/action/update_config_action.cpp index f7ec29583c1cfe..d12588a8df554f 100644 --- a/be/src/http/action/update_config_action.cpp +++ b/be/src/http/action/update_config_action.cpp @@ -49,7 +49,8 @@ void UpdateConfigAction::handle(HttpRequest* req) { // So the number of query params should at most be 2. if (req->params()->size() > 2 || req->params()->size() < 1) { s = Status::InvalidArgument(""); - msg = "Now only support to set a single config once, via 'config_name=new_value', and with an optional parameter 'persist'."; + msg = "Now only support to set a single config once, via 'config_name=new_value', and with " + "an optional parameter 'persist'."; } else { if (req->params()->size() == 1) { const std::string& config = req->params()->begin()->first; @@ -60,12 +61,13 @@ void UpdateConfigAction::handle(HttpRequest* req) { } else { LOG(WARNING) << "set_config " << config << "=" << new_value << " failed"; msg = strings::Substitute("set $0=$1 failed, reason: $2", config, new_value, - s.to_string()); + s.to_string()); } } else if (req->params()->size() == 2) { if (req->params()->find(PERSIST_PARAM) == req->params()->end()) { s = Status::InvalidArgument(""); - msg = "Now only support to set a single config once, via 'config_name=new_value', and with an optional parameter 'persist'."; + msg = "Now only support to set a single config once, via 'config_name=new_value', " + "and with an optional parameter 'persist'."; } else { bool need_persist = false; if (req->params()->find(PERSIST_PARAM)->second.compare("true") == 0) { @@ -77,11 +79,13 @@ void UpdateConfigAction::handle(HttpRequest* req) { } s = config::set_config(iter.first, iter.second, need_persist); if (s.ok()) { - LOG(INFO) << "set_config " << iter.first << "=" << iter.second << " success. persist: " << need_persist; + LOG(INFO) << "set_config " << iter.first << "=" << iter.second + << " success. persist: " << need_persist; } else { - LOG(WARNING) << "set_config " << iter.first << "=" << iter.second << " failed"; - msg = strings::Substitute("set $0=$1 failed, reason: $2", iter.first, iter.second, - s.to_string()); + LOG(WARNING) + << "set_config " << iter.first << "=" << iter.second << " failed"; + msg = strings::Substitute("set $0=$1 failed, reason: $2", iter.first, + iter.second, s.to_string()); } } } diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 20dd3b09d0db12..80755e247203cb 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -125,33 +125,32 @@ void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson* void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { (*output) << "

Memory usage by subsystem

\n"; (*output) << "\n"; + " data-pagination='true' " + " data-search='true' " + " class='table table-striped'>\n"; (*output) << "" - "" - "" - "" - "" - ""; + "" + "" + "" + "" + ""; (*output) << "\n"; std::vector> trackers; MemTracker::ListTrackers(&trackers); for (const shared_ptr& tracker : trackers) { string parent = tracker->parent() == nullptr ? "none" : tracker->parent()->label(); - string limit_str = tracker->limit() == -1 ? "none" : - ItoaKMGT(tracker->limit()); + string limit_str = tracker->limit() == -1 ? "none" : ItoaKMGT(tracker->limit()); string current_consumption_str = ItoaKMGT(tracker->consumption()); string peak_consumption_str = ItoaKMGT(tracker->peak_consumption()); - (*output) << strings::Substitute("" // id, parent, limit - "\n", // current, peak - tracker->label(), parent, limit_str, current_consumption_str, - peak_consumption_str); + (*output) << strings::Substitute( + "" // id, parent, limit + "\n", // current, peak + tracker->label(), parent, limit_str, current_consumption_str, peak_consumption_str); } (*output) << "
IdParentLimitCurrent ConsumptionPeak ConsumptionIdParentLimitCurrent ConsumptionPeak Consumption
$0$1$2$3$4
$0$1$2$3$4
\n"; } @@ -167,26 +166,36 @@ void heap_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* ou #else (*output) << "
" << std::endl;
-    (*output) << "Heap profiling will use pprof tool to sample and get heap profile. It will take 30 seconds" << std::endl;
+    (*output) << "Heap profiling will use pprof tool to sample and get heap profile. It will take "
+                 "30 seconds"
+              << std::endl;
     (*output) << "(Only one thread can obtain profile at the same time)" << std::endl;
     (*output) << std::endl;
-    (*output) << "If you want to get the Heap profile, you need to install gperftools-2.0 on the host machine," << std::endl;
-    (*output) << "and make sure there is a 'pprof' executable file in the system PATH or 'be/tools/bin/' directory." << std::endl;
+    (*output) << "If you want to get the Heap profile, you need to install gperftools-2.0 on the "
+                 "host machine,"
+              << std::endl;
+    (*output) << "and make sure there is a 'pprof' executable file in the system PATH or "
+                 "'be/tools/bin/' directory."
+              << std::endl;
     (*output) << "Doris will obtain Profile in the following ways:" << std::endl;
     (*output) << std::endl;
-    (*output) << "    curl http://localhost:" << config::webserver_port << "/pprof/heap?seconds=30 > perf.data" << std::endl;
+    (*output) << "    curl http://localhost:" << config::webserver_port
+              << "/pprof/heap?seconds=30 > perf.data" << std::endl;
     (*output) << "    pprof --text be/lib/palo_be perf.data" << std::endl;
     (*output) << std::endl;
     (*output) << "
" << std::endl; (*output) << "
" << std::endl; - (*output) << "
" << std::endl; + (*output) << "
" + << std::endl; (*output) << "
" << std::endl; (*output) << "
" << std::endl; (*output) << "
" << std::endl; (*output) << "