diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp index 3ca961afffb725..1e9e7c4ede4a01 100644 --- a/cloud/src/common/bvars.cpp +++ b/cloud/src/common/bvars.cpp @@ -211,6 +211,8 @@ bvar::Status g_bvar_fdb_workload_transactions_started_hz("fdb_workload_ bvar::Status g_bvar_fdb_workload_transactions_committed_hz("fdb_workload_transactions_committed_hz", BVAR_FDB_INVALID_VALUE); bvar::Status g_bvar_fdb_workload_transactions_rejected_hz("fdb_workload_transactions_rejected_hz", BVAR_FDB_INVALID_VALUE); bvar::Status g_bvar_fdb_client_thread_busyness_percent("fdb_client_thread_busyness_percent", BVAR_FDB_INVALID_VALUE); +mBvarStatus g_bvar_fdb_process_status_int("fdb_process_status_int", {"process_id", "component", "metric"}); +mBvarStatus g_bvar_fdb_process_status_float("fdb_process_status_float", {"process_id", "component", "metric"}); // checker's bvars BvarStatusWithTag g_bvar_checker_num_scanned("checker", "num_scanned"); diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h index 6034afe7112e32..d9dfb544d1ae33 100644 --- a/cloud/src/common/bvars.h +++ b/cloud/src/common/bvars.h @@ -348,6 +348,8 @@ extern bvar::Status g_bvar_fdb_workload_transactions_started_hz; extern bvar::Status g_bvar_fdb_workload_transactions_committed_hz; extern bvar::Status g_bvar_fdb_workload_transactions_rejected_hz; extern bvar::Status g_bvar_fdb_client_thread_busyness_percent; +extern mBvarStatus g_bvar_fdb_process_status_int; +extern mBvarStatus g_bvar_fdb_process_status_float; // checker extern BvarStatusWithTag g_bvar_checker_num_scanned; diff --git a/cloud/src/common/metric.cpp b/cloud/src/common/metric.cpp index a9b91c6c853ccd..124a5f26a063af 100644 --- a/cloud/src/common/metric.cpp +++ b/cloud/src/common/metric.cpp @@ -17,10 +17,12 @@ #include "metric.h" +#include #include #include #include +#include #include #include #include @@ -28,6 +30,7 @@ #include #include "common/bvars.h" +#include "common/logging.h" #include "meta-store/txn_kv.h" #include "meta-store/txn_kv_error.h" @@ -134,6 +137,68 @@ static void export_fdb_status_details(const std::string& status_str) { DCHECK(node->value.IsDouble()); return static_cast(node->value.GetDouble() * NANOSECONDS); }; + auto get_process_metric = [&](std::string component) { + auto node = document.FindMember("cluster"); + if (!node->value.HasMember("processes")) return; + node = node->value.FindMember("processes"); + // process + for (auto process_node = node->value.MemberBegin(); process_node != node->value.MemberEnd(); + process_node++) { + const char* process_id = process_node->name.GetString(); + decltype(process_node) component_node; + // get component iter + if (!process_node->value.HasMember(component.data())) return; + component_node = process_node->value.FindMember(component.data()); + // There are three cases here: int64, double, and object. + // If it is double or int64, put it directly into the bvar. + // If it is an object, recursively obtain the full name and corresponding value. + // such as: {"disk": {"reads": {"counter": 123, "hz": 0}}} + // component is "disk", the names of these two values should be "reads_counter" and "reads_hz" + auto recursive_name_helper = [](std::string& origin_name, + const char* next_level_name) -> std::string { + return origin_name + '_' + next_level_name; + }; + // proved two type lambda func to handle object and other type + + // set_bvar_value is responsible for setting integer and float values to the corresponding bvar. + auto set_bvar_value = [&process_id, &component]( + std::string& name, + decltype(process_node)& temp_node) -> void { + if (temp_node->value.IsInt64()) { + g_bvar_fdb_process_status_int.put({process_id, component, name}, + temp_node->value.GetInt64()); + return; + } + if (temp_node->value.IsDouble()) { + g_bvar_fdb_process_status_float.put({process_id, component, name}, + temp_node->value.GetDouble()); + return; + } + LOG(WARNING) << fmt::format( + "Get process metrics set_bvar_value input a wrong type node {}", name); + }; + auto object_recursive = [&set_bvar_value, &recursive_name_helper]( + auto&& self, std::string name, + decltype(process_node) temp_node) -> void { + // if the node is an object, then get Member(iter) and recursive with iter as arg + if (temp_node->value.IsObject()) { + for (auto iter = temp_node->value.MemberBegin(); + iter != temp_node->value.MemberEnd(); iter++) { + self(self, recursive_name_helper(name, iter->name.GetString()), iter); + } + return; + } + // if not object, set bvar value + set_bvar_value(name, temp_node); + }; + // Note that the parameter passed to set_bvar_value here is the current node, not its Member + // so we can directly call object_recursive in the loop + for (auto metric_node = component_node->value.MemberBegin(); + metric_node != component_node->value.MemberEnd(); metric_node++) { + object_recursive(object_recursive, metric_node->name.GetString(), metric_node); + } + } + }; // Configuration g_bvar_fdb_configuration_coordinators_count.set_value( get_value({"configuration", "coordinators_count"})); @@ -226,6 +291,11 @@ static void export_fdb_status_details(const std::string& status_str) { } } } + + // Process Status + get_process_metric("cpu"); + get_process_metric("disk"); + get_process_metric("memory"); } void FdbMetricExporter::export_fdb_metrics(TxnKv* txn_kv) { diff --git a/cloud/test/metric_test.cpp b/cloud/test/metric_test.cpp index 31a2b7b3c5821f..81174c73924de9 100644 --- a/cloud/test/metric_test.cpp +++ b/cloud/test/metric_test.cpp @@ -172,4 +172,121 @@ TEST(MetricTest, FdbMetricExporterTest) { ASSERT_EQ(g_bvar_fdb_machines_count.get_value(), BVAR_FDB_INVALID_VALUE); ASSERT_EQ(g_bvar_fdb_client_count.get_value(), BVAR_FDB_INVALID_VALUE); } + + // process status + { + g_bvar_fdb_machines_count.set_value(BVAR_FDB_INVALID_VALUE); + g_bvar_fdb_client_count.set_value(BVAR_FDB_INVALID_VALUE); + + std::string fdb_metric_example = "./fdb_metric_example.json"; + std::ifstream inFile(fdb_metric_example); + + ASSERT_TRUE(inFile.is_open()); + std::string fileContent((std::istreambuf_iterator(inFile)), + std::istreambuf_iterator()); + + std::shared_ptr txn_kv = std::make_shared(); + std::unique_ptr txn; + ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK); + txn->put("\xff\xff/status/json", fileContent); + ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK); + + FdbMetricExporter fdb_metric_exporter(txn_kv); + fdb_metric_exporter.sleep_interval_ms_ = 1; + fdb_metric_exporter.start(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + fdb_metric_exporter.stop(); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "cpu", "usage_cores"}), + 0.0012292); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "busy"}), + 0.0085999800000000001); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "free_bytes"}), + 490412584960); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "reads_counter"}), + 854857); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "reads_hz"}), + 0); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "reads_sectors"}), + 0); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "total_bytes"}), + 527295578112); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "writes_counter"}), + 73765457); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "writes_hz"}), + 26.1999); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "disk", "writes_sectors"}), + 1336); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "memory", "available_bytes"}), + 3065090867); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "memory", "limit_bytes"}), + 8589934592); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "memory", "rss_bytes"}), + 46551040); + ASSERT_EQ(g_bvar_fdb_process_status_int.get({"09ca90b9f3f413e5816b2610ed8b465d", "memory", + "unused_allocated_memory"}), + 655360); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"09ca90b9f3f413e5816b2610ed8b465d", "memory", "used_bytes"}), + 122974208); + + // test second process + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "cpu", "usage_cores"}), + 0.0049765900000000004); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "busy"}), + 0.012200000000000001); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "free_bytes"}), + 489160159232); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "reads_counter"}), + 877107); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "reads_hz"}), + 0); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "reads_sectors"}), + 0); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "total_bytes"}), + 527295578112); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "writes_counter"}), + 79316112); + ASSERT_EQ(g_bvar_fdb_process_status_float.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "writes_hz"}), + 30.9999); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "disk", "writes_sectors"}), + 744); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "memory", "available_bytes"}), + 3076787404); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "memory", "limit_bytes"}), + 8589934592); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "memory", "rss_bytes"}), + 72359936); + ASSERT_EQ(g_bvar_fdb_process_status_int.get({"0a456165f04e1ec1a2ade0ce523d54a8", "memory", + "unused_allocated_memory"}), + 393216); + ASSERT_EQ(g_bvar_fdb_process_status_int.get( + {"0a456165f04e1ec1a2ade0ce523d54a8", "memory", "used_bytes"}), + 157978624); + } } \ No newline at end of file