From f778344f5bc4194f17a6b98f4836fbd95cb9b15e Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 15 Aug 2019 15:03:45 -0700 Subject: [PATCH 1/3] Fix Deserialization Bug --- source/code/plugin/health/cluster_health_state.rb | 8 ++++++-- source/code/plugin/health/health_monitor_state.rb | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb index ac7e05675..3b56dd243 100644 --- a/source/code/plugin/health/cluster_health_state.rb +++ b/source/code/plugin/health/cluster_health_state.rb @@ -16,8 +16,12 @@ def initialize(token_file_path, cert_file_path) @token = get_token end - def update_state(state) + def update_state(state) #state = hash of monitor_instance_id to HealthMonitorInstanceState struct get_request = Net::HTTP::Get.new(@uri.request_uri) + monitor_states_hash = {} + state.each {|monitor_instance_id, health_monitor_instance_state| + monitor_states_hash[monitor_instance_id] = health_monitor_instance_state.to_h + } get_request["Authorization"] = "Bearer #{@token}" @log.info "Making GET request to #{@uri.request_uri} @ #{Time.now.utc.iso8601}" @@ -37,7 +41,7 @@ def update_state(state) update_request["Authorization"] = "Bearer #{@token}" update_request_body = get_update_request_body - update_request_body["state"] = state.to_json + update_request_body["state"] = monitor_states_hash.to_json update_request.body = update_request_body.to_json update_response = @http_client.request(update_request) diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index c3df5e3a9..e6205b481 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -33,7 +33,7 @@ def to_h def initialize_state(deserialized_state) @@monitor_states = {} deserialized_state.each{|k,v| - health_monitor_instance_state_hash = JSON.parse(v) + health_monitor_instance_state_hash = v state = HealthMonitorInstanceState.new(*health_monitor_instance_state_hash.values_at(*HealthMonitorInstanceState.members)) state.prev_sent_record_time = health_monitor_instance_state_hash["prev_sent_record_time"] state.old_state = health_monitor_instance_state_hash["old_state"] From a2dee3c66cd7dcd6886026633399c3d7f0750185 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 15 Aug 2019 19:25:34 -0700 Subject: [PATCH 2/3] Fix the Capacity computation --- source/code/plugin/health/health_monitor_utils.rb | 8 ++++---- source/code/plugin/in_kube_health.rb | 13 ++++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index df47529e6..5c3b9622f 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -172,9 +172,11 @@ def get_resource_subscription(pod_inventory, metric_name, metric_capacity) return subscription end - def get_cluster_cpu_memory_capacity(log) + def get_cluster_cpu_memory_capacity(log, node_inventory: nil) begin - node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + if node_inventory.nil? + node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + end cluster_cpu_capacity = 0.0 cluster_memory_capacity = 0.0 if !node_inventory.empty? @@ -186,7 +188,6 @@ def get_cluster_cpu_memory_capacity(log) cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'] end end - log.info "Cluster CPU Limit #{cluster_cpu_capacity}" else log.info "Error getting cpu_capacity" end @@ -197,7 +198,6 @@ def get_cluster_cpu_memory_capacity(log) cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value'] end end - log.info "Cluster Memory Limit #{cluster_memory_capacity}" else log.info "Error getting memory_capacity" end diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index d9672da3b..045ddf7c7 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -104,9 +104,9 @@ def enumerate end if !pod_inventory.nil? - record = process_cpu_oversubscribed_monitor(pod_inventory) + record = process_cpu_oversubscribed_monitor(pod_inventory, node_inventory) health_monitor_records.push(record) if record - record = process_memory_oversubscribed_monitor(pod_inventory) + record = process_memory_oversubscribed_monitor(pod_inventory, node_inventory) health_monitor_records.push(record) if record pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(pod_inventory, deployment_inventory) @@ -146,11 +146,12 @@ def enumerate end end - def process_cpu_oversubscribed_monitor(pod_inventory) + def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory) timestamp = Time.now.utc.iso8601 + @@clusterCpuCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog, node_inventory: node_inventory)[0] subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"cpu", @@clusterCpuCapacity) + @@hmlog.info "Refreshed Cluster CPU Capacity #{@@clusterCpuCapacity}" state = subscription > @@clusterCpuCapacity ? "fail" : "pass" - #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}" #CPU monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID @@ -171,8 +172,10 @@ def process_cpu_oversubscribed_monitor(pod_inventory) return health_record end - def process_memory_oversubscribed_monitor(pod_inventory) + def process_memory_oversubscribed_monitor(pod_inventory, node_inventory) timestamp = Time.now.utc.iso8601 + @@clusterMemoryCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog,node_inventory: node_inventory)[1] + @@hmlog.info "Refreshed Cluster Memory Capacity #{@@clusterMemoryCapacity}" subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"memory", @@clusterMemoryCapacity) state = subscription > @@clusterMemoryCapacity ? "fail" : "pass" #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}" From e63cb8f551d93b42b724edc48d9e6473c5ba036d Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 15 Aug 2019 23:22:03 -0700 Subject: [PATCH 3/3] Added new regions, added handler for plugin start --- installer/conf/container.conf | 2 +- installer/conf/kube.conf | 4 +-- source/code/plugin/out_mdm.rb | 53 ++++++++++++++++++----------------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 6d810a0e2..4cb9e6913 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -32,7 +32,7 @@ #custom_metrics_mdm filter plugin type filter_cadvisor2mdm - custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes log_level info diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 4b4ec09ea..3cbc3ff17 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -70,14 +70,14 @@ type filter_inventory2mdm - custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral log_level info #custom_metrics_mdm filter plugin for perf data from windows nodes type filter_cadvisor2mdm - custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes log_level info diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 69ef25580..4b9d50a29 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -44,37 +44,38 @@ def start super begin file = File.read(@@azure_json_path) + @data_hash = JSON.parse(file) + aks_resource_id = ENV["AKS_RESOURCE_ID"] + aks_region = ENV["AKS_REGION"] + + if aks_resource_id.to_s.empty? + @log.info "Environment Variable AKS_RESOURCE_ID is not set.. " + @can_send_data_to_mdm = false + end + if aks_region.to_s.empty? + @log.info "Environment Variable AKS_REGION is not set.. " + @can_send_data_to_mdm = false + end + aks_region = aks_region.gsub(" ","") + + if @can_send_data_to_mdm + @log.info "MDM Metrics supported in #{aks_region} region" + @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]} + @cached_access_token = get_access_token + @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id} + @post_request_uri = URI.parse(@@post_request_url) + @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port) + @http_client.use_ssl = true + @log.info "POST Request url: #{@@post_request_url}" + ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {}) + end rescue => e - @log.info "Unable to read file #{@@azure_json_path} #{e}" + @log.info "exception when initializing out_mdm #{e}" + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "MDM"}) @can_send_data_to_mdm = false return end - # Handle the case where the file read fails. Send Telemetry and exit the plugin? - @data_hash = JSON.parse(file) - @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]} - @cached_access_token = get_access_token - aks_resource_id = ENV["AKS_RESOURCE_ID"] - aks_region = ENV["AKS_REGION"] - - if aks_resource_id.to_s.empty? - @log.info "Environment Variable AKS_RESOURCE_ID is not set.. " - @can_send_data_to_mdm = false - return - end - if aks_region.to_s.empty? - @log.info "Environment Variable AKS_REGION is not set.. " - @can_send_data_to_mdm = false - return - end - - aks_region = aks_region.gsub(" ","") - @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id} - @post_request_uri = URI.parse(@@post_request_url) - @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port) - @http_client.use_ssl = true - @log.info "POST Request url: #{@@post_request_url}" - ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {}) end # get the access token only if the time to expiry is less than 5 minutes