From 8a766d6c8aa3a72282ffdd28360409018a66e82c Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 28 May 2019 15:27:31 -0700 Subject: [PATCH 1/3] Fix Unscheduled Pod bug, remove excess telemetry --- source/code/plugin/filter_inventory2mdm.rb | 128 +++++++++++---------- source/code/plugin/out_mdm.rb | 2 +- 2 files changed, 68 insertions(+), 62 deletions(-) diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb index 553c857b7..f98a3224e 100644 --- a/source/code/plugin/filter_inventory2mdm.rb +++ b/source/code/plugin/filter_inventory2mdm.rb @@ -10,11 +10,11 @@ module Fluent class Inventory2MdmFilter < Filter Fluent::Plugin.register_filter('filter_inventory2mdm', self) - + config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log' config_param :custom_metrics_azure_regions, :string - + @@node_count_metric_name = 'nodesCount' @@pod_count_metric_name = 'podCount' @@pod_inventory_tag = 'mdm.kubepodinventory' @@ -23,63 +23,63 @@ class Inventory2MdmFilter < Filter @@node_status_not_ready = 'NotReady' @@node_inventory_custom_metrics_template = ' - { - "time": "%{timestamp}", - "data": { - "baseData": { - "metric": "%{metricName}", - "namespace": "insights.container/nodes", - "dimNames": [ + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "insights.container/nodes", + "dimNames": [ "status" - ], - "series": [ - { - "dimValues": [ + ], + "series": [ + { + "dimValues": [ "%{statusValue}" - ], + ], "min": %{node_status_count}, - "max": %{node_status_count}, - "sum": %{node_status_count}, + "max": %{node_status_count}, + "sum": %{node_status_count}, "count": 1 - } - ] - } - } + } + ] + } + } }' @@pod_inventory_custom_metrics_template = ' - { - "time": "%{timestamp}", - "data": { - "baseData": { - "metric": "%{metricName}", - "namespace": "insights.container/pods", - "dimNames": [ - "phase", - "Kubernetes namespace", - "node", + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "insights.container/pods", + "dimNames": [ + "phase", + "Kubernetes namespace", + "node", "controllerName" - ], - "series": [ - { - "dimValues": [ - "%{phaseDimValue}", - "%{namespaceDimValue}", - "%{nodeDimValue}", + ], + "series": [ + { + "dimValues": [ + "%{phaseDimValue}", + "%{namespaceDimValue}", + "%{nodeDimValue}", "%{controllerNameDimValue}" - ], + ], "min": %{podCountMetricValue}, - "max": %{podCountMetricValue}, - "sum": %{podCountMetricValue}, - "count": 1 - } - ] - } - } + "max": %{podCountMetricValue}, + "sum": %{podCountMetricValue}, + "count": 1 + } + ] + } + } }' - + @@pod_phase_values = ['Running', 'Pending', 'Succeeded', 'Failed', 'Unknown'] - + @process_incoming_stream = true def initialize @@ -89,7 +89,7 @@ def initialize def configure(conf) super @log = nil - + if @enable_log @log = Logger.new(@log_path, 1, 5000000) @log.debug {'Starting filter_inventory2mdm plugin'} @@ -105,15 +105,15 @@ def start def shutdown super end - + def process_node_inventory_records(es) timestamp = DateTime.now - + begin node_ready_count = 0 node_not_ready_count = 0 records = [] - + es.each{|time,record| begin timestamp = record['DataItems'][0]['CollectionTime'] @@ -129,15 +129,15 @@ def process_node_inventory_records(es) ready_record = @@node_inventory_custom_metrics_template % { timestamp: timestamp, - metricName: @@node_count_metric_name, + metricName: @@node_count_metric_name, statusValue: @@node_status_ready, node_status_count: node_ready_count } records.push(JSON.parse(ready_record)) - + not_ready_record = @@node_inventory_custom_metrics_template % { timestamp: timestamp, - metricName: @@node_count_metric_name, + metricName: @@node_count_metric_name, statusValue: @@node_status_not_ready, node_status_count: node_not_ready_count } @@ -164,7 +164,7 @@ def process_pod_inventory_records(es) record_count += 1 timestamp = record['DataItems'][0]['CollectionTime'] podUid = record['DataItems'][0]['PodUid'] - + if podUids.key?(podUid) #@log.info "pod with #{podUid} already counted" next @@ -176,6 +176,12 @@ def process_pod_inventory_records(es) podControllerNameDimValue = record['DataItems'][0]['ControllerName'] podNodeDimValue = record['DataItems'][0]['Computer'] + if podNodeDimValue.empty? && podPhaseDimValue.downcase == 'pending' + podNodeDimValue = 'unscheduled' + elsif podNodeDimValue.empty? + podNodeDimValue = 'unknown' + end + # group by distinct dimension values pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~') @@ -197,7 +203,7 @@ def process_pod_inventory_records(es) pod_count = 1 pod_count_hash[pod_key] = pod_count end - + # Collect all possible combinations of dimension values other than pod phase key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~') if no_phase_dim_values_hash.key?(key_without_phase_dim_value) @@ -237,9 +243,9 @@ def process_pod_inventory_records(es) timestamp: timestamp, metricName: @@pod_count_metric_name, phaseDimValue: podPhaseDimValue, - namespaceDimValue: podNamespaceDimValue, - nodeDimValue: podNodeDimValue, - controllerNameDimValue: podControllerNameDimValue, + namespaceDimValue: podNamespaceDimValue, + nodeDimValue: podNodeDimValue, + controllerNameDimValue: podControllerNameDimValue, podCountMetricValue: value } records.push(JSON.parse(record)) @@ -265,11 +271,11 @@ def filter_stream(tag, es) elsif tag.downcase.start_with?(@@pod_inventory_tag) @log.info 'Processing POD inventory records in filter plugin to send to MDM' filtered_records, time = process_pod_inventory_records(es) - else + else filtered_records = [] end end - filtered_records.each {|filtered_record| + filtered_records.each {|filtered_record| new_es.add(time, filtered_record) if filtered_record } if filtered_records rescue => e diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 351198afe..3a4ea8d52 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -156,7 +156,7 @@ def send_to_mdm(post_body) response = @http_client.request(request) response.value # this throws for non 200 HTTP response code @log.info "HTTP Post Response Code : #{response.code}" - ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) + #ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) rescue Net::HTTPServerException => e @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}" @log.debug_backtrace(e.backtrace) From b1fbc378ffc06f16ca0f24f7a4cdf4253ca09e91 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 28 May 2019 23:29:11 -0700 Subject: [PATCH 2/3] Send Success Telemetry only once after startup for a node in a cluster for MDM Post --- source/code/plugin/out_mdm.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 3a4ea8d52..225f4d5d2 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -31,6 +31,7 @@ def initialize @last_post_attempt_time = Time.now @first_post_attempt_made = false @can_send_data_to_mdm = true + @success_telemetry_sent = false end def configure(conf) @@ -156,7 +157,11 @@ def send_to_mdm(post_body) response = @http_client.request(request) response.value # this throws for non 200 HTTP response code @log.info "HTTP Post Response Code : #{response.code}" - #ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) + if !@success_telemetry_sent + ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) + @success_telemetry_sent = true + end + rescue Net::HTTPServerException => e @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}" @log.debug_backtrace(e.backtrace) From 8ab08b81da3576b16ac6b5a017ae7157650bf64b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 30 May 2019 16:16:13 -0700 Subject: [PATCH 3/3] Sending telemetry for successful push to MDM every hour --- source/code/plugin/out_mdm.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 225f4d5d2..68c43d5da 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -31,7 +31,7 @@ def initialize @last_post_attempt_time = Time.now @first_post_attempt_made = false @can_send_data_to_mdm = true - @success_telemetry_sent = false + @last_telemetry_sent_time = nil end def configure(conf) @@ -157,9 +157,9 @@ def send_to_mdm(post_body) response = @http_client.request(request) response.value # this throws for non 200 HTTP response code @log.info "HTTP Post Response Code : #{response.code}" - if !@success_telemetry_sent + if @last_telemetry_sent_time.nil? || @last_telemetry_sent_time + 60 * 60 < Time.now ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) - @success_telemetry_sent = true + @last_telemetry_sent_time = Time.now end rescue Net::HTTPServerException => e