From 6d7199f50b75b1fc95ef7e615957ceb6ee0f3c48 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 8 Feb 2019 17:52:03 -0800 Subject: [PATCH 01/90] changes --- installer/conf/container.conf | 5 + .../code/plugin/filter_health_cpu_memory.rb | 296 ++++++++++++++++++ source/code/plugin/in_cadvisor_perf.rb | 3 + 3 files changed, 304 insertions(+) create mode 100644 source/code/plugin/filter_health_cpu_memory.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 091753230..e0f52ce3a 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,6 +23,11 @@ log_level debug + + type filter_health_cpu_memory + log_level debug + + type out_oms log_level debug diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb new file mode 100644 index 000000000..9bddf4a16 --- /dev/null +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -0,0 +1,296 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. + +# frozen_string_literal: true + +module Fluent + require "logger" + require "json" + require_relative "omslog" + + class CPUMemoryHealthFilter < Filter + Fluent::Plugin.register_filter("filter_health_cpu_memory", self) + + config_param :enable_log, :integer, :default => 0 + config_param :log_path, :string, :default => "/var/opt/microsoft/omsagent/log/filter_health_cpu_memory.log" + config_param :metrics_to_collect, :string, :default => "cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes" + + @@previousCpuHealthDetails = {} + @@previousPreviousCpuHealthDetails = {} + @@currentHealthMetrics = {} + @@nodeCpuHealthDataTimeTracker = DateTime.now.to_time.to_i + @@nodeMemoryRssDataTimeTracker = DateTime.now.to_time.to_i + + @@previousMemoryRssHealthDetails = {} + @@previousPreviousMemoryRssHealthDetails = {} + @@currentHealthMetrics = {} + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + @@cpu_usage_milli_cores = "cpuUsageMilliCores" + @@cpu_usage_nano_cores = "cpuusagenanocores" + @@object_name_k8s_node = "K8SNode" + + def initialize + super + end + + def configure(conf) + super + @log = nil + + if @enable_log + @log = Logger.new(@log_path, "weekly") + @log.debug { "Starting filter_health_cpu_memory plugin" } + end + end + + def start + super + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + @@cpu_limit = 0.0 + @@memory_limit = 0.0 + begin + nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + rescue Exception => e + @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + end + if !nodeInventory.nil? + cpu_limit_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") + if !cpu_limit_json.nil? + @@cpu_limit = cpu_limit_json[0]["DataItems"][0]["Collections"][0]["Value"] + @log.info "CPU Limit #{@@cpu_limit}" + else + @log.info "Error getting cpu_limit" + end + memory_limit_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes") + if !memory_limit_json.nil? + @@memory_limit = memory_limit_json[0]["DataItems"][0]["Collections"][0]["Value"] + @log.info "Memory Limit #{@@memory_limit}" + else + @log.info "Error getting memory_limit" + end + end + end + + def shutdown + super + end + + #def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) + def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) + begin + @log.debug "cpuMetricValue: #{cpuMetricValue}" + @log.debug "cpuMetricPercentValue: #{cpuMetricPercentValue}" + #@log.debug "healthRecords: #{healthRecords}" + # Get node CPU usage health + updateCpuHealthState = false + cpuHealthRecord = {} + currentCpuHealthDetails = {} + cpuHealthRecord["ClusterName"] = @@clusterName + cpuHealthRecord["ClusterId"] = @@clusterId + cpuHealthRecord["ClusterRegion"] = @@clusterRegion + cpuHealthRecord["Computer"] = @@currentHealthMetrics["computer"] + cpuHealthState = "" + if cpuMetricPercentValue.to_f < 80.0 + #nodeCpuHealthState = 'Pass' + cpuHealthState = "Pass" + elsif cpuMetricPercentValue.to_f > 90.0 + cpuHealthState = "Fail" + else + cpuHealthState = "Warning" + end + currentCpuHealthDetails["State"] = cpuHealthState + currentCpuHealthDetails["Time"] = @@currentHealthMetrics["metricTime"] + currentCpuHealthDetails["CPUUsagePercentage"] = cpuMetricPercentValue + currentCpuHealthDetails["CPUUsageMillicores"] = cpuMetricValue + + currentTime = DateTime.now.to_time.to_i + timeDifference = (currentTime - @@nodeCpuHealthDataTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + + if (@@previousCpuHealthDetails["State"].nil? || + ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"])) || + timeDifferenceInMinutes > 50) + cpuHealthRecord["NodeCpuHealthState"] = cpuHealthState + cpuHealthRecord["NodeCpuUsagePercentage"] = cpuMetricPercentValue + cpuHealthRecord["NodeCpuUsageMilliCores"] = cpuMetricValue + #healthRecord['TimeStateDetected'] = @@previousPreviousCpuHealthDetails['Time'] + cpuHealthRecord["CollectionTime"] = @@previousPreviousCpuHealthDetails["Time"] + cpuHealthRecord["PrevNodeCpuUsageDetails"] = {"Percent": @@previousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousCpuHealthDetails["Time"], "Millicores": @@previousCpuHealthDetails["CPUUsageMillicores"]} + cpuHealthRecord["PrevPrevNodeCpuUsageDetails"] = {"Percent": @@previousPreviousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousPreviousCpuHealthDetails["Time"], "Millicores": @@previousPreviousCpuHealthDetails["CPUUsageMillicores"]} + updateCpuHealthState = true + end + @@previousPreviousCpuHealthDetails = @@previousCpuHealthDetails.clone + @@previousCpuHealthDetails = currentCpuHealthDetails.clone + if updateCpuHealthState + @log.debug "cpu health record: #{cpuHealthRecord}" + #healthRecords.push(cpuHealthRecord) + @@nodeCpuHealthDataTimeTracker = DateTime.now.to_time.to_i + return cpuHealthRecord + else + return nil + end + rescue => errorStr + @log.debug "In processCpuMetrics: exception: #{errorStr}" + end + end + + #def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) + def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue) + begin + @log.debug "memoryRssMetricValue: #{memoryRssMetricValue}" + @log.debug "memoryRssMetricPercentValue: #{memoryRssMetricPercentValue}" + #@log.debug "healthRecords: #{healthRecords}" + + # Get node memory RSS health + memRssHealthRecord = {} + currentMemoryRssHealthDetails = {} + memRssHealthRecord["ClusterName"] = @@clusterName + memRssHealthRecord["ClusterId"] = @@clusterId + memRssHealthRecord["ClusterRegion"] = @@clusterRegion + memRssHealthRecord["Computer"] = @@currentHealthMetrics["computer"] + + memoryRssHealthState = "" + if memoryRssMetricPercentValue.to_f < 80.0 + #nodeCpuHealthState = 'Pass' + memoryRssHealthState = "Pass" + elsif memoryRssMetricPercentValue.to_f > 90.0 + memoryRssHealthState = "Fail" + else + memoryRssHealthState = "Warning" + end + currentMemoryRssHealthDetails["State"] = memoryRssHealthState + currentMemoryRssHealthDetails["Time"] = @@currentHealthMetrics["metricTime"] + currentMemoryRssHealthDetails["memoryRssPercentage"] = memoryRssMetricPercentValue + currentMemoryRssHealthDetails["memoryRssBytes"] = memoryRssMetricValue + + currentTime = DateTime.now.to_time.to_i + timeDifference = (currentTime - @@nodeMemoryRssDataTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + + if (@@previousMemoryRssHealthDetails["State"].nil? || + ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"])) || + timeDifferenceInMinutes > 50) + memRssHealthRecord["NodeMemoryRssHealthState"] = memoryRssHealthState + memRssHealthRecord["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue + memRssHealthRecord["NodeMemoryRssBytes"] = memoryRssMetricValue + #healthRecord['TimeStateDetected'] = @@previousPreviousCpuHealthDetails['Time'] + memRssHealthRecord["CollectionTime"] = @@previousPreviousMemoryRssHealthDetails["Time"] + memRssHealthRecord["PrevNodeMemoryRssDetails"] = {"Percent": @@previousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousMemoryRssHealthDetails["Time"], "Bytes": @@previousMemoryRssHealthDetails["memoryRssBytes"]} + memRssHealthRecord["PrevPrevNodeMemoryRssDetails"] = {"Percent": @@previousPreviousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousPreviousMemoryRssHealthDetails["Time"], "Bytes": @@previousPreviousMemoryRssHealthDetails["memoryRssBytes"]} + updateMemoryRssHealthState = true + end + @@previousPreviousMemoryRssHealthDetails = @@previousMemoryRssHealthDetails.clone + @@previousMemoryRssHealthDetails = currentMemoryRssHealthDetails.clone + if updateMemoryRssHealthState + @log.debug "memory health record: #{memRssHealthRecord}" + # healthRecords.push(memRssHealthRecord) + @@nodeMemoryRssDataTimeTracker = currentTime + return memRssHealthRecord + else + return nil + end + rescue => errorStr + @log.debug "In processMemoryRssMetrics: exception: #{errorStr}" + end + end + + def processHealthMetrics() + healthRecords = [] + cpuMetricPercentValue = @@currentHealthMetrics["cpuUsageNanoCoresPercentage"] + cpuMetricValue = @@currentHealthMetrics["cpuUsageNanoCores"] + memoryRssMetricPercentValue = @@currentHealthMetrics["memoryRssBytesPercentage"] + memoryRssMetricValue = @@currentHealthMetrics["memoryRssBytes"] + processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) + processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) + return healthRecords + end + + #def filter(tag, time, record) + # Reading all the records to populate a hash for CPU and memory utilization percentages and values + # @@currentHealthMetrics[record['data']['baseData']['metric']] = record['data']['baseData']['series'][0]['min'] + # if !(@@currentHealthMetrics.has_key?("metricTime")) + # @@currentHealthMetrics['metricTime'] = record['time'] + # end + # if !(@@currentHealthMetrics.has_key?("computer")) + # @@currentHealthMetrics['computer'] = record['data']['baseData']['series'][0]['dimValues'][0] + # end + # return nil + #end + + def filter(tag, time, record) + object_name = record["DataItems"][0]["ObjectName"] + counter_name = record["DataItems"][0]["Collections"][0]["CounterName"] + if object_name == @@object_name_k8s_node && @metrics_to_collect_hash.key?(counter_name.downcase) + percentage_metric_value = 0.0 + + # Compute and send % CPU and Memory + begin + metric_value = record["DataItems"][0]["Collections"][0]["Value"] + if counter_name.downcase == @@cpu_usage_nano_cores + metric_name = @@cpu_usage_milli_cores + metric_value = metric_value / 1000000 + if @cpu_limit != 0.0 + percentage_metric_value = (metric_value * 1000000) * 100 / @cpu_limit + end + return processCpuMetrics(metric_value, percentage_metric_value) + end + + if counter_name.start_with?("memory") + metric_name = counter_name + if @memory_limit != 0.0 + percentage_metric_value = metric_value * 100 / @memory_limit + end + return processMemoryRssMetrics(metric_value, percentage_metric_value) + end + #return get_metric_records(record, metric_name, metric_value, percentage_metric_value) + rescue Exception => e + @log.info "Error parsing cadvisor record Exception: #{e.class} Message: #{e.message}" + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + return [] + end + else + return [] + end + end + + #def filter_stream(tag, es) + #health_es = MultiEventStream.new + #timeFromEventStream = DateTime.now.to_time.to_i + #begin + #es.each { |time, record| + # filter(tag, time, record) + # if !timeFromEventStream.nil? + # timeFromEventStream = time + # end + #} + #healthRecords = processHealthMetrics + #healthRecords.each {|healthRecord| + # health_es.add(timeFromEventStream, healthRecord) if healthRecord + # router.emit_stream('oms.rashmi', health_es) if health_es + #} if healthRecords + #rescue => e + # @log.debug "exception: #{e}" + #end + # es + # end + + def filter_stream(tag, es) + new_es = MultiEventStream.new + es.each { |time, record| + begin + filtered_record = filter(tag, time, record) + # filtered_records.each { |filtered_record| + new_es.add(time, filtered_record) if filtered_record + # } if filtered_records + rescue => e + router.emit_error_event(tag, time, record, e) + end + } + new_es + end + end +end diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 5b551f74e..1a50d1c64 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -18,6 +18,7 @@ def initialize config_param :run_interval, :time, :default => '1m' config_param :tag, :string, :default => "oms.api.cadvisorperf" + config_param :healthtag, :string, :default => "oms.health.cadvisorperf" def configure (conf) super @@ -55,6 +56,8 @@ def enumerate() end router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@healthtag, eventStream) if eventStream + @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") From 3048d137869a085e27716ff2adf65409828fb126 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 8 Feb 2019 19:27:42 -0800 Subject: [PATCH 02/90] changes --- .../code/plugin/filter_health_cpu_memory.rb | 97 ++++++++++++------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index 9bddf4a16..ab6a1c204 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -16,20 +16,23 @@ class CPUMemoryHealthFilter < Filter @@previousCpuHealthDetails = {} @@previousPreviousCpuHealthDetails = {} - @@currentHealthMetrics = {} + @@previousCpuHealthStateSent = "" @@nodeCpuHealthDataTimeTracker = DateTime.now.to_time.to_i @@nodeMemoryRssDataTimeTracker = DateTime.now.to_time.to_i @@previousMemoryRssHealthDetails = {} @@previousPreviousMemoryRssHealthDetails = {} - @@currentHealthMetrics = {} + @@previousMemoryRssHealthStateSent = "" @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion - @@cpu_usage_milli_cores = "cpuUsageMilliCores" + # @@cpu_usage_milli_cores = "cpuUsageMilliCores" @@cpu_usage_nano_cores = "cpuusagenanocores" + @@memory_rss_bytes = "memoryrssbytes" @@object_name_k8s_node = "K8SNode" + @metrics_to_collect_hash = {} + def initialize super end @@ -46,6 +49,7 @@ def configure(conf) def start super + @metrics_to_collect_hash = build_metrics_hash @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion @@ -79,8 +83,16 @@ def shutdown super end + def build_metrics_hash + @log.debug "Building Hash of Metrics to Collect" + metrics_to_collect_arr = @metrics_to_collect.split(",").map(&:strip) + metrics_hash = metrics_to_collect_arr.map { |x| [x.downcase, true] }.to_h + @log.info "Metrics Collected : #{metrics_hash}" + return metrics_hash + end + #def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) - def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) + def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) begin @log.debug "cpuMetricValue: #{cpuMetricValue}" @log.debug "cpuMetricPercentValue: #{cpuMetricPercentValue}" @@ -92,7 +104,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) cpuHealthRecord["ClusterName"] = @@clusterName cpuHealthRecord["ClusterId"] = @@clusterId cpuHealthRecord["ClusterRegion"] = @@clusterRegion - cpuHealthRecord["Computer"] = @@currentHealthMetrics["computer"] + cpuHealthRecord["Computer"] = host cpuHealthState = "" if cpuMetricPercentValue.to_f < 80.0 #nodeCpuHealthState = 'Pass' @@ -103,7 +115,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) cpuHealthState = "Warning" end currentCpuHealthDetails["State"] = cpuHealthState - currentCpuHealthDetails["Time"] = @@currentHealthMetrics["metricTime"] + currentCpuHealthDetails["Time"] = timeStamp currentCpuHealthDetails["CPUUsagePercentage"] = cpuMetricPercentValue currentCpuHealthDetails["CPUUsageMillicores"] = cpuMetricValue @@ -111,9 +123,12 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) timeDifference = (currentTime - @@nodeCpuHealthDataTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 - if (@@previousCpuHealthDetails["State"].nil? || - ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"])) || - timeDifferenceInMinutes > 50) + @log.debug "processing cpu metrics" + if ((cpuHealthState != @@previousCpuHealthStateSent && + #@@previousCpuHealthDetails["State"].nil? || + ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"]))) || + timeDifferenceInMinutes > 5) + @log.debug "cpu conditions met." cpuHealthRecord["NodeCpuHealthState"] = cpuHealthState cpuHealthRecord["NodeCpuUsagePercentage"] = cpuMetricPercentValue cpuHealthRecord["NodeCpuUsageMilliCores"] = cpuMetricValue @@ -122,6 +137,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) cpuHealthRecord["PrevNodeCpuUsageDetails"] = {"Percent": @@previousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousCpuHealthDetails["Time"], "Millicores": @@previousCpuHealthDetails["CPUUsageMillicores"]} cpuHealthRecord["PrevPrevNodeCpuUsageDetails"] = {"Percent": @@previousPreviousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousPreviousCpuHealthDetails["Time"], "Millicores": @@previousPreviousCpuHealthDetails["CPUUsageMillicores"]} updateCpuHealthState = true + @@previousCpuHealthStateSent = cpuHealthState end @@previousPreviousCpuHealthDetails = @@previousCpuHealthDetails.clone @@previousCpuHealthDetails = currentCpuHealthDetails.clone @@ -129,6 +145,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) @log.debug "cpu health record: #{cpuHealthRecord}" #healthRecords.push(cpuHealthRecord) @@nodeCpuHealthDataTimeTracker = DateTime.now.to_time.to_i + @log.debug "cpu record sent" return cpuHealthRecord else return nil @@ -139,7 +156,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue) end #def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) - def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue) + def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, host, timeStamp) begin @log.debug "memoryRssMetricValue: #{memoryRssMetricValue}" @log.debug "memoryRssMetricPercentValue: #{memoryRssMetricPercentValue}" @@ -151,7 +168,7 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa memRssHealthRecord["ClusterName"] = @@clusterName memRssHealthRecord["ClusterId"] = @@clusterId memRssHealthRecord["ClusterRegion"] = @@clusterRegion - memRssHealthRecord["Computer"] = @@currentHealthMetrics["computer"] + memRssHealthRecord["Computer"] = host memoryRssHealthState = "" if memoryRssMetricPercentValue.to_f < 80.0 @@ -163,17 +180,21 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa memoryRssHealthState = "Warning" end currentMemoryRssHealthDetails["State"] = memoryRssHealthState - currentMemoryRssHealthDetails["Time"] = @@currentHealthMetrics["metricTime"] + currentMemoryRssHealthDetails["Time"] = timeStamp currentMemoryRssHealthDetails["memoryRssPercentage"] = memoryRssMetricPercentValue currentMemoryRssHealthDetails["memoryRssBytes"] = memoryRssMetricValue + updateMemoryRssHealthState = false currentTime = DateTime.now.to_time.to_i timeDifference = (currentTime - @@nodeMemoryRssDataTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 + @log.debug "processing memory metrics" - if (@@previousMemoryRssHealthDetails["State"].nil? || - ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"])) || - timeDifferenceInMinutes > 50) + if ((memoryRssHealthState != @@previousMemoryRssHealthStateSent && + # @@previousMemoryRssHealthDetails["State"].nil? || + ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"]))) || + timeDifferenceInMinutes > 5) + @log.debug "memory conditions met" memRssHealthRecord["NodeMemoryRssHealthState"] = memoryRssHealthState memRssHealthRecord["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue memRssHealthRecord["NodeMemoryRssBytes"] = memoryRssMetricValue @@ -182,6 +203,7 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa memRssHealthRecord["PrevNodeMemoryRssDetails"] = {"Percent": @@previousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousMemoryRssHealthDetails["Time"], "Bytes": @@previousMemoryRssHealthDetails["memoryRssBytes"]} memRssHealthRecord["PrevPrevNodeMemoryRssDetails"] = {"Percent": @@previousPreviousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousPreviousMemoryRssHealthDetails["Time"], "Bytes": @@previousPreviousMemoryRssHealthDetails["memoryRssBytes"]} updateMemoryRssHealthState = true + @@previousMemoryRssHealthStateSent = memoryRssHealthState end @@previousPreviousMemoryRssHealthDetails = @@previousMemoryRssHealthDetails.clone @@previousMemoryRssHealthDetails = currentMemoryRssHealthDetails.clone @@ -189,6 +211,7 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa @log.debug "memory health record: #{memRssHealthRecord}" # healthRecords.push(memRssHealthRecord) @@nodeMemoryRssDataTimeTracker = currentTime + @log.debug "memory record sent" return memRssHealthRecord else return nil @@ -198,16 +221,16 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa end end - def processHealthMetrics() - healthRecords = [] - cpuMetricPercentValue = @@currentHealthMetrics["cpuUsageNanoCoresPercentage"] - cpuMetricValue = @@currentHealthMetrics["cpuUsageNanoCores"] - memoryRssMetricPercentValue = @@currentHealthMetrics["memoryRssBytesPercentage"] - memoryRssMetricValue = @@currentHealthMetrics["memoryRssBytes"] - processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) - processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) - return healthRecords - end + # def processHealthMetrics() + # healthRecords = [] + # cpuMetricPercentValue = @@currentHealthMetrics["cpuUsageNanoCoresPercentage"] + # cpuMetricValue = @@currentHealthMetrics["cpuUsageNanoCores"] + # memoryRssMetricPercentValue = @@currentHealthMetrics["memoryRssBytesPercentage"] + # memoryRssMetricValue = @@currentHealthMetrics["memoryRssBytes"] + # processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) + # processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) + # return healthRecords + # end #def filter(tag, time, record) # Reading all the records to populate a hash for CPU and memory utilization percentages and values @@ -224,6 +247,8 @@ def processHealthMetrics() def filter(tag, time, record) object_name = record["DataItems"][0]["ObjectName"] counter_name = record["DataItems"][0]["Collections"][0]["CounterName"] + host = record["DataItems"][0]["Host"] + timeStamp = record["DataItems"][0]["Timestamp"] if object_name == @@object_name_k8s_node && @metrics_to_collect_hash.key?(counter_name.downcase) percentage_metric_value = 0.0 @@ -231,29 +256,29 @@ def filter(tag, time, record) begin metric_value = record["DataItems"][0]["Collections"][0]["Value"] if counter_name.downcase == @@cpu_usage_nano_cores - metric_name = @@cpu_usage_milli_cores + # metric_name = @@cpu_usage_milli_cores metric_value = metric_value / 1000000 - if @cpu_limit != 0.0 - percentage_metric_value = (metric_value * 1000000) * 100 / @cpu_limit + if @@cpu_limit != 0.0 + percentage_metric_value = (metric_value * 1000000) * 100 / @@cpu_limit end - return processCpuMetrics(metric_value, percentage_metric_value) + return processCpuMetrics(metric_value, percentage_metric_value, host, timeStamp) end - if counter_name.start_with?("memory") - metric_name = counter_name - if @memory_limit != 0.0 - percentage_metric_value = metric_value * 100 / @memory_limit + if counter_name.downcase == @@memory_rss_bytes + # metric_name = counter_name + if @@memory_limit != 0.0 + percentage_metric_value = metric_value * 100 / @@memory_limit end - return processMemoryRssMetrics(metric_value, percentage_metric_value) + return processMemoryRssHealthMetrics(metric_value, percentage_metric_value, host, timeStamp) end #return get_metric_records(record, metric_name, metric_value, percentage_metric_value) rescue Exception => e @log.info "Error parsing cadvisor record Exception: #{e.class} Message: #{e.message}" ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) - return [] + return nil end else - return [] + return nil end end From 7bd5eac432e5c4f66524aef960f2625efebe80f9 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 8 Feb 2019 19:35:34 -0800 Subject: [PATCH 03/90] changes --- source/code/plugin/filter_health_cpu_memory.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index ab6a1c204..8249c2ef2 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -127,7 +127,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) if ((cpuHealthState != @@previousCpuHealthStateSent && #@@previousCpuHealthDetails["State"].nil? || ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"]))) || - timeDifferenceInMinutes > 5) + timeDifferenceInMinutes > 50) @log.debug "cpu conditions met." cpuHealthRecord["NodeCpuHealthState"] = cpuHealthState cpuHealthRecord["NodeCpuUsagePercentage"] = cpuMetricPercentValue @@ -144,7 +144,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) if updateCpuHealthState @log.debug "cpu health record: #{cpuHealthRecord}" #healthRecords.push(cpuHealthRecord) - @@nodeCpuHealthDataTimeTracker = DateTime.now.to_time.to_i + @@nodeCpuHealthDataTimeTracker = currentTime @log.debug "cpu record sent" return cpuHealthRecord else @@ -193,7 +193,7 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa if ((memoryRssHealthState != @@previousMemoryRssHealthStateSent && # @@previousMemoryRssHealthDetails["State"].nil? || ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"]))) || - timeDifferenceInMinutes > 5) + timeDifferenceInMinutes > 50) @log.debug "memory conditions met" memRssHealthRecord["NodeMemoryRssHealthState"] = memoryRssHealthState memRssHealthRecord["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue From 6f62c6c8394588b8b6e9649e73aa8de6587e9a01 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 8 Feb 2019 20:03:16 -0800 Subject: [PATCH 04/90] changes --- installer/conf/container.conf | 14 ++- source/code/plugin/in_cadvisor_perf.rb | 152 ++++++++++++------------- 2 files changed, 88 insertions(+), 78 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index e0f52ce3a..4f6cfa7e1 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,11 +23,23 @@ log_level debug - + type filter_health_cpu_memory log_level debug + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path /var/opt/microsoft/omsagent/b539903f-ee4d-424f-b48f-a6410acd9025/state/out_oms_api_health_perf*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s + + type out_oms log_level debug diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 1a50d1c64..22c887709 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -2,91 +2,89 @@ # frozen_string_literal: true module Fluent - - class CAdvisor_Perf_Input < Input - Plugin.register_input('cadvisorperf', self) - - def initialize - super - require 'yaml' - require 'json' - - require_relative 'CAdvisorMetricsAPIClient' - require_relative 'oms_common' - require_relative 'omslog' - end - - config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.api.cadvisorperf" - config_param :healthtag, :string, :default => "oms.health.cadvisorperf" - - def configure (conf) - super + class CAdvisor_Perf_Input < Input + Plugin.register_input("cadvisorperf", self) + + def initialize + super + require "yaml" + require "json" + + require_relative "CAdvisorMetricsAPIClient" + require_relative "oms_common" + require_relative "omslog" + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.api.cadvisorperf" + config_param :healthtag, :string, :default => "oms.api.health_perf" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) end - - def start - if @run_interval - @finished = false - @condition = ConditionVariable.new - @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) - end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join end - - def shutdown - if @run_interval - @mutex.synchronize { - @finished = true - @condition.signal - } - @thread.join + end + + def enumerate() + time = Time.now.to_f + begin + eventStream = MultiEventStream.new + metricData = CAdvisorMetricsAPIClient.getMetrics() + metricData.each do |record| + record["DataType"] = "LINUX_PERF_BLOB" + record["IPName"] = "LogManagement" + eventStream.add(time, record) if record + #router.emit(@tag, time, record) if record end - end - - def enumerate() - time = Time.now.to_f - begin - eventStream = MultiEventStream.new - metricData = CAdvisorMetricsAPIClient.getMetrics() - metricData.each do |record| - record['DataType'] = "LINUX_PERF_BLOB" - record['IPName'] = "LogManagement" - eventStream.add(time, record) if record - #router.emit(@tag, time, record) if record - end - - router.emit_stream(@tag, eventStream) if eventStream - router.emit_stream(@healthtag, eventStream) if eventStream - @@istestvar = ENV['ISTEST'] - if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) - $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") - end - rescue => errorStr - $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}" - $log.debug_backtrace(errorStr.backtrace) + router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@healthtag, eventStream) if eventStream + + @@istestvar = ENV["ISTEST"] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) + $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") end + rescue => errorStr + $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) end - - def run_periodic - @mutex.lock + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) done = @finished - until done - @condition.wait(@mutex, @run_interval) - done = @finished - @mutex.unlock - if !done - begin - $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}") - enumerate - rescue => errorStr - $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}" - end + @mutex.unlock + if !done + begin + $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}" end - @mutex.lock end - @mutex.unlock + @mutex.lock end - end # CAdvisor_Perf_Input + @mutex.unlock + end + end # CAdvisor_Perf_Input end # module - From 109ee9da22195d5beb0d99c46c5545cd5c4ef4a7 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 8 Feb 2019 20:18:11 -0800 Subject: [PATCH 05/90] changes --- installer/conf/container.conf | 6 +++--- source/code/plugin/in_cadvisor_perf.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 4f6cfa7e1..351176923 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,17 +23,17 @@ log_level debug - + type filter_health_cpu_memory log_level debug - + type out_oms_api log_level debug buffer_chunk_limit 10m buffer_type file - buffer_path /var/opt/microsoft/omsagent/b539903f-ee4d-424f-b48f-a6410acd9025/state/out_oms_api_health_perf*.buffer + buffer_path /var/opt/microsoft/omsagent/b539903f-ee4d-424f-b48f-a6410acd9025/state/out_oms_api_ci_health_perf*.buffer buffer_queue_limit 10 flush_interval 20s retry_limit 10 diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 22c887709..4678d8bcd 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -17,7 +17,7 @@ def initialize config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.api.cadvisorperf" - config_param :healthtag, :string, :default => "oms.api.health_perf" + config_param :healthtag, :string, :default => "oms.api.ci_health_perf" def configure(conf) super From 8d585fd06372ad6c190d3487295c8b44838d0519 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 8 Feb 2019 20:37:22 -0800 Subject: [PATCH 06/90] changes --- installer/conf/container.conf | 6 +++--- source/code/plugin/in_cadvisor_perf.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 351176923..749fdb18e 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,17 +23,17 @@ log_level debug - + type filter_health_cpu_memory log_level debug - + type out_oms_api log_level debug buffer_chunk_limit 10m buffer_type file - buffer_path /var/opt/microsoft/omsagent/b539903f-ee4d-424f-b48f-a6410acd9025/state/out_oms_api_ci_health_perf*.buffer + buffer_path /var/opt/microsoft/omsagent/b539903f-ee4d-424f-b48f-a6410acd9025/state/out_oms_api_CIHealthPerf*.buffer buffer_queue_limit 10 flush_interval 20s retry_limit 10 diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 4678d8bcd..c6b9bc2ee 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -17,7 +17,7 @@ def initialize config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.api.cadvisorperf" - config_param :healthtag, :string, :default => "oms.api.ci_health_perf" + config_param :healthtag, :string, :default => "oms.api.CIHealthPerf" def configure(conf) super From f6b1e023fe0613f2f3d49f23b084fd1d5658da11 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 11 Feb 2019 17:15:49 -0800 Subject: [PATCH 07/90] changes --- .../code/plugin/filter_health_cpu_memory.rb | 104 +++++++----------- 1 file changed, 38 insertions(+), 66 deletions(-) diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index 8249c2ef2..4be2f3e7a 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -14,6 +14,12 @@ class CPUMemoryHealthFilter < Filter config_param :log_path, :string, :default => "/var/opt/microsoft/omsagent/log/filter_health_cpu_memory.log" config_param :metrics_to_collect, :string, :default => "cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes" + @@HealthConfigFile = "/var/opt/microsoft/docker-cimprov/healthConfig/CpuMemory/config" + # Setting the memory and cpu pass and fail percentages to default values + @@memoryPassPercentage = 80.0 + @@memoryFailPercentage = 90.0 + @@cpuPassPercentage = 80.0 + @@cpuFailPercentage = 90.0 @@previousCpuHealthDetails = {} @@previousPreviousCpuHealthDetails = {} @@previousCpuHealthStateSent = "" @@ -26,7 +32,6 @@ class CPUMemoryHealthFilter < Filter @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion - # @@cpu_usage_milli_cores = "cpuUsageMilliCores" @@cpu_usage_nano_cores = "cpuusagenanocores" @@memory_rss_bytes = "memoryrssbytes" @@object_name_k8s_node = "K8SNode" @@ -77,6 +82,34 @@ def start @log.info "Error getting memory_limit" end end + # Read config information for cpu and memory limits. + begin + healthConfigObject = nil + file = File.open(@@HealthConfigFile, "r") + if !file.nil? + fileContents = file.read + healthConfigObject = JSON.parse(fileContents) + file.close + if !healthConfigObject.nil? + if !healthConfigObject.memoryPassPercentage.nil? && healthConfigObject.memoryPassPercentage.is_a?(Numeric) + @@memoryPassPercentage = healthConfigObject.memoryPassPercentage + end + if !healthConfigObject.memoryFailPercentage.nil? && healthConfigObject.memoryFailPercentage.is_a?(Numeric) + @@memoryFailPercentage = healthConfigObject.memoryFailPercentage + end + if !healthConfigObject.cpuPassPercentage.nil? && healthConfigObject.cpuPassPercentage.is_a?(Numeric) + @@cpuPassPercentage = healthConfigObject.cpuPassPercentage + end + if !healthConfigObject.cpuFailPercentage.nil? && healthConfigObject.cpuFailPercentage.is_a?(Numeric) + @@cpuFailPercentage = healthConfigObject.cpuFailPercentage + end + end + else + @log.warn "Failed to open file at location #{@@HealthConfigFile} to read health config, using defaults" + end + rescue => errorStr + @log.debug "Exception occured while reading config file at location #{@@HealthConfigFile}, error: #{errorStr}" + end end def shutdown @@ -91,12 +124,10 @@ def build_metrics_hash return metrics_hash end - #def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) begin @log.debug "cpuMetricValue: #{cpuMetricValue}" @log.debug "cpuMetricPercentValue: #{cpuMetricPercentValue}" - #@log.debug "healthRecords: #{healthRecords}" # Get node CPU usage health updateCpuHealthState = false cpuHealthRecord = {} @@ -106,10 +137,9 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) cpuHealthRecord["ClusterRegion"] = @@clusterRegion cpuHealthRecord["Computer"] = host cpuHealthState = "" - if cpuMetricPercentValue.to_f < 80.0 - #nodeCpuHealthState = 'Pass' + if cpuMetricPercentValue.to_f < @@cpuPassPercentage cpuHealthState = "Pass" - elsif cpuMetricPercentValue.to_f > 90.0 + elsif cpuMetricPercentValue.to_f > @@cpuFailPercentage cpuHealthState = "Fail" else cpuHealthState = "Warning" @@ -125,14 +155,12 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) @log.debug "processing cpu metrics" if ((cpuHealthState != @@previousCpuHealthStateSent && - #@@previousCpuHealthDetails["State"].nil? || ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"]))) || timeDifferenceInMinutes > 50) @log.debug "cpu conditions met." cpuHealthRecord["NodeCpuHealthState"] = cpuHealthState cpuHealthRecord["NodeCpuUsagePercentage"] = cpuMetricPercentValue cpuHealthRecord["NodeCpuUsageMilliCores"] = cpuMetricValue - #healthRecord['TimeStateDetected'] = @@previousPreviousCpuHealthDetails['Time'] cpuHealthRecord["CollectionTime"] = @@previousPreviousCpuHealthDetails["Time"] cpuHealthRecord["PrevNodeCpuUsageDetails"] = {"Percent": @@previousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousCpuHealthDetails["Time"], "Millicores": @@previousCpuHealthDetails["CPUUsageMillicores"]} cpuHealthRecord["PrevPrevNodeCpuUsageDetails"] = {"Percent": @@previousPreviousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousPreviousCpuHealthDetails["Time"], "Millicores": @@previousPreviousCpuHealthDetails["CPUUsageMillicores"]} @@ -143,7 +171,6 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) @@previousCpuHealthDetails = currentCpuHealthDetails.clone if updateCpuHealthState @log.debug "cpu health record: #{cpuHealthRecord}" - #healthRecords.push(cpuHealthRecord) @@nodeCpuHealthDataTimeTracker = currentTime @log.debug "cpu record sent" return cpuHealthRecord @@ -155,12 +182,10 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) end end - #def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, host, timeStamp) begin @log.debug "memoryRssMetricValue: #{memoryRssMetricValue}" @log.debug "memoryRssMetricPercentValue: #{memoryRssMetricPercentValue}" - #@log.debug "healthRecords: #{healthRecords}" # Get node memory RSS health memRssHealthRecord = {} @@ -171,10 +196,9 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa memRssHealthRecord["Computer"] = host memoryRssHealthState = "" - if memoryRssMetricPercentValue.to_f < 80.0 - #nodeCpuHealthState = 'Pass' + if memoryRssMetricPercentValue.to_f < @@memoryPassPercentage memoryRssHealthState = "Pass" - elsif memoryRssMetricPercentValue.to_f > 90.0 + elsif memoryRssMetricPercentValue.to_f > @@memoryFailPercentage memoryRssHealthState = "Fail" else memoryRssHealthState = "Warning" @@ -191,14 +215,12 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa @log.debug "processing memory metrics" if ((memoryRssHealthState != @@previousMemoryRssHealthStateSent && - # @@previousMemoryRssHealthDetails["State"].nil? || ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"]))) || timeDifferenceInMinutes > 50) @log.debug "memory conditions met" memRssHealthRecord["NodeMemoryRssHealthState"] = memoryRssHealthState memRssHealthRecord["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue memRssHealthRecord["NodeMemoryRssBytes"] = memoryRssMetricValue - #healthRecord['TimeStateDetected'] = @@previousPreviousCpuHealthDetails['Time'] memRssHealthRecord["CollectionTime"] = @@previousPreviousMemoryRssHealthDetails["Time"] memRssHealthRecord["PrevNodeMemoryRssDetails"] = {"Percent": @@previousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousMemoryRssHealthDetails["Time"], "Bytes": @@previousMemoryRssHealthDetails["memoryRssBytes"]} memRssHealthRecord["PrevPrevNodeMemoryRssDetails"] = {"Percent": @@previousPreviousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousPreviousMemoryRssHealthDetails["Time"], "Bytes": @@previousPreviousMemoryRssHealthDetails["memoryRssBytes"]} @@ -209,7 +231,6 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa @@previousMemoryRssHealthDetails = currentMemoryRssHealthDetails.clone if updateMemoryRssHealthState @log.debug "memory health record: #{memRssHealthRecord}" - # healthRecords.push(memRssHealthRecord) @@nodeMemoryRssDataTimeTracker = currentTime @log.debug "memory record sent" return memRssHealthRecord @@ -221,29 +242,6 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa end end - # def processHealthMetrics() - # healthRecords = [] - # cpuMetricPercentValue = @@currentHealthMetrics["cpuUsageNanoCoresPercentage"] - # cpuMetricValue = @@currentHealthMetrics["cpuUsageNanoCores"] - # memoryRssMetricPercentValue = @@currentHealthMetrics["memoryRssBytesPercentage"] - # memoryRssMetricValue = @@currentHealthMetrics["memoryRssBytes"] - # processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, healthRecords) - # processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, healthRecords) - # return healthRecords - # end - - #def filter(tag, time, record) - # Reading all the records to populate a hash for CPU and memory utilization percentages and values - # @@currentHealthMetrics[record['data']['baseData']['metric']] = record['data']['baseData']['series'][0]['min'] - # if !(@@currentHealthMetrics.has_key?("metricTime")) - # @@currentHealthMetrics['metricTime'] = record['time'] - # end - # if !(@@currentHealthMetrics.has_key?("computer")) - # @@currentHealthMetrics['computer'] = record['data']['baseData']['series'][0]['dimValues'][0] - # end - # return nil - #end - def filter(tag, time, record) object_name = record["DataItems"][0]["ObjectName"] counter_name = record["DataItems"][0]["Collections"][0]["CounterName"] @@ -256,7 +254,6 @@ def filter(tag, time, record) begin metric_value = record["DataItems"][0]["Collections"][0]["Value"] if counter_name.downcase == @@cpu_usage_nano_cores - # metric_name = @@cpu_usage_milli_cores metric_value = metric_value / 1000000 if @@cpu_limit != 0.0 percentage_metric_value = (metric_value * 1000000) * 100 / @@cpu_limit @@ -265,13 +262,11 @@ def filter(tag, time, record) end if counter_name.downcase == @@memory_rss_bytes - # metric_name = counter_name if @@memory_limit != 0.0 percentage_metric_value = metric_value * 100 / @@memory_limit end return processMemoryRssHealthMetrics(metric_value, percentage_metric_value, host, timeStamp) end - #return get_metric_records(record, metric_name, metric_value, percentage_metric_value) rescue Exception => e @log.info "Error parsing cadvisor record Exception: #{e.class} Message: #{e.message}" ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) @@ -282,35 +277,12 @@ def filter(tag, time, record) end end - #def filter_stream(tag, es) - #health_es = MultiEventStream.new - #timeFromEventStream = DateTime.now.to_time.to_i - #begin - #es.each { |time, record| - # filter(tag, time, record) - # if !timeFromEventStream.nil? - # timeFromEventStream = time - # end - #} - #healthRecords = processHealthMetrics - #healthRecords.each {|healthRecord| - # health_es.add(timeFromEventStream, healthRecord) if healthRecord - # router.emit_stream('oms.rashmi', health_es) if health_es - #} if healthRecords - #rescue => e - # @log.debug "exception: #{e}" - #end - # es - # end - def filter_stream(tag, es) new_es = MultiEventStream.new es.each { |time, record| begin filtered_record = filter(tag, time, record) - # filtered_records.each { |filtered_record| new_es.add(time, filtered_record) if filtered_record - # } if filtered_records rescue => e router.emit_error_event(tag, time, record, e) end From c19eb1d9fdec135de10be8436bffad6a879dbd85 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 11 Feb 2019 17:38:16 -0800 Subject: [PATCH 08/90] changes --- .../code/plugin/filter_health_cpu_memory.rb | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index 4be2f3e7a..ae5320d9f 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -91,18 +91,24 @@ def start healthConfigObject = JSON.parse(fileContents) file.close if !healthConfigObject.nil? - if !healthConfigObject.memoryPassPercentage.nil? && healthConfigObject.memoryPassPercentage.is_a?(Numeric) - @@memoryPassPercentage = healthConfigObject.memoryPassPercentage + memPassPercent = healthConfigObject["memoryPassPercentage"] + memFailPercent = healthConfigObject["memoryFailPercentage"] + cpuPassPercent = healthConfigObject["cpuPassPercentage"] + cpuFailPercent = healthConfigObject["cpuFailPercentage"] + + if !memPassPercent.nil? && memPassPercent.is_a?(Numeric) + @@memoryPassPercentage = memPassPercent end - if !healthConfigObject.memoryFailPercentage.nil? && healthConfigObject.memoryFailPercentage.is_a?(Numeric) - @@memoryFailPercentage = healthConfigObject.memoryFailPercentage + if !memFailPercent.nil? && memFailPercent.is_a?(Numeric) + @@memoryFailPercentage = memFailPercent end - if !healthConfigObject.cpuPassPercentage.nil? && healthConfigObject.cpuPassPercentage.is_a?(Numeric) - @@cpuPassPercentage = healthConfigObject.cpuPassPercentage + if !cpuPassPercent.nil? && cpuPassPercent.is_a?(Numeric) + @@cpuPassPercentage = cpuPassPercent end - if !healthConfigObject.cpuFailPercentage.nil? && healthConfigObject.cpuFailPercentage.is_a?(Numeric) - @@cpuFailPercentage = healthConfigObject.cpuFailPercentage + if !cpuFailPercent.nil? && cpuFailPercent.is_a?(Numeric) + @@cpuFailPercentage = cpuFailPercent end + @log.info "Successfully read config values from file, using values for cpu and memory health." end else @log.warn "Failed to open file at location #{@@HealthConfigFile} to read health config, using defaults" From fd3cfb563b50a8e40a185840cf3e2d3337a72f52 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 11 Feb 2019 21:37:34 -0800 Subject: [PATCH 09/90] changes --- installer/conf/container.conf | 21 +- installer/conf/kube.conf | 20 + source/code/plugin/KubernetesApiClient.rb | 906 +++++++++++----------- source/code/plugin/in_health_docker.rb | 117 +++ source/code/plugin/in_health_kubelet.rb | 141 ++++ 5 files changed, 754 insertions(+), 451 deletions(-) create mode 100644 source/code/plugin/in_health_docker.rb create mode 100644 source/code/plugin/in_health_kubelet.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 749fdb18e..08dc3bd0c 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,6 +23,13 @@ log_level debug + + type dockerhealth + tag oms.api.DockerHealth + run_interval 60s + log_level debug + + type filter_health_cpu_memory log_level debug @@ -33,7 +40,7 @@ log_level debug buffer_chunk_limit 10m buffer_type file - buffer_path /var/opt/microsoft/omsagent/b539903f-ee4d-424f-b48f-a6410acd9025/state/out_oms_api_CIHealthPerf*.buffer + buffer_path %STATE_DIR_WS%/out_oms_api_CIHealthPerf*.buffer buffer_queue_limit 10 flush_interval 20s retry_limit 10 @@ -69,3 +76,15 @@ retry_wait 30s max_retry_wait 9m + + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_docker_health*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s + diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index d0ef0517d..2b7689669 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -47,6 +47,14 @@ log_level debug + #Kubelet health + + type kubelethealth + tag oms.api.KubeletHealth + run_interval 60s + log_level debug + + type out_oms log_level debug @@ -145,4 +153,16 @@ retry_limit 10 retry_wait 30s max_retry_wait 9m + + + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_kubelet_health*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s \ No newline at end of file diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb index a1e143b15..2cd8cc697 100644 --- a/source/code/plugin/KubernetesApiClient.rb +++ b/source/code/plugin/KubernetesApiClient.rb @@ -2,474 +2,480 @@ # frozen_string_literal: true class KubernetesApiClient + require "json" + require "logger" + require "net/http" + require "net/https" + require "uri" + require "time" - require 'json' - require 'logger' - require 'net/http' - require 'net/https' - require 'uri' - require 'time' - - require_relative 'oms_common' - - @@ApiVersion = "v1" - @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - @@ClusterName = nil - @@ClusterId = nil - @@IsNodeMaster = nil - #@@IsValidRunningNode = nil - #@@IsLinuxCluster = nil - @@KubeSystemNamespace = "kube-system" - @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt" - @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M - @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" - @@TokenStr = nil - @@NodeMetrics = Hash.new - - def initialize + require_relative "oms_common" + + @@ApiVersion = "v1" + @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + @@ClusterName = nil + @@ClusterId = nil + @@IsNodeMaster = nil + #@@IsValidRunningNode = nil + #@@IsLinuxCluster = nil + @@KubeSystemNamespace = "kube-system" + @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt" + @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" + @@TokenStr = nil + @@NodeMetrics = Hash.new + + def initialize + end + + class << self + def getKubeResourceInfo(resource) + headers = {} + response = nil + @Log.info "Getting Kube resource" + @Log.info resource + begin + resourceUri = getResourceUri(resource) + if !resourceUri.nil? + uri = URI.parse(resourceUri) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + if !File.exist?(@@CaFile) + raise "#{@@CaFile} doesnt exist" + else + http.ca_file = @@CaFile if File.exist?(@@CaFile) + end + http.verify_mode = OpenSSL::SSL::VERIFY_PEER + + kubeApiRequest = Net::HTTP::Get.new(uri.request_uri) + kubeApiRequest["Authorization"] = "Bearer " + getTokenStr + @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}" + response = http.request(kubeApiRequest) + @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}" end + rescue => error + @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}") + end + if (response.body.empty?) + @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}") + end + return response + end - class << self - def getKubeResourceInfo(resource) - headers = {} - response = nil - @Log.info 'Getting Kube resource' - @Log.info resource - begin - resourceUri = getResourceUri(resource) - if !resourceUri.nil? - uri = URI.parse(resourceUri) - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true - if !File.exist?(@@CaFile) - raise "#{@@CaFile} doesnt exist" - else - http.ca_file = @@CaFile if File.exist?(@@CaFile) - end - http.verify_mode = OpenSSL::SSL::VERIFY_PEER - - kubeApiRequest = Net::HTTP::Get.new(uri.request_uri) - kubeApiRequest['Authorization'] = "Bearer " + getTokenStr - @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}" - response = http.request(kubeApiRequest) - @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}" - end - rescue => error - @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}") - end - if (response.body.empty?) - @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}") - end - return response - end + def getTokenStr + return @@TokenStr if !@@TokenStr.nil? + begin + if File.exist?(@@TokenFileName) && File.readable?(@@TokenFileName) + @@TokenStr = File.read(@@TokenFileName).strip + return @@TokenStr + else + @Log.warn("Unable to read token string from #{@@TokenFileName}: #{error}") + return nil + end + end + end - def getTokenStr - return @@TokenStr if !@@TokenStr.nil? - begin - if File.exist?(@@TokenFileName) && File.readable?(@@TokenFileName) - @@TokenStr = File.read(@@TokenFileName).strip - return @@TokenStr - else - @Log.warn("Unable to read token string from #{@@TokenFileName}: #{error}") - return nil - end - end - end + def getClusterRegion + if ENV["AKS_REGION"] + return ENV["AKS_REGION"] + else + @Log.warn ("Kubernetes environment variable not set AKS_REGION. Unable to get cluster region.") + return nil + end + end - def getResourceUri(resource) - begin - if ENV['KUBERNETES_SERVICE_HOST'] && ENV['KUBERNETES_PORT_443_TCP_PORT'] - return "https://#{ENV['KUBERNETES_SERVICE_HOST']}:#{ENV['KUBERNETES_PORT_443_TCP_PORT']}/api/" + @@ApiVersion + "/" + resource - else - @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV['KUBERNETES_SERVICE_HOST']} KUBERNETES_PORT_443_TCP_PORT: #{ENV['KUBERNETES_PORT_443_TCP_PORT']}. Unable to form resourceUri") - return nil - end - end - end + def getResourceUri(resource) + begin + if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"] + return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + @@ApiVersion + "/" + resource + else + @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri") + return nil + end + end + end - def getClusterName - return @@ClusterName if !@@ClusterName.nil? - @@ClusterName = "None" - begin - #try getting resource ID for aks - cluster = ENV['AKS_RESOURCE_ID'] - if cluster && !cluster.nil? && !cluster.empty? - @@ClusterName = cluster.split("/").last - else - cluster = ENV['ACS_RESOURCE_NAME'] - if cluster && !cluster.nil? && !cluster.empty? - @@ClusterName = cluster - else - kubesystemResourceUri = "namespaces/" + @@KubeSystemNamespace + "/pods" - @Log.info("KubernetesApiClient::getClusterName : Getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) - @Log.info("KubernetesApiClient::getClusterName : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo['items'].each do |items| - if items['metadata']['name'].include? "kube-controller-manager" - items['spec']['containers'][0]['command'].each do |command| - if command.include? "--cluster-name" - @@ClusterName = command.split('=')[1] - end - end - end - end - end - end - rescue => error - @Log.warn("getClusterName failed: #{error}") + def getClusterName + return @@ClusterName if !@@ClusterName.nil? + @@ClusterName = "None" + begin + #try getting resource ID for aks + cluster = ENV["AKS_RESOURCE_ID"] + if cluster && !cluster.nil? && !cluster.empty? + @@ClusterName = cluster.split("/").last + else + cluster = ENV["ACS_RESOURCE_NAME"] + if cluster && !cluster.nil? && !cluster.empty? + @@ClusterName = cluster + else + kubesystemResourceUri = "namespaces/" + @@KubeSystemNamespace + "/pods" + @Log.info("KubernetesApiClient::getClusterName : Getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) + @Log.info("KubernetesApiClient::getClusterName : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo["items"].each do |items| + if items["metadata"]["name"].include? "kube-controller-manager" + items["spec"]["containers"][0]["command"].each do |command| + if command.include? "--cluster-name" + @@ClusterName = command.split("=")[1] + end end - return @@ClusterName + end end + end + end + rescue => error + @Log.warn("getClusterName failed: #{error}") + end + return @@ClusterName + end - def getClusterId - return @@ClusterId if !@@ClusterId.nil? - #By default initialize ClusterId to ClusterName. - # In ACS/On-prem, we need to figure out how we can generate ClusterId - @@ClusterId = getClusterName - begin - cluster = ENV['AKS_RESOURCE_ID'] - if cluster && !cluster.nil? && !cluster.empty? - @@ClusterId = cluster - end - rescue => error - @Log.warn("getClusterId failed: #{error}") - end - return @@ClusterId - end + def getClusterId + return @@ClusterId if !@@ClusterId.nil? + #By default initialize ClusterId to ClusterName. + # In ACS/On-prem, we need to figure out how we can generate ClusterId + @@ClusterId = getClusterName + begin + cluster = ENV["AKS_RESOURCE_ID"] + if cluster && !cluster.nil? && !cluster.empty? + @@ClusterId = cluster + end + rescue => error + @Log.warn("getClusterId failed: #{error}") + end + return @@ClusterId + end - def isNodeMaster - return @@IsNodeMaster if !@@IsNodeMaster.nil? - @@IsNodeMaster = false - begin - @Log.info("KubernetesApiClient::isNodeMaster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body) - @Log.info("KubernetesApiClient::isNodeMaster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") - if !allNodesInfo.nil? && !allNodesInfo.empty? - thisNodeName = OMS::Common.get_hostname - allNodesInfo['items'].each do |item| - if item['metadata']['name'].casecmp(thisNodeName) == 0 - if item['metadata']['labels']["kubernetes.io/role"].to_s.include?("master") || item['metadata']['labels']["role"].to_s.include?("master") - @@IsNodeMaster = true - end - break - end - end - end - rescue => error - @Log.warn("KubernetesApiClient::isNodeMaster : node role request failed: #{error}") - end - - return @@IsNodeMaster + def isNodeMaster + return @@IsNodeMaster if !@@IsNodeMaster.nil? + @@IsNodeMaster = false + begin + @Log.info("KubernetesApiClient::isNodeMaster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + allNodesInfo = JSON.parse(getKubeResourceInfo("nodes").body) + @Log.info("KubernetesApiClient::isNodeMaster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + if !allNodesInfo.nil? && !allNodesInfo.empty? + thisNodeName = OMS::Common.get_hostname + allNodesInfo["items"].each do |item| + if item["metadata"]["name"].casecmp(thisNodeName) == 0 + if item["metadata"]["labels"]["kubernetes.io/role"].to_s.include?("master") || item["metadata"]["labels"]["role"].to_s.include?("master") + @@IsNodeMaster = true + end + break end + end + end + rescue => error + @Log.warn("KubernetesApiClient::isNodeMaster : node role request failed: #{error}") + end - #def isValidRunningNode - # return @@IsValidRunningNode if !@@IsValidRunningNode.nil? - # @@IsValidRunningNode = false - # begin - # thisNodeName = OMS::Common.get_hostname - # if isLinuxCluster - # # Run on agent node [0] - # @@IsValidRunningNode = !isNodeMaster && thisNodeName.to_s.split('-').last == '0' - # else - # # Run on master node [0] - # @@IsValidRunningNode = isNodeMaster && thisNodeName.to_s.split('-').last == '0' - # end - # rescue => error - # @Log.warn("Checking Node Type failed: #{error}") - # end - # if(@@IsValidRunningNode == true) - # @Log.info("Electing current node to talk to k8 api") - # else - # @Log.info("Not Electing current node to talk to k8 api") - # end - # return @@IsValidRunningNode - #end - - #def isLinuxCluster - # return @@IsLinuxCluster if !@@IsLinuxCluster.nil? - # @@IsLinuxCluster = true - # begin - # @Log.info("KubernetesApiClient::isLinuxCluster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - # allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body) - # @Log.info("KubernetesApiClient::isLinuxCluster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") - # if !allNodesInfo.nil? && !allNodesInfo.empty? - # allNodesInfo['items'].each do |item| - # if !(item['status']['nodeInfo']['operatingSystem'].casecmp('linux') == 0) - # @@IsLinuxCluster = false - # break - # end - # end - # end - # rescue => error - # @Log.warn("KubernetesApiClient::isLinuxCluster : node role request failed: #{error}") - # end - # return @@IsLinuxCluster - #end - - # returns an arry of pods (json) - def getPods(namespace) - pods = [] - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods" - podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) - podInfo['items'].each do |items| - pods.push items - end - rescue => error - @Log.warn("List pods request failed: #{error}") - end - return pods - end + return @@IsNodeMaster + end - def getContainerIDs(namespace) - containers = Hash.new - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods" - @Log.info("KubernetesApiClient::getContainerIDs : Getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) - @Log.info("KubernetesApiClient::getContainerIDs : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo['items'].each do |item| - if (!item['status'].nil? && !item['status'].empty? && !item['status']['containerStatuses'].nil? && !item['status']['containerStatuses'].empty?) - item['status']['containerStatuses'].each do |cntr| - containers[cntr['containerID']] = "kube-system" - end - end - end - rescue => error - @Log.warn("KubernetesApiClient::getContainerIDs : List ContainerIDs request failed: #{error}") - end - return containers - end + #def isValidRunningNode + # return @@IsValidRunningNode if !@@IsValidRunningNode.nil? + # @@IsValidRunningNode = false + # begin + # thisNodeName = OMS::Common.get_hostname + # if isLinuxCluster + # # Run on agent node [0] + # @@IsValidRunningNode = !isNodeMaster && thisNodeName.to_s.split('-').last == '0' + # else + # # Run on master node [0] + # @@IsValidRunningNode = isNodeMaster && thisNodeName.to_s.split('-').last == '0' + # end + # rescue => error + # @Log.warn("Checking Node Type failed: #{error}") + # end + # if(@@IsValidRunningNode == true) + # @Log.info("Electing current node to talk to k8 api") + # else + # @Log.info("Not Electing current node to talk to k8 api") + # end + # return @@IsValidRunningNode + #end - def getContainerLogs(namespace, pod, container, showTimeStamp) - containerLogs = "" - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container - if showTimeStamp - kubesystemResourceUri += "×tamps=true" - end - @Log.info("KubernetesApiClient::getContainerLogs : Getting logs from Kube API @ #{Time.now.utc.iso8601}") - containerLogs = getKubeResourceInfo(kubesystemResourceUri).body - @Log.info("KubernetesApiClient::getContainerLogs : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") - rescue => error - @Log.warn("Pod logs request failed: #{error}") - end - return containerLogs + #def isLinuxCluster + # return @@IsLinuxCluster if !@@IsLinuxCluster.nil? + # @@IsLinuxCluster = true + # begin + # @Log.info("KubernetesApiClient::isLinuxCluster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + # allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body) + # @Log.info("KubernetesApiClient::isLinuxCluster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + # if !allNodesInfo.nil? && !allNodesInfo.empty? + # allNodesInfo['items'].each do |item| + # if !(item['status']['nodeInfo']['operatingSystem'].casecmp('linux') == 0) + # @@IsLinuxCluster = false + # break + # end + # end + # end + # rescue => error + # @Log.warn("KubernetesApiClient::isLinuxCluster : node role request failed: #{error}") + # end + # return @@IsLinuxCluster + #end + + # returns an arry of pods (json) + def getPods(namespace) + pods = [] + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods" + podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) + podInfo["items"].each do |items| + pods.push items + end + rescue => error + @Log.warn("List pods request failed: #{error}") + end + return pods + end + + def getContainerIDs(namespace) + containers = Hash.new + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods" + @Log.info("KubernetesApiClient::getContainerIDs : Getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) + @Log.info("KubernetesApiClient::getContainerIDs : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo["items"].each do |item| + if (!item["status"].nil? && !item["status"].empty? && !item["status"]["containerStatuses"].nil? && !item["status"]["containerStatuses"].empty?) + item["status"]["containerStatuses"].each do |cntr| + containers[cntr["containerID"]] = "kube-system" end + end + end + rescue => error + @Log.warn("KubernetesApiClient::getContainerIDs : List ContainerIDs request failed: #{error}") + end + return containers + end + + def getContainerLogs(namespace, pod, container, showTimeStamp) + containerLogs = "" + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + if showTimeStamp + kubesystemResourceUri += "×tamps=true" + end + @Log.info("KubernetesApiClient::getContainerLogs : Getting logs from Kube API @ #{Time.now.utc.iso8601}") + containerLogs = getKubeResourceInfo(kubesystemResourceUri).body + @Log.info("KubernetesApiClient::getContainerLogs : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") + rescue => error + @Log.warn("Pod logs request failed: #{error}") + end + return containerLogs + end + + def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp) + containerLogs = "" + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + "&sinceTime=" + since + kubesystemResourceUri = URI.escape(kubesystemResourceUri, ":.+") # HTML URL Encoding for date + + if showTimeStamp + kubesystemResourceUri += "×tamps=true" + end + @Log.info("calling #{kubesystemResourceUri}") + @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Getting logs from Kube API @ #{Time.now.utc.iso8601}") + containerLogs = getKubeResourceInfo(kubesystemResourceUri).body + @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") + rescue => error + @Log.warn("Pod logs request failed: #{error}") + end + return containerLogs + end + + def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) + metricItems = [] + begin + clusterId = getClusterId + metricInfo = metricJSON + metricInfo["items"].each do |pod| + podNameSpace = pod["metadata"]["namespace"] + if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences") + # The above case seems to be the only case where you have horizontal scaling of pods + # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash + # instead of the actual poduid. Since this uid is not being surface into the UX + # its ok to use this. + # Use kubernetes.io/config.hash to be able to correlate with cadvisor data + podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"] + else + podUid = pod["metadata"]["uid"] + end + if (!pod["spec"]["containers"].nil? && !pod["spec"]["nodeName"].nil?) + nodeName = pod["spec"]["nodeName"] + pod["spec"]["containers"].each do |container| + containerName = container["name"] + metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z + if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?) + metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect]) - def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp) - containerLogs = "" - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + "&sinceTime=" + since - kubesystemResourceUri = URI.escape(kubesystemResourceUri, ":.+") # HTML URL Encoding for date - - if showTimeStamp - kubesystemResourceUri += "×tamps=true" - end - @Log.info("calling #{kubesystemResourceUri}") - @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Getting logs from Kube API @ #{Time.now.utc.iso8601}") - containerLogs = getKubeResourceInfo(kubesystemResourceUri).body - @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") - rescue => error - @Log.warn("Pod logs request failed: #{error}") + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = nodeName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + #No container level limit for the given metric, so default to node level limit + else + nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect + if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey)) + metricValue = @@NodeMetrics[nodeMetricsHashKey] + #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ") + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = nodeName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) end - return containerLogs + end end + end + end + rescue => error + @Log.warn("getcontainerResourceRequestsAndLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") + return metricItems + end + return metricItems + end #getContainerResourceRequestAndLimits - def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) - metricItems = [] - begin - clusterId = getClusterId - metricInfo = metricJSON - metricInfo['items'].each do |pod| - podNameSpace = pod['metadata']['namespace'] - if podNameSpace.eql?("kube-system") && !pod['metadata'].key?("ownerReferences") - # The above case seems to be the only case where you have horizontal scaling of pods - # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash - # instead of the actual poduid. Since this uid is not being surface into the UX - # its ok to use this. - # Use kubernetes.io/config.hash to be able to correlate with cadvisor data - podUid = pod['metadata']['annotations']['kubernetes.io/config.hash'] - else - podUid = pod['metadata']['uid'] - end - if (!pod['spec']['containers'].nil? && !pod['spec']['nodeName'].nil?) - nodeName = pod['spec']['nodeName'] - pod['spec']['containers'].each do |container| - containerName = container['name'] - metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z - if (!container['resources'].nil? && !container['resources'].empty? && !container['resources'][metricCategory].nil? && !container['resources'][metricCategory][metricNameToCollect].nil?) - metricValue = getMetricNumericValue(metricNameToCollect, container['resources'][metricCategory][metricNameToCollect]) - - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = nodeName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - #No container level limit for the given metric, so default to node level limit - else - nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect - if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey)) - - metricValue = @@NodeMetrics[nodeMetricsHashKey] - #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ") - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = nodeName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - end - end - end - end - end - rescue => error - @Log.warn("getcontainerResourceRequestsAndLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") - return metricItems - end - return metricItems - end #getContainerResourceRequestAndLimits - - def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) - metricItems = [] - begin - metricInfo = metricJSON - clusterId = getClusterId - #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics, - #if we are coming up with the time it should be same for all nodes - metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z - metricInfo['items'].each do |node| - if (!node['status'][metricCategory].nil?) - - # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" - metricValue = getMetricNumericValue(metricNameToCollect, node['status'][metricCategory][metricNameToCollect]) - - metricItem = {} - metricItem['DataItems'] = [] - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = node['metadata']['name'] - metricProps['ObjectName'] = "K8SNode" - metricProps['InstanceName'] = clusterId + "/" + node['metadata']['name'] - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - #push node level metrics to a inmem hash so that we can use it looking up at container level. - #Currently if container level cpu & memory limits are not defined we default to node level limits - @@NodeMetrics[clusterId + "/" + node['metadata']['name'] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue - #@Log.info ("Node metric hash: #{@@NodeMetrics}") - end - end - rescue => error - @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") - end - return metricItems - end #parseNodeLimits - - def getMetricNumericValue(metricName, metricVal) - metricValue = metricVal - begin - case metricName - when "memory" #convert to bytes for memory - #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/ - if (metricValue.end_with?("Ki")) - metricValue.chomp!("Ki") - metricValue = Float(metricValue) * 1024.0 ** 1 - elsif (metricValue.end_with?("Mi")) - metricValue.chomp!("Mi") - metricValue = Float(metricValue) * 1024.0 ** 2 - elsif (metricValue.end_with?("Gi")) - metricValue.chomp!("Gi") - metricValue = Float(metricValue) * 1024.0 ** 3 - elsif (metricValue.end_with?("Ti")) - metricValue.chomp!("Ti") - metricValue = Float(metricValue) * 1024.0 ** 4 - elsif (metricValue.end_with?("Pi")) - metricValue.chomp!("Pi") - metricValue = Float(metricValue) * 1024.0 ** 5 - elsif (metricValue.end_with?("Ei")) - metricValue.chomp!("Ei") - metricValue = Float(metricValue) * 1024.0 ** 6 - elsif (metricValue.end_with?("Zi")) - metricValue.chomp!("Zi") - metricValue = Float(metricValue) * 1024.0 ** 7 - elsif (metricValue.end_with?("Yi")) - metricValue.chomp!("Yi") - metricValue = Float(metricValue) * 1024.0 ** 8 - elsif (metricValue.end_with?("K")) - metricValue.chomp!("K") - metricValue = Float(metricValue) * 1000.0 ** 1 - elsif (metricValue.end_with?("M")) - metricValue.chomp!("M") - metricValue = Float(metricValue) * 1000.0 ** 2 - elsif (metricValue.end_with?("G")) - metricValue.chomp!("G") - metricValue = Float(metricValue) * 1000.0 ** 3 - elsif (metricValue.end_with?("T")) - metricValue.chomp!("T") - metricValue = Float(metricValue) * 1000.0 ** 4 - elsif (metricValue.end_with?("P")) - metricValue.chomp!("P") - metricValue = Float(metricValue) * 1000.0 ** 5 - elsif (metricValue.end_with?("E")) - metricValue.chomp!("E") - metricValue = Float(metricValue) * 1000.0 ** 6 - elsif (metricValue.end_with?("Z")) - metricValue.chomp!("Z") - metricValue = Float(metricValue) * 1000.0 ** 7 - elsif (metricValue.end_with?("Y")) - metricValue.chomp!("Y") - metricValue = Float(metricValue) * 1000.0 ** 8 - else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units') - metricValue = Float(metricValue) - end - when "cpu" #convert to nanocores for cpu - #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/ - if (metricValue.end_with?("m")) - metricValue.chomp!("m") - metricValue = Float(metricValue) * 1000.0 ** 2 - else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units') - metricValue = Float(metricValue) * 1000.0 ** 3 - end - else - @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value") - metricValue = 0 - end #case statement - rescue => error - @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value") - return 0 - end - return metricValue - end # getMetricNumericValue + def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) + metricItems = [] + begin + metricInfo = metricJSON + clusterId = getClusterId + #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics, + #if we are coming up with the time it should be same for all nodes + metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z + metricInfo["items"].each do |node| + if (!node["status"][metricCategory].nil?) + + # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" + metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect]) + + metricItem = {} + metricItem["DataItems"] = [] + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = node["metadata"]["name"] + metricProps["ObjectName"] = "K8SNode" + metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"] + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + #push node level metrics to a inmem hash so that we can use it looking up at container level. + #Currently if container level cpu & memory limits are not defined we default to node level limits + @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue + #@Log.info ("Node metric hash: #{@@NodeMetrics}") + end end - end + rescue => error + @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") + end + return metricItems + end #parseNodeLimits + def getMetricNumericValue(metricName, metricVal) + metricValue = metricVal + begin + case metricName + when "memory" #convert to bytes for memory + #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/ + if (metricValue.end_with?("Ki")) + metricValue.chomp!("Ki") + metricValue = Float(metricValue) * 1024.0 ** 1 + elsif (metricValue.end_with?("Mi")) + metricValue.chomp!("Mi") + metricValue = Float(metricValue) * 1024.0 ** 2 + elsif (metricValue.end_with?("Gi")) + metricValue.chomp!("Gi") + metricValue = Float(metricValue) * 1024.0 ** 3 + elsif (metricValue.end_with?("Ti")) + metricValue.chomp!("Ti") + metricValue = Float(metricValue) * 1024.0 ** 4 + elsif (metricValue.end_with?("Pi")) + metricValue.chomp!("Pi") + metricValue = Float(metricValue) * 1024.0 ** 5 + elsif (metricValue.end_with?("Ei")) + metricValue.chomp!("Ei") + metricValue = Float(metricValue) * 1024.0 ** 6 + elsif (metricValue.end_with?("Zi")) + metricValue.chomp!("Zi") + metricValue = Float(metricValue) * 1024.0 ** 7 + elsif (metricValue.end_with?("Yi")) + metricValue.chomp!("Yi") + metricValue = Float(metricValue) * 1024.0 ** 8 + elsif (metricValue.end_with?("K")) + metricValue.chomp!("K") + metricValue = Float(metricValue) * 1000.0 ** 1 + elsif (metricValue.end_with?("M")) + metricValue.chomp!("M") + metricValue = Float(metricValue) * 1000.0 ** 2 + elsif (metricValue.end_with?("G")) + metricValue.chomp!("G") + metricValue = Float(metricValue) * 1000.0 ** 3 + elsif (metricValue.end_with?("T")) + metricValue.chomp!("T") + metricValue = Float(metricValue) * 1000.0 ** 4 + elsif (metricValue.end_with?("P")) + metricValue.chomp!("P") + metricValue = Float(metricValue) * 1000.0 ** 5 + elsif (metricValue.end_with?("E")) + metricValue.chomp!("E") + metricValue = Float(metricValue) * 1000.0 ** 6 + elsif (metricValue.end_with?("Z")) + metricValue.chomp!("Z") + metricValue = Float(metricValue) * 1000.0 ** 7 + elsif (metricValue.end_with?("Y")) + metricValue.chomp!("Y") + metricValue = Float(metricValue) * 1000.0 ** 8 + else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units') + metricValue = Float(metricValue) + end + when "cpu" #convert to nanocores for cpu + #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/ + if (metricValue.end_with?("m")) + metricValue.chomp!("m") + metricValue = Float(metricValue) * 1000.0 ** 2 + else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units') + metricValue = Float(metricValue) * 1000.0 ** 3 + end + else + @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value") + metricValue = 0 + end #case statement + rescue => error + @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value") + return 0 + end + return metricValue + end # getMetricNumericValue + end +end diff --git a/source/code/plugin/in_health_docker.rb b/source/code/plugin/in_health_docker.rb new file mode 100644 index 000000000..0c8410294 --- /dev/null +++ b/source/code/plugin/in_health_docker.rb @@ -0,0 +1,117 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + class Kubelet_Health_Input < Input + Plugin.register_input("dockerhealth", self) + + def initialize + super + require "yaml" + require "json" + + require_relative "KubernetesApiClient" + require_relative "oms_common" + require_relative "omslog" + require_relative "ApplicationInsightsUtility" + require_relative "DockerApiClient" + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.containerinsights.DockerHealth" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + @@previousDockerState = "" + # Tracks the last time docker health data sent for each node + @@dockerHealthDataTimeTracker = DateTime.now.to_time.to_i + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end + + def enumerate + begin + currentTime = Time.now + emitTime = currentTime.to_f + batchTime = currentTime.utc.iso8601 + record = {} + eventStream = MultiEventStream.new + $log.info("in_docker_health::Making a call to get docker info @ #{Time.now.utc.iso8601}") + isDockerStateFlush = false + dockerInfo = DockerApiClient.dockerInfo + if (!dockerInfo.nil? && !dockerInfo.empty?) + dockerState = "Healthy" + else + dockerState = "Unhealthy" + end + currentTime = DateTime.now.to_time.to_i + timeDifference = (currentTime - @@dockerHealthDataTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + $log.info("Time difference in minutes: #{timeDifferenceInMinutes}") + if (timeDifferenceInMinutes >= 3) || + !(dockerState.casecmp(@@previousDockerState) == 0) + @@previousDockerState = dockerState + isDockerStateFlush = true + @@dockerHealthDataTimeTracker = currentTime + record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated + record["DockerState"] = dockerState + hostName = (OMS::Common.get_hostname) + record["Computer"] = hostName + record["ClusterName"] = @@clusterName + record["ClusterId"] = @@clusterId + record["ClusterRegion"] = @@clusterRegion + eventStream.add(emitTime, record) if record + $log.info("record: #{record}") + end + + if isDockerStateFlush + router.emit_stream(@tag, eventStream) if eventStream + end + rescue => errorStr + $log.warn("error : #{errorStr.to_s}") + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished + @mutex.unlock + if !done + begin + $log.info("in_health_docker::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_health_docker::run_periodic: enumerate Failed for docker health: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + @mutex.lock + end + @mutex.unlock + end + end # Health_Docker_Input +end # module diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb new file mode 100644 index 000000000..62e243007 --- /dev/null +++ b/source/code/plugin/in_health_kubelet.rb @@ -0,0 +1,141 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + class Kubelet_Health_Input < Input + Plugin.register_input("kubelethealth", self) + + def initialize + super + require "yaml" + require "json" + + require_relative "KubernetesApiClient" + require_relative "oms_common" + require_relative "omslog" + require_relative "ApplicationInsightsUtility" + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.containerinsights.KubeletHealth" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + @@previousNodeStatus = {} + # Tracks the last time node health data sent for each node + @@nodeHealthDataTimeTracker = {} + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end + + def enumerate + currentTime = Time.now + emitTime = currentTime.to_f + batchTime = currentTime.utc.iso8601 + $log.info("in_health_health::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + $log.info("in_health_health::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + begin + if (!nodeInventory.empty?) + eventStream = MultiEventStream.new + #get node inventory + nodeInventory["items"].each do |item| + record = {} + record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated + computerName = item["metadata"]["name"] + record["Computer"] = computerName + # Tracking state change in order to send node health data only in case of state change or timeout + flushRecord = false + + currentTime = DateTime.now.to_time.to_i + timeDifferenceInMinutes = 0 + if !@@nodeHealthDataTimeTracker[computerName].nil? + timeDifference = (currentTime - @@nodeHealthDataTimeTracker[computerName]).abs + timeDifferenceInMinutes = timeDifference / 60 + end + if item["status"].key?("conditions") && !item["status"]["conditions"].empty? + allNodeConditions = "" + item["status"]["conditions"].each do |condition| + conditionType = condition["type"] + conditionStatus = condition["status"] + conditionReason = condition["reason"] + if @@previousNodeStatus[computerName + conditionType].nil? || + !(conditionStatus.casecmp(@@previousNodeStatus[computerName + conditionType]) == 0) || + timeDifferenceInMinutes >= 3 + # Comparing current status with previous status and setting state change as true + flushRecord = true + @@previousNodeStatus[computerName + conditionType] = conditionStatus + if !allNodeConditions.empty? + allNodeConditions = allNodeConditions + "," + conditionType + ":" + conditionReason + else + allNodeConditions = conditionType + ":" + conditionReason + end + #end + if !allNodeConditions.empty? + record["NodeStatusCondition"] = allNodeConditions + end + end + end + end + + if flushRecord + #Sending node health data the very first time without checking for state change and timeout + record["Computer"] = computerName + record["ClusterName"] = @@clusterName + record["ClusterId"] = @@clusterId + record["ClusterRegion"] = @@clusterRegion + $log.warn("recordData: #{record}") + eventStream.add(emitTime, record) if record + @@nodeHealthDataTimeTracker[computerName] = currentTime + end + end + router.emit_stream(@tag, eventStream) if eventStream + end + rescue => errorStr + $log.warn("error : #{errorStr.to_s}") + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished + @mutex.unlock + if !done + begin + $log.info("in_health_kubelet::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_health_kubelet::run_periodic: enumerate Failed for kubelet health: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + @mutex.lock + end + @mutex.unlock + end + end # Health_Kubelet_Input +end # module From 8e16e32a1335771cbac38433a24a2edcfca96cc0 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 11 Feb 2019 21:53:41 -0800 Subject: [PATCH 10/90] changes --- source/code/plugin/filter_health_cpu_memory.rb | 15 +++++++++++++-- source/code/plugin/in_health_docker.rb | 12 +++++++++++- source/code/plugin/in_health_kubelet.rb | 12 +++++++++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index ae5320d9f..e3fbbeba9 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -15,6 +15,7 @@ class CPUMemoryHealthFilter < Filter config_param :metrics_to_collect, :string, :default => "cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes" @@HealthConfigFile = "/var/opt/microsoft/docker-cimprov/healthConfig/CpuMemory/config" + @@PluginName = "filter_health_cpu_memory" # Setting the memory and cpu pass and fail percentages to default values @@memoryPassPercentage = 80.0 @@memoryFailPercentage = 90.0 @@ -115,6 +116,7 @@ def start end rescue => errorStr @log.debug "Exception occured while reading config file at location #{@@HealthConfigFile}, error: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -176,8 +178,11 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) @@previousPreviousCpuHealthDetails = @@previousCpuHealthDetails.clone @@previousCpuHealthDetails = currentCpuHealthDetails.clone if updateCpuHealthState - @log.debug "cpu health record: #{cpuHealthRecord}" @@nodeCpuHealthDataTimeTracker = currentTime + telemetryProperties = {} + telemetryProperties["Computer"] = host + telemetryProperties["NodeCpuHealthState"] = cpuHealthState + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) @log.debug "cpu record sent" return cpuHealthRecord else @@ -185,6 +190,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) end rescue => errorStr @log.debug "In processCpuMetrics: exception: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -236,8 +242,11 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa @@previousPreviousMemoryRssHealthDetails = @@previousMemoryRssHealthDetails.clone @@previousMemoryRssHealthDetails = currentMemoryRssHealthDetails.clone if updateMemoryRssHealthState - @log.debug "memory health record: #{memRssHealthRecord}" @@nodeMemoryRssDataTimeTracker = currentTime + telemetryProperties = {} + telemetryProperties["Computer"] = host + telemetryProperties["NodeMemoryRssHealthState"] = memoryRssHealthState + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) @log.debug "memory record sent" return memRssHealthRecord else @@ -245,6 +254,7 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa end rescue => errorStr @log.debug "In processMemoryRssMetrics: exception: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -291,6 +301,7 @@ def filter_stream(tag, es) new_es.add(time, filtered_record) if filtered_record rescue => e router.emit_error_event(tag, time, record, e) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end } new_es diff --git a/source/code/plugin/in_health_docker.rb b/source/code/plugin/in_health_docker.rb index 0c8410294..6ea0ae9dc 100644 --- a/source/code/plugin/in_health_docker.rb +++ b/source/code/plugin/in_health_docker.rb @@ -36,6 +36,8 @@ def start @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion + @@telemetryTimeTracker = DateTime.now.to_time.to_i + @@PluginName = "in_health_docker" end end @@ -81,11 +83,19 @@ def enumerate record["ClusterId"] = @@clusterId record["ClusterRegion"] = @@clusterRegion eventStream.add(emitTime, record) if record - $log.info("record: #{record}") end if isDockerStateFlush router.emit_stream(@tag, eventStream) if eventStream + timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + @@telemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProperties = {} + telemetryProperties["Computer"] = hostname + telemetryProperties["DockerState"] = dockerState + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) + end end rescue => errorStr $log.warn("error : #{errorStr.to_s}") diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb index 62e243007..5fd54339d 100644 --- a/source/code/plugin/in_health_kubelet.rb +++ b/source/code/plugin/in_health_kubelet.rb @@ -35,6 +35,8 @@ def start @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion + @@telemetryTimeTracker = DateTime.now.to_time.to_i + @@PluginName = "in_health_kubelet" end end @@ -104,9 +106,17 @@ def enumerate record["ClusterName"] = @@clusterName record["ClusterId"] = @@clusterId record["ClusterRegion"] = @@clusterRegion - $log.warn("recordData: #{record}") eventStream.add(emitTime, record) if record @@nodeHealthDataTimeTracker[computerName] = currentTime + timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + @@telemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProperties = {} + telemetryProperties["Computer"] = computerName + telemetryProperties["NodeStatusCondition"] = allNodeConditions + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) + end end end router.emit_stream(@tag, eventStream) if eventStream From 9453d7074711a7fc38caf226b8c5b31f134b9a7f Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 14 Feb 2019 22:38:52 -0800 Subject: [PATCH 11/90] changes --- source/code/plugin/in_health_kubelet.rb | 56 ++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb index 5fd54339d..b41efb3d2 100644 --- a/source/code/plugin/in_health_kubelet.rb +++ b/source/code/plugin/in_health_kubelet.rb @@ -30,6 +30,7 @@ def start @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) @@previousNodeStatus = {} + @@previousNodeState = {} # Tracks the last time node health data sent for each node @@nodeHealthDataTimeTracker = {} @@clusterName = KubernetesApiClient.getClusterName @@ -77,10 +78,46 @@ def enumerate end if item["status"].key?("conditions") && !item["status"]["conditions"].empty? allNodeConditions = "" + nodeState = "" item["status"]["conditions"].each do |condition| conditionType = condition["type"] conditionStatus = condition["status"] conditionReason = condition["reason"] + + if !(nodeState.casecmp("Fail") == 0) + if (conditionStatus.casecmp("Unknown") || + conditionStatus.casecmp("True")) + if ((conditionType.casecmp("MemoryPressure") == 0) || + (conditionType.casecmp("DiskPressure") == 0) || + (conditionType.casecmp("PIDPressure") == 0)) + nodeState = "Warning" + else + nodeState = "Fail" + end + elsif !(nodeState.casecmp("Warning") == 0) + nodeState = "Pass" + end + end + + # if !(nodeState.casecmp("Fail") == 0) + # if ((conditionType.casecmp("MemoryPressure") == 0) || + # (conditionType.casecmp("DiskPressure") == 0) || + # (conditionType.casecmp("PIDPressure") == 0)) + # if (conditionStatus.casecmp("Unknown") || + # conditionStatus.casecmp("True")) + # nodeState = "Warning" + # else + # nodeState = "Pass" + # end + # else + # if (conditionStatus.casecmp("Unknown") || + # conditionStatus.casecmp("True")) + # nodeState = "Fail" + # else + # nodeState = "Pass" + # end + # end + # end if @@previousNodeStatus[computerName + conditionType].nil? || !(conditionStatus.casecmp(@@previousNodeStatus[computerName + conditionType]) == 0) || timeDifferenceInMinutes >= 3 @@ -93,8 +130,11 @@ def enumerate allNodeConditions = conditionType + ":" + conditionReason end #end + record["NewState"] = nodeState + record["OldState"] = @@previousNodeState[computerName] + @@previousNodeState[computerName] = nodeState if !allNodeConditions.empty? - record["NodeStatusCondition"] = allNodeConditions + record["Details"] = allNodeConditions end end end @@ -102,10 +142,16 @@ def enumerate if flushRecord #Sending node health data the very first time without checking for state change and timeout - record["Computer"] = computerName - record["ClusterName"] = @@clusterName - record["ClusterId"] = @@clusterId - record["ClusterRegion"] = @@clusterRegion + labelsString = "NodeName:" + ((!computerName.nil?)? computerName : "") + + "ClusterName:" + ((!@@clusterName.nil?)? @@clusterName : "") + + "ClusterId:" + ((!@@clusterId.nil?)? @@clusterId : "") + + "ClusterRegion:" + ((!@@clusterRegion.nil?)? @@clusterRegion : "") + # record["Computer"] = computerName + # record["ClusterName"] = @@clusterName + # record["ClusterId"] = @@clusterId + # record["ClusterRegion"] = @@clusterRegion + record["MonitorId"] = "KubeletHealth" + record["Labels"] = labelsString eventStream.add(emitTime, record) if record @@nodeHealthDataTimeTracker[computerName] = currentTime timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs From 4514780f887f22bc8a178bed54bd14c9eee94f9b Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 14 Feb 2019 23:20:01 -0800 Subject: [PATCH 12/90] chg --- source/code/plugin/in_health_kubelet.rb | 28 +++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb index b41efb3d2..947d012cd 100644 --- a/source/code/plugin/in_health_kubelet.rb +++ b/source/code/plugin/in_health_kubelet.rb @@ -85,17 +85,33 @@ def enumerate conditionReason = condition["reason"] if !(nodeState.casecmp("Fail") == 0) - if (conditionStatus.casecmp("Unknown") || + if ((conditionType.casecmp("MemoryPressure") == 0) || + (conditionType.casecmp("DiskPressure") == 0) || + (conditionType.casecmp("PIDPressure") == 0)) + if (conditionStatus.casecmp("Unknown") || conditionStatus.casecmp("True")) - if ((conditionType.casecmp("MemoryPressure") == 0) || - (conditionType.casecmp("DiskPressure") == 0) || - (conditionType.casecmp("PIDPressure") == 0)) nodeState = "Warning" else + if !(nodeState.casecmp("Warning") == 0) + nodeState = "Pass" + end + # nodeState = (nodeState.casecmp("Warning") == 0)? nodeState : "Pass" + end + elsif ((conditionType.casecmp("NetworkUnavailable") == 0) || + (conditionType.casecmp("OutOfDisk") == 0)) + if (conditionStatus.casecmp("Unknown") || + conditionStatus.casecmp("True")) + nodeState = "Fail" + else + nodeState = "Pass" + end + elsif (conditionType.casecmp("Ready") == 0) + if (conditionStatus.casecmp("Unknown") || + conditionStatus.casecmp("False")) nodeState = "Fail" + else + nodeState = "Pass" end - elsif !(nodeState.casecmp("Warning") == 0) - nodeState = "Pass" end end From fc8a1ed3410f92838b3bf54066ff5876c059a926 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 15 Feb 2019 16:44:57 -0800 Subject: [PATCH 13/90] changes for kubelet health --- source/code/plugin/in_health_kubelet.rb | 72 ++++++++----------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb index 947d012cd..1d6071a51 100644 --- a/source/code/plugin/in_health_kubelet.rb +++ b/source/code/plugin/in_health_kubelet.rb @@ -66,7 +66,11 @@ def enumerate record = {} record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated computerName = item["metadata"]["name"] - record["Computer"] = computerName + labels = {} + labels["ClusterName"] = @@clusterName + labels["ClusterId"] = @@clusterId + labels["ClusterRegion"] = @@clusterRegion + labels["NodeName"] = computerName # Tracking state change in order to send node health data only in case of state change or timeout flushRecord = false @@ -77,7 +81,8 @@ def enumerate timeDifferenceInMinutes = timeDifference / 60 end if item["status"].key?("conditions") && !item["status"]["conditions"].empty? - allNodeConditions = "" + # allNodeConditions = "" + allNodeConditions = {} nodeState = "" item["status"]["conditions"].each do |condition| conditionType = condition["type"] @@ -86,88 +91,53 @@ def enumerate if !(nodeState.casecmp("Fail") == 0) if ((conditionType.casecmp("MemoryPressure") == 0) || - (conditionType.casecmp("DiskPressure") == 0) || - (conditionType.casecmp("PIDPressure") == 0)) - if (conditionStatus.casecmp("Unknown") || - conditionStatus.casecmp("True")) + (conditionType.casecmp("DiskPressure") == 0) || + (conditionType.casecmp("PIDPressure") == 0)) + if ((conditionStatus.casecmp("Unknown") == 0) || + (conditionStatus.casecmp("True") == 0)) nodeState = "Warning" else if !(nodeState.casecmp("Warning") == 0) nodeState = "Pass" end - # nodeState = (nodeState.casecmp("Warning") == 0)? nodeState : "Pass" end elsif ((conditionType.casecmp("NetworkUnavailable") == 0) || - (conditionType.casecmp("OutOfDisk") == 0)) - if (conditionStatus.casecmp("Unknown") || - conditionStatus.casecmp("True")) + (conditionType.casecmp("OutOfDisk") == 0)) + if ((conditionStatus.casecmp("Unknown") == 0) || + (conditionStatus.casecmp("True") == 0)) nodeState = "Fail" else nodeState = "Pass" end elsif (conditionType.casecmp("Ready") == 0) - if (conditionStatus.casecmp("Unknown") || - conditionStatus.casecmp("False")) + if ((conditionStatus.casecmp("Unknown") == 0) || + (conditionStatus.casecmp("False") == 0)) nodeState = "Fail" else nodeState = "Pass" end end end - - # if !(nodeState.casecmp("Fail") == 0) - # if ((conditionType.casecmp("MemoryPressure") == 0) || - # (conditionType.casecmp("DiskPressure") == 0) || - # (conditionType.casecmp("PIDPressure") == 0)) - # if (conditionStatus.casecmp("Unknown") || - # conditionStatus.casecmp("True")) - # nodeState = "Warning" - # else - # nodeState = "Pass" - # end - # else - # if (conditionStatus.casecmp("Unknown") || - # conditionStatus.casecmp("True")) - # nodeState = "Fail" - # else - # nodeState = "Pass" - # end - # end - # end + if @@previousNodeStatus[computerName + conditionType].nil? || !(conditionStatus.casecmp(@@previousNodeStatus[computerName + conditionType]) == 0) || timeDifferenceInMinutes >= 3 # Comparing current status with previous status and setting state change as true flushRecord = true @@previousNodeStatus[computerName + conditionType] = conditionStatus - if !allNodeConditions.empty? - allNodeConditions = allNodeConditions + "," + conditionType + ":" + conditionReason - else - allNodeConditions = conditionType + ":" + conditionReason - end - #end + allNodeConditions[conditionType] = conditionReason record["NewState"] = nodeState record["OldState"] = @@previousNodeState[computerName] @@previousNodeState[computerName] = nodeState - if !allNodeConditions.empty? - record["Details"] = allNodeConditions - end + record["Details"] = allNodeConditions.to_json end end end if flushRecord #Sending node health data the very first time without checking for state change and timeout - labelsString = "NodeName:" + ((!computerName.nil?)? computerName : "") + - "ClusterName:" + ((!@@clusterName.nil?)? @@clusterName : "") + - "ClusterId:" + ((!@@clusterId.nil?)? @@clusterId : "") + - "ClusterRegion:" + ((!@@clusterRegion.nil?)? @@clusterRegion : "") - # record["Computer"] = computerName - # record["ClusterName"] = @@clusterName - # record["ClusterId"] = @@clusterId - # record["ClusterRegion"] = @@clusterRegion record["MonitorId"] = "KubeletHealth" - record["Labels"] = labelsString + record["Labels"] = labels.to_json eventStream.add(emitTime, record) if record @@nodeHealthDataTimeTracker[computerName] = currentTime timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs @@ -176,7 +146,7 @@ def enumerate @@telemetryTimeTracker = DateTime.now.to_time.to_i telemetryProperties = {} telemetryProperties["Computer"] = computerName - telemetryProperties["NodeStatusCondition"] = allNodeConditions + telemetryProperties["NodeStatusCondition"] = allNodeConditions.to_json ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) end end From 0e28a4edc1ebb5c04dc09082c0cf8586f40029dc Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 15 Feb 2019 19:20:33 -0800 Subject: [PATCH 14/90] changes --- .../code/plugin/filter_health_cpu_memory.rb | 79 +++++++++++++------ 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index e3fbbeba9..03afaa3ce 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -14,13 +14,17 @@ class CPUMemoryHealthFilter < Filter config_param :log_path, :string, :default => "/var/opt/microsoft/omsagent/log/filter_health_cpu_memory.log" config_param :metrics_to_collect, :string, :default => "cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes" - @@HealthConfigFile = "/var/opt/microsoft/docker-cimprov/healthConfig/CpuMemory/config" + @@HealthConfigFile = "/var/opt/microsoft/docker-cimprov/healthConfig/config" @@PluginName = "filter_health_cpu_memory" + # Setting the memory and cpu pass and fail percentages to default values @@memoryPassPercentage = 80.0 @@memoryFailPercentage = 90.0 @@cpuPassPercentage = 80.0 @@cpuFailPercentage = 90.0 + @@cpuMonitorTimeOut = 50 + @@memoryRssMonitorTimeOut = 50 + @@previousCpuHealthDetails = {} @@previousPreviousCpuHealthDetails = {} @@previousCpuHealthStateSent = "" @@ -92,10 +96,16 @@ def start healthConfigObject = JSON.parse(fileContents) file.close if !healthConfigObject.nil? - memPassPercent = healthConfigObject["memoryPassPercentage"] - memFailPercent = healthConfigObject["memoryFailPercentage"] - cpuPassPercent = healthConfigObject["cpuPassPercentage"] - cpuFailPercent = healthConfigObject["cpuFailPercentage"] + # memPassPercent = healthConfigObject["memoryPassPercentage"] + # memFailPercent = healthConfigObject["memoryFailPercentage"] + # cpuPassPercent = healthConfigObject["cpuPassPercentage"] + # cpuFailPercent = healthConfigObject["cpuFailPercentage"] + cpuPassPercent = healthConfigObject["NodeCpuMonitor"]["PassPercentage"] + cpuFailPercent = healthConfigObject["NodeCpuMonitor"]["FailPercentage"] + memPassPercent = healthConfigObject["NodeMemoryRssMonitor"]["PassPercentage"] + memFailPercent = healthConfigObject["NodeMemoryRssMonitor"]["FailPercentage"] + @@cpuMonitorTimeOut = healthConfigObject["NodeCpuMonitor"]["MonitorTimeOut"] + @@memoryRssMonitorTimeOut = healthConfigObject["NodeMemoryRssMonitor"]["MonitorTimeOut"] if !memPassPercent.nil? && memPassPercent.is_a?(Numeric) @@memoryPassPercentage = memPassPercent @@ -140,10 +150,13 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) updateCpuHealthState = false cpuHealthRecord = {} currentCpuHealthDetails = {} - cpuHealthRecord["ClusterName"] = @@clusterName - cpuHealthRecord["ClusterId"] = @@clusterId - cpuHealthRecord["ClusterRegion"] = @@clusterRegion - cpuHealthRecord["Computer"] = host + labels = {} + labels["ClusterName"] = @@clusterName + labels["ClusterId"] = @@clusterId + labels["ClusterRegion"] = @@clusterRegion + labels["NodeName"] = host + cpuHealthRecord["Labels"] = labels.to_json + cpuHealthRecord["MonitorId"] = "NodeCpuMonitor" cpuHealthState = "" if cpuMetricPercentValue.to_f < @@cpuPassPercentage cpuHealthState = "Pass" @@ -164,14 +177,20 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) @log.debug "processing cpu metrics" if ((cpuHealthState != @@previousCpuHealthStateSent && ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"]))) || - timeDifferenceInMinutes > 50) + timeDifferenceInMinutes > @@cpuMonitorTimeOut) @log.debug "cpu conditions met." - cpuHealthRecord["NodeCpuHealthState"] = cpuHealthState - cpuHealthRecord["NodeCpuUsagePercentage"] = cpuMetricPercentValue - cpuHealthRecord["NodeCpuUsageMilliCores"] = cpuMetricValue + cpuHealthRecord["NewState"] = cpuHealthState + cpuHealthRecord["OldState"] = @@previousCpuHealthStateSent + + details = {} + details["NodeCpuUsagePercentage"] = cpuMetricPercentValue + details["NodeCpuUsageMilliCores"] = cpuMetricValue + details["PrevNodeCpuUsageDetails"] = { "Percent": @@previousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousCpuHealthDetails["Time"], "Millicores": @@previousCpuHealthDetails["CPUUsageMillicores"] } + details["PrevPrevNodeCpuUsageDetails"] = { "Percent": @@previousPreviousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousPreviousCpuHealthDetails["Time"], "Millicores": @@previousPreviousCpuHealthDetails["CPUUsageMillicores"] } + cpuHealthRecord["Details"] = details.to_json + + #Sendind this data as collection time because this is overridden in custom log type. This will be mapped to TimeGenerated with fixed type. cpuHealthRecord["CollectionTime"] = @@previousPreviousCpuHealthDetails["Time"] - cpuHealthRecord["PrevNodeCpuUsageDetails"] = {"Percent": @@previousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousCpuHealthDetails["Time"], "Millicores": @@previousCpuHealthDetails["CPUUsageMillicores"]} - cpuHealthRecord["PrevPrevNodeCpuUsageDetails"] = {"Percent": @@previousPreviousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousPreviousCpuHealthDetails["Time"], "Millicores": @@previousPreviousCpuHealthDetails["CPUUsageMillicores"]} updateCpuHealthState = true @@previousCpuHealthStateSent = cpuHealthState end @@ -179,6 +198,7 @@ def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) @@previousCpuHealthDetails = currentCpuHealthDetails.clone if updateCpuHealthState @@nodeCpuHealthDataTimeTracker = currentTime + cpuHealthRecord["TimeObserved"] = Time.now.utc.iso8601 telemetryProperties = {} telemetryProperties["Computer"] = host telemetryProperties["NodeCpuHealthState"] = cpuHealthState @@ -202,10 +222,14 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa # Get node memory RSS health memRssHealthRecord = {} currentMemoryRssHealthDetails = {} - memRssHealthRecord["ClusterName"] = @@clusterName - memRssHealthRecord["ClusterId"] = @@clusterId - memRssHealthRecord["ClusterRegion"] = @@clusterRegion - memRssHealthRecord["Computer"] = host + + labels = {} + labels["ClusterName"] = @@clusterName + labels["ClusterId"] = @@clusterId + labels["ClusterRegion"] = @@clusterRegion + labels["NodeName"] = host + memRssHealthRecord["Labels"] = labels.to_json + memRssHealthRecord["MonitorId"] = "NodeMemoryRssMonitor" memoryRssHealthState = "" if memoryRssMetricPercentValue.to_f < @@memoryPassPercentage @@ -228,14 +252,18 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa if ((memoryRssHealthState != @@previousMemoryRssHealthStateSent && ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"]))) || - timeDifferenceInMinutes > 50) + timeDifferenceInMinutes > @@memoryRssMonitorTimeOut) @log.debug "memory conditions met" - memRssHealthRecord["NodeMemoryRssHealthState"] = memoryRssHealthState - memRssHealthRecord["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue - memRssHealthRecord["NodeMemoryRssBytes"] = memoryRssMetricValue + memRssHealthRecord["NewState"] = memoryRssHealthState + memRssHealthRecord["OldState"] = @@previousMemoryRssHealthStateSent + details = {} + details["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue + details["NodeMemoryRssBytes"] = memoryRssMetricValue + details["PrevNodeMemoryRssDetails"] = { "Percent": @@previousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousMemoryRssHealthDetails["Time"], "Bytes": @@previousMemoryRssHealthDetails["memoryRssBytes"] } + details["PrevPrevNodeMemoryRssDetails"] = { "Percent": @@previousPreviousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousPreviousMemoryRssHealthDetails["Time"], "Bytes": @@previousPreviousMemoryRssHealthDetails["memoryRssBytes"] } + memRssHealthRecord["Details"] = details.to_json + #Sending this data as collection time because this is overridden in custom log type. This will be mapped to TimeGenerated with fixed type. memRssHealthRecord["CollectionTime"] = @@previousPreviousMemoryRssHealthDetails["Time"] - memRssHealthRecord["PrevNodeMemoryRssDetails"] = {"Percent": @@previousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousMemoryRssHealthDetails["Time"], "Bytes": @@previousMemoryRssHealthDetails["memoryRssBytes"]} - memRssHealthRecord["PrevPrevNodeMemoryRssDetails"] = {"Percent": @@previousPreviousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousPreviousMemoryRssHealthDetails["Time"], "Bytes": @@previousPreviousMemoryRssHealthDetails["memoryRssBytes"]} updateMemoryRssHealthState = true @@previousMemoryRssHealthStateSent = memoryRssHealthState end @@ -243,6 +271,7 @@ def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentVa @@previousMemoryRssHealthDetails = currentMemoryRssHealthDetails.clone if updateMemoryRssHealthState @@nodeMemoryRssDataTimeTracker = currentTime + memRssHealthRecord["TimeObserved"] = Time.now.utc.iso8601 telemetryProperties = {} telemetryProperties["Computer"] = host telemetryProperties["NodeMemoryRssHealthState"] = memoryRssHealthState From 627de84430e1ccd9b7a38ef58b9f609efa30ed5a Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 19 Feb 2019 17:24:47 -0800 Subject: [PATCH 15/90] changes to include message --- source/code/plugin/in_health_kubelet.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb index 1d6071a51..c9db5727f 100644 --- a/source/code/plugin/in_health_kubelet.rb +++ b/source/code/plugin/in_health_kubelet.rb @@ -88,6 +88,7 @@ def enumerate conditionType = condition["type"] conditionStatus = condition["status"] conditionReason = condition["reason"] + conditionMessage = condition["message"] if !(nodeState.casecmp("Fail") == 0) if ((conditionType.casecmp("MemoryPressure") == 0) || @@ -118,14 +119,17 @@ def enumerate end end end - + if @@previousNodeStatus[computerName + conditionType].nil? || !(conditionStatus.casecmp(@@previousNodeStatus[computerName + conditionType]) == 0) || timeDifferenceInMinutes >= 3 # Comparing current status with previous status and setting state change as true flushRecord = true @@previousNodeStatus[computerName + conditionType] = conditionStatus - allNodeConditions[conditionType] = conditionReason + conditionInformation = {} + conditionInformation["Reason"] = conditionReason + conditionInformation["Message"] = conditionMessage + allNodeConditions[conditionType] = conditionInformation.to_json record["NewState"] = nodeState record["OldState"] = @@previousNodeState[computerName] @@previousNodeState[computerName] = nodeState From 14ab4468aa0c7d160860927beceb58829bb4f371 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 8 Mar 2019 15:28:14 -0800 Subject: [PATCH 16/90] First iteration of health monitor signals TODO: 1. Remove unnecessary Logging 2. Better Error Handling -- doesnt crash, continues, but need to do best effort 3. Consolidate Log Files for Health 4. Telemetry 5. Hook up to out_oms_api --- source/code/plugin/HealthEventTemplates.rb | 79 ++++ source/code/plugin/HealthEventUtils.rb | 368 ++++++++++++++++++ source/code/plugin/HealthEventsConstants.rb | 21 + source/code/plugin/HealthMonitorState.rb | 113 ++++++ source/code/plugin/HealthSignalReducer.rb | 138 +++++++ source/code/plugin/KubernetesApiClient.rb | 2 + source/code/plugin/filter_cadvisor2mdm.rb | 68 ++-- source/code/plugin/filter_cadvisor_health.rb | 229 +++++++++++ .../code/plugin/filter_health_cpu_memory.rb | 4 - source/code/plugin/healthconfig.json | 68 ++++ source/code/plugin/in_health_kubeapidata.rb | 248 ++++++++++++ 11 files changed, 1300 insertions(+), 38 deletions(-) create mode 100644 source/code/plugin/HealthEventTemplates.rb create mode 100644 source/code/plugin/HealthEventUtils.rb create mode 100644 source/code/plugin/HealthEventsConstants.rb create mode 100644 source/code/plugin/HealthMonitorState.rb create mode 100644 source/code/plugin/HealthSignalReducer.rb create mode 100644 source/code/plugin/filter_cadvisor_health.rb create mode 100644 source/code/plugin/healthconfig.json create mode 100644 source/code/plugin/in_health_kubeapidata.rb diff --git a/source/code/plugin/HealthEventTemplates.rb b/source/code/plugin/HealthEventTemplates.rb new file mode 100644 index 000000000..047fb64c3 --- /dev/null +++ b/source/code/plugin/HealthEventTemplates.rb @@ -0,0 +1,79 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + + +# details is an array of records +# include monitor config details in the template + +require_relative 'HealthEventsConstants' + +class HealthEventTemplates + HealthRecordTemplate = '{ + "Labels": %{labels}, + "MonitorId": "%{monitor_id}", + "MonitorInstanceId": "%{monitor_instance_id}", + "NewState": "%{new_state}", + "OldState": "%{old_state}", + "Details": %{monitor_details}, + "MonitorConfig": %{monitor_config}, + "CollectionTime": "%{collection_time}", + "TimeObserved": "%{time_observed}" + }' + + DetailsNodeMemoryTemplate = '{ + "NodeMemoryRssPercentage": %{memory_rss_percentage}, + "NodeMemoryRssBytes": %{memory_rss_bytes}, + "History": [%{prev_records}] + }' + + + DetailsNodeCpuTemplate = '{ + "NodeCpuUsagePercentage": %{cpu_percentage}, + "NodeCpuUsageMilliCores": %{cpu_usage}, + "PrevNodeCpuUsageDetails": %{prev_monitor_record_details}, + "PrevPrevNodeCpuUsageDetails": %{prev_prev_monitor_record_details} + }' + + DetailsWorkloadCpuOversubscribedTemplate = '{ + "ClusterCpuCapacity": %{cluster_cpu_capacity}, + "ClusterCpuRequests": %{cluster_cpu_requests} + }' + + DetailsWorkloadMemoryOversubscribedTemplate = '{ + "ClusterMemoryCapacity": %{cluster_memory_capacity}, + "ClusterMemoryRequests": %{cluster_memory_requests} + }' + + DetailsWorkloadPodsReadyStatePercentage = '{ + "TimeStamp": "%{timestamp}", + "PodsReady": %{pods_ready}, + "TotalPods": %{total_pods} + "History": [%{prev_records}] + }' + + DetailsWorkloadContainerCpuPercentage = ' + "TimeStamp": "%{timestamp}", + "CpuLimit": %{cpu_limit}, + "CpuRequest": %{cpu_request}, + "CpuPercentage": %{cpu_percentage}, + "History": [%{prev_records}] + }' + + DetailsWorkloadContainerMemoryPercentage = ' + "TimeStamp": "%{timestamp}", + "MemoryLimit": %{memory_limit}, + "MemoryRequest": %{memory_request}, + "MemoryPercentage": %{memory_percentage}, + "History": [%{prev_records}] + }' + + DETAILS_TEMPLATE_HASH = { + HealthEventsConstants::NODE_CPU_MONITOR_ID => DetailsNodeCpuTemplate, + HealthEventsConstants::NODE_MEMORY_MONITOR_ID => DetailsNodeMemoryTemplate, + HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID => DetailsWorkloadContainerCpuPercentage, + HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID => DetailsWorkloadContainerMemoryPercentage, + HealthEventsConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID => DetailsWorkloadCpuOversubscribedTemplate, + HealthEventsConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID => DetailsWorkloadMemoryOversubscribedTemplate, + HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID => DetailsWorkloadPodsReadyStatePercentage, + } +end \ No newline at end of file diff --git a/source/code/plugin/HealthEventUtils.rb b/source/code/plugin/HealthEventUtils.rb new file mode 100644 index 000000000..fbca6243f --- /dev/null +++ b/source/code/plugin/HealthEventUtils.rb @@ -0,0 +1,368 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +require_relative 'KubernetesApiClient' +require_relative 'HealthEventsConstants' +require 'time' + +class HealthEventUtils + + @LogPath = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" + @log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + @@hostName = (OMS::Common.get_hostname) + + @@containerMetadata = {} + @@controllerMapping = {} + @@podInventory = {} + @@lastRefreshTime = '2019-01-01T00:00:00Z' + @@nodeInventory = [] + + def initialize + end + + class << self + + def build_metrics_hash(metrics_to_collect) + @log.debug "Building Hash of Metrics to Collect #{metrics_to_collect}" + metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip) + metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h + @log.info "Metrics Collected : #{metrics_hash}" + return metrics_hash + end + + def ensure_cpu_memory_capacity_set(cpu_capacity, memory_capacity, hostname) + + @log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}" + if cpu_capacity != 0.0 && memory_capacity != 0.0 + @log.info "CPU And Memory Capacity are already set" + return [cpu_capacity, memory_capacity] + end + + begin + @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + rescue Exception => e + @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + end + if !@@nodeInventory.nil? + cpu_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") + if !cpu_capacity_json.nil? + cpu_capacity_json.each do |cpu_info_node| + if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == @@hostName + if !cpu_info_node['DataItems'][0]['Collections'][0]['Value'].nil? + cpu_capacity = cpu_info_node['DataItems'][0]['Collections'][0]['Value'] + end + end + end + @log.info "CPU Limit #{cpu_capacity}" + else + @log.info "Error getting cpu_capacity" + end + memory_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "memory", "memoryCapacityBytes") + if !memory_capacity_json.nil? + memory_capacity_json.each do |memory_info_node| + if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == @@hostName + if !memory_info_node['DataItems'][0]['Collections'][0]['Value'].nil? + memory_capacity = memory_info_node['DataItems'][0]['Collections'][0]['Value'] + end + end + end + @log.info "memory Limit #{memory_capacity}" + else + @log.info "Error getting memory_capacity" + end + return [cpu_capacity, memory_capacity] + end + end + + def getContainerKeyFromInstanceName(instance_name) + if instance_name.nil? + return "" + end + size = instance_name.size + instance_name_elements = instance_name.split("/") + key = [instance_name_elements[9], instance_name_elements[10]].join("/") + return key + end + + def getMonitorInstanceId(log, monitor_id, args = {}) + log.debug "getMonitorInstanceId" + string_to_hash = '' + # Container Level Monitor + if args.key?("cluster_id") && args.key?("node_name") && args.key?("container_key") + string_to_hash = [args['cluster_id'], args['node_name'], args['container_key']].join("/") + elsif args.key?("cluster_id") && args.key?("node_name") + string_to_hash = [args['cluster_id'], args['node_name']].join("/") + elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") + string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name']].join("/") + elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("container_key") + string_to_hash = [args['cluster_id']].join("/") + end + @log.info "String to Hash : #{string_to_hash}" + return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" + end + + def getClusterLabels + + labels = {} + cluster_id = KubernetesApiClient.getClusterId + region = KubernetesApiClient.getClusterRegion + labels['monitor.azure.com/ClusterId'] = cluster_id + labels['monitor.azure.com/ClusterRegion'] = region + if !cluster_id.nil? + cluster_id_elements = cluster_id.split('/') + azure_sub_id = cluster_id_elements[2] + resource_group = cluster_id_elements[4] + cluster_name = cluster_id_elements[8] + labels['monitor.azure.com/SubscriptionId'] = azure_sub_id + labels['monitor.azure.com/ResourceGroup'] = resource_group + labels['monitor.azure.com/ClusterName'] = cluster_name + end + return labels + end + + def getMonitorLabels(log, monitor_id, key, controller_name, node_name) + log.debug "key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" + monitor_labels = {} + case monitor_id + when HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID + log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" + if !key.nil? #container + monitor_labels['monitor.azure.com/ControllerName'] = getContainerControllerName(key) + monitor_labels['monitor.azure.com/Namespace'] = getContainerNamespace(key) + elsif !controller_name.nil? + monitor_labels['monitor.azure.com/ControllerName'] = controller_name + monitor_labels['monitor.azure.com/Namespace'] = getControllerNamespace(controller_name) + end + return monitor_labels + when HealthEventsConstants::NODE_CPU_MONITOR_ID, HealthEventsConstants::NODE_MEMORY_MONITOR_ID, HealthEventsConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthEventsConstants::NODE_CONDITION_MONITOR_ID, HealthEventsConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID + log.debug "Getting Node Labels " + + @@nodeInventory["items"].each do |node| + if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] + log.debug "Matched node name " + if !node["metadata"].nil? && !node["metadata"]["labels"].nil? + monitor_labels = node["metadata"]["labels"] + end + end + end + return monitor_labels + end + end + + def refreshKubernetesApiData(log, hostName) + log.debug "refreshKubernetesApiData" + if ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 + log.debug "Less than 5 minutes since last refresh" + return + end + + begin + + @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + + if !hostName.nil? + podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body) + else + podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body) + end + podInventory['items'].each do |pod| + controller_name = pod['metadata']['ownerReferences'][0]['name'] + namespace = pod['metadata']['namespace'] + @@controllerMapping[controller_name] = namespace + #log.debug "controller_name #{controller_name} namespace #{namespace}" + pod['spec']['containers'].each do |container| + key = [pod['metadata']['uid'], container['name']].join('/') + + if !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil? + cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) + else + @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" + cpu_limit_value = @cpu_capacity + end + + if !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? + @log.info "Raw Memory Value #{container['resources']['limits']['memory']}" + memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory']) + else + @log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity" + memory_limit_value = @memory_capacity + end + + @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => controller_name, "namespace" => namespace} + end + end + rescue => e + @log.info "Error Refreshing Container Resource Limits #{e}" + end + # log.info "Controller Mapping #{@@controllerMapping}" + # log.info "Node Inventory #{@@nodeInventory}" + # log.info "Container Metadata #{@@containerMetadata}" + # log.info "------------------------------------" + @@lastRefreshTime = Time.now.utc.iso8601 + end + + def getContainerCpuLimit(key) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['cpuLimit'] + else + return '' + end + end + + def getContainerMemoryLimit(key) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['memoryLimit'] + else + return '' + end + end + + def getContainerControllerName(key) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['controllerName'] + else + return '' + end + end + + def getContainerNamespace(key) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['namespace'] + else + return '' + end + end + + def getControllerNamespace(controller_name) + if @@controllerMapping.has_key?(controller_name) + return @@controllerMapping[controller_name] + else + return '' + end + end + + def getClusterCpuMemoryCapacity + begin + node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + cluster_cpu_capacity = 0.0 + cluster_memory_capacity = 0.0 + if !node_inventory.empty? + node_inventory['items'].each do |node| + cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores") + if !cpu_capacity_json.nil? + cpu_capacity_json.each do |cpu_capacity_node| + if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? + cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'] + end + end + @log.info "Cluster CPU Limit #{cluster_cpu_capacity}" + else + @log.info "Error getting cpu_capacity" + end + memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes") + if !memory_capacity_json.nil? + memory_capacity_json.each do |memory_capacity_node| + if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? + cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value'] + end + end + @log.info "Cluster Memory Limit #{cluster_memory_capacity}" + else + @log.info "Error getting memory_capacity" + end + end + else + @log.info "Unable to get cpu and memory capacity" + return [0.0, 0.0] + end + return [cluster_cpu_capacity, cluster_memory_capacity] + rescue => e + @log.info e + end + end + + + def getResourceSubscription(pod_inventory, metric_name, metric_capacity) + subscription = 0.0 + if !pod_inventory.empty? + pod_inventory['items'].each do |pod| + pod['spec']['containers'].each do |container| + if !container['resources']['requests'].nil? && !container['resources']['requests'][metric_name].nil? + subscription += KubernetesApiClient.getMetricNumericValue(metric_name, container['resources']['requests'][metric_name]) + end + end + end + end + @log.debug "#{metric_name} Subscription #{subscription}" + return subscription + end + + def getHealthMonitorConfig + health_monitor_config = {} + begin + file = File.open('/opt/microsoft/omsagent/plugin/healthconfig.json', "r") + if !file.nil? + fileContents = file.read + health_monitor_config = JSON.parse(fileContents) + file.close + end + rescue => e + @log.info "Error when opening health config file #{e}" + end + return health_monitor_config + end + + def getLogHandle + return @log + end + + def getPodsReadyHash(pod_inventory) + pods_ready_percentage_hash = {} + pod_inventory['items'].each do |pod| + controller_name = pod['metadata']['ownerReferences'][0]['name'] + namespace = pod['metadata']['namespace'] + status = pod['status']['phase'] + + if pods_ready_percentage_hash.key?(controller_name) + total_pods = pods_ready_percentage_hash[controller_name]['totalPods'] + pods_ready = pods_ready_percentage_hash[controller_name]['podsReady'] + else + total_pods = 0 + pods_ready = 0 + end + + total_pods += 1 + if status == 'Running' + pods_ready += 1 + end + pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} + end + + @log.debug "pods_ready_percentage_hash #{pods_ready_percentage_hash}" + return pods_ready_percentage_hash + end + + def getNodeStateFromNodeConditions(node_conditions) + pass = false + node_conditions.each do |condition| + type = condition['type'] + status = condition['status'] + + if ((type == "NetworkUnavailable" || type == "OutOfDisk") && (status == 'True' || status == 'Unknown')) + return "Fail" + elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown')) + return "Warn" + elsif type == "Ready" && status == 'True' + pass = true + end + end + + if pass + return "Pass" + else + return "Fail" + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/HealthEventsConstants.rb b/source/code/plugin/HealthEventsConstants.rb new file mode 100644 index 000000000..d7d051e41 --- /dev/null +++ b/source/code/plugin/HealthEventsConstants.rb @@ -0,0 +1,21 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class HealthEventsConstants + NODE_CPU_MONITOR_ID = "node_cpu_utilization_percentage" + NODE_MEMORY_MONITOR_ID = "node_memory_utilization_percentage" + NODE_KUBELET_HEALTH_MONITOR_ID = "kubelet_running" + NODE_CONDITION_MONITOR_ID = "node_condition" + NODE_CONTAINER_RUNTIME_MONITOR_ID = "container_manager_runtime_running" + WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "is_oversubscribed_cpu" + WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "is_oversubscribed_memory" + WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID = "pods_ready_percentage" + WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization_percentage" + WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization_percentage" + MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID = "kube_api_up" + MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID = "pods_ready_percentage" + DEFAULT_PASS_PERCENTAGE = 80.0 + DEFAULT_FAIL_PERCENTAGE = 90.0 + DEFAULT_MONITOR_TIMEOUT = 240 #4 hours + DEFAULT_SAMPLES_BEFORE_NOTIFICATION = 3 +end \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb new file mode 100644 index 000000000..bba2b5af5 --- /dev/null +++ b/source/code/plugin/HealthMonitorState.rb @@ -0,0 +1,113 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +require_relative 'HealthEventsConstants' + +HealthMonitorRecord = Struct.new(:timestamp, :state, :details) do +end + +HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :prev_sent_record_status, :prev_records) do +end + +class HealthMonitorState + @@instanceStates = {} #hash of monitor_instance_id --> health monitor instance state + @@firstMonitorRecordSent = {} + HEALTH_MONITOR_STATE = {"PASS" => "Pass", "FAIL" => "Fail", "WARNING" => "Warning"} + + class << self + def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) + log.debug "updateHealthMonitorState" + samples_to_keep = 1 + if config.nil? || config['SamplesBeforeNotification'].nil? + samples_to_keep = HealthEventsConstants::DEFAULT_SAMPLES_BEFORE_NOTIFICATION + elsif !config.nil? && config['NotifyInstantly'] + samples_to_keep = 1 + else + samples_to_keep = config['SamplesBeforeNotification'].to_i + end + + if @@instanceStates.key?(monitor_instance_id) + health_monitor_instance_state = @@instanceStates[monitor_instance_id] + health_monitor_records = health_monitor_instance_state.prev_records #This should be an array + + if health_monitor_records.size == samples_to_keep + health_monitor_records.delete_at(0) + end + health_monitor_records.push(health_monitor_record) + health_monitor_instance_state.prev_records = health_monitor_records + @@instanceStates[monitor_instance_id] = health_monitor_instance_state + else + health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record.timestamp, health_monitor_record.state, [health_monitor_record]) + @@instanceStates[monitor_instance_id] = health_monitor_instance_state + end + log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" + end + + def getHealthMonitorState(monitor_instance_id) + return @@instanceStates[monitor_instance_id] + end + + def setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) + @@instanceStates[monitor_instance_id] = health_monitor_instance_state + end + + def getHealthMonitorStatesHash + return @@instanceStates + end + + def computeHealthMonitorState(log, monitor_id, value, config) + log.debug "computeHealthMonitorState" + #log.info "id: #{monitor_id} value: #{value} config: #{config}" + case monitor_id + when HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::NODE_CPU_MONITOR_ID, HealthEventsConstants::NODE_MEMORY_MONITOR_ID + return getStateForRangeMonitor(log, value, config) + when HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID + getStateForInfraPodsReadyPercentage(log, value, config) + end + end + + def getStateForRangeMonitor(log, value, config) + log.debug "getStateForRangeMonitor" + pass_percentage = 0.0 + (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + #log.info "Pass: #{pass_percentage} Fail: #{fail_percentage}" + if value.to_f < pass_percentage.to_f + return HEALTH_MONITOR_STATE['PASS'] + elsif value.to_f > fail_percentage.to_f + return HEALTH_MONITOR_STATE['FAIL'] + else + return HEALTH_MONITOR_STATE['WARNING'] + end + end + + def getStateForInfraPodsReadyPercentage(log, value, config) + log.debug "getStateForInfraPodsReadyPercentage" + log.debug "getStateForInfraPodsReadyPercentage #{config}" + (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" + if value.to_f < pass_percentage.to_f + return HEALTH_MONITOR_STATE['FAIL'] + else + return HEALTH_MONITOR_STATE['PASS'] + end + end + + def getStateForWorkloadPodsReadyPercentage(log, value, config) + log.debug "getStateForWorkloadPodsReadyPercentage" + pass_percentage = 0.0 + (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" + if value.to_f > fail_percentage.to_f && value.to_f < pass_percentage.to_f + return HEALTH_MONITOR_STATE['WARNING'] + elsif value.to_f < fail_percentage.to_f + return HEALTH_MONITOR_STATE['FAIL'] + elsif value.to_f == pass_percentage.to_f + return HEALTH_MONITOR_STATE['PASS'] + end + end + end +end + diff --git a/source/code/plugin/HealthSignalReducer.rb b/source/code/plugin/HealthSignalReducer.rb new file mode 100644 index 000000000..f1010b2d2 --- /dev/null +++ b/source/code/plugin/HealthSignalReducer.rb @@ -0,0 +1,138 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +require 'time' +require 'json' +require_relative 'HealthEventTemplates' + +class HealthSignalReducer + class << self + def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, controller_name: nil, node_name: nil) + log.debug "reduceSignal Key : #{key} controller_name: #{controller_name} node_name #{node_name}" + log.debug "monitorConfig #{monitor_config}" + + health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) + log.info "Health Monitor Instance state #{health_monitor_instance_state}" + health_monitor_records = health_monitor_instance_state.prev_records + prev_sent_status = health_monitor_instance_state.prev_sent_record_status + prev_sent_time = health_monitor_instance_state.prev_sent_record_time + monitor_config['MonitorTimeOut'].nil? ? monitor_timeout = HealthEventsConstants::DEFAULT_MONITOR_TIMEOUT : monitor_timeout = monitor_config['MonitorTimeOut'] #minutes + #log.info monitor_timeout + + + if (!monitor_config['NotifyInstantly'].nil? && monitor_config['NotifyInstantly'] == true) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) + end + + if health_monitor_instance_state.prev_records.size == 1 + #log.info "Only One Record" + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) + else + latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning + latest_record_state = latest_record.state + latest_record_time = latest_record.timestamp #string representation of time + #log.info "Latest Record #{latest_record}" + if latest_record_state.downcase == prev_sent_status.downcase + #log.info "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" + time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 + #log.info "time elapsed #{time_elapsed}" + if time_elapsed > monitor_timeout # minutes + # update record + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.prev_sent_record_status = latest_record_state + #log.info "After Updating Monitor State #{health_monitor_instance_state}" + HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name) + else + #log.info "Monitor timeout not reached #{time_elapsed}" + return [] # dont send anything + end + else # state change from previous sent state to latest record state + #check state of last n records to see if they are all in the same state + if (isStateChangeConsistent(log, health_monitor_records)) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) + else + return [] + end + end + end + return [] + end + + def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, controller_name: nil, node_name: nil) + log.debug "formatRecord key:#{key} controller_name: #{controller_name} node_name #{node_name}" + + log.debug "Health Monitor Instance State #{health_monitor_instance_state}" + + labels = HealthEventUtils.getClusterLabels + log.info "Labels : #{labels}" + + monitor_labels = HealthEventUtils.getMonitorLabels(log, monitor_id, key, controller_name, node_name) + log.info "Monitor Labels : #{monitor_labels}" + + if !monitor_labels.nil? + monitor_labels.keys.each do |key| + labels[key] = monitor_labels[key] + end + end + + log.debug "Labels #{labels.to_json.to_s}" + prev_records = health_monitor_instance_state.prev_records + collection_time = prev_records[0].timestamp # the oldest collection time + new_state = health_monitor_instance_state.prev_records[0].state + old_state = health_monitor_instance_state.prev_sent_record_status + + log.debug "monitor_config #{monitor_config}" + if monitor_config.nil? + monitor_config = '' + end + monitor_config = monitor_config.to_json.to_s + log.debug "monitor_config #{monitor_config}" + records = [] + + details = prev_records.each do |record| + + hash_record = { "timestamp" => record.timestamp, "state" => record.state, "details" => record.details} + #log.debug "Hash from Struct #{hash_record}" + #log.debug "monitor_config #{monitor_config}" + records.push(hash_record.to_json.to_s) + end + details = "[#{records.join(',')}]" + time_observed = Time.now.utc.iso8601 + #log.debug "Details: #{details}" + #log.debug "collection_time #{collection_time} time_observed #{time_observed} new_state #{new_state} old_state #{old_state}" + + health_monitor_record = HealthEventTemplates::HealthRecordTemplate % { + labels: labels.to_json.to_s, + monitor_id: monitor_id, + monitor_instance_id: monitor_instance_id, + new_state: new_state, + old_state: old_state, + monitor_details: details, + collection_time: collection_time, + time_observed: time_observed, + monitor_config: monitor_config + } + + log.debug "HealthMonitor Record #{health_monitor_record}" + return_val = JSON.parse(health_monitor_record) + log.debug "Parsed Health Monitor Record" + return [] + end + + def isStateChangeConsistent(log, health_monitor_records) + if health_monitor_records.nil? || health_monitor_records.size == 0 + return false + end + i = 0 + while i < health_monitor_records.size - 1 + #log.info "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}" + if health_monitor_records[i].state != health_monitor_records[i + 1].state + return false + end + i += 1 + end + return true + end + end +end \ No newline at end of file diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb index 2cd8cc697..3e13f221f 100644 --- a/source/code/plugin/KubernetesApiClient.rb +++ b/source/code/plugin/KubernetesApiClient.rb @@ -133,6 +133,8 @@ def getClusterId return @@ClusterId if !@@ClusterId.nil? #By default initialize ClusterId to ClusterName. # In ACS/On-prem, we need to figure out how we can generate ClusterId + # Dilipr: Spoof the subid by generating md5 hash of cluster name, and taking some constant parts of it. + # e.g. md5 digest is 128 bits = 32 character in hex. Get first 16 and get a guid, and the next 16 to get resource id @@ClusterId = getClusterName begin cluster = ENV["AKS_RESOURCE_ID"] diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb index 94f2107cc..1fd1f3e5c 100644 --- a/source/code/plugin/filter_cadvisor2mdm.rb +++ b/source/code/plugin/filter_cadvisor2mdm.rb @@ -10,45 +10,45 @@ module Fluent class CAdvisor2MdmFilter < Filter Fluent::Plugin.register_filter('filter_cadvisor2mdm', self) - + config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log' config_param :custom_metrics_azure_regions, :string config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes' - + @@cpu_usage_milli_cores = 'cpuUsageMillicores' @@cpu_usage_nano_cores = 'cpuusagenanocores' @@object_name_k8s_node = 'K8SNode' @@hostName = (OMS::Common.get_hostname) @@custom_metrics_template = ' - { - "time": "%{timestamp}", - "data": { - "baseData": { - "metric": "%{metricName}", - "namespace": "Insights.Container/nodes", - "dimNames": [ + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "Insights.Container/nodes", + "dimNames": [ "host" - ], - "series": [ - { - "dimValues": [ + ], + "series": [ + { + "dimValues": [ "%{hostvalue}" - ], + ], "min": %{metricminvalue}, - "max": %{metricmaxvalue}, - "sum": %{metricsumvalue}, - "count": 1 - } - ] - } - } + "max": %{metricmaxvalue}, + "sum": %{metricsumvalue}, + "count": 1 + } + ] + } + } }' - + @@metric_name_metric_percentage_name_hash = { - @@cpu_usage_milli_cores => "cpuUsagePercentage", + @@cpu_usage_milli_cores => "cpuUsagePercentage", "memoryRssBytes" => "memoryRssPercentage", - "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" + "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" } @process_incoming_stream = true @@ -61,7 +61,7 @@ def initialize def configure(conf) super @log = nil - + if @enable_log @log = Logger.new(@log_path, 1, 5000000) @log.debug {'Starting filter_cadvisor2mdm plugin'} @@ -73,8 +73,8 @@ def start @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions) @metrics_to_collect_hash = build_metrics_hash @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}" - - # initialize cpu and memory limit + + # initialize cpu and memory limit if @process_incoming_stream @cpu_capacity = 0.0 @memory_capacity = 0.0 @@ -117,9 +117,9 @@ def filter(tag, time, record) if @memory_capacity != 0.0 percentage_metric_value = metric_value*100/@memory_capacity end - end + end return get_metric_records(record, metric_name, metric_value, percentage_metric_value) - else + else return [] end else @@ -140,13 +140,13 @@ def ensure_cpu_memory_capacity_set return end - begin + begin nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes?fieldSelector=metadata.name%3D#{@@hostName}").body) rescue Exception => e @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) end - if !nodeInventory.nil? + if !nodeInventory.nil? cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil? @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'] @@ -163,7 +163,7 @@ def ensure_cpu_memory_capacity_set end end end - + def get_metric_records(record, metric_name, metric_value, percentage_metric_value) records = [] custommetricrecord = @@custom_metrics_template % { @@ -194,14 +194,14 @@ def get_metric_records(record, metric_name, metric_value, percentage_metric_valu return records end - + def filter_stream(tag, es) new_es = MultiEventStream.new ensure_cpu_memory_capacity_set es.each { |time, record| begin filtered_records = filter(tag, time, record) - filtered_records.each {|filtered_record| + filtered_records.each {|filtered_record| new_es.add(time, filtered_record) if filtered_record } if filtered_records rescue => e diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb new file mode 100644 index 000000000..6ed53e8b6 --- /dev/null +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -0,0 +1,229 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + require 'logger' + require 'json' + require_relative 'oms_common' + require_relative 'HealthEventUtils' + require_relative 'HealthMonitorState' + + + class CAdvisor2HealthFilter < Filter + Fluent::Plugin.register_filter('filter_cadvisor2health', self) + + config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log' + config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' + config_param :container_resource_refresh_interval_minutes, :integer, :default => 5 + + @@object_name_k8s_node = 'K8SNode' + @@object_name_k8s_container = 'K8SContainer' + + @@counter_name_cpu = 'cpuusagenanocores' + @@counter_name_memory_rss = 'memoryrssbytes' + + @@health_monitor_config = {} + + @@hostName = (OMS::Common.get_hostname) + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + + + def initialize + super + @cpu_capacity = 0.0 + @memory_capacity = 0.0 + @last_resource_refresh = DateTime.now.to_time.to_i + @metrics_to_collect_hash = {} + end + + def configure(conf) + super + @log = nil + @log = Logger.new(@log_path, 1, 5000000) + @log.debug {'Starting filter_cadvisor2health plugin'} + end + + def start + super + @metrics_to_collect_hash = HealthEventUtils.build_metrics_hash(@metrics_to_collect) + @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" + node_capacity = HealthEventUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity) + @cpu_capacity = node_capacity[0] + @memory_capacity = node_capacity[1] + @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" + HealthEventUtils.refreshKubernetesApiData(@log, @@hostName) + @@health_monitor_config = HealthEventUtils.getHealthMonitorConfig + end + + def filter_stream(tag, es) + new_es = MultiEventStream.new + HealthEventUtils.refreshKubernetesApiData(@log, @hostName) + es.each { |time, record| + begin + filtered_records = filter(tag, time, record) + filtered_records.each {|filtered_record| + new_es.add(time, filtered_record) if filtered_record + } if filtered_records + rescue => e + router.emit_error_event(tag, time, record, e) + end + } + new_es + end + + def filter(tag, time, record) + begin + object_name = record['DataItems'][0]['ObjectName'] + + counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase + + if @metrics_to_collect_hash.key?(counter_name.downcase) + metric_value = record['DataItems'][0]['Collections'][0]['Value'] + case object_name + when @@object_name_k8s_container + case counter_name.downcase + when @@counter_name_cpu + # @log.debug "Object Name #{object_name}" + # @log.debug "Counter Name #{counter_name}" + # @log.debug "Metric Value #{metric_value}" + return process_container_cpu_record(record, metric_value) + when @@counter_name_memory_rss + return process_container_memory_record(record, metric_value) + end + when @@object_name_k8s_node + case counter_name.downcase + when @@counter_name_cpu + process_node_cpu_record(record, metric_value) + when @@counter_name_memory_rss + process_node_memory_record(record, metric_value) + end + end + end + rescue => e + @log.debug "Error in filter #{e}" + @log.debug "backtrace #{e.backtrace}" + return [] + end + end + + def process_container_cpu_record(record, metric_value) + # monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID + # @log.debug "processing container cpu record" + # if record.nil? + # return [] + # else + # instance_name = record['DataItems'][0]['InstanceName'] + # key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) + # cpu_limit = HealthEventUtils.getContainerCpuLimit(key) + + # if cpu_limit.to_s.empty? + # @log.info "CPU Limit is nil" + # cpu_limit = @cpu_capacity + # end + + # @log.info "cpu limit #{cpu_limit}" + + # percent = (metric_value.to_f/cpu_limit*100).round(2) + # @log.debug "Container #{key} | Percentage of CPU limit: #{percent}" + # state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) + # @log.debug "Computed State : #{state}" + # timestamp = record['DataItems'][0]['Timestamp'] + # health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) + # @log.info health_monitor_record + + # monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + # @log.info "Monitor Instance Id: #{monitor_instance_id}" + # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) + # end + + return [] + end + + def process_container_memory_record(record, metric_value) + # monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID + # @log.debug "processing container memory record" + # if record.nil? + # return [] + # else + # instance_name = record['DataItems'][0]['InstanceName'] + # key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) + # memory_limit = HealthEventUtils.getContainerMemoryLimit(key) + + # if memory_limit.to_s.empty? + # @log.info "Memory Limit is nil" + # memory_limit = @memory_capacity + # end + + # @log.info "memory limit #{memory_limit}" + + # percent = (metric_value.to_f/memory_limit*100).round(2) + # @log.debug "Container #{key} | Percentage of Memory limit: #{percent}" + # state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) + # @log.debug "Computed State : #{state}" + # timestamp = record['DataItems'][0]['Timestamp'] + # health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) + # @log.info health_monitor_record + + # monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + # @log.info "Monitor Instance Id: #{monitor_instance_id}" + # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) + # end + + return [] + end + + def process_node_cpu_record(record, metric_value) + monitor_id = HealthEventsConstants::NODE_CPU_MONITOR_ID + @log.debug "processing node cpu record" + if record.nil? + return [] + else + instance_name = record['DataItems'][0]['InstanceName'] + @log.info "CPU capacity #{@cpu_capacity}" + + percent = (metric_value.to_f/@cpu_capacity*100).round(2) + @log.debug "Percentage of CPU limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_CPU_MONITOR_ID]) + @log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) + @log.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) + @log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + end + return [] + end + + def process_node_memory_record(record, metric_value) + monitor_id = HealthEventsConstants::NODE_MEMORY_MONITOR_ID + @log.debug "processing node memory record" + if record.nil? + return [] + else + instance_name = record['DataItems'][0]['InstanceName'] + @log.info "Memory capacity #{@memory_capacity}" + + percent = (metric_value.to_f/@memory_capacity*100).round(2) + @log.debug "Percentage of Memory limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_MEMORY_MONITOR_ID]) + @log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) + @log.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) + @log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + end + return [] + end + end +end diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb index 03afaa3ce..cf55d9a3a 100644 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ b/source/code/plugin/filter_health_cpu_memory.rb @@ -96,10 +96,6 @@ def start healthConfigObject = JSON.parse(fileContents) file.close if !healthConfigObject.nil? - # memPassPercent = healthConfigObject["memoryPassPercentage"] - # memFailPercent = healthConfigObject["memoryFailPercentage"] - # cpuPassPercent = healthConfigObject["cpuPassPercentage"] - # cpuFailPercent = healthConfigObject["cpuFailPercentage"] cpuPassPercent = healthConfigObject["NodeCpuMonitor"]["PassPercentage"] cpuFailPercent = healthConfigObject["NodeCpuMonitor"]["FailPercentage"] memPassPercent = healthConfigObject["NodeMemoryRssMonitor"]["PassPercentage"] diff --git a/source/code/plugin/healthconfig.json b/source/code/plugin/healthconfig.json new file mode 100644 index 000000000..70e990760 --- /dev/null +++ b/source/code/plugin/healthconfig.json @@ -0,0 +1,68 @@ +{ + "node_cpu_utilization_percentage": { + "PassPercentage": 2.0, + "FailPercentage": 2.0, + "SamplesBeforeNotification": 3, + "NotifyInstantly" : false, + "MonitorTimeOut": 5 + }, + "node_memory_utilization_percentage": { + "PassPercentage": 2.0, + "FailPercentage": 2.0, + "SamplesBeforeNotification": 3, + "NotifyInstantly" : false, + "MonitorTimeOut": 5 + }, + "container_manager_runtime_running": { + "MonitorTimeOut": 3, + "NotifyInstantly" : true + }, + "kubelet_running": { + "MonitorTimeOut": 5, + "NotifyInstantly" : true + }, + "node_condition": { + "MonitorTimeOut": 5, + "NotifyInstantly" : true + }, + "is_oversubscribed_cpu": { + "MonitorTimeOut": 5, + "NotifyInstantly" : true + }, + "is_oversubscribed_memory": { + "MonitorTimeOut": 5, + "NotifyInstantly" : true + }, + "container_cpu_utilization_percentage": { + "PassPercentage": 2.0, + "FailPercentage": 2.0, + "SamplesBeforeNotification": 3, + "NotifyInstantly" : false, + "MonitorTimeOut": 5 + }, + "container_memory_utilization_percentage": { + "PassPercentage": 2.0, + "FailPercentage": 2.0, + "SamplesBeforeNotification": 3, + "NotifyInstantly" : false, + "MonitorTimeOut": 5 + }, + "workload_pods_ready_percentage" : { + "PassPercentage": 100.0, + "FailPercentage": 90.0, + "SamplesBeforeNotification": 2, + "NotifyInstantly" : false, + "MonitorTimeOut": 5 + }, + "system_pods_ready_percentage" : { + "PassPercentage": 100.0, + "FailPercentage": 90.0, + "SamplesBeforeNotification": 2, + "NotifyInstantly" : false, + "MonitorTimeOut": 5 + }, + "kube_api_up": { + "MonitorTimeOut": 3, + "NotifyInstantly" : true + } +} \ No newline at end of file diff --git a/source/code/plugin/in_health_kubeapidata.rb b/source/code/plugin/in_health_kubeapidata.rb new file mode 100644 index 000000000..0a67f249a --- /dev/null +++ b/source/code/plugin/in_health_kubeapidata.rb @@ -0,0 +1,248 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + class KubeApiDataHealthInput < Input + Plugin.register_input("kubeapidatahealth", self) + + @@clusterCpuCapacity = 0.0 + @@clusterMemoryCapacity = 0.0 + + def initialize + super + require "yaml" + require "json" + + require_relative "KubernetesApiClient" + require_relative "oms_common" + require_relative "omslog" + require_relative "ApplicationInsightsUtility" + require_relative "DockerApiClient" + require_relative 'HealthEventUtils' + require_relative 'HealthMonitorState' + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.containerinsights.KubeApiDataHealth" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + cluster_capacity = HealthEventUtils.getClusterCpuMemoryCapacity + @@clusterCpuCapacity = cluster_capacity[0] + @@clusterMemoryCapacity = cluster_capacity[1] + @@healthMonitorConfig = HealthEventUtils.getHealthMonitorConfig + end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end + + def enumerate + begin + $log.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" + currentTime = Time.now + emitTime = currentTime.to_f + batchTime = currentTime.utc.iso8601 + record = {} + eventStream = MultiEventStream.new + + hmlog = HealthEventUtils.getLogHandle + HealthEventUtils.refreshKubernetesApiData(hmlog, nil) + # we do this so that if the call fails, we get a response code/header etc. + node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") + node_inventory = JSON.parse(node_inventory_response.body) + pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods") + pod_inventory = JSON.parse(pod_inventory_response.body) + + if node_inventory_response.code.to_i != 200 + #process_kube_api_up_monitor("Fail", node_inventory_response) + else + #process_kube_api_up_monitor("Pass", node_inventory_response) + end + + if !pod_inventory.nil? + #process_cpu_oversubscribed_monitor(pod_inventory) + #process_memory_oversubscribed_monitor(pod_inventory) + pods_ready_hash = HealthEventUtils.getPodsReadyHash(pod_inventory) + + system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} + workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} + + #process_pods_ready_percentage(system_pods, "system_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + #process_pods_ready_percentage(workload_pods, "workload_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + end + + if !node_inventory.nil? + #process_node_condition_monitor(node_inventory) + end + + cpu_capacity = 0.0 + memory_capacity = 0.0 + hostname = OMS::Common.get_hostname + + capacity = HealthEventUtils.ensure_cpu_memory_capacity_set(cpu_capacity, memory_capacity, hostname) + $log.info "Cpu #{capacity[0]} memory #{capacity[1]}" + + rescue => errorStr + $log.warn("error : #{errorStr.to_s}") + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def process_cpu_oversubscribed_monitor(pod_inventory) + timestamp = Time.now.utc.iso8601 + subscription = HealthEventUtils.getResourceSubscription(pod_inventory,"cpu", @@clusterCpuCapacity) + state = subscription > @@clusterCpuCapacity ? "Fail" : "Pass" + $log.debug "CPU Oversubscribed Monitor State : #{state}" + + #CPU + monitor_id = HealthEventsConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) + hmlog = HealthEventUtils.getLogHandle + hmlog.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId}) + hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + end + + def process_memory_oversubscribed_monitor(pod_inventory) + timestamp = Time.now.utc.iso8601 + subscription = HealthEventUtils.getResourceSubscription(pod_inventory,"memory", @@clusterMemoryCapacity) + state = subscription > @@clusterCpuCapacity ? "Fail" : "Pass" + $log.debug "Memory Oversubscribed Monitor State : #{state}" + + #CPU + monitor_id = HealthEventsConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) + hmlog = HealthEventUtils.getLogHandle + hmlog.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId}) + hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + end + + def process_kube_api_up_monitor(state, response) + timestamp = Time.now.utc.iso8601 + + monitor_id = HealthEventsConstants::MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID + details = response.each_header.to_h + details['ResponseCode'] = response.code + health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) + hmlog = HealthEventUtils.getLogHandle + hmlog.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId}) + hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + end + + def process_pods_ready_percentage(pods_hash, config_monitor_id) + monitor_id = HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID + monitor_config = @@healthMonitorConfig[config_monitor_id] + hmlog = HealthEventUtils.getLogHandle + + records = [] + pods_hash.keys.each do |key| + controller_name = key + total_pods = pods_hash[controller_name]['totalPods'] + pods_ready = pods_hash[controller_name]['podsReady'] + namespace = pods_hash[controller_name]['namespace'] + percent = pods_ready / total_pods * 100 + timestamp = Time.now.utc.iso8601 + + hmlog.debug "process_pods_ready_percentage percent: #{percent}" + + if config_monitor_id.downcase.start_with?("system") + state = HealthMonitorState.getStateForInfraPodsReadyPercentage(hmlog, percent, monitor_config) + hmlog.debug "getStateForInfraPodsReadyPercentage State: #{state}" + elsif config_monitor_id.downcase.start_with?("workload") + state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(hmlog, percent, monitor_config) + hmlog.debug "getStateForWorkloadPodsReadyPercentage State: #{state}" + end + + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) + hmlog.info health_monitor_record + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) + hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + hmlog.info "controller_name #{controller_name}" + HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, monitor_config) + record_a = HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + record_a.each do |r| + records.push(r) + end + end + $log.debug "records count : #{records.size}" + end + + def process_node_condition_monitor(node_inventory) + hmlog = HealthEventUtils.getLogHandle + monitor_id = HealthEventsConstants::NODE_CONDITION_MONITOR_ID + timestamp = Time.now.utc.iso8601 + monitor_config = @@healthMonitorConfig[monitor_id] + if !node_inventory.nil? + node_inventory['items'].each do |node| + node_name = node['metadata']['name'] + conditions = node['status']['conditions'] + state = HealthEventUtils.getNodeStateFromNodeConditions(conditions) + hmlog.debug "Node Name = #{node_name} State = #{state}" + details = {} + conditions.each do |condition| + details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} + end + health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) + hmlog.info health_monitor_record + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) + hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, monitor_config) + return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + end + end + + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished + @mutex.unlock + if !done + begin + $log.info("in_health_docker::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_health_docker::run_periodic: enumerate Failed for docker health: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + @mutex.lock + end + @mutex.unlock + end + end # Health_Docker_Input +end # module From b8dcc529666a2cf11a0d76ecd5e5a9a56f58c22e Mon Sep 17 00:00:00 2001 From: r-dilip Date: Sat, 9 Mar 2019 09:52:53 -0800 Subject: [PATCH 17/90] Fixed Bugs for NotifyInstantly Monitor --- source/code/plugin/HealthEventUtils.rb | 21 +-- source/code/plugin/HealthMonitorState.rb | 14 +- source/code/plugin/HealthSignalReducer.rb | 66 +++++++--- source/code/plugin/healthconfig.json | 24 ++-- source/code/plugin/in_health_kubeapidata.rb | 139 +++++++++++--------- 5 files changed, 154 insertions(+), 110 deletions(-) diff --git a/source/code/plugin/HealthEventUtils.rb b/source/code/plugin/HealthEventUtils.rb index fbca6243f..122d1d73f 100644 --- a/source/code/plugin/HealthEventUtils.rb +++ b/source/code/plugin/HealthEventUtils.rb @@ -86,7 +86,7 @@ def getContainerKeyFromInstanceName(instance_name) end def getMonitorInstanceId(log, monitor_id, args = {}) - log.debug "getMonitorInstanceId" + #log.debug "getMonitorInstanceId" string_to_hash = '' # Container Level Monitor if args.key?("cluster_id") && args.key?("node_name") && args.key?("container_key") @@ -98,7 +98,7 @@ def getMonitorInstanceId(log, monitor_id, args = {}) elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("container_key") string_to_hash = [args['cluster_id']].join("/") end - @log.info "String to Hash : #{string_to_hash}" + #@log.info "String to Hash : #{string_to_hash}" return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" end @@ -122,11 +122,11 @@ def getClusterLabels end def getMonitorLabels(log, monitor_id, key, controller_name, node_name) - log.debug "key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" + #log.debug "key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" monitor_labels = {} case monitor_id when HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID - log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" + #log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" if !key.nil? #container monitor_labels['monitor.azure.com/ControllerName'] = getContainerControllerName(key) monitor_labels['monitor.azure.com/Namespace'] = getContainerNamespace(key) @@ -136,11 +136,11 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) end return monitor_labels when HealthEventsConstants::NODE_CPU_MONITOR_ID, HealthEventsConstants::NODE_MEMORY_MONITOR_ID, HealthEventsConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthEventsConstants::NODE_CONDITION_MONITOR_ID, HealthEventsConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID - log.debug "Getting Node Labels " + #log.debug "Getting Node Labels " @@nodeInventory["items"].each do |node| if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] - log.debug "Matched node name " + #log.debug "Matched node name " if !node["metadata"].nil? && !node["metadata"]["labels"].nil? monitor_labels = node["metadata"]["labels"] end @@ -151,7 +151,7 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) end def refreshKubernetesApiData(log, hostName) - log.debug "refreshKubernetesApiData" + #log.debug "refreshKubernetesApiData" if ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 log.debug "Less than 5 minutes since last refresh" return @@ -178,11 +178,12 @@ def refreshKubernetesApiData(log, hostName) cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) else @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" + #TODO: Send warning health event cpu_limit_value = @cpu_capacity end if !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? - @log.info "Raw Memory Value #{container['resources']['limits']['memory']}" + #@log.info "Raw Memory Value #{container['resources']['limits']['memory']}" memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory']) else @log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity" @@ -294,7 +295,7 @@ def getResourceSubscription(pod_inventory, metric_name, metric_capacity) end end end - @log.debug "#{metric_name} Subscription #{subscription}" + #@log.debug "#{metric_name} Subscription #{subscription}" return subscription end @@ -339,7 +340,7 @@ def getPodsReadyHash(pod_inventory) pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} end - @log.debug "pods_ready_percentage_hash #{pods_ready_percentage_hash}" + #@log.debug "pods_ready_percentage_hash #{pods_ready_percentage_hash}" return pods_ready_percentage_hash end diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index bba2b5af5..a73ff47de 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -16,7 +16,7 @@ class HealthMonitorState class << self def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) - log.debug "updateHealthMonitorState" + #log.debug "updateHealthMonitorState" samples_to_keep = 1 if config.nil? || config['SamplesBeforeNotification'].nil? samples_to_keep = HealthEventsConstants::DEFAULT_SAMPLES_BEFORE_NOTIFICATION @@ -40,7 +40,7 @@ def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, co health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record.timestamp, health_monitor_record.state, [health_monitor_record]) @@instanceStates[monitor_instance_id] = health_monitor_instance_state end - log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" + #log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" end def getHealthMonitorState(monitor_instance_id) @@ -82,11 +82,11 @@ def getStateForRangeMonitor(log, value, config) end def getStateForInfraPodsReadyPercentage(log, value, config) - log.debug "getStateForInfraPodsReadyPercentage" - log.debug "getStateForInfraPodsReadyPercentage #{config}" + # log.debug "getStateForInfraPodsReadyPercentage" + # log.debug "getStateForInfraPodsReadyPercentage #{config}" (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" + # log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" if value.to_f < pass_percentage.to_f return HEALTH_MONITOR_STATE['FAIL'] else @@ -95,11 +95,11 @@ def getStateForInfraPodsReadyPercentage(log, value, config) end def getStateForWorkloadPodsReadyPercentage(log, value, config) - log.debug "getStateForWorkloadPodsReadyPercentage" + # log.debug "getStateForWorkloadPodsReadyPercentage" pass_percentage = 0.0 (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" + #log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" if value.to_f > fail_percentage.to_f && value.to_f < pass_percentage.to_f return HEALTH_MONITOR_STATE['WARNING'] elsif value.to_f < fail_percentage.to_f diff --git a/source/code/plugin/HealthSignalReducer.rb b/source/code/plugin/HealthSignalReducer.rb index f1010b2d2..0b46d8f3f 100644 --- a/source/code/plugin/HealthSignalReducer.rb +++ b/source/code/plugin/HealthSignalReducer.rb @@ -6,13 +6,15 @@ require_relative 'HealthEventTemplates' class HealthSignalReducer + + @@firstMonitorRecordSent = {} class << self def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, controller_name: nil, node_name: nil) - log.debug "reduceSignal Key : #{key} controller_name: #{controller_name} node_name #{node_name}" - log.debug "monitorConfig #{monitor_config}" + #log.debug "reduceSignal Key : #{key} controller_name: #{controller_name} node_name #{node_name}" + #log.debug "monitorConfig #{monitor_config}" health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) - log.info "Health Monitor Instance state #{health_monitor_instance_state}" + #log.info "Health Monitor Instance state #{health_monitor_instance_state}" health_monitor_records = health_monitor_instance_state.prev_records prev_sent_status = health_monitor_instance_state.prev_sent_record_status prev_sent_time = health_monitor_instance_state.prev_sent_record_time @@ -21,7 +23,29 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, if (!monitor_config['NotifyInstantly'].nil? && monitor_config['NotifyInstantly'] == true) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) + latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning + latest_record_state = latest_record.state + latest_record_time = latest_record.timestamp #string representation of time + #log.info "Latest Record #{latest_record}" + if latest_record_state.downcase == prev_sent_status.downcase && @@firstMonitorRecordSent.key?(monitor_id) + #log.info "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" + time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 + #log.info "time elapsed #{time_elapsed}" + if time_elapsed > monitor_timeout # minutes + # update record + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.prev_sent_record_status = latest_record_state + #log.info "After Updating Monitor State #{health_monitor_instance_state}" + HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) + else + #log.info "Monitor timeout not reached #{time_elapsed}" + #log.info "Timeout not reached for #{monitor_id}" + return nil# dont send anything + end + else + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) + end end if health_monitor_instance_state.prev_records.size == 1 @@ -45,30 +69,33 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name) else #log.info "Monitor timeout not reached #{time_elapsed}" - return [] # dont send anything + #log.info "Timeout not reached for #{monitor_id}" + return nil# dont send anything end else # state change from previous sent state to latest record state #check state of last n records to see if they are all in the same state if (isStateChangeConsistent(log, health_monitor_records)) return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) else - return [] + log.debug "No consistent state change for monitor #{monitor_id}" + return nil end end end - return [] + log.debug "No new information for monitor #{monitor_id}" + return nil end def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, controller_name: nil, node_name: nil) - log.debug "formatRecord key:#{key} controller_name: #{controller_name} node_name #{node_name}" + #log.debug "formatRecord key:#{key} controller_name: #{controller_name} node_name #{node_name}" - log.debug "Health Monitor Instance State #{health_monitor_instance_state}" + #log.debug "Health Monitor Instance State #{health_monitor_instance_state}" labels = HealthEventUtils.getClusterLabels - log.info "Labels : #{labels}" + #log.info "Labels : #{labels}" monitor_labels = HealthEventUtils.getMonitorLabels(log, monitor_id, key, controller_name, node_name) - log.info "Monitor Labels : #{monitor_labels}" + #log.info "Monitor Labels : #{monitor_labels}" if !monitor_labels.nil? monitor_labels.keys.each do |key| @@ -76,18 +103,18 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s end end - log.debug "Labels #{labels.to_json.to_s}" + #log.debug "Labels #{labels.to_json.to_s}" prev_records = health_monitor_instance_state.prev_records collection_time = prev_records[0].timestamp # the oldest collection time new_state = health_monitor_instance_state.prev_records[0].state old_state = health_monitor_instance_state.prev_sent_record_status - log.debug "monitor_config #{monitor_config}" + #log.debug "monitor_config #{monitor_config}" if monitor_config.nil? monitor_config = '' end monitor_config = monitor_config.to_json.to_s - log.debug "monitor_config #{monitor_config}" + #log.debug "monitor_config #{monitor_config}" records = [] details = prev_records.each do |record| @@ -114,10 +141,15 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s monitor_config: monitor_config } - log.debug "HealthMonitor Record #{health_monitor_record}" + #log.debug "HealthMonitor Record #{health_monitor_record}" return_val = JSON.parse(health_monitor_record) - log.debug "Parsed Health Monitor Record" - return [] + #log.debug "Parsed Health Monitor Record for #{monitor_id}" + + if !@@firstMonitorRecordSent.key?(monitor_id) + @@firstMonitorRecordSent[monitor_id] = true + end + + return return_val end def isStateChangeConsistent(log, health_monitor_records) diff --git a/source/code/plugin/healthconfig.json b/source/code/plugin/healthconfig.json index 70e990760..e3ac7cb86 100644 --- a/source/code/plugin/healthconfig.json +++ b/source/code/plugin/healthconfig.json @@ -4,33 +4,33 @@ "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 1 }, "node_memory_utilization_percentage": { "PassPercentage": 2.0, "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 1 }, "container_manager_runtime_running": { - "MonitorTimeOut": 3, + "MonitorTimeOut": 1, "NotifyInstantly" : true }, "kubelet_running": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 1, "NotifyInstantly" : true }, "node_condition": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 1, "NotifyInstantly" : true }, "is_oversubscribed_cpu": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 1, "NotifyInstantly" : true }, "is_oversubscribed_memory": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 1, "NotifyInstantly" : true }, "container_cpu_utilization_percentage": { @@ -38,31 +38,31 @@ "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 1 }, "container_memory_utilization_percentage": { "PassPercentage": 2.0, "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 1 }, "workload_pods_ready_percentage" : { "PassPercentage": 100.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 2, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 1 }, "system_pods_ready_percentage" : { "PassPercentage": 100.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 2, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 1 }, "kube_api_up": { - "MonitorTimeOut": 3, + "MonitorTimeOut": 1, "NotifyInstantly" : true } } \ No newline at end of file diff --git a/source/code/plugin/in_health_kubeapidata.rb b/source/code/plugin/in_health_kubeapidata.rb index 0a67f249a..e95f4bf3b 100644 --- a/source/code/plugin/in_health_kubeapidata.rb +++ b/source/code/plugin/in_health_kubeapidata.rb @@ -23,7 +23,7 @@ def initialize end config_param :run_interval, :time, :default => "1m" - config_param :tag, :string, :default => "oms.containerinsights.KubeApiDataHealth" + config_param :tag, :string, :default => "oms.containerinsights.ContainerInsightsHealth" def configure(conf) super @@ -43,6 +43,8 @@ def start @@clusterCpuCapacity = cluster_capacity[0] @@clusterMemoryCapacity = cluster_capacity[1] @@healthMonitorConfig = HealthEventUtils.getHealthMonitorConfig + @@hmlog = HealthEventUtils.getLogHandle + @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" end end @@ -58,15 +60,14 @@ def shutdown def enumerate begin - $log.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 - record = {} + health_monitor_records = [] eventStream = MultiEventStream.new hmlog = HealthEventUtils.getLogHandle - HealthEventUtils.refreshKubernetesApiData(hmlog, nil) + HealthEventUtils.refreshKubernetesApiData(@@hmlog, nil) # we do this so that if the call fails, we get a response code/header etc. node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") node_inventory = JSON.parse(node_inventory_response.body) @@ -74,36 +75,49 @@ def enumerate pod_inventory = JSON.parse(pod_inventory_response.body) if node_inventory_response.code.to_i != 200 - #process_kube_api_up_monitor("Fail", node_inventory_response) + record = process_kube_api_up_monitor("Fail", node_inventory_response) + health_monitor_records.push(record) if record else - #process_kube_api_up_monitor("Pass", node_inventory_response) + record = process_kube_api_up_monitor("Pass", node_inventory_response) + health_monitor_records.push(record) if record end if !pod_inventory.nil? - #process_cpu_oversubscribed_monitor(pod_inventory) - #process_memory_oversubscribed_monitor(pod_inventory) + record = process_cpu_oversubscribed_monitor(pod_inventory) + health_monitor_records.push(record) if record + record = process_memory_oversubscribed_monitor(pod_inventory) + health_monitor_records.push(record) if record pods_ready_hash = HealthEventUtils.getPodsReadyHash(pod_inventory) system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} - #process_pods_ready_percentage(system_pods, "system_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") - #process_pods_ready_percentage(workload_pods, "workload_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, "system_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + system_pods_ready_percentage_records.each do |record| + health_monitor_records.push(record) if record + end + + workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, "workload_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + workload_pods_ready_percentage_records.each do |record| + health_monitor_records.push(record) if record + end end if !node_inventory.nil? - #process_node_condition_monitor(node_inventory) + node_condition_records = process_node_condition_monitor(node_inventory) + node_condition_records.each do |record| + health_monitor_records.push(record) if record + end end - cpu_capacity = 0.0 - memory_capacity = 0.0 - hostname = OMS::Common.get_hostname - - capacity = HealthEventUtils.ensure_cpu_memory_capacity_set(cpu_capacity, memory_capacity, hostname) - $log.info "Cpu #{capacity[0]} memory #{capacity[1]}" + @@hmlog.info "Health Monitor Records Size #{health_monitor_records.size}" + health_monitor_records.each do |record| + eventStream.add(emitTime, record) + end + router.emit_stream(@tag, eventStream) if eventStream rescue => errorStr - $log.warn("error : #{errorStr.to_s}") + @@hmlog.warn("error : #{errorStr.to_s}") ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -112,36 +126,37 @@ def process_cpu_oversubscribed_monitor(pod_inventory) timestamp = Time.now.utc.iso8601 subscription = HealthEventUtils.getResourceSubscription(pod_inventory,"cpu", @@clusterCpuCapacity) state = subscription > @@clusterCpuCapacity ? "Fail" : "Pass" - $log.debug "CPU Oversubscribed Monitor State : #{state}" + #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}" #CPU monitor_id = HealthEventsConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) - hmlog = HealthEventUtils.getLogHandle - hmlog.info health_monitor_record - - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId}) - hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + # @@hmlog.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" + return record.nil? ? nil : record end def process_memory_oversubscribed_monitor(pod_inventory) timestamp = Time.now.utc.iso8601 subscription = HealthEventUtils.getResourceSubscription(pod_inventory,"memory", @@clusterMemoryCapacity) - state = subscription > @@clusterCpuCapacity ? "Fail" : "Pass" - $log.debug "Memory Oversubscribed Monitor State : #{state}" + state = subscription > @@clusterMemoryCapacity ? "Fail" : "Pass" + #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}" #CPU monitor_id = HealthEventsConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) hmlog = HealthEventUtils.getLogHandle - hmlog.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId}) - hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" + return record.nil? ? nil : record end def process_kube_api_up_monitor(state, response) @@ -152,12 +167,14 @@ def process_kube_api_up_monitor(state, response) details['ResponseCode'] = response.code health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) hmlog = HealthEventUtils.getLogHandle - hmlog.info health_monitor_record - - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId}) - hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + #hmlog.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_kube_api_up_monitor" + return record.nil? ? nil : record end def process_pods_ready_percentage(pods_hash, config_monitor_id) @@ -174,28 +191,21 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) percent = pods_ready / total_pods * 100 timestamp = Time.now.utc.iso8601 - hmlog.debug "process_pods_ready_percentage percent: #{percent}" - if config_monitor_id.downcase.start_with?("system") - state = HealthMonitorState.getStateForInfraPodsReadyPercentage(hmlog, percent, monitor_config) - hmlog.debug "getStateForInfraPodsReadyPercentage State: #{state}" + state = HealthMonitorState.getStateForInfraPodsReadyPercentage(@@hmlog, percent, monitor_config) elsif config_monitor_id.downcase.start_with?("workload") - state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(hmlog, percent, monitor_config) - hmlog.debug "getStateForWorkloadPodsReadyPercentage State: #{state}" + state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) end health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) - hmlog.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) - hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - hmlog.info "controller_name #{controller_name}" - HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, monitor_config) - record_a = HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) - record_a.each do |r| - records.push(r) - end + #hmlog.info health_monitor_record + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) + record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + records.push(record) end - $log.debug "records count : #{records.size}" + @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" + return records end def process_node_condition_monitor(node_inventory) @@ -203,25 +213,26 @@ def process_node_condition_monitor(node_inventory) monitor_id = HealthEventsConstants::NODE_CONDITION_MONITOR_ID timestamp = Time.now.utc.iso8601 monitor_config = @@healthMonitorConfig[monitor_id] + node_condition_monitor_records = [] if !node_inventory.nil? node_inventory['items'].each do |node| node_name = node['metadata']['name'] conditions = node['status']['conditions'] state = HealthEventUtils.getNodeStateFromNodeConditions(conditions) - hmlog.debug "Node Name = #{node_name} State = #{state}" + #hmlog.debug "Node Name = #{node_name} State = #{state}" details = {} conditions.each do |condition| details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} end health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) - hmlog.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) - hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(hmlog, monitor_instance_id, health_monitor_record, monitor_config) - return HealthSignalReducer.reduceSignal(hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) + record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + node_condition_monitor_records.push(record) end end - + @@hmlog.info "Successfully processed process_node_condition_monitor #{node_condition_monitor_records.size}" + return node_condition_monitor_records end def run_periodic @@ -233,10 +244,10 @@ def run_periodic @mutex.unlock if !done begin - $log.info("in_health_docker::run_periodic @ #{Time.now.utc.iso8601}") + @@hmlog.info("in_health_kubeapidata::run_periodic @ #{Time.now.utc.iso8601}") enumerate rescue => errorStr - $log.warn "in_health_docker::run_periodic: enumerate Failed for docker health: #{errorStr}" + @@hmlog.warn "in_health_kubeapidata::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}" ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end From 1a2d8ce09c743af00ba93ef398cb46f31464cc55 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Sat, 9 Mar 2019 12:20:24 -0800 Subject: [PATCH 18/90] Health and Input plugins, logs cleaned up --- source/code/plugin/HealthEventUtils.rb | 6 +- source/code/plugin/HealthMonitorState.rb | 4 +- source/code/plugin/filter_cadvisor_health.rb | 196 +++++++++++-------- source/code/plugin/healthconfig.json | 24 +-- 4 files changed, 127 insertions(+), 103 deletions(-) diff --git a/source/code/plugin/HealthEventUtils.rb b/source/code/plugin/HealthEventUtils.rb index 122d1d73f..fe796d397 100644 --- a/source/code/plugin/HealthEventUtils.rb +++ b/source/code/plugin/HealthEventUtils.rb @@ -203,11 +203,11 @@ def refreshKubernetesApiData(log, hostName) @@lastRefreshTime = Time.now.utc.iso8601 end - def getContainerCpuLimit(key) + def getContainerMetadata(key) if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['cpuLimit'] + return @@containerMetadata[key] else - return '' + return nil end end diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index a73ff47de..99f041bd3 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -56,7 +56,7 @@ def getHealthMonitorStatesHash end def computeHealthMonitorState(log, monitor_id, value, config) - log.debug "computeHealthMonitorState" + #log.debug "computeHealthMonitorState" #log.info "id: #{monitor_id} value: #{value} config: #{config}" case monitor_id when HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::NODE_CPU_MONITOR_ID, HealthEventsConstants::NODE_MEMORY_MONITOR_ID @@ -67,7 +67,7 @@ def computeHealthMonitorState(log, monitor_id, value, config) end def getStateForRangeMonitor(log, value, config) - log.debug "getStateForRangeMonitor" + #log.debug "getStateForRangeMonitor" pass_percentage = 0.0 (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 6ed53e8b6..cbada4c17 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -40,8 +40,7 @@ def initialize def configure(conf) super - @log = nil - @log = Logger.new(@log_path, 1, 5000000) + @log = HealthEventUtils.getLogHandle @log.debug {'Starting filter_cadvisor2health plugin'} end @@ -49,7 +48,7 @@ def start super @metrics_to_collect_hash = HealthEventUtils.build_metrics_hash(@metrics_to_collect) @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" - node_capacity = HealthEventUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity) + node_capacity = HealthEventUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" @@ -60,16 +59,19 @@ def start def filter_stream(tag, es) new_es = MultiEventStream.new HealthEventUtils.refreshKubernetesApiData(@log, @hostName) + records_count = 0 es.each { |time, record| begin - filtered_records = filter(tag, time, record) - filtered_records.each {|filtered_record| - new_es.add(time, filtered_record) if filtered_record - } if filtered_records + filtered_record = filter(tag, time, record) + if !filtered_record.nil? + new_es.add(time, filtered_record) + records_count += 1 + end rescue => e router.emit_error_event(tag, time, record, e) end } + @log.debug "Records Count #{records_count}" new_es end @@ -104,126 +106,148 @@ def filter(tag, time, record) rescue => e @log.debug "Error in filter #{e}" @log.debug "backtrace #{e.backtrace}" - return [] + return nil end end def process_container_cpu_record(record, metric_value) - # monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID - # @log.debug "processing container cpu record" - # if record.nil? - # return [] - # else - # instance_name = record['DataItems'][0]['InstanceName'] - # key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) - # cpu_limit = HealthEventUtils.getContainerCpuLimit(key) - - # if cpu_limit.to_s.empty? - # @log.info "CPU Limit is nil" - # cpu_limit = @cpu_capacity - # end - - # @log.info "cpu limit #{cpu_limit}" - - # percent = (metric_value.to_f/cpu_limit*100).round(2) - # @log.debug "Container #{key} | Percentage of CPU limit: #{percent}" - # state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) - # @log.debug "Computed State : #{state}" - # timestamp = record['DataItems'][0]['Timestamp'] - # health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) - # @log.info health_monitor_record - - # monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) - # @log.info "Monitor Instance Id: #{monitor_instance_id}" - # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - # return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) - # end - - return [] + monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID + @log.debug "processing container cpu record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthEventUtils.getContainerMetadata(key) + if !container_metadata.nil? + if container_metadata['namespace'] == 'kube-system' + return nil + end + cpu_limit = container_metadata['cpuLimit'] + end + + if cpu_limit.to_s.empty? + #@log.info "CPU Limit is nil" + cpu_limit = @cpu_capacity + end + + #@log.info "cpu limit #{cpu_limit}" + + percent = (metric_value.to_f/cpu_limit*100).round(2) + #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) + #@log.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Container CPU #{temp}" + return record + end + return nil end def process_container_memory_record(record, metric_value) - # monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID - # @log.debug "processing container memory record" - # if record.nil? - # return [] - # else - # instance_name = record['DataItems'][0]['InstanceName'] - # key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) - # memory_limit = HealthEventUtils.getContainerMemoryLimit(key) - - # if memory_limit.to_s.empty? - # @log.info "Memory Limit is nil" - # memory_limit = @memory_capacity - # end - - # @log.info "memory limit #{memory_limit}" - - # percent = (metric_value.to_f/memory_limit*100).round(2) - # @log.debug "Container #{key} | Percentage of Memory limit: #{percent}" - # state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) - # @log.debug "Computed State : #{state}" - # timestamp = record['DataItems'][0]['Timestamp'] - # health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) - # @log.info health_monitor_record - - # monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) - # @log.info "Monitor Instance Id: #{monitor_instance_id}" - # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - # return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) - # end - - return [] + monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID + @log.debug "processing container memory record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthEventUtils.getContainerMetadata(key) + if !container_metadata.nil? + if container_metadata['namespace'] == 'kube-system' + return nil + end + memory_limit = container_metadata['memoryLimit'] + end + + if memory_limit.to_s.empty? + #@log.info "Memory Limit is nil" + memory_limit = @memory_capacity + end + + #@log.info "memory limit #{memory_limit}" + + percent = (metric_value.to_f/memory_limit*100).round(2) + #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) + #@log.info health_monitor_record + + monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Container Memory #{temp}" + return record + end + return nil end def process_node_cpu_record(record, metric_value) monitor_id = HealthEventsConstants::NODE_CPU_MONITOR_ID @log.debug "processing node cpu record" if record.nil? - return [] + return nil else instance_name = record['DataItems'][0]['InstanceName'] - @log.info "CPU capacity #{@cpu_capacity}" + #@log.info "CPU capacity #{@cpu_capacity}" percent = (metric_value.to_f/@cpu_capacity*100).round(2) - @log.debug "Percentage of CPU limit: #{percent}" + #@log.debug "Percentage of CPU limit: #{percent}" state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_CPU_MONITOR_ID]) - @log.debug "Computed State : #{state}" + #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) - @log.info health_monitor_record + #@log.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) - @log.info "Monitor Instance Id: #{monitor_instance_id}" + #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Node CPU #{temp}" + return record end - return [] + return nil end def process_node_memory_record(record, metric_value) monitor_id = HealthEventsConstants::NODE_MEMORY_MONITOR_ID @log.debug "processing node memory record" if record.nil? - return [] + return nil else instance_name = record['DataItems'][0]['InstanceName'] - @log.info "Memory capacity #{@memory_capacity}" + #@log.info "Memory capacity #{@memory_capacity}" percent = (metric_value.to_f/@memory_capacity*100).round(2) - @log.debug "Percentage of Memory limit: #{percent}" + #@log.debug "Percentage of Memory limit: #{percent}" state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_MEMORY_MONITOR_ID]) - @log.debug "Computed State : #{state}" + #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) - @log.info health_monitor_record + #@log.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) - @log.info "Monitor Instance Id: #{monitor_instance_id}" + #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - return HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Node Memory #{temp}" + return record end - return [] + return nil end end end diff --git a/source/code/plugin/healthconfig.json b/source/code/plugin/healthconfig.json index e3ac7cb86..5525b25b9 100644 --- a/source/code/plugin/healthconfig.json +++ b/source/code/plugin/healthconfig.json @@ -4,33 +4,33 @@ "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 1 + "MonitorTimeOut": 5 }, "node_memory_utilization_percentage": { "PassPercentage": 2.0, "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 1 + "MonitorTimeOut": 5 }, "container_manager_runtime_running": { - "MonitorTimeOut": 1, + "MonitorTimeOut": 5, "NotifyInstantly" : true }, "kubelet_running": { - "MonitorTimeOut": 1, + "MonitorTimeOut": 5, "NotifyInstantly" : true }, "node_condition": { - "MonitorTimeOut": 1, + "MonitorTimeOut": 5, "NotifyInstantly" : true }, "is_oversubscribed_cpu": { - "MonitorTimeOut": 1, + "MonitorTimeOut": 5, "NotifyInstantly" : true }, "is_oversubscribed_memory": { - "MonitorTimeOut": 1, + "MonitorTimeOut": 5, "NotifyInstantly" : true }, "container_cpu_utilization_percentage": { @@ -38,31 +38,31 @@ "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 1 + "MonitorTimeOut": 5 }, "container_memory_utilization_percentage": { "PassPercentage": 2.0, "FailPercentage": 2.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 1 + "MonitorTimeOut": 5 }, "workload_pods_ready_percentage" : { "PassPercentage": 100.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 2, "NotifyInstantly" : false, - "MonitorTimeOut": 1 + "MonitorTimeOut": 5 }, "system_pods_ready_percentage" : { "PassPercentage": 100.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 2, "NotifyInstantly" : false, - "MonitorTimeOut": 1 + "MonitorTimeOut": 5 }, "kube_api_up": { - "MonitorTimeOut": 1, + "MonitorTimeOut": 5, "NotifyInstantly" : true } } \ No newline at end of file From e0c431e051b16477e49fff5f110addc7ebf2951b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 11 Mar 2019 09:11:27 -0700 Subject: [PATCH 19/90] Hooking up input and filter plugins to out_oms_api plugin --- source/code/plugin/HealthMonitorState.rb | 2 +- source/code/plugin/HealthSignalReducer.rb | 78 +++++++++++++------- source/code/plugin/filter_cadvisor_health.rb | 19 +++-- source/code/plugin/in_cadvisor_perf.rb | 30 ++++---- source/code/plugin/in_health_kubeapidata.rb | 21 ++++-- 5 files changed, 91 insertions(+), 59 deletions(-) diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index 99f041bd3..e74d8ed08 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -37,7 +37,7 @@ def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, co health_monitor_instance_state.prev_records = health_monitor_records @@instanceStates[monitor_instance_id] = health_monitor_instance_state else - health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record.timestamp, health_monitor_record.state, [health_monitor_record]) + health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record["timestamp"], health_monitor_record["state"], [health_monitor_record]) @@instanceStates[monitor_instance_id] = health_monitor_instance_state end #log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" diff --git a/source/code/plugin/HealthSignalReducer.rb b/source/code/plugin/HealthSignalReducer.rb index 0b46d8f3f..c423b0b0b 100644 --- a/source/code/plugin/HealthSignalReducer.rb +++ b/source/code/plugin/HealthSignalReducer.rb @@ -24,8 +24,8 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, if (!monitor_config['NotifyInstantly'].nil? && monitor_config['NotifyInstantly'] == true) latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning - latest_record_state = latest_record.state - latest_record_time = latest_record.timestamp #string representation of time + latest_record_state = latest_record["state"] + latest_record_time = latest_record["timestamp"] #string representation of time #log.info "Latest Record #{latest_record}" if latest_record_state.downcase == prev_sent_status.downcase && @@firstMonitorRecordSent.key?(monitor_id) #log.info "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" @@ -53,8 +53,8 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) else latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning - latest_record_state = latest_record.state - latest_record_time = latest_record.timestamp #string representation of time + latest_record_state = latest_record["state"] + latest_record_time = latest_record["timestamp"] #string representation of time #log.info "Latest Record #{latest_record}" if latest_record_state.downcase == prev_sent_status.downcase #log.info "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" @@ -105,51 +105,73 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s #log.debug "Labels #{labels.to_json.to_s}" prev_records = health_monitor_instance_state.prev_records - collection_time = prev_records[0].timestamp # the oldest collection time - new_state = health_monitor_instance_state.prev_records[0].state + collection_time = prev_records[0]["timestamp"] # the oldest collection time + new_state = health_monitor_instance_state.prev_records[0]["state"] old_state = health_monitor_instance_state.prev_sent_record_status #log.debug "monitor_config #{monitor_config}" if monitor_config.nil? monitor_config = '' end - monitor_config = monitor_config.to_json.to_s + monitor_config = monitor_config #log.debug "monitor_config #{monitor_config}" records = [] - details = prev_records.each do |record| + details = prev_records #.each do |record| - hash_record = { "timestamp" => record.timestamp, "state" => record.state, "details" => record.details} - #log.debug "Hash from Struct #{hash_record}" - #log.debug "monitor_config #{monitor_config}" - records.push(hash_record.to_json.to_s) - end - details = "[#{records.join(',')}]" + # hash_record = { "timestamp" => record.timestamp, "state" => record.state, "details" => record.details} + # #log.debug "Hash from Struct #{hash_record}" + # #log.debug "monitor_config #{monitor_config}" + # records.push(hash_record.to_json.to_s) + # end + # details = "[#{records.join(',')}]" time_observed = Time.now.utc.iso8601 #log.debug "Details: #{details}" #log.debug "collection_time #{collection_time} time_observed #{time_observed} new_state #{new_state} old_state #{old_state}" - health_monitor_record = HealthEventTemplates::HealthRecordTemplate % { - labels: labels.to_json.to_s, - monitor_id: monitor_id, - monitor_instance_id: monitor_instance_id, - new_state: new_state, - old_state: old_state, - monitor_details: details, - collection_time: collection_time, - time_observed: time_observed, - monitor_config: monitor_config - } + # health_monitor_record = HealthEventTemplates::HealthRecordTemplate % { + # labels: labels, + # monitor_id: monitor_id, + # monitor_instance_id: monitor_instance_id, + # new_state: new_state, + # old_state: old_state, + # monitor_details: details, + # collection_time: collection_time, + # time_observed: time_observed, + # monitor_config: monitor_config + # } + # HealthRecordTemplate = '{ + # "Labels": %{labels}, + # "MonitorId": "%{monitor_id}", + # "MonitorInstanceId": "%{monitor_instance_id}", + # "NewState": "%{new_state}", + # "OldState": "%{old_state}", + # "Details": %{monitor_details}, + # "MonitorConfig": %{monitor_config}, + # "CollectionTime": "%{collection_time}", + # "TimeObserved": "%{time_observed}" + # }' + health_monitor_record = {} + health_monitor_record["MonitorLabels"] = labels.to_json + health_monitor_record["MonitorId"] = monitor_id + health_monitor_record["MonitorInstanceId"] = monitor_instance_id + health_monitor_record["NewState"] = new_state + health_monitor_record["OldState"] = old_state + health_monitor_record["Details"] = details + health_monitor_record["MonitorConfig"] = monitor_config.to_json + health_monitor_record["CollectionTime"] = collection_time + health_monitor_record["TimeObserved"] = time_observed + #log.debug "HealthMonitor Record #{health_monitor_record}" - return_val = JSON.parse(health_monitor_record) + #return_val = JSON.parse(health_monitor_record) #log.debug "Parsed Health Monitor Record for #{monitor_id}" if !@@firstMonitorRecordSent.key?(monitor_id) @@firstMonitorRecordSent[monitor_id] = true end - return return_val + return health_monitor_record end def isStateChangeConsistent(log, health_monitor_records) @@ -159,7 +181,7 @@ def isStateChangeConsistent(log, health_monitor_records) i = 0 while i < health_monitor_records.size - 1 #log.info "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}" - if health_monitor_records[i].state != health_monitor_records[i + 1].state + if health_monitor_records[i]["state"] != health_monitor_records[i + 1]["state"] return false end i += 1 diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index cbada4c17..79e502241 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -71,16 +71,17 @@ def filter_stream(tag, es) router.emit_error_event(tag, time, record, e) end } - @log.debug "Records Count #{records_count}" + @log.debug "Filter Records Count #{records_count}" new_es end def filter(tag, time, record) begin + if record.key?("Labels") + return record + end object_name = record['DataItems'][0]['ObjectName'] - counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase - if @metrics_to_collect_hash.key?(counter_name.downcase) metric_value = record['DataItems'][0]['Collections'][0]['Value'] case object_name @@ -138,7 +139,8 @@ def process_container_cpu_record(record, metric_value) state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) @@ -180,7 +182,8 @@ def process_container_memory_record(record, metric_value) state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) @@ -208,7 +211,8 @@ def process_node_cpu_record(record, metric_value) state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_CPU_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) @@ -236,7 +240,8 @@ def process_node_memory_record(record, metric_value) state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_MEMORY_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index b9ac31127..dd0944ae3 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -2,29 +2,29 @@ # frozen_string_literal: true module Fluent - + class CAdvisor_Perf_Input < Input Plugin.register_input('cadvisorperf', self) - + def initialize super require 'yaml' require 'json' - + require_relative 'CAdvisorMetricsAPIClient' require_relative 'oms_common' require_relative 'omslog' end - + config_param :run_interval, :time, :default => '1m' config_param :tag, :string, :default => "oms.api.cadvisorperf" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" - config_param :healthtag, :string, :default => "oms.api.CIHealthPerf" - + config_param :healthtag, :string, :default => "oms.api.DiliprPerf" + def configure (conf) super end - + def start if @run_interval @finished = false @@ -33,7 +33,7 @@ def start @thread = Thread.new(&method(:run_periodic)) end end - + def shutdown if @run_interval @mutex.synchronize { @@ -43,7 +43,7 @@ def shutdown @thread.join end end - + def enumerate() time = Time.now.to_f begin @@ -53,13 +53,13 @@ def enumerate() record['DataType'] = "LINUX_PERF_BLOB" record['IPName'] = "LogManagement" eventStream.add(time, record) if record - #router.emit(@tag, time, record) if record - end - + #router.emit(@tag, time, record) if record + end + router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@mdmtag, eventStream) if eventStream - router.emit_stream(@healthtag, eventStream) if eventStream - + router.emit_stream(@healthtag, eventStream) if eventStream + @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") @@ -69,7 +69,7 @@ def enumerate() $log.debug_backtrace(errorStr.backtrace) end end - + def run_periodic @mutex.lock done = @finished diff --git a/source/code/plugin/in_health_kubeapidata.rb b/source/code/plugin/in_health_kubeapidata.rb index e95f4bf3b..7f6bda690 100644 --- a/source/code/plugin/in_health_kubeapidata.rb +++ b/source/code/plugin/in_health_kubeapidata.rb @@ -23,7 +23,7 @@ def initialize end config_param :run_interval, :time, :default => "1m" - config_param :tag, :string, :default => "oms.containerinsights.ContainerInsightsHealth" + config_param :tag, :string, :default => "oms.api.DiliprPerf" def configure(conf) super @@ -117,7 +117,8 @@ def enumerate end router.emit_stream(@tag, eventStream) if eventStream rescue => errorStr - @@hmlog.warn("error : #{errorStr.to_s}") + @@hmlog.warn("error in_health_kubeapidata: #{errorStr.to_s}") + @log.debug "backtrace Input #{errorStr.backtrace}" ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -130,7 +131,8 @@ def process_cpu_oversubscribed_monitor(pod_inventory) #CPU monitor_id = HealthEventsConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) # @@hmlog.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) @@ -149,7 +151,8 @@ def process_memory_oversubscribed_monitor(pod_inventory) #CPU monitor_id = HealthEventsConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) hmlog = HealthEventUtils.getLogHandle monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) @@ -165,7 +168,8 @@ def process_kube_api_up_monitor(state, response) monitor_id = HealthEventsConstants::MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID details = response.each_header.to_h details['ResponseCode'] = response.code - health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) hmlog = HealthEventUtils.getLogHandle #hmlog.info health_monitor_record @@ -196,8 +200,8 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) elsif config_monitor_id.downcase.start_with?("workload") state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) end - - health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) #hmlog.info health_monitor_record monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) @@ -224,7 +228,8 @@ def process_node_condition_monitor(node_inventory) conditions.each do |condition| details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} end - health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) From 654c7c92dd5965aeb0ffd531b2f79f772ac72e9f Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 12 Mar 2019 09:01:40 -0700 Subject: [PATCH 20/90] 1. Tag Changes 2. Adding Health Monitor Configuration 3. Added Agent Collection Time(in lieu of TimeGenerated) --- installer/conf/container.conf | 23 +- .../conf/healthmonitorconfig.json | 16 +- installer/conf/kube.conf | 22 +- installer/datafiles/base_container.data | 16 +- source/code/plugin/HealthEventTemplates.rb | 79 ----- ...Constants.rb => HealthMonitorConstants.rb} | 2 +- ...ducer.rb => HealthMonitorSignalReducer.rb} | 123 +++---- source/code/plugin/HealthMonitorState.rb | 34 +- ...lthEventUtils.rb => HealthMonitorUtils.rb} | 33 +- source/code/plugin/filter_cadvisor_health.rb | 70 ++-- .../code/plugin/filter_health_cpu_memory.rb | 335 ------------------ source/code/plugin/in_cadvisor_perf.rb | 2 +- source/code/plugin/in_health_docker.rb | 127 ------- source/code/plugin/in_health_kubeapidata.rb | 86 ++--- source/code/plugin/in_health_kubelet.rb | 187 ---------- 15 files changed, 220 insertions(+), 935 deletions(-) rename source/code/plugin/healthconfig.json => installer/conf/healthmonitorconfig.json (86%) delete mode 100644 source/code/plugin/HealthEventTemplates.rb rename source/code/plugin/{HealthEventsConstants.rb => HealthMonitorConstants.rb} (97%) rename source/code/plugin/{HealthSignalReducer.rb => HealthMonitorSignalReducer.rb} (58%) rename source/code/plugin/{HealthEventUtils.rb => HealthMonitorUtils.rb} (93%) delete mode 100644 source/code/plugin/filter_health_cpu_memory.rb delete mode 100644 source/code/plugin/in_health_docker.rb delete mode 100644 source/code/plugin/in_health_kubelet.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 24318b6cf..88f7929d7 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -17,30 +17,23 @@ #cadvisor perf - type cadvisorperf - tag oms.api.cadvisorperf - run_interval 60s - log_level debug - - - - type dockerhealth - tag oms.api.DockerHealth + type cadvisorperf + tag oms.api.cadvisorperf run_interval 60s log_level debug - - type filter_health_cpu_memory + + type filter_cadvisor2health log_level debug - + type out_oms_api log_level debug buffer_chunk_limit 10m buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_CIHealthPerf*.buffer + buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer buffer_queue_limit 10 flush_interval 20s retry_limit 10 @@ -85,12 +78,12 @@ max_retry_wait 9m - + type out_oms_api log_level debug buffer_chunk_limit 10m buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_docker_health*.buffer + buffer_path %STATE_DIR_WS%/out_oms_api_kube_health*.buffer buffer_queue_limit 10 flush_interval 20s retry_limit 10 diff --git a/source/code/plugin/healthconfig.json b/installer/conf/healthmonitorconfig.json similarity index 86% rename from source/code/plugin/healthconfig.json rename to installer/conf/healthmonitorconfig.json index 5525b25b9..2b28f0317 100644 --- a/source/code/plugin/healthconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -1,14 +1,14 @@ { "node_cpu_utilization_percentage": { - "PassPercentage": 2.0, - "FailPercentage": 2.0, + "PassPercentage": 80.0, + "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, "MonitorTimeOut": 5 }, "node_memory_utilization_percentage": { - "PassPercentage": 2.0, - "FailPercentage": 2.0, + "PassPercentage": 80.0, + "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, "MonitorTimeOut": 5 @@ -34,15 +34,15 @@ "NotifyInstantly" : true }, "container_cpu_utilization_percentage": { - "PassPercentage": 2.0, - "FailPercentage": 2.0, + "PassPercentage": 80.0, + "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, "MonitorTimeOut": 5 }, "container_memory_utilization_percentage": { - "PassPercentage": 2.0, - "FailPercentage": 2.0, + "PassPercentage": 80.0, + "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, "MonitorTimeOut": 5 diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 454df6e91..8e8af731c 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -47,6 +47,14 @@ log_level debug +#cluster health + + type kubehealth + tag oms.api.KubeHealth.AgentCollectionTime + run_interval 60s + log_level debug + + type filter_inventory2mdm custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope @@ -95,6 +103,18 @@ retry_wait 30s + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s + + type out_oms log_level debug @@ -138,7 +158,7 @@ max_retry_wait 9m - + type out_oms log_level debug num_threads 5 diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index c263aa505..075948ab8 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -49,6 +49,14 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/out_mdm.rb; source/code/plugin/out_mdm.rb; 644; root; root /opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/code/plugin/filter_cadvisor2mdm.rb; 644; root; root +/opt/microsoft/omsagent/plugin/filter_cadvisor_health.rb; source/code/plugin/filter_cadvisor_health.rb; 644; root; root +/opt/microsoft/omsagent/plugin/in_health_kubeapidata.rb; source/code/plugin/in_health_kubeapidata.rb; 644; root; root +/opt/microsoft/omsagent/plugin/HealthMonitorConstants.rb; source/code/plugin/HealthMonitorConstants.rb; 644; root; root +/opt/microsoft/omsagent/plugin/HealthMonitorSignalReducer.rb; source/code/plugin/HealthMonitorSignalReducer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/HealthMonitorState.rb; source/code/plugin/HealthMonitorState.rb; 644; root; root +/opt/microsoft/omsagent/plugin/HealthMonitorUtils.rb; source/code/plugin/HealthMonitorUtils.rb; 644; root; root +/opt/microsoft/omsagent/plugin/healthmonitorconfig.json installer/conf/healthmonitorconfig.json + /opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/code/plugin/lib/application_insights/version.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/code/plugin/lib/application_insights/rack/track_request.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb; source/code/plugin/lib/application_insights/unhandled_exception.rb; 644; root; root @@ -98,6 +106,8 @@ MAINTAINER: 'Microsoft Corporation' /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf; installer/conf/td-agent-bit.conf; 644; root; root /etc/opt/microsoft/docker-cimprov/out_oms.conf; installer/conf/out_oms.conf; 644; root; root + + %Links /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root @@ -184,12 +194,16 @@ touch /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log +touch /var/opt/microsoft/docker-cimprov/log/health_monitors.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/health_monitors.log +chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/health_monitors.log + mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf %Postuninstall_10 # If we're an upgrade, skip all of this cleanup -if ${{PERFORMING_UPGRADE_NOT}}; then +if ${{PERFORMING_UPGRADE_NOT}}; then # Clean up installinfo.txt file (registered as "conf" file to pass rpmcheck) rm -f /etc/opt/microsoft/docker-cimprov/conf/installinfo.txt* rm -f /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt diff --git a/source/code/plugin/HealthEventTemplates.rb b/source/code/plugin/HealthEventTemplates.rb deleted file mode 100644 index 047fb64c3..000000000 --- a/source/code/plugin/HealthEventTemplates.rb +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - - -# details is an array of records -# include monitor config details in the template - -require_relative 'HealthEventsConstants' - -class HealthEventTemplates - HealthRecordTemplate = '{ - "Labels": %{labels}, - "MonitorId": "%{monitor_id}", - "MonitorInstanceId": "%{monitor_instance_id}", - "NewState": "%{new_state}", - "OldState": "%{old_state}", - "Details": %{monitor_details}, - "MonitorConfig": %{monitor_config}, - "CollectionTime": "%{collection_time}", - "TimeObserved": "%{time_observed}" - }' - - DetailsNodeMemoryTemplate = '{ - "NodeMemoryRssPercentage": %{memory_rss_percentage}, - "NodeMemoryRssBytes": %{memory_rss_bytes}, - "History": [%{prev_records}] - }' - - - DetailsNodeCpuTemplate = '{ - "NodeCpuUsagePercentage": %{cpu_percentage}, - "NodeCpuUsageMilliCores": %{cpu_usage}, - "PrevNodeCpuUsageDetails": %{prev_monitor_record_details}, - "PrevPrevNodeCpuUsageDetails": %{prev_prev_monitor_record_details} - }' - - DetailsWorkloadCpuOversubscribedTemplate = '{ - "ClusterCpuCapacity": %{cluster_cpu_capacity}, - "ClusterCpuRequests": %{cluster_cpu_requests} - }' - - DetailsWorkloadMemoryOversubscribedTemplate = '{ - "ClusterMemoryCapacity": %{cluster_memory_capacity}, - "ClusterMemoryRequests": %{cluster_memory_requests} - }' - - DetailsWorkloadPodsReadyStatePercentage = '{ - "TimeStamp": "%{timestamp}", - "PodsReady": %{pods_ready}, - "TotalPods": %{total_pods} - "History": [%{prev_records}] - }' - - DetailsWorkloadContainerCpuPercentage = ' - "TimeStamp": "%{timestamp}", - "CpuLimit": %{cpu_limit}, - "CpuRequest": %{cpu_request}, - "CpuPercentage": %{cpu_percentage}, - "History": [%{prev_records}] - }' - - DetailsWorkloadContainerMemoryPercentage = ' - "TimeStamp": "%{timestamp}", - "MemoryLimit": %{memory_limit}, - "MemoryRequest": %{memory_request}, - "MemoryPercentage": %{memory_percentage}, - "History": [%{prev_records}] - }' - - DETAILS_TEMPLATE_HASH = { - HealthEventsConstants::NODE_CPU_MONITOR_ID => DetailsNodeCpuTemplate, - HealthEventsConstants::NODE_MEMORY_MONITOR_ID => DetailsNodeMemoryTemplate, - HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID => DetailsWorkloadContainerCpuPercentage, - HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID => DetailsWorkloadContainerMemoryPercentage, - HealthEventsConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID => DetailsWorkloadCpuOversubscribedTemplate, - HealthEventsConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID => DetailsWorkloadMemoryOversubscribedTemplate, - HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID => DetailsWorkloadPodsReadyStatePercentage, - } -end \ No newline at end of file diff --git a/source/code/plugin/HealthEventsConstants.rb b/source/code/plugin/HealthMonitorConstants.rb similarity index 97% rename from source/code/plugin/HealthEventsConstants.rb rename to source/code/plugin/HealthMonitorConstants.rb index d7d051e41..1eff53f72 100644 --- a/source/code/plugin/HealthEventsConstants.rb +++ b/source/code/plugin/HealthMonitorConstants.rb @@ -1,7 +1,7 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -class HealthEventsConstants +class HealthMonitorConstants NODE_CPU_MONITOR_ID = "node_cpu_utilization_percentage" NODE_MEMORY_MONITOR_ID = "node_memory_utilization_percentage" NODE_KUBELET_HEALTH_MONITOR_ID = "kubelet_running" diff --git a/source/code/plugin/HealthSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb similarity index 58% rename from source/code/plugin/HealthSignalReducer.rb rename to source/code/plugin/HealthMonitorSignalReducer.rb index c423b0b0b..6707611b2 100644 --- a/source/code/plugin/HealthSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -3,9 +3,9 @@ require 'time' require 'json' -require_relative 'HealthEventTemplates' +require_relative 'KubernetesApiClient' -class HealthSignalReducer +class HealthMonitorSignalReducer @@firstMonitorRecordSent = {} class << self @@ -14,67 +14,80 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, #log.debug "monitorConfig #{monitor_config}" health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) - #log.info "Health Monitor Instance state #{health_monitor_instance_state}" + #log.debug "Health Monitor Instance state #{health_monitor_instance_state}" health_monitor_records = health_monitor_instance_state.prev_records - prev_sent_status = health_monitor_instance_state.prev_sent_record_status + new_state = health_monitor_instance_state.new_state prev_sent_time = health_monitor_instance_state.prev_sent_record_time - monitor_config['MonitorTimeOut'].nil? ? monitor_timeout = HealthEventsConstants::DEFAULT_MONITOR_TIMEOUT : monitor_timeout = monitor_config['MonitorTimeOut'] #minutes - #log.info monitor_timeout - + time_first_observed = health_monitor_instance_state.state_change_time + monitor_config['MonitorTimeOut'].nil? ? monitor_timeout = HealthMonitorConstants::DEFAULT_MONITOR_TIMEOUT : monitor_timeout = monitor_config['MonitorTimeOut'] #minutes + #log.debug monitor_timeout if (!monitor_config['NotifyInstantly'].nil? && monitor_config['NotifyInstantly'] == true) latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning latest_record_state = latest_record["state"] latest_record_time = latest_record["timestamp"] #string representation of time - #log.info "Latest Record #{latest_record}" - if latest_record_state.downcase == prev_sent_status.downcase && @@firstMonitorRecordSent.key?(monitor_id) - #log.info "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" + #log.debug "Latest Record #{latest_record}" + if latest_record_state.downcase == new_state.downcase && @@firstMonitorRecordSent.key?(monitor_id) #no state change + #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 - #log.info "time elapsed #{time_elapsed}" + #log.debug "time elapsed #{time_elapsed}" if time_elapsed > monitor_timeout # minutes - # update record + # update record for last sent record time + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state + health_monitor_instance_state.new_state = latest_record_state health_monitor_instance_state.prev_sent_record_time = latest_record_time - health_monitor_instance_state.prev_sent_record_status = latest_record_state - #log.info "After Updating Monitor State #{health_monitor_instance_state}" + #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) else - #log.info "Monitor timeout not reached #{time_elapsed}" - #log.info "Timeout not reached for #{monitor_id}" + #log.debug "Monitor timeout not reached #{time_elapsed}" + #log.debug "Timeout not reached for #{monitor_id}" return nil# dont send anything end else + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state + health_monitor_instance_state.new_state = latest_record_state + health_monitor_instance_state.state_change_time = latest_record_time + health_monitor_instance_state.prev_sent_record_time = latest_record_time + HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) end end if health_monitor_instance_state.prev_records.size == 1 - #log.info "Only One Record" + #log.debug "Only One Record" return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) else + first_record = health_monitor_records[0] latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning latest_record_state = latest_record["state"] latest_record_time = latest_record["timestamp"] #string representation of time - #log.info "Latest Record #{latest_record}" - if latest_record_state.downcase == prev_sent_status.downcase - #log.info "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" - time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 - #log.info "time elapsed #{time_elapsed}" + #log.debug "Latest Record #{latest_record}" + if latest_record_state.downcase == new_state.downcase # No state change + #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" + time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 #check if more than monitor timeout for signal + #log.debug "time elapsed #{time_elapsed}" if time_elapsed > monitor_timeout # minutes # update record + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state + health_monitor_instance_state.new_state = latest_record_state health_monitor_instance_state.prev_sent_record_time = latest_record_time - health_monitor_instance_state.prev_sent_record_status = latest_record_state - #log.info "After Updating Monitor State #{health_monitor_instance_state}" + #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name) else - #log.info "Monitor timeout not reached #{time_elapsed}" - #log.info "Timeout not reached for #{monitor_id}" + #log.debug "Monitor timeout not reached #{time_elapsed}" + #log.debug "Timeout not reached for #{monitor_id}" return nil# dont send anything end else # state change from previous sent state to latest record state #check state of last n records to see if they are all in the same state if (isStateChangeConsistent(log, health_monitor_records)) + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state + health_monitor_instance_state.new_state = latest_record_state + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.state_change_time = first_record["timestamp"] + HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) else log.debug "No consistent state change for monitor #{monitor_id}" @@ -91,11 +104,11 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s #log.debug "Health Monitor Instance State #{health_monitor_instance_state}" - labels = HealthEventUtils.getClusterLabels - #log.info "Labels : #{labels}" + labels = HealthMonitorUtils.getClusterLabels + #log.debug "Labels : #{labels}" - monitor_labels = HealthEventUtils.getMonitorLabels(log, monitor_id, key, controller_name, node_name) - #log.info "Monitor Labels : #{monitor_labels}" + monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key, controller_name, node_name) + #log.debug "Monitor Labels : #{monitor_labels}" if !monitor_labels.nil? monitor_labels.keys.each do |key| @@ -103,11 +116,11 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s end end - #log.debug "Labels #{labels.to_json.to_s}" + #log.debug "Labels after adding Monitor Labels #{labels}" prev_records = health_monitor_instance_state.prev_records - collection_time = prev_records[0]["timestamp"] # the oldest collection time - new_state = health_monitor_instance_state.prev_records[0]["state"] - old_state = health_monitor_instance_state.prev_sent_record_status + time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time + new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called + old_state = health_monitor_instance_state.old_state #log.debug "monitor_config #{monitor_config}" if monitor_config.nil? @@ -117,41 +130,14 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s #log.debug "monitor_config #{monitor_config}" records = [] - details = prev_records #.each do |record| + details = prev_records #.each do |record| - # hash_record = { "timestamp" => record.timestamp, "state" => record.state, "details" => record.details} - # #log.debug "Hash from Struct #{hash_record}" - # #log.debug "monitor_config #{monitor_config}" - # records.push(hash_record.to_json.to_s) - # end - # details = "[#{records.join(',')}]" time_observed = Time.now.utc.iso8601 #log.debug "Details: #{details}" - #log.debug "collection_time #{collection_time} time_observed #{time_observed} new_state #{new_state} old_state #{old_state}" - - # health_monitor_record = HealthEventTemplates::HealthRecordTemplate % { - # labels: labels, - # monitor_id: monitor_id, - # monitor_instance_id: monitor_instance_id, - # new_state: new_state, - # old_state: old_state, - # monitor_details: details, - # collection_time: collection_time, - # time_observed: time_observed, - # monitor_config: monitor_config - # } - # HealthRecordTemplate = '{ - # "Labels": %{labels}, - # "MonitorId": "%{monitor_id}", - # "MonitorInstanceId": "%{monitor_instance_id}", - # "NewState": "%{new_state}", - # "OldState": "%{old_state}", - # "Details": %{monitor_details}, - # "MonitorConfig": %{monitor_config}, - # "CollectionTime": "%{collection_time}", - # "TimeObserved": "%{time_observed}" - # }' + #log.debug "time_first_observed #{time_first_observed} time_observed #{time_observed} new_state #{new_state} old_state #{old_state}" + health_monitor_record = {} + health_monitor_record["ClusterId"] = KubernetesApiClient.getClusterId health_monitor_record["MonitorLabels"] = labels.to_json health_monitor_record["MonitorId"] = monitor_id health_monitor_record["MonitorInstanceId"] = monitor_instance_id @@ -159,12 +145,11 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s health_monitor_record["OldState"] = old_state health_monitor_record["Details"] = details health_monitor_record["MonitorConfig"] = monitor_config.to_json - health_monitor_record["CollectionTime"] = collection_time - health_monitor_record["TimeObserved"] = time_observed + health_monitor_record["AgentCollectionTime"] = Time.now.utc.iso8601 + health_monitor_record["TimeFirstObserved"] = time_first_observed #log.debug "HealthMonitor Record #{health_monitor_record}" - #return_val = JSON.parse(health_monitor_record) #log.debug "Parsed Health Monitor Record for #{monitor_id}" if !@@firstMonitorRecordSent.key?(monitor_id) @@ -180,7 +165,7 @@ def isStateChangeConsistent(log, health_monitor_records) end i = 0 while i < health_monitor_records.size - 1 - #log.info "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}" + #log.debug "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}" if health_monitor_records[i]["state"] != health_monitor_records[i + 1]["state"] return false end diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index e74d8ed08..8e01f35a4 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -1,31 +1,33 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -require_relative 'HealthEventsConstants' +require_relative 'HealthMonitorConstants' HealthMonitorRecord = Struct.new(:timestamp, :state, :details) do end -HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :prev_sent_record_status, :prev_records) do +HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records) do end class HealthMonitorState @@instanceStates = {} #hash of monitor_instance_id --> health monitor instance state @@firstMonitorRecordSent = {} - HEALTH_MONITOR_STATE = {"PASS" => "Pass", "FAIL" => "Fail", "WARNING" => "Warning"} + HEALTH_MONITOR_STATE = {"PASS" => "pass", "FAIL" => "fail", "WARNING" => "warn"} class << self def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) #log.debug "updateHealthMonitorState" samples_to_keep = 1 - if config.nil? || config['SamplesBeforeNotification'].nil? - samples_to_keep = HealthEventsConstants::DEFAULT_SAMPLES_BEFORE_NOTIFICATION - elsif !config.nil? && config['NotifyInstantly'] + if !config.nil? && config['NotifyInstantly'] == true samples_to_keep = 1 - else + elsif !config.nil? && !config['SamplesBeforeNotification'].nil? samples_to_keep = config['SamplesBeforeNotification'].to_i + else + samples_to_keep = HealthMonitorConstants::DEFAULT_SAMPLES_BEFORE_NOTIFICATION end + #log.debug "Monitor Instance Id #{monitor_instance_id} samples_to_keep #{samples_to_keep}" + if @@instanceStates.key?(monitor_instance_id) health_monitor_instance_state = @@instanceStates[monitor_instance_id] health_monitor_records = health_monitor_instance_state.prev_records #This should be an array @@ -37,7 +39,7 @@ def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, co health_monitor_instance_state.prev_records = health_monitor_records @@instanceStates[monitor_instance_id] = health_monitor_instance_state else - health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record["timestamp"], health_monitor_record["state"], [health_monitor_record]) + health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record["timestamp"], health_monitor_record["state"], health_monitor_record["state"], health_monitor_record["timestamp"], [health_monitor_record]) @@instanceStates[monitor_instance_id] = health_monitor_instance_state end #log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" @@ -59,9 +61,9 @@ def computeHealthMonitorState(log, monitor_id, value, config) #log.debug "computeHealthMonitorState" #log.info "id: #{monitor_id} value: #{value} config: #{config}" case monitor_id - when HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::NODE_CPU_MONITOR_ID, HealthEventsConstants::NODE_MEMORY_MONITOR_ID + when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID return getStateForRangeMonitor(log, value, config) - when HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID + when HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID getStateForInfraPodsReadyPercentage(log, value, config) end end @@ -69,8 +71,8 @@ def computeHealthMonitorState(log, monitor_id, value, config) def getStateForRangeMonitor(log, value, config) #log.debug "getStateForRangeMonitor" pass_percentage = 0.0 - (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f #log.info "Pass: #{pass_percentage} Fail: #{fail_percentage}" if value.to_f < pass_percentage.to_f return HEALTH_MONITOR_STATE['PASS'] @@ -84,8 +86,8 @@ def getStateForRangeMonitor(log, value, config) def getStateForInfraPodsReadyPercentage(log, value, config) # log.debug "getStateForInfraPodsReadyPercentage" # log.debug "getStateForInfraPodsReadyPercentage #{config}" - (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f # log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" if value.to_f < pass_percentage.to_f return HEALTH_MONITOR_STATE['FAIL'] @@ -97,8 +99,8 @@ def getStateForInfraPodsReadyPercentage(log, value, config) def getStateForWorkloadPodsReadyPercentage(log, value, config) # log.debug "getStateForWorkloadPodsReadyPercentage" pass_percentage = 0.0 - (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthEventsConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthEventsConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f #log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" if value.to_f > fail_percentage.to_f && value.to_f < pass_percentage.to_f return HEALTH_MONITOR_STATE['WARNING'] diff --git a/source/code/plugin/HealthEventUtils.rb b/source/code/plugin/HealthMonitorUtils.rb similarity index 93% rename from source/code/plugin/HealthEventUtils.rb rename to source/code/plugin/HealthMonitorUtils.rb index fe796d397..61d87f413 100644 --- a/source/code/plugin/HealthEventUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -2,10 +2,10 @@ # frozen_string_literal: true require_relative 'KubernetesApiClient' -require_relative 'HealthEventsConstants' +require_relative 'HealthMonitorConstants' require 'time' -class HealthEventUtils +class HealthMonitorUtils @LogPath = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" @log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M @@ -16,6 +16,7 @@ class HealthEventUtils @@podInventory = {} @@lastRefreshTime = '2019-01-01T00:00:00Z' @@nodeInventory = [] + @@clusterId = KubernetesApiClient.getClusterId def initialize end @@ -103,20 +104,18 @@ def getMonitorInstanceId(log, monitor_id, args = {}) end def getClusterLabels - labels = {} - cluster_id = KubernetesApiClient.getClusterId + cluster_id = @@clusterId region = KubernetesApiClient.getClusterRegion - labels['monitor.azure.com/ClusterId'] = cluster_id - labels['monitor.azure.com/ClusterRegion'] = region + labels['monitor.azure.com/cluster-region'] = region if !cluster_id.nil? cluster_id_elements = cluster_id.split('/') azure_sub_id = cluster_id_elements[2] resource_group = cluster_id_elements[4] cluster_name = cluster_id_elements[8] - labels['monitor.azure.com/SubscriptionId'] = azure_sub_id - labels['monitor.azure.com/ResourceGroup'] = resource_group - labels['monitor.azure.com/ClusterName'] = cluster_name + labels['monitor.azure.com/cluster-subscription-id'] = azure_sub_id + labels['monitor.azure.com/cluster-resource-group'] = resource_group + labels['monitor.azure.com/cluster-name'] = cluster_name end return labels end @@ -125,7 +124,7 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) #log.debug "key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" monitor_labels = {} case monitor_id - when HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID + when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID #log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" if !key.nil? #container monitor_labels['monitor.azure.com/ControllerName'] = getContainerControllerName(key) @@ -135,7 +134,7 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) monitor_labels['monitor.azure.com/Namespace'] = getControllerNamespace(controller_name) end return monitor_labels - when HealthEventsConstants::NODE_CPU_MONITOR_ID, HealthEventsConstants::NODE_MEMORY_MONITOR_ID, HealthEventsConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthEventsConstants::NODE_CONDITION_MONITOR_ID, HealthEventsConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID + when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, HealthMonitorConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID #log.debug "Getting Node Labels " @@nodeInventory["items"].each do |node| @@ -153,7 +152,7 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) def refreshKubernetesApiData(log, hostName) #log.debug "refreshKubernetesApiData" if ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 - log.debug "Less than 5 minutes since last refresh" + log.debug "Less than 5 minutes since last refresh at #{@@lastRefreshTime}" return end @@ -302,7 +301,7 @@ def getResourceSubscription(pod_inventory, metric_name, metric_capacity) def getHealthMonitorConfig health_monitor_config = {} begin - file = File.open('/opt/microsoft/omsagent/plugin/healthconfig.json', "r") + file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r") if !file.nil? fileContents = file.read health_monitor_config = JSON.parse(fileContents) @@ -351,18 +350,18 @@ def getNodeStateFromNodeConditions(node_conditions) status = condition['status'] if ((type == "NetworkUnavailable" || type == "OutOfDisk") && (status == 'True' || status == 'Unknown')) - return "Fail" + return "fail" elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown')) - return "Warn" + return "warn" elsif type == "Ready" && status == 'True' pass = true end end if pass - return "Pass" + return "pass" else - return "Fail" + return "fail" end end end diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 79e502241..289c7384a 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -5,8 +5,9 @@ module Fluent require 'logger' require 'json' require_relative 'oms_common' - require_relative 'HealthEventUtils' + require_relative 'HealthMonitorUtils' require_relative 'HealthMonitorState' + require_relative "ApplicationInsightsUtility" class CAdvisor2HealthFilter < Filter @@ -40,25 +41,25 @@ def initialize def configure(conf) super - @log = HealthEventUtils.getLogHandle + @log = HealthMonitorUtils.getLogHandle @log.debug {'Starting filter_cadvisor2health plugin'} end def start super - @metrics_to_collect_hash = HealthEventUtils.build_metrics_hash(@metrics_to_collect) + @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect) @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" - node_capacity = HealthEventUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) + node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" - HealthEventUtils.refreshKubernetesApiData(@log, @@hostName) - @@health_monitor_config = HealthEventUtils.getHealthMonitorConfig + HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) + @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig end def filter_stream(tag, es) new_es = MultiEventStream.new - HealthEventUtils.refreshKubernetesApiData(@log, @hostName) + HealthMonitorUtils.refreshKubernetesApiData(@log, @hostName) records_count = 0 es.each { |time, record| begin @@ -77,7 +78,7 @@ def filter_stream(tag, es) def filter(tag, time, record) begin - if record.key?("Labels") + if record.key?("MonitorLabels") return record end object_name = record['DataItems'][0]['ObjectName'] @@ -106,20 +107,22 @@ def filter(tag, time, record) end rescue => e @log.debug "Error in filter #{e}" + @log.debug "record #{record}" @log.debug "backtrace #{e.backtrace}" + ApplicationInsightsUtility.sendExceptionTelemetry(e) return nil end end def process_container_cpu_record(record, metric_value) - monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID + monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID @log.debug "processing container cpu record" if record.nil? return nil else instance_name = record['DataItems'][0]['InstanceName'] - key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) - container_metadata = HealthEventUtils.getContainerMetadata(key) + key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthMonitorUtils.getContainerMetadata(key) if !container_metadata.nil? if container_metadata['namespace'] == 'kube-system' return nil @@ -136,17 +139,17 @@ def process_container_cpu_record(record, metric_value) percent = (metric_value.to_f/cpu_limit*100).round(2) #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container CPU #{temp}" return record @@ -155,14 +158,14 @@ def process_container_cpu_record(record, metric_value) end def process_container_memory_record(record, metric_value) - monitor_id = HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID - @log.debug "processing container memory record" + monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID + #@log.debug "processing container memory record" if record.nil? return nil else instance_name = record['DataItems'][0]['InstanceName'] - key = HealthEventUtils.getContainerKeyFromInstanceName(instance_name) - container_metadata = HealthEventUtils.getContainerMetadata(key) + key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthMonitorUtils.getContainerMetadata(key) if !container_metadata.nil? if container_metadata['namespace'] == 'kube-system' return nil @@ -179,17 +182,17 @@ def process_container_memory_record(record, metric_value) percent = (metric_value.to_f/memory_limit*100).round(2) #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container Memory #{temp}" return record @@ -198,8 +201,8 @@ def process_container_memory_record(record, metric_value) end def process_node_cpu_record(record, metric_value) - monitor_id = HealthEventsConstants::NODE_CPU_MONITOR_ID - @log.debug "processing node cpu record" + monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID + #@log.debug "processing node cpu record" if record.nil? return nil else @@ -208,17 +211,14 @@ def process_node_cpu_record(record, metric_value) percent = (metric_value.to_f/@cpu_capacity*100).round(2) #@log.debug "Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_CPU_MONITOR_ID]) + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) - #@log.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) - #@log.info "Monitor Instance Id: #{monitor_instance_id}" + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Node CPU #{temp}" return record @@ -227,8 +227,8 @@ def process_node_cpu_record(record, metric_value) end def process_node_memory_record(record, metric_value) - monitor_id = HealthEventsConstants::NODE_MEMORY_MONITOR_ID - @log.debug "processing node memory record" + monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID + #@log.debug "processing node memory record" if record.nil? return nil else @@ -237,19 +237,19 @@ def process_node_memory_record(record, metric_value) percent = (metric_value.to_f/@memory_capacity*100).round(2) #@log.debug "Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthEventsConstants::NODE_MEMORY_MONITOR_ID]) + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - @log.info "Processed Node Memory #{temp}" + @log.info "Processed Node Memory #{record}" return record end return nil diff --git a/source/code/plugin/filter_health_cpu_memory.rb b/source/code/plugin/filter_health_cpu_memory.rb deleted file mode 100644 index cf55d9a3a..000000000 --- a/source/code/plugin/filter_health_cpu_memory.rb +++ /dev/null @@ -1,335 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. - -# frozen_string_literal: true - -module Fluent - require "logger" - require "json" - require_relative "omslog" - - class CPUMemoryHealthFilter < Filter - Fluent::Plugin.register_filter("filter_health_cpu_memory", self) - - config_param :enable_log, :integer, :default => 0 - config_param :log_path, :string, :default => "/var/opt/microsoft/omsagent/log/filter_health_cpu_memory.log" - config_param :metrics_to_collect, :string, :default => "cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes" - - @@HealthConfigFile = "/var/opt/microsoft/docker-cimprov/healthConfig/config" - @@PluginName = "filter_health_cpu_memory" - - # Setting the memory and cpu pass and fail percentages to default values - @@memoryPassPercentage = 80.0 - @@memoryFailPercentage = 90.0 - @@cpuPassPercentage = 80.0 - @@cpuFailPercentage = 90.0 - @@cpuMonitorTimeOut = 50 - @@memoryRssMonitorTimeOut = 50 - - @@previousCpuHealthDetails = {} - @@previousPreviousCpuHealthDetails = {} - @@previousCpuHealthStateSent = "" - @@nodeCpuHealthDataTimeTracker = DateTime.now.to_time.to_i - @@nodeMemoryRssDataTimeTracker = DateTime.now.to_time.to_i - - @@previousMemoryRssHealthDetails = {} - @@previousPreviousMemoryRssHealthDetails = {} - @@previousMemoryRssHealthStateSent = "" - @@clusterName = KubernetesApiClient.getClusterName - @@clusterId = KubernetesApiClient.getClusterId - @@clusterRegion = KubernetesApiClient.getClusterRegion - @@cpu_usage_nano_cores = "cpuusagenanocores" - @@memory_rss_bytes = "memoryrssbytes" - @@object_name_k8s_node = "K8SNode" - - @metrics_to_collect_hash = {} - - def initialize - super - end - - def configure(conf) - super - @log = nil - - if @enable_log - @log = Logger.new(@log_path, "weekly") - @log.debug { "Starting filter_health_cpu_memory plugin" } - end - end - - def start - super - @metrics_to_collect_hash = build_metrics_hash - @@clusterName = KubernetesApiClient.getClusterName - @@clusterId = KubernetesApiClient.getClusterId - @@clusterRegion = KubernetesApiClient.getClusterRegion - @@cpu_limit = 0.0 - @@memory_limit = 0.0 - begin - nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) - rescue Exception => e - @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " - ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) - end - if !nodeInventory.nil? - cpu_limit_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") - if !cpu_limit_json.nil? - @@cpu_limit = cpu_limit_json[0]["DataItems"][0]["Collections"][0]["Value"] - @log.info "CPU Limit #{@@cpu_limit}" - else - @log.info "Error getting cpu_limit" - end - memory_limit_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes") - if !memory_limit_json.nil? - @@memory_limit = memory_limit_json[0]["DataItems"][0]["Collections"][0]["Value"] - @log.info "Memory Limit #{@@memory_limit}" - else - @log.info "Error getting memory_limit" - end - end - # Read config information for cpu and memory limits. - begin - healthConfigObject = nil - file = File.open(@@HealthConfigFile, "r") - if !file.nil? - fileContents = file.read - healthConfigObject = JSON.parse(fileContents) - file.close - if !healthConfigObject.nil? - cpuPassPercent = healthConfigObject["NodeCpuMonitor"]["PassPercentage"] - cpuFailPercent = healthConfigObject["NodeCpuMonitor"]["FailPercentage"] - memPassPercent = healthConfigObject["NodeMemoryRssMonitor"]["PassPercentage"] - memFailPercent = healthConfigObject["NodeMemoryRssMonitor"]["FailPercentage"] - @@cpuMonitorTimeOut = healthConfigObject["NodeCpuMonitor"]["MonitorTimeOut"] - @@memoryRssMonitorTimeOut = healthConfigObject["NodeMemoryRssMonitor"]["MonitorTimeOut"] - - if !memPassPercent.nil? && memPassPercent.is_a?(Numeric) - @@memoryPassPercentage = memPassPercent - end - if !memFailPercent.nil? && memFailPercent.is_a?(Numeric) - @@memoryFailPercentage = memFailPercent - end - if !cpuPassPercent.nil? && cpuPassPercent.is_a?(Numeric) - @@cpuPassPercentage = cpuPassPercent - end - if !cpuFailPercent.nil? && cpuFailPercent.is_a?(Numeric) - @@cpuFailPercentage = cpuFailPercent - end - @log.info "Successfully read config values from file, using values for cpu and memory health." - end - else - @log.warn "Failed to open file at location #{@@HealthConfigFile} to read health config, using defaults" - end - rescue => errorStr - @log.debug "Exception occured while reading config file at location #{@@HealthConfigFile}, error: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - - def shutdown - super - end - - def build_metrics_hash - @log.debug "Building Hash of Metrics to Collect" - metrics_to_collect_arr = @metrics_to_collect.split(",").map(&:strip) - metrics_hash = metrics_to_collect_arr.map { |x| [x.downcase, true] }.to_h - @log.info "Metrics Collected : #{metrics_hash}" - return metrics_hash - end - - def processCpuMetrics(cpuMetricValue, cpuMetricPercentValue, host, timeStamp) - begin - @log.debug "cpuMetricValue: #{cpuMetricValue}" - @log.debug "cpuMetricPercentValue: #{cpuMetricPercentValue}" - # Get node CPU usage health - updateCpuHealthState = false - cpuHealthRecord = {} - currentCpuHealthDetails = {} - labels = {} - labels["ClusterName"] = @@clusterName - labels["ClusterId"] = @@clusterId - labels["ClusterRegion"] = @@clusterRegion - labels["NodeName"] = host - cpuHealthRecord["Labels"] = labels.to_json - cpuHealthRecord["MonitorId"] = "NodeCpuMonitor" - cpuHealthState = "" - if cpuMetricPercentValue.to_f < @@cpuPassPercentage - cpuHealthState = "Pass" - elsif cpuMetricPercentValue.to_f > @@cpuFailPercentage - cpuHealthState = "Fail" - else - cpuHealthState = "Warning" - end - currentCpuHealthDetails["State"] = cpuHealthState - currentCpuHealthDetails["Time"] = timeStamp - currentCpuHealthDetails["CPUUsagePercentage"] = cpuMetricPercentValue - currentCpuHealthDetails["CPUUsageMillicores"] = cpuMetricValue - - currentTime = DateTime.now.to_time.to_i - timeDifference = (currentTime - @@nodeCpuHealthDataTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - - @log.debug "processing cpu metrics" - if ((cpuHealthState != @@previousCpuHealthStateSent && - ((cpuHealthState == @@previousCpuHealthDetails["State"]) && (cpuHealthState == @@previousPreviousCpuHealthDetails["State"]))) || - timeDifferenceInMinutes > @@cpuMonitorTimeOut) - @log.debug "cpu conditions met." - cpuHealthRecord["NewState"] = cpuHealthState - cpuHealthRecord["OldState"] = @@previousCpuHealthStateSent - - details = {} - details["NodeCpuUsagePercentage"] = cpuMetricPercentValue - details["NodeCpuUsageMilliCores"] = cpuMetricValue - details["PrevNodeCpuUsageDetails"] = { "Percent": @@previousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousCpuHealthDetails["Time"], "Millicores": @@previousCpuHealthDetails["CPUUsageMillicores"] } - details["PrevPrevNodeCpuUsageDetails"] = { "Percent": @@previousPreviousCpuHealthDetails["CPUUsagePercentage"], "TimeStamp": @@previousPreviousCpuHealthDetails["Time"], "Millicores": @@previousPreviousCpuHealthDetails["CPUUsageMillicores"] } - cpuHealthRecord["Details"] = details.to_json - - #Sendind this data as collection time because this is overridden in custom log type. This will be mapped to TimeGenerated with fixed type. - cpuHealthRecord["CollectionTime"] = @@previousPreviousCpuHealthDetails["Time"] - updateCpuHealthState = true - @@previousCpuHealthStateSent = cpuHealthState - end - @@previousPreviousCpuHealthDetails = @@previousCpuHealthDetails.clone - @@previousCpuHealthDetails = currentCpuHealthDetails.clone - if updateCpuHealthState - @@nodeCpuHealthDataTimeTracker = currentTime - cpuHealthRecord["TimeObserved"] = Time.now.utc.iso8601 - telemetryProperties = {} - telemetryProperties["Computer"] = host - telemetryProperties["NodeCpuHealthState"] = cpuHealthState - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) - @log.debug "cpu record sent" - return cpuHealthRecord - else - return nil - end - rescue => errorStr - @log.debug "In processCpuMetrics: exception: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - - def processMemoryRssHealthMetrics(memoryRssMetricValue, memoryRssMetricPercentValue, host, timeStamp) - begin - @log.debug "memoryRssMetricValue: #{memoryRssMetricValue}" - @log.debug "memoryRssMetricPercentValue: #{memoryRssMetricPercentValue}" - - # Get node memory RSS health - memRssHealthRecord = {} - currentMemoryRssHealthDetails = {} - - labels = {} - labels["ClusterName"] = @@clusterName - labels["ClusterId"] = @@clusterId - labels["ClusterRegion"] = @@clusterRegion - labels["NodeName"] = host - memRssHealthRecord["Labels"] = labels.to_json - memRssHealthRecord["MonitorId"] = "NodeMemoryRssMonitor" - - memoryRssHealthState = "" - if memoryRssMetricPercentValue.to_f < @@memoryPassPercentage - memoryRssHealthState = "Pass" - elsif memoryRssMetricPercentValue.to_f > @@memoryFailPercentage - memoryRssHealthState = "Fail" - else - memoryRssHealthState = "Warning" - end - currentMemoryRssHealthDetails["State"] = memoryRssHealthState - currentMemoryRssHealthDetails["Time"] = timeStamp - currentMemoryRssHealthDetails["memoryRssPercentage"] = memoryRssMetricPercentValue - currentMemoryRssHealthDetails["memoryRssBytes"] = memoryRssMetricValue - updateMemoryRssHealthState = false - - currentTime = DateTime.now.to_time.to_i - timeDifference = (currentTime - @@nodeMemoryRssDataTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - @log.debug "processing memory metrics" - - if ((memoryRssHealthState != @@previousMemoryRssHealthStateSent && - ((memoryRssHealthState == @@previousMemoryRssHealthDetails["State"]) && (memoryRssHealthState == @@previousPreviousMemoryRssHealthDetails["State"]))) || - timeDifferenceInMinutes > @@memoryRssMonitorTimeOut) - @log.debug "memory conditions met" - memRssHealthRecord["NewState"] = memoryRssHealthState - memRssHealthRecord["OldState"] = @@previousMemoryRssHealthStateSent - details = {} - details["NodeMemoryRssPercentage"] = memoryRssMetricPercentValue - details["NodeMemoryRssBytes"] = memoryRssMetricValue - details["PrevNodeMemoryRssDetails"] = { "Percent": @@previousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousMemoryRssHealthDetails["Time"], "Bytes": @@previousMemoryRssHealthDetails["memoryRssBytes"] } - details["PrevPrevNodeMemoryRssDetails"] = { "Percent": @@previousPreviousMemoryRssHealthDetails["memoryRssPercentage"], "TimeStamp": @@previousPreviousMemoryRssHealthDetails["Time"], "Bytes": @@previousPreviousMemoryRssHealthDetails["memoryRssBytes"] } - memRssHealthRecord["Details"] = details.to_json - #Sending this data as collection time because this is overridden in custom log type. This will be mapped to TimeGenerated with fixed type. - memRssHealthRecord["CollectionTime"] = @@previousPreviousMemoryRssHealthDetails["Time"] - updateMemoryRssHealthState = true - @@previousMemoryRssHealthStateSent = memoryRssHealthState - end - @@previousPreviousMemoryRssHealthDetails = @@previousMemoryRssHealthDetails.clone - @@previousMemoryRssHealthDetails = currentMemoryRssHealthDetails.clone - if updateMemoryRssHealthState - @@nodeMemoryRssDataTimeTracker = currentTime - memRssHealthRecord["TimeObserved"] = Time.now.utc.iso8601 - telemetryProperties = {} - telemetryProperties["Computer"] = host - telemetryProperties["NodeMemoryRssHealthState"] = memoryRssHealthState - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) - @log.debug "memory record sent" - return memRssHealthRecord - else - return nil - end - rescue => errorStr - @log.debug "In processMemoryRssMetrics: exception: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - - def filter(tag, time, record) - object_name = record["DataItems"][0]["ObjectName"] - counter_name = record["DataItems"][0]["Collections"][0]["CounterName"] - host = record["DataItems"][0]["Host"] - timeStamp = record["DataItems"][0]["Timestamp"] - if object_name == @@object_name_k8s_node && @metrics_to_collect_hash.key?(counter_name.downcase) - percentage_metric_value = 0.0 - - # Compute and send % CPU and Memory - begin - metric_value = record["DataItems"][0]["Collections"][0]["Value"] - if counter_name.downcase == @@cpu_usage_nano_cores - metric_value = metric_value / 1000000 - if @@cpu_limit != 0.0 - percentage_metric_value = (metric_value * 1000000) * 100 / @@cpu_limit - end - return processCpuMetrics(metric_value, percentage_metric_value, host, timeStamp) - end - - if counter_name.downcase == @@memory_rss_bytes - if @@memory_limit != 0.0 - percentage_metric_value = metric_value * 100 / @@memory_limit - end - return processMemoryRssHealthMetrics(metric_value, percentage_metric_value, host, timeStamp) - end - rescue Exception => e - @log.info "Error parsing cadvisor record Exception: #{e.class} Message: #{e.message}" - ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) - return nil - end - else - return nil - end - end - - def filter_stream(tag, es) - new_es = MultiEventStream.new - es.each { |time, record| - begin - filtered_record = filter(tag, time, record) - new_es.add(time, filtered_record) if filtered_record - rescue => e - router.emit_error_event(tag, time, record, e) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - } - new_es - end - end -end diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index dd0944ae3..bf6ac48ac 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -19,7 +19,7 @@ def initialize config_param :run_interval, :time, :default => '1m' config_param :tag, :string, :default => "oms.api.cadvisorperf" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" - config_param :healthtag, :string, :default => "oms.api.DiliprPerf" + config_param :healthtag, :string, :default => "oms.api.KubeHealth.AgentCollectionTime" def configure (conf) super diff --git a/source/code/plugin/in_health_docker.rb b/source/code/plugin/in_health_docker.rb deleted file mode 100644 index 6ea0ae9dc..000000000 --- a/source/code/plugin/in_health_docker.rb +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -module Fluent - class Kubelet_Health_Input < Input - Plugin.register_input("dockerhealth", self) - - def initialize - super - require "yaml" - require "json" - - require_relative "KubernetesApiClient" - require_relative "oms_common" - require_relative "omslog" - require_relative "ApplicationInsightsUtility" - require_relative "DockerApiClient" - end - - config_param :run_interval, :time, :default => "1m" - config_param :tag, :string, :default => "oms.containerinsights.DockerHealth" - - def configure(conf) - super - end - - def start - if @run_interval - @finished = false - @condition = ConditionVariable.new - @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) - @@previousDockerState = "" - # Tracks the last time docker health data sent for each node - @@dockerHealthDataTimeTracker = DateTime.now.to_time.to_i - @@clusterName = KubernetesApiClient.getClusterName - @@clusterId = KubernetesApiClient.getClusterId - @@clusterRegion = KubernetesApiClient.getClusterRegion - @@telemetryTimeTracker = DateTime.now.to_time.to_i - @@PluginName = "in_health_docker" - end - end - - def shutdown - if @run_interval - @mutex.synchronize { - @finished = true - @condition.signal - } - @thread.join - end - end - - def enumerate - begin - currentTime = Time.now - emitTime = currentTime.to_f - batchTime = currentTime.utc.iso8601 - record = {} - eventStream = MultiEventStream.new - $log.info("in_docker_health::Making a call to get docker info @ #{Time.now.utc.iso8601}") - isDockerStateFlush = false - dockerInfo = DockerApiClient.dockerInfo - if (!dockerInfo.nil? && !dockerInfo.empty?) - dockerState = "Healthy" - else - dockerState = "Unhealthy" - end - currentTime = DateTime.now.to_time.to_i - timeDifference = (currentTime - @@dockerHealthDataTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - $log.info("Time difference in minutes: #{timeDifferenceInMinutes}") - if (timeDifferenceInMinutes >= 3) || - !(dockerState.casecmp(@@previousDockerState) == 0) - @@previousDockerState = dockerState - isDockerStateFlush = true - @@dockerHealthDataTimeTracker = currentTime - record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated - record["DockerState"] = dockerState - hostName = (OMS::Common.get_hostname) - record["Computer"] = hostName - record["ClusterName"] = @@clusterName - record["ClusterId"] = @@clusterId - record["ClusterRegion"] = @@clusterRegion - eventStream.add(emitTime, record) if record - end - - if isDockerStateFlush - router.emit_stream(@tag, eventStream) if eventStream - timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - if (timeDifferenceInMinutes >= 5) - @@telemetryTimeTracker = DateTime.now.to_time.to_i - telemetryProperties = {} - telemetryProperties["Computer"] = hostname - telemetryProperties["DockerState"] = dockerState - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) - end - end - rescue => errorStr - $log.warn("error : #{errorStr.to_s}") - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - - def run_periodic - @mutex.lock - done = @finished - until done - @condition.wait(@mutex, @run_interval) - done = @finished - @mutex.unlock - if !done - begin - $log.info("in_health_docker::run_periodic @ #{Time.now.utc.iso8601}") - enumerate - rescue => errorStr - $log.warn "in_health_docker::run_periodic: enumerate Failed for docker health: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - @mutex.lock - end - @mutex.unlock - end - end # Health_Docker_Input -end # module diff --git a/source/code/plugin/in_health_kubeapidata.rb b/source/code/plugin/in_health_kubeapidata.rb index 7f6bda690..c17cfb753 100644 --- a/source/code/plugin/in_health_kubeapidata.rb +++ b/source/code/plugin/in_health_kubeapidata.rb @@ -2,7 +2,7 @@ # frozen_string_literal: true module Fluent - class KubeApiDataHealthInput < Input + class KubeHealthInput < Input Plugin.register_input("kubeapidatahealth", self) @@clusterCpuCapacity = 0.0 @@ -18,12 +18,12 @@ def initialize require_relative "omslog" require_relative "ApplicationInsightsUtility" require_relative "DockerApiClient" - require_relative 'HealthEventUtils' + require_relative 'HealthMonitorUtils' require_relative 'HealthMonitorState' end config_param :run_interval, :time, :default => "1m" - config_param :tag, :string, :default => "oms.api.DiliprPerf" + config_param :tag, :string, :default => "oms.api.KubeHealth.AgentCollectionTime" def configure(conf) super @@ -39,11 +39,11 @@ def start @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion - cluster_capacity = HealthEventUtils.getClusterCpuMemoryCapacity + cluster_capacity = HealthMonitorUtils.getClusterCpuMemoryCapacity @@clusterCpuCapacity = cluster_capacity[0] @@clusterMemoryCapacity = cluster_capacity[1] - @@healthMonitorConfig = HealthEventUtils.getHealthMonitorConfig - @@hmlog = HealthEventUtils.getLogHandle + @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig + @@hmlog = HealthMonitorUtils.getLogHandle @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" end end @@ -66,8 +66,8 @@ def enumerate health_monitor_records = [] eventStream = MultiEventStream.new - hmlog = HealthEventUtils.getLogHandle - HealthEventUtils.refreshKubernetesApiData(@@hmlog, nil) + hmlog = HealthMonitorUtils.getLogHandle + HealthMonitorUtils.refreshKubernetesApiData(@@hmlog, nil) # we do this so that if the call fails, we get a response code/header etc. node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") node_inventory = JSON.parse(node_inventory_response.body) @@ -75,10 +75,10 @@ def enumerate pod_inventory = JSON.parse(pod_inventory_response.body) if node_inventory_response.code.to_i != 200 - record = process_kube_api_up_monitor("Fail", node_inventory_response) + record = process_kube_api_up_monitor("fail", node_inventory_response) health_monitor_records.push(record) if record else - record = process_kube_api_up_monitor("Pass", node_inventory_response) + record = process_kube_api_up_monitor("pass", node_inventory_response) health_monitor_records.push(record) if record end @@ -87,17 +87,17 @@ def enumerate health_monitor_records.push(record) if record record = process_memory_oversubscribed_monitor(pod_inventory) health_monitor_records.push(record) if record - pods_ready_hash = HealthEventUtils.getPodsReadyHash(pod_inventory) + pods_ready_hash = HealthMonitorUtils.getPodsReadyHash(pod_inventory) system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} - system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, "system_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, "system_#{HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") system_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end - workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, "workload_#{HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, "workload_#{HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") workload_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end @@ -110,81 +110,81 @@ def enumerate end end - @@hmlog.info "Health Monitor Records Size #{health_monitor_records.size}" + #@@hmlog.debug "Health Monitor Records Size #{health_monitor_records.size}" health_monitor_records.each do |record| eventStream.add(emitTime, record) end router.emit_stream(@tag, eventStream) if eventStream rescue => errorStr - @@hmlog.warn("error in_health_kubeapidata: #{errorStr.to_s}") - @log.debug "backtrace Input #{errorStr.backtrace}" + @@hmlog.warn("error in_kube_health: #{errorStr.to_s}") + @@hmlog.debug "backtrace Input #{errorStr.backtrace}" ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end def process_cpu_oversubscribed_monitor(pod_inventory) timestamp = Time.now.utc.iso8601 - subscription = HealthEventUtils.getResourceSubscription(pod_inventory,"cpu", @@clusterCpuCapacity) - state = subscription > @@clusterCpuCapacity ? "Fail" : "Pass" + subscription = HealthMonitorUtils.getResourceSubscription(pod_inventory,"cpu", @@clusterCpuCapacity) + state = subscription > @@clusterCpuCapacity ? "fail" : "pass" #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}" #CPU - monitor_id = HealthEventsConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID + monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) # @@hmlog.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" return record.nil? ? nil : record end def process_memory_oversubscribed_monitor(pod_inventory) timestamp = Time.now.utc.iso8601 - subscription = HealthEventUtils.getResourceSubscription(pod_inventory,"memory", @@clusterMemoryCapacity) - state = subscription > @@clusterMemoryCapacity ? "Fail" : "Pass" + subscription = HealthMonitorUtils.getResourceSubscription(pod_inventory,"memory", @@clusterMemoryCapacity) + state = subscription > @@clusterMemoryCapacity ? "fail" : "pass" #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}" #CPU - monitor_id = HealthEventsConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID + monitor_id = HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) - hmlog = HealthEventUtils.getLogHandle + hmlog = HealthMonitorUtils.getLogHandle - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) - @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor #{record}" return record.nil? ? nil : record end def process_kube_api_up_monitor(state, response) timestamp = Time.now.utc.iso8601 - monitor_id = HealthEventsConstants::MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID + monitor_id = HealthMonitorConstants::MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID details = response.each_header.to_h details['ResponseCode'] = response.code health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) - hmlog = HealthEventUtils.getLogHandle + hmlog = HealthMonitorUtils.getLogHandle #hmlog.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) @@hmlog.info "Successfully processed process_kube_api_up_monitor" return record.nil? ? nil : record end def process_pods_ready_percentage(pods_hash, config_monitor_id) - monitor_id = HealthEventsConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID + monitor_id = HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID monitor_config = @@healthMonitorConfig[config_monitor_id] - hmlog = HealthEventUtils.getLogHandle + hmlog = HealthMonitorUtils.getLogHandle records = [] pods_hash.keys.each do |key| @@ -203,9 +203,9 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) #hmlog.info health_monitor_record - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) records.push(record) end @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" @@ -213,8 +213,8 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) end def process_node_condition_monitor(node_inventory) - hmlog = HealthEventUtils.getLogHandle - monitor_id = HealthEventsConstants::NODE_CONDITION_MONITOR_ID + hmlog = HealthMonitorUtils.getLogHandle + monitor_id = HealthMonitorConstants::NODE_CONDITION_MONITOR_ID timestamp = Time.now.utc.iso8601 monitor_config = @@healthMonitorConfig[monitor_id] node_condition_monitor_records = [] @@ -222,7 +222,7 @@ def process_node_condition_monitor(node_inventory) node_inventory['items'].each do |node| node_name = node['metadata']['name'] conditions = node['status']['conditions'] - state = HealthEventUtils.getNodeStateFromNodeConditions(conditions) + state = HealthMonitorUtils.getNodeStateFromNodeConditions(conditions) #hmlog.debug "Node Name = #{node_name} State = #{state}" details = {} conditions.each do |condition| @@ -230,9 +230,9 @@ def process_node_condition_monitor(node_inventory) end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) - monitor_instance_id = HealthEventUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - record = HealthSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) node_condition_monitor_records.push(record) end end @@ -249,10 +249,10 @@ def run_periodic @mutex.unlock if !done begin - @@hmlog.info("in_health_kubeapidata::run_periodic @ #{Time.now.utc.iso8601}") + @@hmlog.info("in_kube_health::run_periodic @ #{Time.now.utc.iso8601}") enumerate rescue => errorStr - @@hmlog.warn "in_health_kubeapidata::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}" + @@hmlog.warn "in_kube_health::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}" ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end diff --git a/source/code/plugin/in_health_kubelet.rb b/source/code/plugin/in_health_kubelet.rb deleted file mode 100644 index c9db5727f..000000000 --- a/source/code/plugin/in_health_kubelet.rb +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -module Fluent - class Kubelet_Health_Input < Input - Plugin.register_input("kubelethealth", self) - - def initialize - super - require "yaml" - require "json" - - require_relative "KubernetesApiClient" - require_relative "oms_common" - require_relative "omslog" - require_relative "ApplicationInsightsUtility" - end - - config_param :run_interval, :time, :default => "1m" - config_param :tag, :string, :default => "oms.containerinsights.KubeletHealth" - - def configure(conf) - super - end - - def start - if @run_interval - @finished = false - @condition = ConditionVariable.new - @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) - @@previousNodeStatus = {} - @@previousNodeState = {} - # Tracks the last time node health data sent for each node - @@nodeHealthDataTimeTracker = {} - @@clusterName = KubernetesApiClient.getClusterName - @@clusterId = KubernetesApiClient.getClusterId - @@clusterRegion = KubernetesApiClient.getClusterRegion - @@telemetryTimeTracker = DateTime.now.to_time.to_i - @@PluginName = "in_health_kubelet" - end - end - - def shutdown - if @run_interval - @mutex.synchronize { - @finished = true - @condition.signal - } - @thread.join - end - end - - def enumerate - currentTime = Time.now - emitTime = currentTime.to_f - batchTime = currentTime.utc.iso8601 - $log.info("in_health_health::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) - $log.info("in_health_health::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") - begin - if (!nodeInventory.empty?) - eventStream = MultiEventStream.new - #get node inventory - nodeInventory["items"].each do |item| - record = {} - record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated - computerName = item["metadata"]["name"] - labels = {} - labels["ClusterName"] = @@clusterName - labels["ClusterId"] = @@clusterId - labels["ClusterRegion"] = @@clusterRegion - labels["NodeName"] = computerName - # Tracking state change in order to send node health data only in case of state change or timeout - flushRecord = false - - currentTime = DateTime.now.to_time.to_i - timeDifferenceInMinutes = 0 - if !@@nodeHealthDataTimeTracker[computerName].nil? - timeDifference = (currentTime - @@nodeHealthDataTimeTracker[computerName]).abs - timeDifferenceInMinutes = timeDifference / 60 - end - if item["status"].key?("conditions") && !item["status"]["conditions"].empty? - # allNodeConditions = "" - allNodeConditions = {} - nodeState = "" - item["status"]["conditions"].each do |condition| - conditionType = condition["type"] - conditionStatus = condition["status"] - conditionReason = condition["reason"] - conditionMessage = condition["message"] - - if !(nodeState.casecmp("Fail") == 0) - if ((conditionType.casecmp("MemoryPressure") == 0) || - (conditionType.casecmp("DiskPressure") == 0) || - (conditionType.casecmp("PIDPressure") == 0)) - if ((conditionStatus.casecmp("Unknown") == 0) || - (conditionStatus.casecmp("True") == 0)) - nodeState = "Warning" - else - if !(nodeState.casecmp("Warning") == 0) - nodeState = "Pass" - end - end - elsif ((conditionType.casecmp("NetworkUnavailable") == 0) || - (conditionType.casecmp("OutOfDisk") == 0)) - if ((conditionStatus.casecmp("Unknown") == 0) || - (conditionStatus.casecmp("True") == 0)) - nodeState = "Fail" - else - nodeState = "Pass" - end - elsif (conditionType.casecmp("Ready") == 0) - if ((conditionStatus.casecmp("Unknown") == 0) || - (conditionStatus.casecmp("False") == 0)) - nodeState = "Fail" - else - nodeState = "Pass" - end - end - end - - if @@previousNodeStatus[computerName + conditionType].nil? || - !(conditionStatus.casecmp(@@previousNodeStatus[computerName + conditionType]) == 0) || - timeDifferenceInMinutes >= 3 - # Comparing current status with previous status and setting state change as true - flushRecord = true - @@previousNodeStatus[computerName + conditionType] = conditionStatus - conditionInformation = {} - conditionInformation["Reason"] = conditionReason - conditionInformation["Message"] = conditionMessage - allNodeConditions[conditionType] = conditionInformation.to_json - record["NewState"] = nodeState - record["OldState"] = @@previousNodeState[computerName] - @@previousNodeState[computerName] = nodeState - record["Details"] = allNodeConditions.to_json - end - end - end - - if flushRecord - #Sending node health data the very first time without checking for state change and timeout - record["MonitorId"] = "KubeletHealth" - record["Labels"] = labels.to_json - eventStream.add(emitTime, record) if record - @@nodeHealthDataTimeTracker[computerName] = currentTime - timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - if (timeDifferenceInMinutes >= 5) - @@telemetryTimeTracker = DateTime.now.to_time.to_i - telemetryProperties = {} - telemetryProperties["Computer"] = computerName - telemetryProperties["NodeStatusCondition"] = allNodeConditions.to_json - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) - end - end - end - router.emit_stream(@tag, eventStream) if eventStream - end - rescue => errorStr - $log.warn("error : #{errorStr.to_s}") - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - - def run_periodic - @mutex.lock - done = @finished - until done - @condition.wait(@mutex, @run_interval) - done = @finished - @mutex.unlock - if !done - begin - $log.info("in_health_kubelet::run_periodic @ #{Time.now.utc.iso8601}") - enumerate - rescue => errorStr - $log.warn "in_health_kubelet::run_periodic: enumerate Failed for kubelet health: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - @mutex.lock - end - @mutex.unlock - end - end # Health_Kubelet_Input -end # module From b60ee713d7711942beb529c3d120d9d59322a733 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 12 Mar 2019 21:10:18 +0000 Subject: [PATCH 21/90] Fix Base Container.data, include kube-system containers, fix input plugin name --- installer/datafiles/base_container.data | 4 ++-- source/code/plugin/filter_cadvisor_health.rb | 6 ------ .../plugin/{in_health_kubeapidata.rb => in_kube_health.rb} | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) rename source/code/plugin/{in_health_kubeapidata.rb => in_kube_health.rb} (99%) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 075948ab8..9f68a8f71 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -50,12 +50,12 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/code/plugin/filter_cadvisor2mdm.rb; 644; root; root /opt/microsoft/omsagent/plugin/filter_cadvisor_health.rb; source/code/plugin/filter_cadvisor_health.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_health_kubeapidata.rb; source/code/plugin/in_health_kubeapidata.rb; 644; root; root +/opt/microsoft/omsagent/plugin/in_kube_health.rb; source/code/plugin/in_kube_health.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorConstants.rb; source/code/plugin/HealthMonitorConstants.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorSignalReducer.rb; source/code/plugin/HealthMonitorSignalReducer.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorState.rb; source/code/plugin/HealthMonitorState.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorUtils.rb; source/code/plugin/HealthMonitorUtils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/healthmonitorconfig.json installer/conf/healthmonitorconfig.json +/opt/microsoft/omsagent/plugin/healthmonitorconfig.json; installer/conf/healthmonitorconfig.json; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/code/plugin/lib/application_insights/version.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/code/plugin/lib/application_insights/rack/track_request.rb; 644; root; root diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 289c7384a..dc447e930 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -124,9 +124,6 @@ def process_container_cpu_record(record, metric_value) key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) container_metadata = HealthMonitorUtils.getContainerMetadata(key) if !container_metadata.nil? - if container_metadata['namespace'] == 'kube-system' - return nil - end cpu_limit = container_metadata['cpuLimit'] end @@ -167,9 +164,6 @@ def process_container_memory_record(record, metric_value) key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) container_metadata = HealthMonitorUtils.getContainerMetadata(key) if !container_metadata.nil? - if container_metadata['namespace'] == 'kube-system' - return nil - end memory_limit = container_metadata['memoryLimit'] end diff --git a/source/code/plugin/in_health_kubeapidata.rb b/source/code/plugin/in_kube_health.rb similarity index 99% rename from source/code/plugin/in_health_kubeapidata.rb rename to source/code/plugin/in_kube_health.rb index c17cfb753..c5358baec 100644 --- a/source/code/plugin/in_health_kubeapidata.rb +++ b/source/code/plugin/in_kube_health.rb @@ -3,7 +3,7 @@ module Fluent class KubeHealthInput < Input - Plugin.register_input("kubeapidatahealth", self) + Plugin.register_input("kubehealth", self) @@clusterCpuCapacity = 0.0 @@clusterMemoryCapacity = 0.0 From 99b7fe13dc000a923847cb4b27d9f9dc45aebddc Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 12 Mar 2019 14:38:58 -0700 Subject: [PATCH 22/90] More fixes to config, process kube-system --- installer/conf/container.conf | 1 - installer/conf/healthmonitorconfig.json | 24 ++++++++++---------- installer/datafiles/base_container.data | 4 ++-- source/code/plugin/HealthMonitorUtils.rb | 8 +++---- source/code/plugin/filter_cadvisor_health.rb | 11 +++------ 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 88f7929d7..88c7228ca 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -89,7 +89,6 @@ retry_limit 10 retry_wait 30s max_retry_wait 9m - retry_mdm_post_wait_minutes 60 diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json index 2b28f0317..ddceef91a 100644 --- a/installer/conf/healthmonitorconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -4,33 +4,33 @@ "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 240 }, "node_memory_utilization_percentage": { "PassPercentage": 80.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 240 }, "container_manager_runtime_running": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 240, "NotifyInstantly" : true }, "kubelet_running": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 240, "NotifyInstantly" : true }, "node_condition": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 240, "NotifyInstantly" : true }, "is_oversubscribed_cpu": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 240, "NotifyInstantly" : true }, "is_oversubscribed_memory": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 240, "NotifyInstantly" : true }, "container_cpu_utilization_percentage": { @@ -38,31 +38,31 @@ "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 240 }, "container_memory_utilization_percentage": { "PassPercentage": 80.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 3, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 240 }, "workload_pods_ready_percentage" : { "PassPercentage": 100.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 2, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 240 }, "system_pods_ready_percentage" : { "PassPercentage": 100.0, "FailPercentage": 90.0, "SamplesBeforeNotification": 2, "NotifyInstantly" : false, - "MonitorTimeOut": 5 + "MonitorTimeOut": 240 }, "kube_api_up": { - "MonitorTimeOut": 5, + "MonitorTimeOut": 240, "NotifyInstantly" : true } } \ No newline at end of file diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 075948ab8..21dc9ad95 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -50,12 +50,12 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/code/plugin/filter_cadvisor2mdm.rb; 644; root; root /opt/microsoft/omsagent/plugin/filter_cadvisor_health.rb; source/code/plugin/filter_cadvisor_health.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_health_kubeapidata.rb; source/code/plugin/in_health_kubeapidata.rb; 644; root; root +/opt/microsoft/omsagent/plugin/in_kube_health.rb; source/code/plugin/in_kube_health.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorConstants.rb; source/code/plugin/HealthMonitorConstants.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorSignalReducer.rb; source/code/plugin/HealthMonitorSignalReducer.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorState.rb; source/code/plugin/HealthMonitorState.rb; 644; root; root /opt/microsoft/omsagent/plugin/HealthMonitorUtils.rb; source/code/plugin/HealthMonitorUtils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/healthmonitorconfig.json installer/conf/healthmonitorconfig.json +/opt/microsoft/omsagent/plugin/healthmonitorconfig.json; installer/conf/healthmonitorconfig.json; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/code/plugin/lib/application_insights/version.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/code/plugin/lib/application_insights/rack/track_request.rb; 644; root; root diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index 61d87f413..f7bd1861e 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -127,11 +127,11 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID #log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" if !key.nil? #container - monitor_labels['monitor.azure.com/ControllerName'] = getContainerControllerName(key) - monitor_labels['monitor.azure.com/Namespace'] = getContainerNamespace(key) + monitor_labels['monitor.azure.com/controller-name'] = getContainerControllerName(key) + monitor_labels['monitor.azure.com/namespace'] = getContainerNamespace(key) elsif !controller_name.nil? - monitor_labels['monitor.azure.com/ControllerName'] = controller_name - monitor_labels['monitor.azure.com/Namespace'] = getControllerNamespace(controller_name) + monitor_labels['monitor.azure.com/controller-name'] = controller_name + monitor_labels['monitor.azure.com/namespace'] = getControllerNamespace(controller_name) end return monitor_labels when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, HealthMonitorConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 289c7384a..30013bee6 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -124,8 +124,6 @@ def process_container_cpu_record(record, metric_value) key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) container_metadata = HealthMonitorUtils.getContainerMetadata(key) if !container_metadata.nil? - if container_metadata['namespace'] == 'kube-system' - return nil end cpu_limit = container_metadata['cpuLimit'] end @@ -149,7 +147,7 @@ def process_container_cpu_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container CPU #{temp}" return record @@ -167,9 +165,6 @@ def process_container_memory_record(record, metric_value) key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) container_metadata = HealthMonitorUtils.getContainerMetadata(key) if !container_metadata.nil? - if container_metadata['namespace'] == 'kube-system' - return nil - end memory_limit = container_metadata['memoryLimit'] end @@ -192,7 +187,7 @@ def process_container_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container Memory #{temp}" return record @@ -249,7 +244,7 @@ def process_node_memory_record(record, metric_value) HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - @log.info "Processed Node Memory #{record}" + @log.info "Processed Node Memory #{temp}" return record end return nil From fd5bbf6fa18477f565c1b9fe43a7a47ef99823ec Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 12 Mar 2019 14:39:54 -0700 Subject: [PATCH 23/90] Adding in_kube_health --- source/code/plugin/in_kube_health.rb | 264 +++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 source/code/plugin/in_kube_health.rb diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb new file mode 100644 index 000000000..c5358baec --- /dev/null +++ b/source/code/plugin/in_kube_health.rb @@ -0,0 +1,264 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + class KubeHealthInput < Input + Plugin.register_input("kubehealth", self) + + @@clusterCpuCapacity = 0.0 + @@clusterMemoryCapacity = 0.0 + + def initialize + super + require "yaml" + require "json" + + require_relative "KubernetesApiClient" + require_relative "oms_common" + require_relative "omslog" + require_relative "ApplicationInsightsUtility" + require_relative "DockerApiClient" + require_relative 'HealthMonitorUtils' + require_relative 'HealthMonitorState' + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.api.KubeHealth.AgentCollectionTime" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + cluster_capacity = HealthMonitorUtils.getClusterCpuMemoryCapacity + @@clusterCpuCapacity = cluster_capacity[0] + @@clusterMemoryCapacity = cluster_capacity[1] + @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig + @@hmlog = HealthMonitorUtils.getLogHandle + @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" + end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end + + def enumerate + begin + currentTime = Time.now + emitTime = currentTime.to_f + batchTime = currentTime.utc.iso8601 + health_monitor_records = [] + eventStream = MultiEventStream.new + + hmlog = HealthMonitorUtils.getLogHandle + HealthMonitorUtils.refreshKubernetesApiData(@@hmlog, nil) + # we do this so that if the call fails, we get a response code/header etc. + node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") + node_inventory = JSON.parse(node_inventory_response.body) + pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods") + pod_inventory = JSON.parse(pod_inventory_response.body) + + if node_inventory_response.code.to_i != 200 + record = process_kube_api_up_monitor("fail", node_inventory_response) + health_monitor_records.push(record) if record + else + record = process_kube_api_up_monitor("pass", node_inventory_response) + health_monitor_records.push(record) if record + end + + if !pod_inventory.nil? + record = process_cpu_oversubscribed_monitor(pod_inventory) + health_monitor_records.push(record) if record + record = process_memory_oversubscribed_monitor(pod_inventory) + health_monitor_records.push(record) if record + pods_ready_hash = HealthMonitorUtils.getPodsReadyHash(pod_inventory) + + system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} + workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} + + system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, "system_#{HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + system_pods_ready_percentage_records.each do |record| + health_monitor_records.push(record) if record + end + + workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, "workload_#{HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + workload_pods_ready_percentage_records.each do |record| + health_monitor_records.push(record) if record + end + end + + if !node_inventory.nil? + node_condition_records = process_node_condition_monitor(node_inventory) + node_condition_records.each do |record| + health_monitor_records.push(record) if record + end + end + + #@@hmlog.debug "Health Monitor Records Size #{health_monitor_records.size}" + + health_monitor_records.each do |record| + eventStream.add(emitTime, record) + end + router.emit_stream(@tag, eventStream) if eventStream + rescue => errorStr + @@hmlog.warn("error in_kube_health: #{errorStr.to_s}") + @@hmlog.debug "backtrace Input #{errorStr.backtrace}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def process_cpu_oversubscribed_monitor(pod_inventory) + timestamp = Time.now.utc.iso8601 + subscription = HealthMonitorUtils.getResourceSubscription(pod_inventory,"cpu", @@clusterCpuCapacity) + state = subscription > @@clusterCpuCapacity ? "fail" : "pass" + #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}" + + #CPU + monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) + # @@hmlog.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" + return record.nil? ? nil : record + end + + def process_memory_oversubscribed_monitor(pod_inventory) + timestamp = Time.now.utc.iso8601 + subscription = HealthMonitorUtils.getResourceSubscription(pod_inventory,"memory", @@clusterMemoryCapacity) + state = subscription > @@clusterMemoryCapacity ? "fail" : "pass" + #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}" + + #CPU + monitor_id = HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) + hmlog = HealthMonitorUtils.getLogHandle + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor #{record}" + return record.nil? ? nil : record + end + + def process_kube_api_up_monitor(state, response) + timestamp = Time.now.utc.iso8601 + + monitor_id = HealthMonitorConstants::MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID + details = response.each_header.to_h + details['ResponseCode'] = response.code + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) + hmlog = HealthMonitorUtils.getLogHandle + #hmlog.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + @@hmlog.info "Successfully processed process_kube_api_up_monitor" + return record.nil? ? nil : record + end + + def process_pods_ready_percentage(pods_hash, config_monitor_id) + monitor_id = HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID + monitor_config = @@healthMonitorConfig[config_monitor_id] + hmlog = HealthMonitorUtils.getLogHandle + + records = [] + pods_hash.keys.each do |key| + controller_name = key + total_pods = pods_hash[controller_name]['totalPods'] + pods_ready = pods_hash[controller_name]['podsReady'] + namespace = pods_hash[controller_name]['namespace'] + percent = pods_ready / total_pods * 100 + timestamp = Time.now.utc.iso8601 + + if config_monitor_id.downcase.start_with?("system") + state = HealthMonitorState.getStateForInfraPodsReadyPercentage(@@hmlog, percent, monitor_config) + elsif config_monitor_id.downcase.start_with?("workload") + state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) + end + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) + #hmlog.info health_monitor_record + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + records.push(record) + end + @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" + return records + end + + def process_node_condition_monitor(node_inventory) + hmlog = HealthMonitorUtils.getLogHandle + monitor_id = HealthMonitorConstants::NODE_CONDITION_MONITOR_ID + timestamp = Time.now.utc.iso8601 + monitor_config = @@healthMonitorConfig[monitor_id] + node_condition_monitor_records = [] + if !node_inventory.nil? + node_inventory['items'].each do |node| + node_name = node['metadata']['name'] + conditions = node['status']['conditions'] + state = HealthMonitorUtils.getNodeStateFromNodeConditions(conditions) + #hmlog.debug "Node Name = #{node_name} State = #{state}" + details = {} + conditions.each do |condition| + details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} + end + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} + #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + node_condition_monitor_records.push(record) + end + end + @@hmlog.info "Successfully processed process_node_condition_monitor #{node_condition_monitor_records.size}" + return node_condition_monitor_records + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished + @mutex.unlock + if !done + begin + @@hmlog.info("in_kube_health::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + @@hmlog.warn "in_kube_health::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + @mutex.lock + end + @mutex.unlock + end + end # Health_Docker_Input +end # module From e667406ee574e7915da94a672c3ed126e5c76c5b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 12 Mar 2019 15:45:15 -0700 Subject: [PATCH 24/90] Send Node_name parameter to reduceSignal for node level monitors --- source/code/plugin/HealthMonitorSignalReducer.rb | 2 +- source/code/plugin/filter_cadvisor_health.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index 6707611b2..b00edc99a 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -74,7 +74,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, health_monitor_instance_state.prev_sent_record_time = latest_record_time #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) else #log.debug "Monitor timeout not reached #{time_elapsed}" #log.debug "Timeout not reached for #{monitor_id}" diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 20f269bbe..effa07c69 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -212,7 +212,7 @@ def process_node_cpu_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Node CPU #{temp}" return record @@ -241,7 +241,7 @@ def process_node_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], , node_name: @@hostName) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Node Memory #{temp}" return record From 31a3931bec4d7b0a1821778a2291e4fa7ba6a017 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 14 Mar 2019 21:05:29 +0000 Subject: [PATCH 25/90] Fix Typo in method invocation --- source/code/plugin/filter_cadvisor_health.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index effa07c69..5362402fd 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -241,7 +241,7 @@ def process_node_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], , node_name: @@hostName) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Node Memory #{temp}" return record From c380a5e9ada9537f49e66d99bd9c9a64d6320ac8 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 19 Mar 2019 18:10:20 -0700 Subject: [PATCH 26/90] 1. Added pod_status monitor (unused), 2. Removed processing for container resource utilization monitors (for v1/private preview), 3. Fixed bug for pods_ready_percentage instance id generation --- installer/conf/healthmonitorconfig.json | 4 ++ source/code/plugin/HealthMonitorConstants.rb | 1 + .../code/plugin/HealthMonitorSignalReducer.rb | 8 ++-- source/code/plugin/HealthMonitorUtils.rb | 12 +++--- source/code/plugin/filter_cadvisor_health.rb | 10 ++--- source/code/plugin/in_kube_health.rb | 42 ++++++++++++++++++- 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json index ddceef91a..c22ef8b99 100644 --- a/installer/conf/healthmonitorconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -25,6 +25,10 @@ "MonitorTimeOut": 240, "NotifyInstantly" : true }, + "pod_status": { + "MonitorTimeOut": 5, + "NotifyInstantly" : true + }, "is_oversubscribed_cpu": { "MonitorTimeOut": 240, "NotifyInstantly" : true diff --git a/source/code/plugin/HealthMonitorConstants.rb b/source/code/plugin/HealthMonitorConstants.rb index 1eff53f72..bf85f6532 100644 --- a/source/code/plugin/HealthMonitorConstants.rb +++ b/source/code/plugin/HealthMonitorConstants.rb @@ -14,6 +14,7 @@ class HealthMonitorConstants WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization_percentage" MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID = "kube_api_up" MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID = "pods_ready_percentage" + POD_STATUS = "pod_status" DEFAULT_PASS_PERCENTAGE = 80.0 DEFAULT_FAIL_PERCENTAGE = 90.0 DEFAULT_MONITOR_TIMEOUT = 240 #4 hours diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index b00edc99a..bb06f0363 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -27,7 +27,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, latest_record_state = latest_record["state"] latest_record_time = latest_record["timestamp"] #string representation of time #log.debug "Latest Record #{latest_record}" - if latest_record_state.downcase == new_state.downcase && @@firstMonitorRecordSent.key?(monitor_id) #no state change + if latest_record_state.downcase == new_state.downcase && @@firstMonitorRecordSent.key?(monitor_instance_id) #no state change #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 #log.debug "time elapsed #{time_elapsed}" @@ -38,7 +38,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, health_monitor_instance_state.prev_sent_record_time = latest_record_time #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name, controller_name: controller_name) else #log.debug "Monitor timeout not reached #{time_elapsed}" #log.debug "Timeout not reached for #{monitor_id}" @@ -152,8 +152,8 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s #log.debug "HealthMonitor Record #{health_monitor_record}" #log.debug "Parsed Health Monitor Record for #{monitor_id}" - if !@@firstMonitorRecordSent.key?(monitor_id) - @@firstMonitorRecordSent[monitor_id] = true + if !@@firstMonitorRecordSent.key?(monitor_instance_id) + @@firstMonitorRecordSent[monitor_instance_id] = true end return health_monitor_record diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index f7bd1861e..f2cf28b59 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -90,13 +90,15 @@ def getMonitorInstanceId(log, monitor_id, args = {}) #log.debug "getMonitorInstanceId" string_to_hash = '' # Container Level Monitor - if args.key?("cluster_id") && args.key?("node_name") && args.key?("container_key") - string_to_hash = [args['cluster_id'], args['node_name'], args['container_key']].join("/") + if args.key?("cluster_id") && args.key?("node_name") && args.key?("key") + string_to_hash = [args['cluster_id'], args['node_name'], args['key']].join("/") elsif args.key?("cluster_id") && args.key?("node_name") string_to_hash = [args['cluster_id'], args['node_name']].join("/") - elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") + elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && args.key?("key") + string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name'], args['key']].join("/") + elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && !args.key?("key") string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name']].join("/") - elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("container_key") + elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("key") string_to_hash = [args['cluster_id']].join("/") end #@log.info "String to Hash : #{string_to_hash}" @@ -124,7 +126,7 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) #log.debug "key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" monitor_labels = {} case monitor_id - when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID + when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::POD_STATUS #log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" if !key.nil? #container monitor_labels['monitor.azure.com/controller-name'] = getContainerControllerName(key) diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index effa07c69..bccaadb59 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -92,9 +92,9 @@ def filter(tag, time, record) # @log.debug "Object Name #{object_name}" # @log.debug "Counter Name #{counter_name}" # @log.debug "Metric Value #{metric_value}" - return process_container_cpu_record(record, metric_value) + #return process_container_cpu_record(record, metric_value) when @@counter_name_memory_rss - return process_container_memory_record(record, metric_value) + #return process_container_memory_record(record, metric_value) end when @@object_name_k8s_node case counter_name.downcase @@ -143,7 +143,7 @@ def process_container_cpu_record(record, metric_value) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "key" => key}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) @@ -183,7 +183,7 @@ def process_container_memory_record(record, metric_value) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "container_key" => key}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "key" => key}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) @@ -241,7 +241,7 @@ def process_node_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], , node_name: @@hostName) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Node Memory #{temp}" return record diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index c5358baec..74e6ad59b 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -101,6 +101,10 @@ def enumerate workload_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end + # pod_statuses = process_pod_statuses(hmlog, pod_inventory) + # pod_statuses.each do |pod_status| + # health_monitor_records.push(pod_status) if pod_status + # end end if !node_inventory.nil? @@ -158,7 +162,7 @@ def process_memory_oversubscribed_monitor(pod_inventory) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) - @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor #{record}" + @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" return record.nil? ? nil : record end @@ -240,6 +244,42 @@ def process_node_condition_monitor(node_inventory) return node_condition_monitor_records end + def process_pod_statuses(log, pod_inventory) + monitor_id = HealthMonitorConstants::POD_STATUS + pods_ready_percentage_hash = {} + records = [] + monitor_config = @@healthMonitorConfig[monitor_id] + pod_inventory['items'].each do |pod| + controller_name = pod['metadata']['ownerReferences'][0]['name'] + namespace = pod['metadata']['namespace'] + status = pod['status']['phase'] + timestamp = Time.now.utc.iso8601 + state = '' + podUid = pod['metadata']['uid'] + conditions = pod['status']['conditions'] + details = {} + if status == 'Running' + state = 'pass' + else + state = 'fail' + end + details['status'] = status + conditions.each do |condition| + details[condition['type']] = {"Status" => condition['status'], "LastTransitionTime" => condition['lastTransitionTime']} + end + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace, "key" => podUid}) + HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) + record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + if !record.nil? + records.push(record) + end + end + log.debug "Pod Status Records #{records.size}" + return records + end + def run_periodic @mutex.lock done = @finished From b68572fe9020abe74a5cd6538b2228a93bcd1543 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 29 Mar 2019 15:13:11 -0700 Subject: [PATCH 27/90] Fix issue when pods are created since last kube api refresh -- this cause empty metadata --- source/code/plugin/HealthMonitorUtils.rb | 56 +++++++++++++++++++----- source/code/plugin/in_kube_health.rb | 2 - 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index f2cf28b59..96efa71c2 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -151,17 +151,19 @@ def getMonitorLabels(log, monitor_id, key, controller_name, node_name) end end - def refreshKubernetesApiData(log, hostName) + def refreshKubernetesApiData(log, hostName, force: false) #log.debug "refreshKubernetesApiData" - if ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 + if ( ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 && !force) log.debug "Less than 5 minutes since last refresh at #{@@lastRefreshTime}" return end + if force + @log.debug "Force Refresh" + end begin @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) - if !hostName.nil? podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body) else @@ -208,7 +210,14 @@ def getContainerMetadata(key) if @@containerMetadata.has_key?(key) return @@containerMetadata[key] else - return nil + # This is to handle new containers/controllers that might have come up since the last refresh + @log.info "Adhoc refresh getContainerMetadata" + HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key] + else + return nil + end end end @@ -216,7 +225,14 @@ def getContainerMemoryLimit(key) if @@containerMetadata.has_key?(key) return @@containerMetadata[key]['memoryLimit'] else - return '' + @log.info "Adhoc refresh getContainerMemoryLimit" + # This is to handle new containers/controllers that might have come up since the last refresh + HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['memoryLimit'] + else + return '' + end end end @@ -224,7 +240,14 @@ def getContainerControllerName(key) if @@containerMetadata.has_key?(key) return @@containerMetadata[key]['controllerName'] else - return '' + @log.info "Adhoc refresh getContainerControllerName" + # This is to handle new containers/controllers that might have come up since the last refresh + HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['controllerName'] + else + return '' + end end end @@ -232,7 +255,14 @@ def getContainerNamespace(key) if @@containerMetadata.has_key?(key) return @@containerMetadata[key]['namespace'] else - return '' + @log.info "Adhoc refresh getContainerNamespace" + # This is to handle new containers/controllers that might have come up since the last refresh + HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + if @@containerMetadata.has_key?(key) + return @@containerMetadata[key]['namespace'] + else + return '' + end end end @@ -240,7 +270,14 @@ def getControllerNamespace(controller_name) if @@controllerMapping.has_key?(controller_name) return @@controllerMapping[controller_name] else - return '' + @log.info "Adhoc refresh getControllerNamespace" + # This is to handle new containers/controllers that might have come up since the last refresh + HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + if @@controllerMapping.has_key?(controller_name) + return @@controllerMapping[controller_name] + else + return '' + end end end @@ -284,7 +321,6 @@ def getClusterCpuMemoryCapacity end end - def getResourceSubscription(pod_inventory, metric_name, metric_capacity) subscription = 0.0 if !pod_inventory.empty? @@ -340,8 +376,6 @@ def getPodsReadyHash(pod_inventory) end pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} end - - #@log.debug "pods_ready_percentage_hash #{pods_ready_percentage_hash}" return pods_ready_percentage_hash end diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 74e6ad59b..1e0cae4eb 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -205,8 +205,6 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}) - #hmlog.info health_monitor_record monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) From b80366e0b6231cc76fa1254b5e983362948f2aed Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 10 Apr 2019 19:18:18 -0700 Subject: [PATCH 28/90] Remove duplicate plugin entry from container.conf --- installer/conf/container.conf | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 88c7228ca..52e806e4c 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -28,18 +28,6 @@ log_level debug - - type out_oms_api - log_level debug - buffer_chunk_limit 10m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer - buffer_queue_limit 10 - flush_interval 20s - retry_limit 10 - retry_wait 30s - - #custom_metrics_mdm filter plugin type filter_cadvisor2mdm @@ -83,7 +71,7 @@ log_level debug buffer_chunk_limit 10m buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_kube_health*.buffer + buffer_path %STATE_DIR_WS%/out_oms_api_kubehealth*.buffer buffer_queue_limit 10 flush_interval 20s retry_limit 10 From c6d0fee90ed889261ec8cd42f52e15f48f094db4 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 11 Apr 2019 12:56:54 -0700 Subject: [PATCH 29/90] Updating Agent Version in fluent-bit config --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 78a7b2dde..bba668662 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -29,4 +29,4 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod03122019 \ No newline at end of file + AgentVersion healthpreview04112019 \ No newline at end of file From 42957df3dcf84d707cc9cfdb8a57deb05060ff38 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 12 Apr 2019 10:45:42 -0700 Subject: [PATCH 30/90] Updating Agent Version --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index bba668662..b81c15c66 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -29,4 +29,4 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion healthpreview04112019 \ No newline at end of file + AgentVersion healthpreview04122019 \ No newline at end of file From 8ab3ee867fec12b788104836fb91d402960c6e40 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Apr 2019 11:12:54 -0700 Subject: [PATCH 31/90] Fix Error when Pods dont have a controller --- source/code/plugin/HealthMonitorUtils.rb | 34 +++++++++++++----------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index 96efa71c2..e45eae22c 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -358,23 +358,27 @@ def getLogHandle def getPodsReadyHash(pod_inventory) pods_ready_percentage_hash = {} pod_inventory['items'].each do |pod| - controller_name = pod['metadata']['ownerReferences'][0]['name'] - namespace = pod['metadata']['namespace'] - status = pod['status']['phase'] - - if pods_ready_percentage_hash.key?(controller_name) - total_pods = pods_ready_percentage_hash[controller_name]['totalPods'] - pods_ready = pods_ready_percentage_hash[controller_name]['podsReady'] - else - total_pods = 0 - pods_ready = 0 - end + begin + controller_name = pod['metadata']['ownerReferences'][0]['name'] + namespace = pod['metadata']['namespace'] + status = pod['status']['phase'] + + if pods_ready_percentage_hash.key?(controller_name) + total_pods = pods_ready_percentage_hash[controller_name]['totalPods'] + pods_ready = pods_ready_percentage_hash[controller_name]['podsReady'] + else + total_pods = 0 + pods_ready = 0 + end - total_pods += 1 - if status == 'Running' - pods_ready += 1 + total_pods += 1 + if status == 'Running' + pods_ready += 1 + end + pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} + rescue => e + @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" end - pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} end return pods_ready_percentage_hash end From a8837f82cb1588b43c8d339c659e7412407b0834 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 26 Apr 2019 13:46:13 -0700 Subject: [PATCH 32/90] Add Telemetry for plugin start --- source/code/plugin/filter_cadvisor_health.rb | 1 + source/code/plugin/in_kube_health.rb | 1 + 2 files changed, 2 insertions(+) diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index bccaadb59..4ce2d31f4 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -55,6 +55,7 @@ def start @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig + ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) end def filter_stream(tag, es) diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 1e0cae4eb..f89550eb5 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -45,6 +45,7 @@ def start @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig @@hmlog = HealthMonitorUtils.getLogHandle @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" + ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {}) end end From 147d688c2e8f23c9c023f770aa9640d0c4fc93c6 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 30 Apr 2019 15:34:23 -0700 Subject: [PATCH 33/90] Change getMonitorInstanceId method signature to be an array instead of a hash --- source/code/plugin/HealthMonitorUtils.rb | 30 ++++++++++---------- source/code/plugin/filter_cadvisor_health.rb | 8 +++--- source/code/plugin/in_kube_health.rb | 12 ++++---- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index e45eae22c..a7fdaeec0 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -86,22 +86,22 @@ def getContainerKeyFromInstanceName(instance_name) return key end - def getMonitorInstanceId(log, monitor_id, args = {}) + def getMonitorInstanceId(log, monitor_id, args = []) #log.debug "getMonitorInstanceId" - string_to_hash = '' - # Container Level Monitor - if args.key?("cluster_id") && args.key?("node_name") && args.key?("key") - string_to_hash = [args['cluster_id'], args['node_name'], args['key']].join("/") - elsif args.key?("cluster_id") && args.key?("node_name") - string_to_hash = [args['cluster_id'], args['node_name']].join("/") - elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && args.key?("key") - string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name'], args['key']].join("/") - elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && !args.key?("key") - string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name']].join("/") - elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("key") - string_to_hash = [args['cluster_id']].join("/") - end - #@log.info "String to Hash : #{string_to_hash}" + string_to_hash = args.join("/") + # # Container Level Monitor + # if args.key?("cluster_id") && args.key?("node_name") && args.key?("key") + # string_to_hash = [args['cluster_id'], args['node_name'], args['key']].join("/") + # elsif args.key?("cluster_id") && args.key?("node_name") + # string_to_hash = [args['cluster_id'], args['node_name']].join("/") + # elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && args.key?("key") + # string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name'], args['key']].join("/") + # elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && !args.key?("key") + # string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name']].join("/") + # elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("key") + # string_to_hash = [args['cluster_id']].join("/") + # end + @log.info "String to Hash : #{string_to_hash}" return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" end diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 4ce2d31f4..6ec837ca8 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -144,7 +144,7 @@ def process_container_cpu_record(record, metric_value) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "key" => key}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) @@ -184,7 +184,7 @@ def process_container_memory_record(record, metric_value) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName, "key" => key}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) @@ -211,7 +211,7 @@ def process_node_cpu_record(record, metric_value) timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @@ -239,7 +239,7 @@ def process_node_memory_record(record, metric_value) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, {"cluster_id" => @@clusterId, "node_name" => @@hostName}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index f89550eb5..7cf4e7113 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -140,7 +140,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) # @@hmlog.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) @@ -160,7 +160,7 @@ def process_memory_oversubscribed_monitor(pod_inventory) #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) hmlog = HealthMonitorUtils.getLogHandle - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" @@ -178,7 +178,7 @@ def process_kube_api_up_monitor(state, response) hmlog = HealthMonitorUtils.getLogHandle #hmlog.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) @@ -206,7 +206,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, namespace, controller_name]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) records.push(record) @@ -233,7 +233,7 @@ def process_node_condition_monitor(node_inventory) end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "node_name" => node_name}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, node_name]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) node_condition_monitor_records.push(record) @@ -268,7 +268,7 @@ def process_pod_statuses(log, pod_inventory) end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, {"cluster_id" => @@clusterId, "controller_name" => controller_name, "namespace" => namespace, "key" => podUid}) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, namespace, controller_name, podUid]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) if !record.nil? From 80a5d3631c947801d4395c460894979f1187a9ff Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 1 May 2019 15:25:31 -0700 Subject: [PATCH 34/90] Remove references to HealthMonitorRecord struct in code --- installer/conf/container.conf | 2 +- source/code/plugin/HealthMonitorState.rb | 3 --- source/code/plugin/filter_cadvisor_health.rb | 3 --- source/code/plugin/in_kube_health.rb | 4 ---- 4 files changed, 1 insertion(+), 11 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 52e806e4c..cacc807c7 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -31,7 +31,7 @@ #custom_metrics_mdm filter plugin type filter_cadvisor2mdm - custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes log_level info diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index 8e01f35a4..58b772eec 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -3,9 +3,6 @@ require_relative 'HealthMonitorConstants' -HealthMonitorRecord = Struct.new(:timestamp, :state, :details) do -end - HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records) do end diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 6ec837ca8..c05a5d759 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -141,7 +141,6 @@ def process_container_cpu_record(record, metric_value) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) @@ -181,7 +180,6 @@ def process_container_memory_record(record, metric_value) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) @@ -236,7 +234,6 @@ def process_node_memory_record(record, metric_value) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"memoryRssBytes" => metric_value/1000000.to_f, "memoryUtilizationPercentage" => percent}) #@log.info health_monitor_record monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 7cf4e7113..054935937 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -137,7 +137,6 @@ def process_cpu_oversubscribed_monitor(pod_inventory) #CPU monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}) # @@hmlog.info health_monitor_record monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) @@ -157,7 +156,6 @@ def process_memory_oversubscribed_monitor(pod_inventory) #CPU monitor_id = HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}) hmlog = HealthMonitorUtils.getLogHandle monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) @@ -174,7 +172,6 @@ def process_kube_api_up_monitor(state, response) details = response.each_header.to_h details['ResponseCode'] = response.code health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) hmlog = HealthMonitorUtils.getLogHandle #hmlog.info health_monitor_record @@ -232,7 +229,6 @@ def process_node_condition_monitor(node_inventory) details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - #health_monitor_record = HealthMonitorRecord.new(timestamp, state, details) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, node_name]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) From d7a71d5ea050ed038d56fd879305c7627b001fde Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Tue, 7 May 2019 09:13:07 -0700 Subject: [PATCH 35/90] Rake --- Rakefile | 5 +++++ source/code/plugin/aggregate_monitor.rb | 12 ++++++++++++ test/code/plugin/aggregate_monitor_test.rb | 10 ++++++++++ 3 files changed, 27 insertions(+) create mode 100644 Rakefile create mode 100644 source/code/plugin/aggregate_monitor.rb create mode 100644 test/code/plugin/aggregate_monitor_test.rb diff --git a/Rakefile b/Rakefile new file mode 100644 index 000000000..d7001d2bd --- /dev/null +++ b/Rakefile @@ -0,0 +1,5 @@ +require 'rake/testtask' +task default: "test" +Rake::TestTask.new do |task| + task.pattern = 'test/source/plugin/*_test.rb' +end \ No newline at end of file diff --git a/source/code/plugin/aggregate_monitor.rb b/source/code/plugin/aggregate_monitor.rb new file mode 100644 index 000000000..45d338be3 --- /dev/null +++ b/source/code/plugin/aggregate_monitor.rb @@ -0,0 +1,12 @@ +class AggregateMonitor + attr_accessor :name, :id + def initialize(name, id) + @name = name + @id = id + end + + def getName + @name + end + +end \ No newline at end of file diff --git a/test/code/plugin/aggregate_monitor_test.rb b/test/code/plugin/aggregate_monitor_test.rb new file mode 100644 index 000000000..b83f9c611 --- /dev/null +++ b/test/code/plugin/aggregate_monitor_test.rb @@ -0,0 +1,10 @@ +require 'test/unit' +require_relative '../../../source/code/plugin/aggregate_monitor' + +class AggregateMonitor_Test < Test::Unit::TestCase + def test_get_name + puts 'starting test_get_name' + agg = AggregateMonitor.new('Cluster', 'Cluster') + agg. + end +end \ No newline at end of file From a18eb83aa0fca5edfff531ffbce859499826a257 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 7 May 2019 11:45:09 -0700 Subject: [PATCH 36/90] Running Ruby tests --- Rakefile | 6 ++++-- test/code/plugin/aggregate_monitor_test.rb | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Rakefile b/Rakefile index d7001d2bd..d48941cc1 100644 --- a/Rakefile +++ b/Rakefile @@ -1,5 +1,7 @@ require 'rake/testtask' -task default: "test" + Rake::TestTask.new do |task| - task.pattern = 'test/source/plugin/*_test.rb' + task.libs << "test" + task.pattern = './test/code/plugin/*_test.rb' + task.verbose = true end \ No newline at end of file diff --git a/test/code/plugin/aggregate_monitor_test.rb b/test/code/plugin/aggregate_monitor_test.rb index b83f9c611..631cda175 100644 --- a/test/code/plugin/aggregate_monitor_test.rb +++ b/test/code/plugin/aggregate_monitor_test.rb @@ -5,6 +5,6 @@ class AggregateMonitor_Test < Test::Unit::TestCase def test_get_name puts 'starting test_get_name' agg = AggregateMonitor.new('Cluster', 'Cluster') - agg. + assert_equal(agg.name, 'Cluster') end -end \ No newline at end of file +end From 5895951ec6cd7400b952d695c9f9039176b77f4e Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 21 May 2019 19:42:43 -0700 Subject: [PATCH 37/90] Working Version for Health Model Builder on the agent --- installer/conf/container.conf | 9 +- installer/conf/health_model_definition.json | 138 +++++ source/code/plugin/HealthMonitorConstants.rb | 27 +- .../code/plugin/HealthMonitorSignalReducer.rb | 16 +- source/code/plugin/HealthMonitorState.rb | 13 +- source/code/plugin/HealthMonitorUtils.rb | 6 +- source/code/plugin/aggregate_monitor.rb | 12 - source/code/plugin/filter_cadvisor_health.rb | 37 +- .../filter_cadvisor_health_container.rb | 266 ++++++++ .../plugin/filter_cadvisor_health_node.rb | 266 ++++++++ .../plugin/filter_health_model_builder.rb | 131 ++++ .../code/plugin/health/aggregate_monitor.rb | 113 ++++ .../aggregate_monitor_instance_id_labels.rb | 25 + .../aggregate_monitor_state_finalizer.rb | 32 + .../code/plugin/health/health_model_buffer.rb | 38 ++ .../plugin/health/health_model_builder.rb | 82 +++ .../plugin/health/health_model_constants.rb | 58 ++ .../plugin/health/health_model_definition.rb | 75 +++ .../health/health_model_definition_parser.rb | 49 ++ source/code/plugin/health/monitor_factory.rb | 22 + source/code/plugin/health/monitor_set.rb | 56 ++ .../plugin/health/monitor_state_transition.rb | 11 + source/code/plugin/health/newfiles.txt | 15 + .../health/node_monitor_hierarchy_reducer.rb | 32 + .../health/state_transition_processor.rb | 78 +++ source/code/plugin/health/unit_monitor.rb | 26 + source/code/plugin/in_cadvisor_perf.rb | 14 +- source/code/plugin/in_kube_health.rb | 87 ++- source/code/plugin/mock_data.json | 568 ++++++++++++++++++ .../filter_health_model_builder_test.rb | 51 ++ 30 files changed, 2288 insertions(+), 65 deletions(-) create mode 100644 installer/conf/health_model_definition.json delete mode 100644 source/code/plugin/aggregate_monitor.rb create mode 100644 source/code/plugin/filter_cadvisor_health_container.rb create mode 100644 source/code/plugin/filter_cadvisor_health_node.rb create mode 100644 source/code/plugin/filter_health_model_builder.rb create mode 100644 source/code/plugin/health/aggregate_monitor.rb create mode 100644 source/code/plugin/health/aggregate_monitor_instance_id_labels.rb create mode 100644 source/code/plugin/health/aggregate_monitor_state_finalizer.rb create mode 100644 source/code/plugin/health/health_model_buffer.rb create mode 100644 source/code/plugin/health/health_model_builder.rb create mode 100644 source/code/plugin/health/health_model_constants.rb create mode 100644 source/code/plugin/health/health_model_definition.rb create mode 100644 source/code/plugin/health/health_model_definition_parser.rb create mode 100644 source/code/plugin/health/monitor_factory.rb create mode 100644 source/code/plugin/health/monitor_set.rb create mode 100644 source/code/plugin/health/monitor_state_transition.rb create mode 100644 source/code/plugin/health/newfiles.txt create mode 100644 source/code/plugin/health/node_monitor_hierarchy_reducer.rb create mode 100644 source/code/plugin/health/state_transition_processor.rb create mode 100644 source/code/plugin/health/unit_monitor.rb create mode 100644 source/code/plugin/mock_data.json create mode 100644 test/code/plugin/filter_health_model_builder_test.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index cacc807c7..4ffb3a7a6 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,8 +23,13 @@ log_level debug - - type filter_cadvisor2health + + type filter_cadvisor_health_node + log_level debug + + + + type filter_cadvisor_health_container log_level debug diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json new file mode 100644 index 000000000..a90f89f52 --- /dev/null +++ b/installer/conf/health_model_definition.json @@ -0,0 +1,138 @@ +[ + { + "monitor_id": "workload_pods_ready_percentage", + "parent_monitor_id": "controller", + "labels": [ + "monitor.azure.com/namespace", + "monitor.azure.com/controller-name" + ] + }, + { + "monitor_id": "controller", + "parent_monitor_id": "namespace", + "labels": [ + "monitor.azure.com/namespace" + ] + }, + { + "monitor_id": "system_pods_ready_percentage", + "parent_monitor_id": "system_controllers", + "labels": [ + "monitor.azure.com/namespace", + "monitor.azure.com/controller-name" + ] + }, + { + "monitor_id": "system_controllers", + "parent_monitor_id": "k8s_infrastructure" + }, + { + "monitor_id": "kube_api_up", + "parent_monitor_id": "k8s_infrastructure" + }, + { + "monitor_id": "namespace", + "labels": [ + "monitor.azure.com/namespace" + ], + "parent_monitor_id": "namespaces" + }, + { + "monitor_id": "k8s_infrastructure", + "parent_monitor_id": "cluster" + }, + { + "monitor_id": "namespaces", + "parent_monitor_id": "workload" + }, + { + "monitor_id": "workload", + "parent_monitor_id": "cluster" + }, + { + "monitor_id": "node_cpu_utilization_percentage", + "parent_monitor_id": "node", + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role" + ] + }, + { + "monitor_id": "node_memory_utilization_percentage", + "parent_monitor_id": "node", + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role" + ] + }, + { + "monitor_id": "node_condition", + "parent_monitor_id": "node", + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role" + ] + }, + { + "monitor_id": "node", + "aggregation_algorithm": "worstOf", + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role" + ], + "parent_monitor_id": [ + { + "label": "kubernetes.io/role", + "operator": "==", + "value": "master", + "id": "master_node_pool" + }, + { + "label": "kubernetes.io/role", + "operator": "==", + "value": "agent", + "id": "agent_node_pool" + } + ] + }, + { + "monitor_id": "master_node_pool", + "aggregation_algorithm": "percentage", + "parent_monitor_id": "all_nodes" + }, + { + "monitor_id": "agent_node_pool", + "aggregation_algorithm": "worstOf", + "labels": [ + "agentpool" + ], + "parent_monitor_id": "all_agent_node_pools" + }, + { + "monitor_id": "all_agent_node_pools", + "aggregation_algorithm": "worstOf", + "parent_monitor_id": "all_nodes" + }, + { + "monitor_id": "all_nodes", + "aggregation_algorithm": "worstOf", + "parent_monitor_id": "cluster" + }, + { + "monitor_id": "cluster", + "aggregation_algorithm": "worstOf", + "parent_monitor_id": null + }, + { + "monitor_id": "is_oversubscribed_cpu", + "parent_monitor_id": "workload" + }, + { + "monitor_id": "is_oversubscribed_memory", + "parent_monitor_id": "workload" + } +] \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorConstants.rb b/source/code/plugin/HealthMonitorConstants.rb index bf85f6532..eaa561ae3 100644 --- a/source/code/plugin/HealthMonitorConstants.rb +++ b/source/code/plugin/HealthMonitorConstants.rb @@ -9,14 +9,37 @@ class HealthMonitorConstants NODE_CONTAINER_RUNTIME_MONITOR_ID = "container_manager_runtime_running" WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "is_oversubscribed_cpu" WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "is_oversubscribed_memory" - WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID = "pods_ready_percentage" + WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID = "workload_pods_ready_percentage" WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization_percentage" WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization_percentage" MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID = "kube_api_up" - MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID = "pods_ready_percentage" + MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID = "system_pods_ready_percentage" POD_STATUS = "pod_status" DEFAULT_PASS_PERCENTAGE = 80.0 DEFAULT_FAIL_PERCENTAGE = 90.0 DEFAULT_MONITOR_TIMEOUT = 240 #4 hours DEFAULT_SAMPLES_BEFORE_NOTIFICATION = 3 +end + +class HealthMonitorRecordFields + CLUSTER_ID = "ClusterId" + MONITOR_ID = "MonitorId" + MONITOR_INSTANCE_ID = "MonitorInstanceId" + MONITOR_LABELS = "MonitorLabels" + DETAILS = "Details" + MONITOR_CONFIG = "MonitorConfig" + OLD_STATE = "OldState" + NEW_STATE = "NewState" + AGENT_COLLECTION_TIME = "AgentCollectionTime" + TIME_FIRST_OBSERVED = "TimeFirstObserved" + NODE_NAME = "NodeName" + CONTROLLER_NAME = "ControllerName" + HEALTH_ASPECT = "HealthAspect" + CONTAINER_ID = "ContainerID" +end + +class HealthAspect + NODES = "Nodes" + KUBERNETES_INFRASTRUCTURE = "Kubernetes infrastructure" + WORKLOAD = "Workload" end \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index bb06f0363..c9aefb5cd 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -10,11 +10,10 @@ class HealthMonitorSignalReducer @@firstMonitorRecordSent = {} class << self def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, controller_name: nil, node_name: nil) - #log.debug "reduceSignal Key : #{key} controller_name: #{controller_name} node_name #{node_name}" - #log.debug "monitorConfig #{monitor_config}" + #log.debug "reduceSignal MonitorId: #{monitor_id} Key : #{key} controller_name: #{controller_name} node_name #{node_name}" + #log.debug "monitorConfig #{monitor_config} monitor_id #{monitor_id}" health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) - #log.debug "Health Monitor Instance state #{health_monitor_instance_state}" health_monitor_records = health_monitor_instance_state.prev_records new_state = health_monitor_instance_state.new_state prev_sent_time = health_monitor_instance_state.prev_sent_record_time @@ -22,6 +21,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, monitor_config['MonitorTimeOut'].nil? ? monitor_timeout = HealthMonitorConstants::DEFAULT_MONITOR_TIMEOUT : monitor_timeout = monitor_config['MonitorTimeOut'] #minutes #log.debug monitor_timeout + # Notify Instantly sends a signal immediately on a state change if (!monitor_config['NotifyInstantly'].nil? && monitor_config['NotifyInstantly'] == true) latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning latest_record_state = latest_record["state"] @@ -38,7 +38,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, health_monitor_instance_state.prev_sent_record_time = latest_record_time #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name, controller_name: controller_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name, controller_name: controller_name) else #log.debug "Monitor timeout not reached #{time_elapsed}" #log.debug "Timeout not reached for #{monitor_id}" @@ -54,9 +54,13 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, end end + #FIXME: if record count = 1, then send it, if it is greater than 1 and less than SamplesBeforeNotification, NO-OP. If equal to SamplesBeforeNotification, then check for consistency in state change if health_monitor_instance_state.prev_records.size == 1 #log.debug "Only One Record" return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) + elsif health_monitor_instance_state.prev_records.size < monitor_config["SamplesBeforeNotification"].to_i + log.debug "Prev records size < SamplesBeforeNotification for #{monitor_instance_id}" + return nil else first_record = health_monitor_records[0] latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning @@ -107,7 +111,7 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s labels = HealthMonitorUtils.getClusterLabels #log.debug "Labels : #{labels}" - monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key, controller_name, node_name) + monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, controller_name: controller_name, node_name: node_name) #log.debug "Monitor Labels : #{monitor_labels}" if !monitor_labels.nil? @@ -148,7 +152,6 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s health_monitor_record["AgentCollectionTime"] = Time.now.utc.iso8601 health_monitor_record["TimeFirstObserved"] = time_first_observed - #log.debug "HealthMonitor Record #{health_monitor_record}" #log.debug "Parsed Health Monitor Record for #{monitor_id}" @@ -159,6 +162,7 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s return health_monitor_record end + #FIXME: check for consistency for "SamplesBeforeNotification" records def isStateChangeConsistent(log, health_monitor_records) if health_monitor_records.nil? || health_monitor_records.size == 0 return false diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index 58b772eec..75991c553 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -9,9 +9,12 @@ class HealthMonitorState @@instanceStates = {} #hash of monitor_instance_id --> health monitor instance state @@firstMonitorRecordSent = {} - HEALTH_MONITOR_STATE = {"PASS" => "pass", "FAIL" => "fail", "WARNING" => "warn"} + #FIXME: use lookup for health_monitor_constants.rb from health folder + HEALTH_MONITOR_STATE = {"PASS" => "pass", "FAIL" => "fail", "WARNING" => "warn", "NONE" => "none"} class << self + #set new_state to be the latest ONLY if the state change is consistent for monitors that are not configured to be notified instantly, i.e. For NotifyInstantly Monitors, set new state to be the latest + # record state. For others, set it to be none, if there is no state information present in the lookup table def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) #log.debug "updateHealthMonitorState" samples_to_keep = 1 @@ -36,7 +39,13 @@ def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, co health_monitor_instance_state.prev_records = health_monitor_records @@instanceStates[monitor_instance_id] = health_monitor_instance_state else - health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record["timestamp"], health_monitor_record["state"], health_monitor_record["state"], health_monitor_record["timestamp"], [health_monitor_record]) + # if samples_to_keep == 1, then set new state to be the health_monitor_record state, else set it as none + old_state = HEALTH_MONITOR_STATE["NONE"] + new_state = HEALTH_MONITOR_STATE["NONE"] + if samples_to_keep == 1 + new_state = health_monitor_record["state"] + end + health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record["timestamp"], old_state, new_state, health_monitor_record["timestamp"], [health_monitor_record]) @@instanceStates[monitor_instance_id] = health_monitor_instance_state end #log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index a7fdaeec0..2b276f971 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -122,8 +122,8 @@ def getClusterLabels return labels end - def getMonitorLabels(log, monitor_id, key, controller_name, node_name) - #log.debug "key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" + def getMonitorLabels(log, monitor_id, key: nil, controller_name: nil, node_name: nil) + #log.debug "get MonitorLabels key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" monitor_labels = {} case monitor_id when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::POD_STATUS @@ -181,7 +181,7 @@ def refreshKubernetesApiData(log, hostName, force: false) cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) else @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" - #TODO: Send warning health event + #TODO: Send warning health event #bestpractices cpu_limit_value = @cpu_capacity end diff --git a/source/code/plugin/aggregate_monitor.rb b/source/code/plugin/aggregate_monitor.rb deleted file mode 100644 index 45d338be3..000000000 --- a/source/code/plugin/aggregate_monitor.rb +++ /dev/null @@ -1,12 +0,0 @@ -class AggregateMonitor - attr_accessor :name, :id - def initialize(name, id) - @name = name - @id = id - end - - def getName - @name - end - -end \ No newline at end of file diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index c05a5d759..68752deaf 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -210,11 +210,20 @@ def process_node_cpu_record(record, metric_value) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) - temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - @log.info "Processed Node CPU #{temp}" - return record + # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName + health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect.NODES + @log.info "Processed Node CPU" + return health_record end return nil end @@ -238,11 +247,19 @@ def process_node_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) - temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - @log.info "Processed Node Memory #{temp}" - return record + # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName + @log.info "Processed Node Memory" + return health_record end return nil end diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb new file mode 100644 index 000000000..eb9c3dcdc --- /dev/null +++ b/source/code/plugin/filter_cadvisor_health_container.rb @@ -0,0 +1,266 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + require 'logger' + require 'json' + require_relative 'oms_common' + require_relative 'HealthMonitorUtils' + require_relative 'HealthMonitorState' + require_relative "ApplicationInsightsUtility" + + + class CAdvisor2ContainerHealthFilter < Filter + Fluent::Plugin.register_filter('filter_cadvisor_health_container', self) + + config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log' + config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' + config_param :container_resource_refresh_interval_minutes, :integer, :default => 5 + + @@object_name_k8s_node = 'K8SNode' + @@object_name_k8s_container = 'K8SContainer' + + @@counter_name_cpu = 'cpuusagenanocores' + @@counter_name_memory_rss = 'memoryrssbytes' + + @@health_monitor_config = {} + + @@hostName = (OMS::Common.get_hostname) + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + + + def initialize + super + @cpu_capacity = 0.0 + @memory_capacity = 0.0 + @last_resource_refresh = DateTime.now.to_time.to_i + @metrics_to_collect_hash = {} + end + + def configure(conf) + super + @log = HealthMonitorUtils.getLogHandle + @log.debug {'Starting filter_cadvisor2health plugin'} + end + + def start + super + @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect) + @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" + node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) + @cpu_capacity = node_capacity[0] + @memory_capacity = node_capacity[1] + @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" + HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) + @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig + ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) + end + + def filter_stream(tag, es) + new_es = MultiEventStream.new + HealthMonitorUtils.refreshKubernetesApiData(@log, @hostName) + records_count = 0 + es.each { |time, record| + begin + filtered_record = filter(tag, time, record) + if !filtered_record.nil? + new_es.add(time, filtered_record) + records_count += 1 + end + rescue => e + router.emit_error_event(tag, time, record, e) + end + } + @log.debug "Filter Records Count #{records_count}" + new_es + end + + def filter(tag, time, record) + begin + if record.key?("MonitorLabels") + return record + end + object_name = record['DataItems'][0]['ObjectName'] + counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase + if @metrics_to_collect_hash.key?(counter_name.downcase) + metric_value = record['DataItems'][0]['Collections'][0]['Value'] + case object_name + when @@object_name_k8s_container + case counter_name.downcase + when @@counter_name_cpu + # @log.debug "Object Name #{object_name}" + # @log.debug "Counter Name #{counter_name}" + # @log.debug "Metric Value #{metric_value}" + #return process_container_cpu_record(record, metric_value) + when @@counter_name_memory_rss + #return process_container_memory_record(record, metric_value) + end + when @@object_name_k8s_node + case counter_name.downcase + when @@counter_name_cpu + #process_node_cpu_record(record, metric_value) + when @@counter_name_memory_rss + #process_node_memory_record(record, metric_value) + end + end + end + rescue => e + @log.debug "Error in filter #{e}" + @log.debug "record #{record}" + @log.debug "backtrace #{e.backtrace}" + ApplicationInsightsUtility.sendExceptionTelemetry(e) + return nil + end + end + + def process_container_cpu_record(record, metric_value) + monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID + @log.debug "processing container cpu record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthMonitorUtils.getContainerMetadata(key) + if !container_metadata.nil? + cpu_limit = container_metadata['cpuLimit'] + end + + if cpu_limit.to_s.empty? + #@log.info "CPU Limit is nil" + cpu_limit = @cpu_capacity + end + + #@log.info "cpu limit #{cpu_limit}" + + percent = (metric_value.to_f/cpu_limit*100).round(2) + #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} + #@log.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Container CPU #{temp}" + return record + end + return nil + end + + def process_container_memory_record(record, metric_value) + monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID + #@log.debug "processing container memory record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthMonitorUtils.getContainerMetadata(key) + if !container_metadata.nil? + memory_limit = container_metadata['memoryLimit'] + end + + if memory_limit.to_s.empty? + #@log.info "Memory Limit is nil" + memory_limit = @memory_capacity + end + + #@log.info "memory limit #{memory_limit}" + + percent = (metric_value.to_f/memory_limit*100).round(2) + #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} + #@log.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Container Memory #{temp}" + return record + end + return nil + end + + def process_node_cpu_record(record, metric_value) + monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID + #@log.debug "processing node cpu record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + #@log.info "CPU capacity #{@cpu_capacity}" + + percent = (metric_value.to_f/@cpu_capacity*100).round(2) + #@log.debug "Percentage of CPU limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName + @log.info "Processed Node CPU" + return health_record + end + return nil + end + + def process_node_memory_record(record, metric_value) + monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID + #@log.debug "processing node memory record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + #@log.info "Memory capacity #{@memory_capacity}" + + percent = (metric_value.to_f/@memory_capacity*100).round(2) + #@log.debug "Percentage of Memory limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} + #@log.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName + @log.info "Processed Node Memory" + return health_record + end + return nil + end + end +end diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb new file mode 100644 index 000000000..4de24e6c9 --- /dev/null +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -0,0 +1,266 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + require 'logger' + require 'json' + require_relative 'oms_common' + require_relative 'HealthMonitorUtils' + require_relative 'HealthMonitorState' + require_relative "ApplicationInsightsUtility" + + + class CAdvisor2NodeHealthFilter < Filter + Fluent::Plugin.register_filter('filter_cadvisor_health_node', self) + + config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log' + config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' + config_param :container_resource_refresh_interval_minutes, :integer, :default => 5 + + @@object_name_k8s_node = 'K8SNode' + @@object_name_k8s_container = 'K8SContainer' + + @@counter_name_cpu = 'cpuusagenanocores' + @@counter_name_memory_rss = 'memoryrssbytes' + + @@health_monitor_config = {} + + @@hostName = (OMS::Common.get_hostname) + @@clusterName = KubernetesApiClient.getClusterName + @@clusterId = KubernetesApiClient.getClusterId + @@clusterRegion = KubernetesApiClient.getClusterRegion + + + def initialize + super + @cpu_capacity = 0.0 + @memory_capacity = 0.0 + @last_resource_refresh = DateTime.now.to_time.to_i + @metrics_to_collect_hash = {} + end + + def configure(conf) + super + @log = HealthMonitorUtils.getLogHandle + @log.debug {'Starting filter_cadvisor2health plugin'} + end + + def start + super + @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect) + @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" + node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) + @cpu_capacity = node_capacity[0] + @memory_capacity = node_capacity[1] + @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" + HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) + @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig + ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) + end + + def filter_stream(tag, es) + new_es = MultiEventStream.new + HealthMonitorUtils.refreshKubernetesApiData(@log, @hostName) + records_count = 0 + es.each { |time, record| + begin + filtered_record = filter(tag, time, record) + if !filtered_record.nil? + new_es.add(time, filtered_record) + records_count += 1 + end + rescue => e + router.emit_error_event(tag, time, record, e) + end + } + @log.debug "Filter Records Count #{records_count}" + new_es + end + + def filter(tag, time, record) + begin + if record.key?("MonitorLabels") + return record + end + object_name = record['DataItems'][0]['ObjectName'] + counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase + if @metrics_to_collect_hash.key?(counter_name.downcase) + metric_value = record['DataItems'][0]['Collections'][0]['Value'] + case object_name + when @@object_name_k8s_container + case counter_name.downcase + when @@counter_name_cpu + # @log.debug "Object Name #{object_name}" + # @log.debug "Counter Name #{counter_name}" + # @log.debug "Metric Value #{metric_value}" + #return process_container_cpu_record(record, metric_value) + when @@counter_name_memory_rss + #return process_container_memory_record(record, metric_value) + end + when @@object_name_k8s_node + case counter_name.downcase + when @@counter_name_cpu + process_node_cpu_record(record, metric_value) + when @@counter_name_memory_rss + process_node_memory_record(record, metric_value) + end + end + end + rescue => e + @log.debug "Error in filter #{e}" + @log.debug "record #{record}" + @log.debug "backtrace #{e.backtrace}" + ApplicationInsightsUtility.sendExceptionTelemetry(e) + return nil + end + end + + def process_container_cpu_record(record, metric_value) + monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID + @log.debug "processing container cpu record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthMonitorUtils.getContainerMetadata(key) + if !container_metadata.nil? + cpu_limit = container_metadata['cpuLimit'] + end + + if cpu_limit.to_s.empty? + #@log.info "CPU Limit is nil" + cpu_limit = @cpu_capacity + end + + #@log.info "cpu limit #{cpu_limit}" + + percent = (metric_value.to_f/cpu_limit*100).round(2) + #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} + #@log.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Container CPU #{temp}" + return record + end + return nil + end + + def process_container_memory_record(record, metric_value) + monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID + #@log.debug "processing container memory record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) + container_metadata = HealthMonitorUtils.getContainerMetadata(key) + if !container_metadata.nil? + memory_limit = container_metadata['memoryLimit'] + end + + if memory_limit.to_s.empty? + #@log.info "Memory Limit is nil" + memory_limit = @memory_capacity + end + + #@log.info "memory limit #{memory_limit}" + + percent = (metric_value.to_f/memory_limit*100).round(2) + #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} + #@log.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) + temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + @log.info "Processed Container Memory #{temp}" + return record + end + return nil + end + + def process_node_cpu_record(record, metric_value) + monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID + #@log.debug "processing node cpu record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + #@log.info "CPU capacity #{@cpu_capacity}" + + percent = (metric_value.to_f/@cpu_capacity*100).round(2) + #@log.debug "Percentage of CPU limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName + @log.info "Processed Node CPU" + return health_record + end + return nil + end + + def process_node_memory_record(record, metric_value) + monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID + #@log.debug "processing node memory record" + if record.nil? + return nil + else + instance_name = record['DataItems'][0]['InstanceName'] + #@log.info "Memory capacity #{@memory_capacity}" + + percent = (metric_value.to_f/@memory_capacity*100).round(2) + #@log.debug "Percentage of Memory limit: #{percent}" + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID]) + #@log.debug "Computed State : #{state}" + timestamp = record['DataItems'][0]['Timestamp'] + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} + #@log.info health_monitor_record + + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) + #@log.info "Monitor Instance Id: #{monitor_instance_id}" + HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName + @log.info "Processed Node Memory" + return health_record + end + return nil + end + end +end diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb new file mode 100644 index 000000000..07e1b26a1 --- /dev/null +++ b/source/code/plugin/filter_health_model_builder.rb @@ -0,0 +1,131 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. + +# frozen_string_literal: true + +module Fluent + require 'logger' + require 'json' + Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file } + + class FilterHealthModelBuilder < Filter + Fluent::Plugin.register_filter('filter_health_model_builder', self) + + config_param :enable_log, :integer, :default => 0 + config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' + config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health_model_definition.json' + attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_transition_processor, :state_finalizers, :monitor_set, :model_builder + + @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig + + def initialize + super + @buffer = HealthModel::HealthModelBuffer.new + @health_model_definition = HealthModel::HealthModelDefinition.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) + @monitor_factory = HealthModel::MonitorFactory.new + @state_transition_processor = HealthModel::StateTransitionProcessor.new(@health_model_definition, @monitor_factory) + @state_finalizers = [HealthModel::NodeMonitorHierarchyReducer.new, HealthModel::AggregateMonitorStateFinalizer.new] + @monitor_set = HealthModel::MonitorSet.new + @model_builder = HealthModel::HealthModelBuilder.new(@state_transition_processor, @state_finalizers, @monitor_set) + end + + def configure(conf) + super + @log = nil + + if @enable_log + @log = Logger.new(@log_path, 'weekly') + @log.info 'Starting filter_health_model_builder plugin' + end + end + + def start + super + end + + def shutdown + super + end + + def filter_stream(tag, es) + begin + if tag.start_with?("oms.api.KubeHealth.DaemonSet") + records = [] + if !es.nil? + es.each{|time, record| + HealthMonitorState.updateHealthMonitorState(@log, + record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + record[HealthMonitorRecordFields::DETAILS], + @@healthMonitorConfig[record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]]) + records.push(record) + } + @buffer.add_to_buffer(records) + end + return [] + elsif tag.start_with?("oms.api.KubeHealth.ReplicaSet") + records = [] + es.each{|time, record| + records.push(record) + } + @buffer.add_to_buffer(records) + records_to_process = @buffer.get_buffer + @buffer.reset_buffer + filtered_records = [] + raw_records = [] + records_to_process.each{|record| + monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + filtered_record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, + record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + @@healthMonitorConfig[monitor_id], + key: record[HealthMonitorRecordFields::CONTAINER_ID], + controller_name: record[HealthMonitorRecordFields::CONTROLLER_NAME], + node_name: record[HealthMonitorRecordFields::NODE_NAME] + ) + filtered_records.push(MonitorStateTransition.new( + filtered_record[HealthMonitorRecordFields::MONITOR_ID], + filtered_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + filtered_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + filtered_record[HealthMonitorRecordFields::OLD_STATE], + filtered_record[HealthMonitorRecordFields::NEW_STATE], + filtered_record[HealthMonitorRecordFields::MONITOR_LABELS], + filtered_record[HealthMonitorRecordFields::MONITOR_CONFIG], + filtered_record[HealthMonitorRecordFields::DETAILS] + )) if filtered_record + + raw_records.push(filtered_record) if filtered_record + } + + @log.info "Filtered Records size = #{filtered_records.size}" + + + # if raw_records.size > 0 + + # raw_records.each{|record| + # @log.debug "#{record}" + # } + + # File.open("/tmp/mock_data-#{Time.now.to_i}.json", "w") do |f| + # f.write(JSON.pretty_generate(raw_records)) + # end + # end + + + @model_builder.process_state_transitions(filtered_records) + monitors_map = @model_builder.finalize_model + @log.debug "monitors map size = #{monitors_map.size}" + # monitors_map.each{|key, value| + # @log.debug "#{key} ==> #{value.state}" + # } + + + return [] + else + raise "Invalid tag #{tag} received" + end + rescue => e + @log.warn "Message: #{e.message} Backtrace: #{e.backtrace}" + return nil + end + es + end + end +end diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb new file mode 100644 index 000000000..c5bcf9ff9 --- /dev/null +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require_relative 'health_model_constants' +require 'json' + +module HealthModel + class AggregateMonitor + attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor + attr_reader :member_monitors + + # constructor + def initialize( + monitor_id, + monitor_instance_id, + state, + transition_time, + aggregation_algorithm, + aggregation_algorithm_params, + labels + ) + @monitor_id = monitor_id + @monitor_instance_id = monitor_instance_id + @state = state + @transition_time = transition_time + @aggregation_algorithm = aggregation_algorithm || AggregationAlgorithm::WORSTOF + @aggregation_algorithm_params = aggregation_algorithm_params + @labels = labels + @member_monitors = {} + @is_aggregate_monitor = true + end + + def add_member_monitor(member_monitor_instance_id) + unless @member_monitors.key?(member_monitor_instance_id) + @member_monitors[member_monitor_instance_id] = true + end + end + + def remove_member_monitor(member_monitor_instance_id) + if @member_monitors.key?(member_monitor_instance_id) + @member_monitors.delete(member_monitor_instance_id) + end + end + + # return the member monitors as an array + def get_member_monitors + @member_monitors.map(&:first) + end + + def calculate_state(monitor_set) + case @aggregation_algorithm + when AggregationAlgorithm::WORSTOF + @state = calculate_worst_of_state(monitor_set) + when AggregationAlgorithm::PERCENTAGE + @state = calculate_percentage_state(monitor_set) + end + end + + # calculates the worst of state, given the member monitors + def calculate_worst_of_state(monitor_set) + + member_state_counts = map_member_monitor_states(monitor_set) + + if member_state_counts.length === 0 + return MonitorState::NONE + end + + if member_state_counts.key?(MonitorState::CRITICAL) && member_state_counts[MonitorState::CRITICAL] > 0 + return MonitorState::CRITICAL + end + if member_state_counts.key?(MonitorState::ERROR) && member_state_counts[MonitorState::ERROR] > 0 + return MonitorState::ERROR + end + if member_state_counts.key?(MonitorState::WARNING) && member_state_counts[MonitorState::WARNING] > 0 + return MonitorState::WARNING + end + + if member_state_counts.key?(MonitorState::NONE) && member_state_counts[MonitorState::NONE] > 0 + return MonitorState::NONE + end + + return MonitorState::HEALTHY + end + + def calculate_percentage_state + + end + + def map_member_monitor_states(monitor_set) + member_monitor_instance_ids = get_member_monitors + if member_monitor_instance_ids.nil? || member_monitor_instance_ids.size == 0 + return {} + end + + state_counts = {} + + member_monitor_instance_ids.each {|monitor_instance_id| + + member_monitor = monitor_set.get_monitor(monitor_instance_id) + monitor_state = member_monitor.state; + + if !state_counts.key?(monitor_state) + state_counts[monitor_state] = 1 + else + count = state_counts[monitor_state] + state_counts[monitor_state] = count+1 + end + } + + return state_counts; + end + + end +end diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb new file mode 100644 index 000000000..be3f3a704 --- /dev/null +++ b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb @@ -0,0 +1,25 @@ +module HealthModel + class AggregateMonitorInstanceIdLabels + @@id_labels_mapping = { + MonitorId::CONTROLLER => ["monitor.azure.com/namespace", "monitor.azure.com/controller-name"], + MonitorId::NODE => ["agentpool", "kubernetes.io/role", "kubernetes.io/hostname"], + MonitorId::NAMESPACE => ["monitor.azure.com/namespace"], + MonitorId::AGENT_NODE_POOL => ["agentpool"] + # MonitorId::ALL_AGENT_NODE_POOLS => [], + # MonitorId::ALL_NODE_POOLS => [], + # MonitorId::ALL_NODES => [], + # MonitorId::K8S_INFRASTRUCTURE => [], + # MonitorId::CLUSTER => [], + # MonitorId::WORKLOAD => [] + } + + def self.get_labels_for(monitor_id) + if @@id_labels_mapping.key?(monitor_id) + return @@id_labels_mapping[monitor_id] + else + return [] + end + + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb new file mode 100644 index 000000000..0ab7e61c9 --- /dev/null +++ b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb @@ -0,0 +1,32 @@ +module HealthModel + class AggregateMonitorStateFinalizer + + def finalize(monitor_set) + top_level_monitor = monitor_set.get_monitor(MonitorId::CLUSTER) + if !top_level_monitor.nil? + calculate_subtree_state(top_level_monitor, monitor_set) + end + end + + private + + def calculate_subtree_state(monitor, monitor_set) + if monitor.nil? || !monitor.is_aggregate_monitor + raise 'AggregateMonitorStateFinalizer:calculateSubtreeState Parameter monitor must be non-null AggregateMonitor' + end + + member_monitor_instance_ids = monitor.get_member_monitors # monitor_instance_ids + + member_monitor_instance_ids.each{|member_monitor_instance_id| + member_monitor = monitor_set.get_monitor(member_monitor_instance_id) + + if !member_monitor.nil? && member_monitor.is_aggregate_monitor + calculate_subtree_state(member_monitor, monitor_set) + end + } + + monitor.calculate_state(monitor_set) + end + + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_buffer.rb b/source/code/plugin/health/health_model_buffer.rb new file mode 100644 index 000000000..402f699cc --- /dev/null +++ b/source/code/plugin/health/health_model_buffer.rb @@ -0,0 +1,38 @@ +module HealthModel + +=begin + Class that is used to create a buffer for collecting the health records +=end + class HealthModelBuffer + + attr_reader :records_buffer, :log + + def initialize + @records_buffer = [] + end + + # Returns the current buffer + def get_buffer + return @records_buffer + end + + # adds records to the buffer + def add_to_buffer(records) + @records_buffer.push(*records) + end + + # clears/resets the buffer + def reset_buffer + @records_buffer = [] + end + + # prints the buffer + def print_buffer + end + + # gets the number of records in the buffer + def get_buffer_length + @records_buffer.length + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb new file mode 100644 index 000000000..04c4cce9c --- /dev/null +++ b/source/code/plugin/health/health_model_builder.rb @@ -0,0 +1,82 @@ +require_relative 'health_model_constants' +require 'time' + +module HealthModel + class HealthModelBuilder + attr_accessor :state_transition_processor, :state_finalizers, :monitor_set + attr_reader :last_sent_monitors + + def initialize(state_transition_processor, state_finalizers, monitor_set) + @state_transition_processor = state_transition_processor + @state_finalizers = state_finalizers + @monitor_set = monitor_set + @last_sent_monitors = {} + end + + def process_state_transitions(state_transitions) + state_transitions.each{|transition| + @state_transition_processor.process_state_transition(transition, @monitor_set) + } + end + + def finalize_model + if !@state_finalizers.is_a?(Array) + raise 'state finalizers should be an array' + end + + if @state_finalizers.length == 0 + raise '@state_finalizers length should not be zero or empty' + end + + @state_finalizers.each{|finalizer| + finalizer.finalize(@monitor_set) + } + + # return only those monitors whose state has changed, ALWAYS including the cluster level monitor + monitors_map = get_changed_monitors + + # monitors_map.each{|key, value| + # puts "#{key} ==> #{value.state}" + # } + # puts "*****************************************************" + + update_last_sent_monitors + clear_monitors + return monitors_map + end + + private + def get_changed_monitors + changed_monitors = {} + # always send cluster monitor as a 'heartbeat' + top_level_monitor = @monitor_set.get_monitor(MonitorId::CLUSTER) + if top_level_monitor.nil? + top_level_monitor = AggregateMonitor.new(MonitorId::CLUSTER, MonitorId::CLUSTER, @last_sent_monitors[MonitorId::CLUSTER], Time.now.utc.iso8601, AggregationAlgorithm::WORSTOF, nil, {}) + end + changed_monitors[MonitorId::CLUSTER] = top_level_monitor + + @monitor_set.get_map.each{|monitor_instance_id, monitor| + if @last_sent_monitors.key?(monitor_instance_id) + last_sent_monitor_state = @last_sent_monitors[monitor_instance_id] + if last_sent_monitor_state.downcase != monitor.state.downcase + changed_monitors[monitor_instance_id] = monitor + end + else + changed_monitors[monitor_instance_id] = monitor + end + } + return changed_monitors + end + + def update_last_sent_monitors + @monitor_set.get_map.map{|instance_id, monitor| + @last_sent_monitors[instance_id] = monitor.state + } + end + + def clear_monitors + @monitor_set = MonitorSet.new + end + + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb new file mode 100644 index 000000000..0c7f541df --- /dev/null +++ b/source/code/plugin/health/health_model_constants.rb @@ -0,0 +1,58 @@ +module HealthModel + class MonitorState + CRITICAL = "fail" + ERROR = "fail" + WARNING = "warn" + NONE = "none" + HEALTHY = "pass" + end + + class AggregationAlgorithm + WORSTOF = "worstOf" + PERCENTAGE = "percentage" + end + + class MonitorId + CLUSTER = 'cluster'; + ALL_NODES = 'all_nodes'; + K8S_INFRASTRUCTURE = 'k8s_infrastructure' + + + NODE = 'node'; + AGENT_NODE_POOL = 'agent_node_pool' + MASTER_NODE_POOL = 'master_node_pool' + ALL_AGENT_NODE_POOLS = 'all_agent_node_pools' + ALL_NODE_POOLS = 'all_node_pools'; + + WORKLOAD = 'workload'; + MANAGED_INFRA = 'managed_infra' + CAPACITY = 'capacity'; + + CONTROLLER = 'controller'; + NAMESPACE = 'namespace'; + NAMESPACES = 'namespaces'; + end + + class HealthMonitorRecordFields + CLUSTER_ID = "ClusterId" + MONITOR_ID = "MonitorId" + MONITOR_INSTANCE_ID = "MonitorInstanceId" + MONITOR_LABELS = "MonitorLabels" + DETAILS = "Details" + MONITOR_CONFIG = "MonitorConfig" + OLD_STATE = "OldState" + NEW_STATE = "NewState" + AGENT_COLLECTION_TIME = "AgentCollectionTime" + TIME_FIRST_OBSERVED = "TimeFirstObserved" + NODE_NAME = "NodeName" + CONTROLLER_NAME = "ControllerName" + HEALTH_ASPECT = "HealthAspect" + CONTAINER_ID = "ContainerID" + end + + class HealthAspect + NODES = "Nodes" + KUBERNETES_INFRASTRUCTURE = "Kubernetes infrastructure" + WORKLOAD = "Workload" + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_definition.rb b/source/code/plugin/health/health_model_definition.rb new file mode 100644 index 000000000..2f7492db8 --- /dev/null +++ b/source/code/plugin/health/health_model_definition.rb @@ -0,0 +1,75 @@ +module HealthModel + class HealthModelDefinition + + attr_reader :health_model_definition + + def initialize(definition) + @health_model_definition = definition + end + + # gets the parent monitor id given the state transition. It requires the monitor id and labels to determine the parent id + def get_parent_monitor_id(monitor) + monitor_id = monitor.monitor_id + + if @health_model_definition.key?(monitor_id) + parent_monitor_id = @health_model_definition[monitor_id]['parent_monitor_id'] + # check parent_monitor_id is an array, then evaluate the conditions, else return the parent_monitor_id + if parent_monitor_id.is_a?(String) + return parent_monitor_id + end + if parent_monitor_id.nil? + conditions = @health_model_definition[monitor_id]['conditions'] + if !conditions.nil? && conditions.is_a?(Array) + puts "Conditions" + labels = monitor.labels + conditions.each{|condition| + left = "#{labels[condition['key']]}" + op = "#{condition['operator']}" + right = "#{condition['value']}" + cond = left.send(op.to_sym, right) + + if cond + return condition['parent_id'] + end + + } + # record = {"namespace" => "kube-system", "controller-name" => "kube-dns-jhdf21374d"} + # value = 'kube-system' + # left, op, right = "#{record['namespace']} == #{value}".split + # cond = left.send(op.to_sym, right) + end + end + else + raise "Invalid Monitor Id #{monitor_id} in get_parent_monitor_id" + end + end + + def get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id) + labels_to_copy = @health_model_definition[monitor_id]['labels'] + if labels_to_copy.nil? + return {} + end + parent_monitor_labels = {} + labels_to_copy.each{|label| + parent_monitor_labels[label] = monitor_labels[label] + } + return parent_monitor_labels + end + + def get_parent_monitor_config(parent_monitor_id) + return @health_model_definition[parent_monitor_id] + end + + def get_parent_monitor_instance_id(parent_monitor_id, parent_monitor_labels) + labels = AggregateMonitorInstanceIdLabels.get_labels_for(parent_monitor_id) + if !labels.is_a?(Array) + raise "Expected #{labels} to be an Array for #{parent_monitor_id}" + end + values = labels.map{|label| parent_monitor_labels[label]} + if values.nil? || values.empty? || values.size == 0 + return parent_monitor_id + end + return "#{parent_monitor_id}-#{values.join('-')}" + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb new file mode 100644 index 000000000..ee424c265 --- /dev/null +++ b/source/code/plugin/health/health_model_definition_parser.rb @@ -0,0 +1,49 @@ +=begin + Class to parse the health model definition. The definition expresses the relationship between monitors, how to roll up to an aggregate monitor, + and what labels to "pass on" to the parent monitor +=end +require 'json' + +module HealthModel + class HealthModelDefinitionParser + attr_accessor :health_model_definition_path, :health_model_definition + + # Constructor + def initialize(path) + @health_model_definition = {} + @health_model_definition_path = path + end + + # Parse the health model definition file and build the model roll-up hierarchy + def parse_file + if (!File.exist?(@health_model_definition_path)) + raise "File does not exist in the specified path" + end + + file = File.read(@health_model_definition_path) + temp_model = JSON.parse(file) + temp_model.each { |entry| + monitor_id = entry['monitor_id'] + parent_monitor_id = entry['parent_monitor_id'] + labels = entry['labels'] if entry['labels'] + aggregation_algorithm = entry['aggregation_algorithm'] if entry['aggregation_algorithm'] + if parent_monitor_id.is_a?(Array) + conditions = [] + parent_monitor_id.each{|condition| + key = condition['label'] + operator = condition['operator'] + value = condition['value'] + parent_id = condition['id'] + conditions.push({"key" => key, "operator" => operator, "value" => value, "parent_id" => parent_id}) + } + @health_model_definition[monitor_id] = {"conditions" => conditions, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm} + elsif parent_monitor_id.is_a?(String) + @health_model_definition[monitor_id] = {"parent_monitor_id" => parent_monitor_id, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm} + elsif parent_monitor_id.nil? + @health_model_definition[monitor_id] = {"parent_monitor_id" => nil, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm} + end + } + @health_model_definition + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb new file mode 100644 index 000000000..68d3cfe70 --- /dev/null +++ b/source/code/plugin/health/monitor_factory.rb @@ -0,0 +1,22 @@ +module HealthModel + class MonitorFactory + + def initialize + + end + + def create_unit_monitor(monitor_state_transition) + return UnitMonitor.new(monitor_state_transition.monitor_id, + monitor_state_transition.monitor_instance_id, + monitor_state_transition.new_state, + monitor_state_transition.transition_date_time, + monitor_state_transition.labels, + monitor_state_transition.config, + monitor_state_transition.details) + end + + def create_aggregate_monitor(monitor_id, monitor_instance_id, labels, aggregation_algorithm, aggregation_algorithm_params, child_monitor) + return AggregateMonitor.new(monitor_id, monitor_instance_id, child_monitor.state, child_monitor.transition_time, aggregation_algorithm, aggregation_algorithm_params, labels) + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/monitor_set.rb b/source/code/plugin/health/monitor_set.rb new file mode 100644 index 000000000..79310a7fa --- /dev/null +++ b/source/code/plugin/health/monitor_set.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module HealthModel + class MonitorSet + attr_accessor :monitors + # attr_reader :changed_monitors + + def initialize + @monitors = {} + end + + def contains?(monitor_instance_id) + @monitors.key?(monitor_instance_id) + end + + def add_or_update(monitor) + # if @monitors.key?(monitor.monitor_instance_id) + # current_monitor = @monitors[monitor.monitor_instance_id] + # if current_monitor.state.downcase != monitor.state.downcase + # @monitors[monitor.monitor_instance_id] = monitor + # @changed_monitors[monitor.monitor_instance_id] = monitor + # end + # else + # @monitors[monitor.monitor_instance_id] = monitor + # @changed_monitors[monitor.monitor_instance_id] = monitor + # end + @monitors[monitor.monitor_instance_id] = monitor + end + + def get_monitor(monitor_instance_id) + @monitors[monitor_instance_id] if @monitors.key?(monitor_instance_id) + end + + def delete(monitor_instance_id) + if @monitors.key?(monitor_instance_id) + @monitors.delete(monitor_instance_id) + end + + # if @changed_monitors.key(monitor_instance_id) + # @changed_monitors.delete(monitor_instance_id) + # end + end + + def get_size + @monitors.length + end + + def get_map + @monitors + end + + # def clear_changed_monitors + # @changed_monitors = {} + # end + end +end diff --git a/source/code/plugin/health/monitor_state_transition.rb b/source/code/plugin/health/monitor_state_transition.rb new file mode 100644 index 000000000..3a5756b73 --- /dev/null +++ b/source/code/plugin/health/monitor_state_transition.rb @@ -0,0 +1,11 @@ +MonitorStateTransition = Struct.new( + :monitor_id, + :monitor_instance_id, + :transition_date_time, + :old_state, + :new_state, + :labels, + :config, + :details + ) do +end \ No newline at end of file diff --git a/source/code/plugin/health/newfiles.txt b/source/code/plugin/health/newfiles.txt new file mode 100644 index 000000000..cf52faf59 --- /dev/null +++ b/source/code/plugin/health/newfiles.txt @@ -0,0 +1,15 @@ +aggregate_monitor.rb +aggregate_monitor_instance_id_labels.rb +aggregate_monitor_state_finalizer.rb +health_model_buffer.rb +health_model_builder.rb +health_model_constants.rb +health_model_definition.rb +health_model_definition_parser.rb +hierarchy_finalizer.rb +monitor_factory.rb +monitor_set.rb +monitor_state_transition.rb +node_monitor_hierarchy_reducer.rb +state_transition_processor.rb +unit_monitor.rb \ No newline at end of file diff --git a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb new file mode 100644 index 000000000..0d6999217 --- /dev/null +++ b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module HealthModel + class NodeMonitorHierarchyReducer + def initialize + end + + # Finalizes the Node Hierarchy. This removes node pools and node pool set from the hierarchy if they are not present. + def finalize(monitor_set) + monitors_to_reduce = [MonitorId::ALL_AGENT_NODE_POOLS, MonitorId::ALL_NODES] + # for the above monitors, which are constant per cluster, the monitor_id and monitor_instance_id are the same + monitors_to_reduce.each do |monitor_to_reduce| + monitor = monitor_set.get_monitor(monitor_to_reduce) + if !monitor.nil? + puts "Before Deleting #{monitor_set.get_size}" + if monitor.is_aggregate_monitor && monitor.get_member_monitors.size == 1 + #copy the children of member monitor as children of parent + member_monitor_instance_id = monitor.get_member_monitors[0] #gets the only member monitor instance id + member_monitor = monitor_set.get_monitor(member_monitor_instance_id) + member_monitor.get_member_monitors.each{|grandchild_monitor| + monitor.add_member_monitor(grandchild_monitor) + } + monitor.remove_member_monitor(member_monitor_instance_id) + # delete the member monitor from the monitor_set + monitor_set.delete(member_monitor_instance_id) + puts "After Deleting #{monitor_set.get_size}" + end + end + end + end + end +end diff --git a/source/code/plugin/health/state_transition_processor.rb b/source/code/plugin/health/state_transition_processor.rb new file mode 100644 index 000000000..6025a49cc --- /dev/null +++ b/source/code/plugin/health/state_transition_processor.rb @@ -0,0 +1,78 @@ +require 'json' +module HealthModel + class StateTransitionProcessor + + attr_accessor :health_model_definition, :monitor_factory + + def initialize(health_model_definition, monitor_factory) + + if !health_model_definition.is_a?(HealthModelDefinition) + raise 'Invalid Type Expected: HealthModelDefinition Actual: #{@health_model_definition.class.name}' + end + @health_model_definition = health_model_definition + + if !monitor_factory.is_a?(MonitorFactory) + raise 'Invalid Type Expected: HealthModelDefinition Actual: #{@monitor_factory.class.name}' + end + @monitor_factory = monitor_factory + end + + def process_state_transition(monitor_state_transition, monitor_set) + if !monitor_state_transition.is_a?(MonitorStateTransition) + raise "Unexpected Type #{monitor_state_transition.class}" + end + + puts "process_state_transition for #{monitor_state_transition.monitor_id}" + + # monitor state transition will always be on a unit monitor + child_monitor = @monitor_factory.create_unit_monitor(monitor_state_transition) + monitor_set.add_or_update(child_monitor) + parent_monitor_id = @health_model_definition.get_parent_monitor_id(child_monitor) + monitor_labels = child_monitor.labels + monitor_id = child_monitor.monitor_id + + # to construct the parent monitor, + # 1. Child's labels + # 2. Parent monitor's config to determine what labels to copy + # 3. Parent Monitor Id + # 4. Monitor Id --> Labels to hash Mapping to generate the monitor instance id for aggregate monitors + + while !parent_monitor_id.nil? + #puts "Parent Monitor Id #{parent_monitor_id}" + # get the set of labels to copy to parent monitor + parent_monitor_labels = @health_model_definition.get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id) + # get the parent monitor configuration + parent_monitor_configuration = @health_model_definition.get_parent_monitor_config(parent_monitor_id) + #get monitor instance id for parent monitor. Does this belong in HealthModelDefinition? + parent_monitor_instance_id = @health_model_definition.get_parent_monitor_instance_id(parent_monitor_id, parent_monitor_labels) + # check if monitor set has the parent monitor id + # if not present, add + # if present, update the state based on the aggregation algorithm + parent_monitor = nil + if !monitor_set.contains?(parent_monitor_instance_id) + parent_monitor = @monitor_factory.create_aggregate_monitor(parent_monitor_id, parent_monitor_instance_id, parent_monitor_labels, parent_monitor_configuration['aggregation_algorithm'], nil, child_monitor) + parent_monitor.add_member_monitor(child_monitor.monitor_instance_id) + else + parent_monitor = monitor_set.get_monitor(parent_monitor_instance_id) + # required to calculate the rollup state + parent_monitor.add_member_monitor(child_monitor.monitor_instance_id) + # update to the earliest of the transition times of child monitors + if child_monitor.transition_time < parent_monitor.transition_time + parent_monitor.transition_time = child_monitor.transition_time + end + end + + if parent_monitor.nil? + raise 'Parent_monitor should not be nil for #{monitor_id}' + end + + monitor_set.add_or_update(parent_monitor) + + child_monitor = parent_monitor + parent_monitor_id = @health_model_definition.get_parent_monitor_id(child_monitor) + monitor_labels = child_monitor.labels + monitor_id = child_monitor.monitor_id + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb new file mode 100644 index 000000000..b025bf40f --- /dev/null +++ b/source/code/plugin/health/unit_monitor.rb @@ -0,0 +1,26 @@ +require_relative 'health_model_constants' +require 'json' + +module HealthModel + class UnitMonitor + + attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_time, :labels, :config, :details, :is_aggregate_monitor + + # constructor + def initialize(monitor_id, monitor_instance_id, state, transition_time, labels, config, details) + @monitor_id = monitor_id + @monitor_instance_id = monitor_instance_id + @state = state + @transition_time = transition_time + @labels = JSON.parse(labels) + @config = config + @details = details + @is_aggregate_monitor = false + end + + def get_member_monitors + return nil + end + + end +end \ No newline at end of file diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index fd4d8a7fc..7fcdd3349 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -2,7 +2,7 @@ # frozen_string_literal: true module Fluent - + class CAdvisor_Perf_Input < Input Plugin.register_input("cadvisorperf", self) @@ -16,10 +16,11 @@ def initialize require_relative "omslog" end - config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.api.cadvisorperf" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" - config_param :healthtag, :string, :default => "oms.api.KubeHealth.AgentCollectionTime" + config_param :nodehealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Node" + config_param :containerhealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Container" + def configure(conf) super @@ -53,12 +54,13 @@ def enumerate() record["DataType"] = "LINUX_PERF_BLOB" record["IPName"] = "LogManagement" eventStream.add(time, record) if record - #router.emit(@tag, time, record) if record - end + #router.emit(@tag, time, record) if record + end router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@mdmtag, eventStream) if eventStream - router.emit_stream(@healthtag, eventStream) if eventStream + router.emit_stream(@containerhealthtag, eventStream) if eventStream + router.emit_stream(@nodehealthtag, eventStream) if eventStream @@istestvar = ENV["ISTEST"] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 054935937..879ead06c 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -20,10 +20,11 @@ def initialize require_relative "DockerApiClient" require_relative 'HealthMonitorUtils' require_relative 'HealthMonitorState' + require_relative 'HealthMonitorConstants' end config_param :run_interval, :time, :default => "1m" - config_param :tag, :string, :default => "oms.api.KubeHealth.AgentCollectionTime" + config_param :tag, :string, :default => "oms.api.KubeHealth.ReplicaSet" def configure(conf) super @@ -93,19 +94,15 @@ def enumerate system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} - system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, "system_#{HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID) system_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end - workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, "workload_#{HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID}") + workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID) workload_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end - # pod_statuses = process_pod_statuses(hmlog, pod_inventory) - # pod_statuses.each do |pod_status| - # health_monitor_records.push(pod_status) if pod_status - # end end if !node_inventory.nil? @@ -142,9 +139,18 @@ def process_cpu_oversubscribed_monitor(pod_inventory) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD + health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" - return record.nil? ? nil : record + return health_record end def process_memory_oversubscribed_monitor(pod_inventory) @@ -160,9 +166,18 @@ def process_memory_oversubscribed_monitor(pod_inventory) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD + health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" - return record.nil? ? nil : record + return health_record end def process_kube_api_up_monitor(state, response) @@ -178,13 +193,21 @@ def process_kube_api_up_monitor(state, response) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD + health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId @@hmlog.info "Successfully processed process_kube_api_up_monitor" - return record.nil? ? nil : record + return health_record end def process_pods_ready_percentage(pods_hash, config_monitor_id) - monitor_id = HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID monitor_config = @@healthMonitorConfig[config_monitor_id] hmlog = HealthMonitorUtils.getLogHandle @@ -203,10 +226,20 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, namespace, controller_name]) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, controller_name]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) - records.push(record) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD + health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::CONTROLLER_NAME] = controller_name + records.push(health_record) end @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" return records @@ -231,8 +264,18 @@ def process_node_condition_monitor(node_inventory) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, node_name]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) - node_condition_monitor_records.push(record) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + health_record = {} + time_now = Time.now.utc.iso8601 + health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record + health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now + health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now + health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD + health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::NODE_NAME] = node_name + node_condition_monitor_records.push(health_record) end end @@hmlog.info "Successfully processed process_node_condition_monitor #{node_condition_monitor_records.size}" @@ -266,7 +309,7 @@ def process_pod_statuses(log, pod_inventory) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, namespace, controller_name, podUid]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) if !record.nil? records.push(record) end @@ -295,5 +338,5 @@ def run_periodic end @mutex.unlock end - end # Health_Docker_Input -end # module + end +end diff --git a/source/code/plugin/mock_data.json b/source/code/plugin/mock_data.json new file mode 100644 index 000000000..ca8906dc2 --- /dev/null +++ b/source/code/plugin/mock_data.json @@ -0,0 +1,568 @@ +[ + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorId": "node_memory_utilization_percentage", + "MonitorInstanceId": "node_memory_utilization_percentage-2b129a9a5633c0cf8f621601c6f8bb32", + "NewState": "none", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-15T17:42:30Z", + "state": "pass", + "details": { + "memoryRssBytes": 726867968.0, + "memoryUtilizationPercentage": 20.14 + } + } + ], + "MonitorConfig": "{\"PassPercentage\":80.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":3,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-15T17:43:10Z", + "TimeFirstObserved": "2019-05-15T17:42:30Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorId": "node_cpu_utilization_percentage", + "MonitorInstanceId": "node_cpu_utilization_percentage-2b129a9a5633c0cf8f621601c6f8bb32", + "NewState": "none", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-15T17:42:30Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 158.99904371666668, + "cpuUtilizationPercentage": 15.9 + } + } + ], + "MonitorConfig": "{\"PassPercentage\":80.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":3,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-15T17:43:10Z", + "TimeFirstObserved": "2019-05-15T17:42:30Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\"}", + "MonitorId": "kube_api_up", + "MonitorInstanceId": "kube_api_up-bef5af9d919a51c49ba49d07f5784471", + "NewState": "pass", + "OldState": "pass", + "Details": [ + { + "timestamp": "2019-05-15T17:43:10Z", + "state": "pass", + "details": { + "content-type": "application/json", + "date": "Wed, 15 May 2019 17:43:10 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + } + ], + "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", + "AgentCollectionTime": "2019-05-15T17:43:10Z", + "TimeFirstObserved": "2019-05-15T17:43:10Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\"}", + "MonitorId": "is_oversubscribed_cpu", + "MonitorInstanceId": "is_oversubscribed_cpu-bef5af9d919a51c49ba49d07f5784471", + "NewState": "pass", + "OldState": "pass", + "Details": [ + { + "timestamp": "2019-05-15T17:43:10Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 4000.0, + "clusterCpuRequests": 870.0 + } + } + ], + "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", + "AgentCollectionTime": "2019-05-15T17:43:10Z", + "TimeFirstObserved": "2019-05-15T17:43:10Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\"}", + "MonitorId": "is_oversubscribed_memory", + "MonitorInstanceId": "is_oversubscribed_memory-bef5af9d919a51c49ba49d07f5784471", + "NewState": "pass", + "OldState": "pass", + "Details": [ + { + "timestamp": "2019-05-15T17:43:10Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 14436810752.0, + "clusterMemoryRequests": 1312817152.0 + } + } + ], + "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", + "AgentCollectionTime": "2019-05-15T17:43:10Z", + "TimeFirstObserved": "2019-05-15T17:43:10Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-1\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", + "NewState": "pass", + "OldState": "pass", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", + "AgentCollectionTime": "2019-05-17T18:06:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59a71abb37b8", + "NewState": "pass", + "OldState": "pass", + "Details": [ + { + "timestamp": "2019-05-15T17: 43: 10Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", + "AgentCollectionTime": "2019-05-15T17:43:10Z", + "TimeFirstObserved": "2019-05-15T17:43:10Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"heapster-fc665dc4b\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-57b4bbef6e083416b8d347d4da8de7a6", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "heapster-fc665dc4b" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "heapster-fc665dc4b" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-dns-autoscaler-746998ccf6\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-21832207f4e82c39f198f11abc39d104", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "kube-dns-autoscaler-746998ccf6" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "kube-dns-autoscaler-746998ccf6" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-dns-v20-659876bf8d\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-8433c768b2f76a7978eb7317e9bb2f4e", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "kube-dns-v20-659876bf8d" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "kube-dns-v20-659876bf8d" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-proxy\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-29464dc11987eb670b82529ef4f5e094", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "kube-proxy" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "kube-proxy" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-svc-redirect\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-62a5128950e4ad0d13a4163f85fbf7b3", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "kube-svc-redirect" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "kube-svc-redirect" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kubernetes-dashboard-6d48dd4779\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-d34ec2ade900ae62a713b2a8a6d1ce74", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "kubernetes-dashboard-6d48dd4779" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "kubernetes-dashboard-6d48dd4779" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"metrics-server-5cbc77f79f\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-2f6c4157408ce6f9dc13da2fd684e716", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "metrics-server-5cbc77f79f" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "metrics-server-5cbc77f79f" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"omsagent\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-930e07ceb1ea2e952e5578b3f1557fbe", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "omsagent" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "controllerName": "omsagent" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"omsagent-rs-6dc57b8544\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-7d42f06622dee87b682d3b03a1e348fa", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "omsagent-rs-6dc57b8544" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "omsagent-rs-6dc57b8544" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"tunnelfront-66dd5cfc6b\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorId": "system_pods_ready_percentage", + "MonitorInstanceId": "system_pods_ready_percentage-544c7f310f41ab8fc1196ae9f210fc83", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "tunnelfront-66dd5cfc6b" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "tunnelfront-66dd5cfc6b" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"diliprdeploymentnodeapps-c4fdfb446\",\"monitor.azure.com/namespace\":\"default\"}", + "MonitorId": "workload_pods_ready_percentage", + "MonitorInstanceId": "workload_pods_ready_percentage-5d1f09e9b9c40718528d87cdb9ec0285", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "diliprdeploymentnodeapps-c4fdfb446" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "controllerName": "diliprdeploymentnodeapps-c4fdfb446" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + }, + { + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"vishwadeploymentnodeapps-8686cf54db\",\"monitor.azure.com/namespace\":\"default\"}", + "MonitorId": "workload_pods_ready_percentage", + "MonitorInstanceId": "workload_pods_ready_percentage-56b719128d6ea1353fb2489afd8d8ed5", + "NewState": "pass", + "OldState": "none", + "Details": [ + { + "timestamp": "2019-05-17T18:06:03Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "controllerName": "vishwadeploymentnodeapps-8686cf54db" + } + }, + { + "timestamp": "2019-05-17T18:07:03Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "controllerName": "vishwadeploymentnodeapps-8686cf54db" + } + } + ], + "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", + "AgentCollectionTime": "2019-05-17T18:07:03Z", + "TimeFirstObserved": "2019-05-17T18:06:03Z" + } +] \ No newline at end of file diff --git a/test/code/plugin/filter_health_model_builder_test.rb b/test/code/plugin/filter_health_model_builder_test.rb new file mode 100644 index 000000000..c918f90ef --- /dev/null +++ b/test/code/plugin/filter_health_model_builder_test.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'test/unit' +require 'json' +# require_relative '../../../source/code/plugin/health' + +Dir[File.join(__dir__, '../../../source/code/plugin/health', '*.rb')].each { |file| require file } + +class FilterHealthModelBuilderTest < Test::Unit::TestCase + include HealthModel + + def test_event_stream + health_definition_path = 'C:\AzureMonitor\ContainerInsights\Docker-Provider\installer\conf\health_model_definition.json' + health_model_definition = HealthModelDefinition.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + monitor_factory = MonitorFactory.new + state_transition_processor = StateTransitionProcessor.new(health_model_definition, monitor_factory) + state_finalizers = [NodeMonitorHierarchyReducer.new, AggregateMonitorStateFinalizer.new] + monitor_set = MonitorSet.new + model_builder = HealthModelBuilder.new(state_transition_processor, state_finalizers, monitor_set) + + i = 1 + loop do + mock_data_path = "C:/AzureMonitor/ContainerInsights/Docker-Provider/source/code/plugin/mock_data-#{i}.json" + file = File.read(mock_data_path) + data = JSON.parse(file) + + state_transitions = [] + data.each do |record| + state_transition = MonitorStateTransition.new( + record[HealthMonitorRecordFields::MONITOR_ID], + record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + record[HealthMonitorRecordFields::OLD_STATE], + record[HealthMonitorRecordFields::NEW_STATE], + record[HealthMonitorRecordFields::MONITOR_LABELS], + record[HealthMonitorRecordFields::MONITOR_CONFIG], + record[HealthMonitorRecordFields::DETAILS] + ) + state_transitions.push(state_transition) + end + + model_builder.process_state_transitions(state_transitions) + changed_monitors = model_builder.finalize_model + i = i + 1 + if i == 5 + break + end + end + puts "Done" + end +end From 8892458b121c258ee61c3c16c97ed427992185ca Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 21 May 2019 22:30:48 -0700 Subject: [PATCH 38/90] Calculate old and new states for Aggregate and Unit Monitors --- .../code/plugin/health/aggregate_monitor.rb | 20 ++++++++++--------- .../plugin/health/health_model_builder.rb | 8 ++++---- source/code/plugin/health/monitor_factory.rb | 10 +++++++++- source/code/plugin/health/unit_monitor.rb | 7 ++++--- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index c5bcf9ff9..89c93187e 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -5,14 +5,15 @@ module HealthModel class AggregateMonitor - attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor + attr_accessor :monitor_id, :monitor_instance_id, :old_state, :new_state, :transition_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor attr_reader :member_monitors # constructor def initialize( monitor_id, monitor_instance_id, - state, + old_state, + new_state, transition_time, aggregation_algorithm, aggregation_algorithm_params, @@ -20,7 +21,8 @@ def initialize( ) @monitor_id = monitor_id @monitor_instance_id = monitor_instance_id - @state = state + @old_state = old_state + @new_state = new_state @transition_time = transition_time @aggregation_algorithm = aggregation_algorithm || AggregationAlgorithm::WORSTOF @aggregation_algorithm_params = aggregation_algorithm_params @@ -49,16 +51,17 @@ def get_member_monitors def calculate_state(monitor_set) case @aggregation_algorithm when AggregationAlgorithm::WORSTOF - @state = calculate_worst_of_state(monitor_set) + @old_state = calculate_worst_of_state(monitor_set, 'old_state') + @new_state = calculate_worst_of_state(monitor_set, 'new_state') when AggregationAlgorithm::PERCENTAGE @state = calculate_percentage_state(monitor_set) end end # calculates the worst of state, given the member monitors - def calculate_worst_of_state(monitor_set) + def calculate_worst_of_state(monitor_set, state_type) - member_state_counts = map_member_monitor_states(monitor_set) + member_state_counts = map_member_monitor_states(monitor_set, state_type) if member_state_counts.length === 0 return MonitorState::NONE @@ -85,7 +88,7 @@ def calculate_percentage_state end - def map_member_monitor_states(monitor_set) + def map_member_monitor_states(monitor_set, state_type) member_monitor_instance_ids = get_member_monitors if member_monitor_instance_ids.nil? || member_monitor_instance_ids.size == 0 return {} @@ -96,7 +99,7 @@ def map_member_monitor_states(monitor_set) member_monitor_instance_ids.each {|monitor_instance_id| member_monitor = monitor_set.get_monitor(monitor_instance_id) - monitor_state = member_monitor.state; + monitor_state = member_monitor.send(state_type); if !state_counts.key?(monitor_state) state_counts[monitor_state] = 1 @@ -108,6 +111,5 @@ def map_member_monitor_states(monitor_set) return state_counts; end - end end diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb index 04c4cce9c..04e2c3eee 100644 --- a/source/code/plugin/health/health_model_builder.rb +++ b/source/code/plugin/health/health_model_builder.rb @@ -51,14 +51,14 @@ def get_changed_monitors # always send cluster monitor as a 'heartbeat' top_level_monitor = @monitor_set.get_monitor(MonitorId::CLUSTER) if top_level_monitor.nil? - top_level_monitor = AggregateMonitor.new(MonitorId::CLUSTER, MonitorId::CLUSTER, @last_sent_monitors[MonitorId::CLUSTER], Time.now.utc.iso8601, AggregationAlgorithm::WORSTOF, nil, {}) + top_level_monitor = AggregateMonitor.new(MonitorId::CLUSTER, MonitorId::CLUSTER, @last_sent_monitors[MonitorId::CLUSTER].old_state, @last_sent_monitors[MonitorId::CLUSTER].new_state, @last_sent_monitors[MonitorId::CLUSTER].transition_time, AggregationAlgorithm::WORSTOF, nil, {}) end changed_monitors[MonitorId::CLUSTER] = top_level_monitor @monitor_set.get_map.each{|monitor_instance_id, monitor| if @last_sent_monitors.key?(monitor_instance_id) - last_sent_monitor_state = @last_sent_monitors[monitor_instance_id] - if last_sent_monitor_state.downcase != monitor.state.downcase + last_sent_monitor_state = @last_sent_monitors[monitor_instance_id].new_state + if last_sent_monitor_state.downcase != monitor.new_state.downcase changed_monitors[monitor_instance_id] = monitor end else @@ -70,7 +70,7 @@ def get_changed_monitors def update_last_sent_monitors @monitor_set.get_map.map{|instance_id, monitor| - @last_sent_monitors[instance_id] = monitor.state + @last_sent_monitors[instance_id] = monitor } end diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb index 68d3cfe70..f09b9f1a6 100644 --- a/source/code/plugin/health/monitor_factory.rb +++ b/source/code/plugin/health/monitor_factory.rb @@ -8,6 +8,7 @@ def initialize def create_unit_monitor(monitor_state_transition) return UnitMonitor.new(monitor_state_transition.monitor_id, monitor_state_transition.monitor_instance_id, + monitor_state_transition.old_state, monitor_state_transition.new_state, monitor_state_transition.transition_date_time, monitor_state_transition.labels, @@ -16,7 +17,14 @@ def create_unit_monitor(monitor_state_transition) end def create_aggregate_monitor(monitor_id, monitor_instance_id, labels, aggregation_algorithm, aggregation_algorithm_params, child_monitor) - return AggregateMonitor.new(monitor_id, monitor_instance_id, child_monitor.state, child_monitor.transition_time, aggregation_algorithm, aggregation_algorithm_params, labels) + return AggregateMonitor.new(monitor_id, + monitor_instance_id, + child_monitor.old_state, + child_monitor.new_state, + child_monitor.transition_time, + aggregation_algorithm, + aggregation_algorithm_params, + labels) end end end \ No newline at end of file diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb index b025bf40f..7fd8d201b 100644 --- a/source/code/plugin/health/unit_monitor.rb +++ b/source/code/plugin/health/unit_monitor.rb @@ -4,13 +4,14 @@ module HealthModel class UnitMonitor - attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_time, :labels, :config, :details, :is_aggregate_monitor + attr_accessor :monitor_id, :monitor_instance_id, :old_state, :new_state, :transition_time, :labels, :config, :details, :is_aggregate_monitor # constructor - def initialize(monitor_id, monitor_instance_id, state, transition_time, labels, config, details) + def initialize(monitor_id, monitor_instance_id, old_state, new_state, transition_time, labels, config, details) @monitor_id = monitor_id @monitor_instance_id = monitor_instance_id - @state = state + @old_state = old_state + @new_state = new_state @transition_time = transition_time @labels = JSON.parse(labels) @config = config From 81df39f52d1212e36deed1eae8b2455b32969979 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 23 May 2019 00:39:06 -0700 Subject: [PATCH 39/90] Remove Controller Name from labels and details, use Deployment/DaemonSet Name, exclude Jobs --- installer/conf/health_model_definition.json | 14 +- source/code/plugin/HealthMonitorConstants.rb | 23 --- .../code/plugin/HealthMonitorSignalReducer.rb | 26 +-- source/code/plugin/HealthMonitorUtils.rb | 153 ++++++++++++------ source/code/plugin/KubernetesApiClient.rb | 14 +- source/code/plugin/filter_cadvisor_health.rb | 1 - .../plugin/filter_health_model_builder.rb | 55 ++++--- .../code/plugin/health/aggregate_monitor.rb | 5 + .../aggregate_monitor_instance_id_labels.rb | 5 +- .../plugin/health/health_model_constants.rb | 8 +- .../plugin/health/health_model_definition.rb | 4 - source/code/plugin/health/monitor_set.rb | 26 +-- source/code/plugin/in_kube_health.rb | 68 +++----- source/code/plugin/mock_data.json | 24 +-- 14 files changed, 219 insertions(+), 207 deletions(-) diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json index a90f89f52..66c104a2c 100644 --- a/installer/conf/health_model_definition.json +++ b/installer/conf/health_model_definition.json @@ -1,14 +1,15 @@ [ { "monitor_id": "workload_pods_ready_percentage", - "parent_monitor_id": "controller", + "parent_monitor_id": "pod_aggregator", "labels": [ "monitor.azure.com/namespace", - "monitor.azure.com/controller-name" + "monitor.azure.com/pod-aggregator", + "monitor.azure.com/pod-aggregator-kind" ] }, { - "monitor_id": "controller", + "monitor_id": "pod_aggregator", "parent_monitor_id": "namespace", "labels": [ "monitor.azure.com/namespace" @@ -16,14 +17,15 @@ }, { "monitor_id": "system_pods_ready_percentage", - "parent_monitor_id": "system_controllers", + "parent_monitor_id": "system_pod_aggregator", "labels": [ "monitor.azure.com/namespace", - "monitor.azure.com/controller-name" + "monitor.azure.com/pod-aggregator", + "monitor.azure.com/pod-aggregator-kind" ] }, { - "monitor_id": "system_controllers", + "monitor_id": "system_pod_aggregator", "parent_monitor_id": "k8s_infrastructure" }, { diff --git a/source/code/plugin/HealthMonitorConstants.rb b/source/code/plugin/HealthMonitorConstants.rb index eaa561ae3..b208d700a 100644 --- a/source/code/plugin/HealthMonitorConstants.rb +++ b/source/code/plugin/HealthMonitorConstants.rb @@ -19,27 +19,4 @@ class HealthMonitorConstants DEFAULT_FAIL_PERCENTAGE = 90.0 DEFAULT_MONITOR_TIMEOUT = 240 #4 hours DEFAULT_SAMPLES_BEFORE_NOTIFICATION = 3 -end - -class HealthMonitorRecordFields - CLUSTER_ID = "ClusterId" - MONITOR_ID = "MonitorId" - MONITOR_INSTANCE_ID = "MonitorInstanceId" - MONITOR_LABELS = "MonitorLabels" - DETAILS = "Details" - MONITOR_CONFIG = "MonitorConfig" - OLD_STATE = "OldState" - NEW_STATE = "NewState" - AGENT_COLLECTION_TIME = "AgentCollectionTime" - TIME_FIRST_OBSERVED = "TimeFirstObserved" - NODE_NAME = "NodeName" - CONTROLLER_NAME = "ControllerName" - HEALTH_ASPECT = "HealthAspect" - CONTAINER_ID = "ContainerID" -end - -class HealthAspect - NODES = "Nodes" - KUBERNETES_INFRASTRUCTURE = "Kubernetes infrastructure" - WORKLOAD = "Workload" end \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index c9aefb5cd..92375787a 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -4,14 +4,13 @@ require 'time' require 'json' require_relative 'KubernetesApiClient' +require_relative 'health/health_model_constants' class HealthMonitorSignalReducer @@firstMonitorRecordSent = {} class << self - def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, controller_name: nil, node_name: nil) - #log.debug "reduceSignal MonitorId: #{monitor_id} Key : #{key} controller_name: #{controller_name} node_name #{node_name}" - #log.debug "monitorConfig #{monitor_config} monitor_id #{monitor_id}" + def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, node_name: nil) health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) health_monitor_records = health_monitor_instance_state.prev_records @@ -38,7 +37,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, health_monitor_instance_state.prev_sent_record_time = latest_record_time #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name, controller_name: controller_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) else #log.debug "Monitor timeout not reached #{time_elapsed}" #log.debug "Timeout not reached for #{monitor_id}" @@ -57,7 +56,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, #FIXME: if record count = 1, then send it, if it is greater than 1 and less than SamplesBeforeNotification, NO-OP. If equal to SamplesBeforeNotification, then check for consistency in state change if health_monitor_instance_state.prev_records.size == 1 #log.debug "Only One Record" - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) elsif health_monitor_instance_state.prev_records.size < monitor_config["SamplesBeforeNotification"].to_i log.debug "Prev records size < SamplesBeforeNotification for #{monitor_instance_id}" return nil @@ -78,7 +77,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, health_monitor_instance_state.prev_sent_record_time = latest_record_time #log.debug "After Updating Monitor State #{health_monitor_instance_state}" HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) else #log.debug "Monitor timeout not reached #{time_elapsed}" #log.debug "Timeout not reached for #{monitor_id}" @@ -92,7 +91,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, health_monitor_instance_state.prev_sent_record_time = latest_record_time health_monitor_instance_state.state_change_time = first_record["timestamp"] HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, controller_name: controller_name, node_name: node_name) + return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) else log.debug "No consistent state change for monitor #{monitor_id}" return nil @@ -103,15 +102,17 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, return nil end - def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, controller_name: nil, node_name: nil) - #log.debug "formatRecord key:#{key} controller_name: #{controller_name} node_name #{node_name}" - - #log.debug "Health Monitor Instance State #{health_monitor_instance_state}" + def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, node_name: nil) + log.debug "Health Monitor Instance State #{health_monitor_instance_state}" labels = HealthMonitorUtils.getClusterLabels #log.debug "Labels : #{labels}" - monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, controller_name: controller_name, node_name: node_name) + namespace = health_monitor_instance_state.prev_records[0]['details']['namespace'] + pod_aggregator = health_monitor_instance_state.prev_records[0]['details']['podAggregator'] + pod_aggregator_kind = health_monitor_instance_state.prev_records[0]['details']['podAggregatorKind'] + + monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, pod_aggregator: pod_aggregator, node_name: node_name, namespace: namespace, pod_aggregator_kind: pod_aggregator_kind) #log.debug "Monitor Labels : #{monitor_labels}" if !monitor_labels.nil? @@ -142,6 +143,7 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s health_monitor_record = {} health_monitor_record["ClusterId"] = KubernetesApiClient.getClusterId + health_monitor_record["ClusterName"] = KubernetesApiClient.getClusterName health_monitor_record["MonitorLabels"] = labels.to_json health_monitor_record["MonitorId"] = monitor_id health_monitor_record["MonitorInstanceId"] = monitor_instance_id diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index 2b276f971..2eaf2c8e0 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -4,6 +4,7 @@ require_relative 'KubernetesApiClient' require_relative 'HealthMonitorConstants' require 'time' +require 'json' class HealthMonitorUtils @@ -89,19 +90,6 @@ def getContainerKeyFromInstanceName(instance_name) def getMonitorInstanceId(log, monitor_id, args = []) #log.debug "getMonitorInstanceId" string_to_hash = args.join("/") - # # Container Level Monitor - # if args.key?("cluster_id") && args.key?("node_name") && args.key?("key") - # string_to_hash = [args['cluster_id'], args['node_name'], args['key']].join("/") - # elsif args.key?("cluster_id") && args.key?("node_name") - # string_to_hash = [args['cluster_id'], args['node_name']].join("/") - # elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && args.key?("key") - # string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name'], args['key']].join("/") - # elsif args.key?("cluster_id") && args.key?("namespace") && args.key?("controller_name") && !args.key?("key") - # string_to_hash = [args['cluster_id'], args['namespace'], args['controller_name']].join("/") - # elsif args.key?("cluster_id") && !args.key?("namespace") && !args.key?("controller_name") && !args.key?("key") - # string_to_hash = [args['cluster_id']].join("/") - # end - @log.info "String to Hash : #{string_to_hash}" return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" end @@ -122,22 +110,21 @@ def getClusterLabels return labels end - def getMonitorLabels(log, monitor_id, key: nil, controller_name: nil, node_name: nil) - #log.debug "get MonitorLabels key : #{key} controller_name #{controller_name} monitor_id #{monitor_id} node_name #{node_name}" + def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: nil, namespace: nil, pod_aggregator_kind: nil) + log.debug "monitor_id #{monitor_id} pod_aggregator #{pod_aggregator} pod_aggregator_kind #{pod_aggregator_kind} namespace #{namespace}" monitor_labels = {} case monitor_id when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::POD_STATUS - #log.debug "Getting Monitor labels for Workload/ManagedInfra Monitors #{controller_name} #{@@controllerMapping}" if !key.nil? #container - monitor_labels['monitor.azure.com/controller-name'] = getContainerControllerName(key) + monitor_labels['monitor.azure.com/pod-aggregator'] = getContainerControllerName(key) monitor_labels['monitor.azure.com/namespace'] = getContainerNamespace(key) - elsif !controller_name.nil? - monitor_labels['monitor.azure.com/controller-name'] = controller_name - monitor_labels['monitor.azure.com/namespace'] = getControllerNamespace(controller_name) + else + monitor_labels['monitor.azure.com/pod-aggregator'] = pod_aggregator.split('~~')[1] + monitor_labels['monitor.azure.com/pod-aggregator-kind'] = pod_aggregator_kind + monitor_labels['monitor.azure.com/namespace'] = namespace end return monitor_labels when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, HealthMonitorConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID - #log.debug "Getting Node Labels " @@nodeInventory["items"].each do |node| if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] @@ -170,14 +157,19 @@ def refreshKubernetesApiData(log, hostName, force: false) podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body) end podInventory['items'].each do |pod| - controller_name = pod['metadata']['ownerReferences'][0]['name'] + has_owner = !pod['metadata']['ownerReferences'].nil? + if !has_owner + pod_aggregator = pod['metadata']['name'] + else + pod_aggregator = pod['metadata']['ownerReferences'][0]['name'] + end namespace = pod['metadata']['namespace'] - @@controllerMapping[controller_name] = namespace - #log.debug "controller_name #{controller_name} namespace #{namespace}" + @@controllerMapping[pod_aggregator] = namespace + #log.debug "pod_aggregator #{pod_aggregator} namespace #{namespace}" pod['spec']['containers'].each do |container| key = [pod['metadata']['uid'], container['name']].join('/') - if !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil? + if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil? cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) else @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" @@ -185,7 +177,7 @@ def refreshKubernetesApiData(log, hostName, force: false) cpu_limit_value = @cpu_capacity end - if !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? + if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? #@log.info "Raw Memory Value #{container['resources']['limits']['memory']}" memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory']) else @@ -193,11 +185,11 @@ def refreshKubernetesApiData(log, hostName, force: false) memory_limit_value = @memory_capacity end - @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => controller_name, "namespace" => namespace} + @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => pod_aggregator, "namespace" => namespace} end end rescue => e - @log.info "Error Refreshing Container Resource Limits #{e}" + @log.info "Error Refreshing Container Resource Limits #{e.backtrace}" end # log.info "Controller Mapping #{@@controllerMapping}" # log.info "Node Inventory #{@@nodeInventory}" @@ -212,7 +204,7 @@ def getContainerMetadata(key) else # This is to handle new containers/controllers that might have come up since the last refresh @log.info "Adhoc refresh getContainerMetadata" - HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + HealthMonitorUtils.refreshKubernetesApiData(@log, nil, force: true) if @@containerMetadata.has_key?(key) return @@containerMetadata[key] else @@ -266,20 +258,20 @@ def getContainerNamespace(key) end end - def getControllerNamespace(controller_name) - if @@controllerMapping.has_key?(controller_name) - return @@controllerMapping[controller_name] - else - @log.info "Adhoc refresh getControllerNamespace" - # This is to handle new containers/controllers that might have come up since the last refresh - HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) - if @@controllerMapping.has_key?(controller_name) - return @@controllerMapping[controller_name] - else - return '' - end - end - end + # def getControllerNamespace(controller_name) + # if @@controllerMapping.has_key?(controller_name) + # return @@controllerMapping[controller_name] + # else + # @log.info "Adhoc refresh getControllerNamespace" + # # This is to handle new containers/controllers that might have come up since the last refresh + # HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) + # if @@controllerMapping.has_key?(controller_name) + # return @@controllerMapping[controller_name] + # else + # return '' + # end + # end + # end def getClusterCpuMemoryCapacity begin @@ -355,17 +347,79 @@ def getLogHandle return @log end - def getPodsReadyHash(pod_inventory) + def getPodsReadyHash(pod_inventory, deployment_inventory) pods_ready_percentage_hash = {} + deployment_lookup = {} + deployment_inventory['items'].each do |deployment| + match_labels = deployment['spec']['selector']['matchLabels'].to_h + namespace = deployment['metadata']['namespace'] + match_labels.each{|k,v| + deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" + } + end pod_inventory['items'].each do |pod| begin - controller_name = pod['metadata']['ownerReferences'][0]['name'] + # controller_name = pod['metadata']['ownerReferences'][0]['name'] + # namespace = pod['metadata']['namespace'] + # status = pod['status']['phase'] + + # if pods_ready_percentage_hash.key?(controller_name) + # total_pods = pods_ready_percentage_hash[controller_name]['totalPods'] + # pods_ready = pods_ready_percentage_hash[controller_name]['podsReady'] + # else + # total_pods = 0 + # pods_ready = 0 + # end + + # total_pods += 1 + # if status == 'Running' + # pods_ready += 1 + # end + # pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} + has_owner = !pod['metadata']['ownerReferences'].nil? + owner_kind = '' + if has_owner + owner_kind = pod['metadata']['ownerReferences'][0]['kind'] + controller_name = pod['metadata']['ownerReferences'][0]['name'] + else + owner_kind = pod['kind'] + controller_name = pod['metadata']['name'] + #@log.info "#{JSON.pretty_generate(pod)}" + end + namespace = pod['metadata']['namespace'] status = pod['status']['phase'] - if pods_ready_percentage_hash.key?(controller_name) - total_pods = pods_ready_percentage_hash[controller_name]['totalPods'] - pods_ready = pods_ready_percentage_hash[controller_name]['podsReady'] + pod_aggregator = '' + if owner_kind.nil? + owner_kind = 'Pod' + end + case owner_kind.downcase + when 'job' + # we are excluding jobs + next + when 'replicaset' + # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name + labels = pod['metadata']['labels'].to_h + labels.each {|k,v| + lookup_key = "#{namespace}-#{k}=#{v}" + if deployment_lookup.key?(lookup_key) + pod_aggregator = deployment_lookup[lookup_key] + break + end + } + if pod_aggregator.empty? + pod_aggregator = "#{namespace}~~#{controller_name}" + end + when 'daemonset' + pod_aggregator = "#{namespace}~~#{controller_name}" + else + pod_aggregator = "#{namespace}~~#{pod['metadata']['name']}" + end + + if pods_ready_percentage_hash.key?(pod_aggregator) + total_pods = pods_ready_percentage_hash[pod_aggregator]['totalPods'] + pods_ready = pods_ready_percentage_hash[pod_aggregator]['podsReady'] else total_pods = 0 pods_ready = 0 @@ -375,7 +429,8 @@ def getPodsReadyHash(pod_inventory) if status == 'Running' pods_ready += 1 end - pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} + + pods_ready_percentage_hash[pod_aggregator] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'pod_aggregator' => pod_aggregator, 'kind' => owner_kind} rescue => e @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" end diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb index 73f819fb7..a858e877c 100644 --- a/source/code/plugin/KubernetesApiClient.rb +++ b/source/code/plugin/KubernetesApiClient.rb @@ -30,13 +30,13 @@ def initialize end class << self - def getKubeResourceInfo(resource) + def getKubeResourceInfo(resource, api_version: nil) headers = {} response = nil - @Log.info "Getting Kube resource" + @Log.info "Getting Kube resource api_version #{api_version}" @Log.info resource begin - resourceUri = getResourceUri(resource) + resourceUri = getResourceUri(resource, api_version: api_version) if !resourceUri.nil? uri = URI.parse(resourceUri) http = Net::HTTP.new(uri.host, uri.port) @@ -85,10 +85,14 @@ def getClusterRegion end end - def getResourceUri(resource) + def getResourceUri(resource, api_version: nil) begin if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"] - return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + @@ApiVersion + "/" + resource + if !api_version.nil? + return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/apis/" + api_version + "/" + resource + end + api_version = @@ApiVersion + return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + api_version + "/" + resource else @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri") return nil diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb index 68752deaf..0425bfcbf 100644 --- a/source/code/plugin/filter_cadvisor_health.rb +++ b/source/code/plugin/filter_cadvisor_health.rb @@ -221,7 +221,6 @@ def process_node_cpu_record(record, metric_value) health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName - health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect.NODES @log.info "Processed Node CPU" return health_record end diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 07e1b26a1..b5f8013bd 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -7,6 +7,7 @@ module Fluent require 'json' Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file } + class FilterHealthModelBuilder < Filter Fluent::Plugin.register_filter('filter_health_model_builder', self) @@ -16,6 +17,7 @@ class FilterHealthModelBuilder < Filter attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_transition_processor, :state_finalizers, :monitor_set, :model_builder @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig + @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime' def initialize super @@ -47,6 +49,8 @@ def shutdown end def filter_stream(tag, es) + new_es = MultiEventStream.new + time = Time.now begin if tag.start_with?("oms.api.KubeHealth.DaemonSet") records = [] @@ -77,7 +81,6 @@ def filter_stream(tag, es) record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], @@healthMonitorConfig[monitor_id], key: record[HealthMonitorRecordFields::CONTAINER_ID], - controller_name: record[HealthMonitorRecordFields::CONTROLLER_NAME], node_name: record[HealthMonitorRecordFields::NODE_NAME] ) filtered_records.push(MonitorStateTransition.new( @@ -96,36 +99,46 @@ def filter_stream(tag, es) @log.info "Filtered Records size = #{filtered_records.size}" - - # if raw_records.size > 0 - - # raw_records.each{|record| - # @log.debug "#{record}" - # } - - # File.open("/tmp/mock_data-#{Time.now.to_i}.json", "w") do |f| - # f.write(JSON.pretty_generate(raw_records)) - # end - # end - + File.open("/tmp/mock_data-#{Time.now.to_i}.json", "w") do |f| + f.write(JSON.pretty_generate(raw_records)) + end @model_builder.process_state_transitions(filtered_records) - monitors_map = @model_builder.finalize_model - @log.debug "monitors map size = #{monitors_map.size}" - # monitors_map.each{|key, value| - # @log.debug "#{key} ==> #{value.state}" - # } - + monitors = @model_builder.finalize_model + @log.debug "monitors map size = #{monitors.size}" + + monitors.map {|monitor_instance_id, monitor| + record = {} + + record[HealthMonitorRecordFields::MONITOR_ID] = monitor.monitor_id + record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor.monitor_instance_id + record[HealthMonitorRecordFields::MONITOR_LABELS] = monitor.labels + record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + record[HealthMonitorRecordFields::CLUSTER_NAME] = KubernetesApiClient.getClusterName + record[HealthMonitorRecordFields::OLD_STATE] = monitor.old_state + record[HealthMonitorRecordFields::NEW_STATE] = monitor.new_state + record[HealthMonitorRecordFields::DETAILS] = monitor.details if monitor.methods.include? :details + record[HealthMonitorRecordFields::MONITOR_CONFIG] = monitor.config if monitor.methods.include? :config + record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601 + record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = monitor.transition_time + + new_es.add(time, record) + } + router.emit_stream(@@rewrite_tag, new_es) + # return an empty event stream, else the match will throw a NoMethodError return [] + elsif tag.start_with?("oms.api.KubeHealth.AgentCollectionTime") + # this filter also acts as a pass through as we are rewriting the tag and emitting to the fluent stream + es else - raise "Invalid tag #{tag} received" + raise 'Invalid tag #{tag} received' end + rescue => e @log.warn "Message: #{e.message} Backtrace: #{e.backtrace}" return nil end - es end end end diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index 89c93187e..76eb29310 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -31,12 +31,14 @@ def initialize( @is_aggregate_monitor = true end + # adds a member monitor as a child def add_member_monitor(member_monitor_instance_id) unless @member_monitors.key?(member_monitor_instance_id) @member_monitors[member_monitor_instance_id] = true end end + #removes a member monitor def remove_member_monitor(member_monitor_instance_id) if @member_monitors.key?(member_monitor_instance_id) @member_monitors.delete(member_monitor_instance_id) @@ -48,6 +50,7 @@ def get_member_monitors @member_monitors.map(&:first) end + # calculates the state of the aggregate monitor based on aggregation algorithm and child monitor states def calculate_state(monitor_set) case @aggregation_algorithm when AggregationAlgorithm::WORSTOF @@ -84,10 +87,12 @@ def calculate_worst_of_state(monitor_set, state_type) return MonitorState::HEALTHY end + # calculates a percentage state, given the aggregation algorithm parameters def calculate_percentage_state end + # maps states of member monitors to counts def map_member_monitor_states(monitor_set, state_type) member_monitor_instance_ids = get_member_monitors if member_monitor_instance_ids.nil? || member_monitor_instance_ids.size == 0 diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb index be3f3a704..c42b403b3 100644 --- a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb +++ b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb @@ -1,10 +1,11 @@ module HealthModel class AggregateMonitorInstanceIdLabels @@id_labels_mapping = { - MonitorId::CONTROLLER => ["monitor.azure.com/namespace", "monitor.azure.com/controller-name"], + MonitorId::SYSTEM_POD_AGGREGATOR => ["monitor.azure.com/namespace", "monitor.azure.com/pod-aggregator"], + MonitorId::POD_AGGREGATOR => ["monitor.azure.com/namespace", "monitor.azure.com/pod-aggregator"], MonitorId::NODE => ["agentpool", "kubernetes.io/role", "kubernetes.io/hostname"], MonitorId::NAMESPACE => ["monitor.azure.com/namespace"], - MonitorId::AGENT_NODE_POOL => ["agentpool"] + MonitorId::AGENT_NODE_POOL => ["agentpool"], # MonitorId::ALL_AGENT_NODE_POOLS => [], # MonitorId::ALL_NODE_POOLS => [], # MonitorId::ALL_NODES => [], diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index 0c7f541df..671239f2c 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -28,13 +28,15 @@ class MonitorId MANAGED_INFRA = 'managed_infra' CAPACITY = 'capacity'; - CONTROLLER = 'controller'; + POD_AGGREGATOR = 'pod_aggregator'; + SYSTEM_POD_AGGREGATOR = 'system_pod_aggregator' NAMESPACE = 'namespace'; NAMESPACES = 'namespaces'; end class HealthMonitorRecordFields CLUSTER_ID = "ClusterId" + CLUSTER_NAME = "ClusterName" MONITOR_ID = "MonitorId" MONITOR_INSTANCE_ID = "MonitorInstanceId" MONITOR_LABELS = "MonitorLabels" @@ -45,8 +47,8 @@ class HealthMonitorRecordFields AGENT_COLLECTION_TIME = "AgentCollectionTime" TIME_FIRST_OBSERVED = "TimeFirstObserved" NODE_NAME = "NodeName" - CONTROLLER_NAME = "ControllerName" - HEALTH_ASPECT = "HealthAspect" + POD_AGGREGATOR = "PodAggregator" + NAMESPACE = "Namespace" CONTAINER_ID = "ContainerID" end diff --git a/source/code/plugin/health/health_model_definition.rb b/source/code/plugin/health/health_model_definition.rb index 2f7492db8..6540c9e92 100644 --- a/source/code/plugin/health/health_model_definition.rb +++ b/source/code/plugin/health/health_model_definition.rb @@ -33,10 +33,6 @@ def get_parent_monitor_id(monitor) end } - # record = {"namespace" => "kube-system", "controller-name" => "kube-dns-jhdf21374d"} - # value = 'kube-system' - # left, op, right = "#{record['namespace']} == #{value}".split - # cond = left.send(op.to_sym, right) end end else diff --git a/source/code/plugin/health/monitor_set.rb b/source/code/plugin/health/monitor_set.rb index 79310a7fa..8d5994419 100644 --- a/source/code/plugin/health/monitor_set.rb +++ b/source/code/plugin/health/monitor_set.rb @@ -3,54 +3,42 @@ module HealthModel class MonitorSet attr_accessor :monitors - # attr_reader :changed_monitors + #constructor def initialize @monitors = {} end + # checks if the monitor is present in the set def contains?(monitor_instance_id) @monitors.key?(monitor_instance_id) end + # adds or updates the monitor def add_or_update(monitor) - # if @monitors.key?(monitor.monitor_instance_id) - # current_monitor = @monitors[monitor.monitor_instance_id] - # if current_monitor.state.downcase != monitor.state.downcase - # @monitors[monitor.monitor_instance_id] = monitor - # @changed_monitors[monitor.monitor_instance_id] = monitor - # end - # else - # @monitors[monitor.monitor_instance_id] = monitor - # @changed_monitors[monitor.monitor_instance_id] = monitor - # end @monitors[monitor.monitor_instance_id] = monitor end + # gets the monitor given the monitor instance id def get_monitor(monitor_instance_id) @monitors[monitor_instance_id] if @monitors.key?(monitor_instance_id) end + # deletes a monitor from the set def delete(monitor_instance_id) if @monitors.key?(monitor_instance_id) @monitors.delete(monitor_instance_id) end - - # if @changed_monitors.key(monitor_instance_id) - # @changed_monitors.delete(monitor_instance_id) - # end end + # gets the size of the monitor set def get_size @monitors.length end + # gets the map of monitor instance id to monitors def get_map @monitors end - - # def clear_changed_monitors - # @changed_monitors = {} - # end end end diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 879ead06c..4b8bb9e84 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -1,6 +1,9 @@ #!/usr/local/bin/ruby # frozen_string_literal: true +require_relative 'health/health_model_constants' +include HealthModel + module Fluent class KubeHealthInput < Input Plugin.register_input("kubehealth", self) @@ -76,6 +79,8 @@ def enumerate pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods") pod_inventory = JSON.parse(pod_inventory_response.body) + deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body) + if node_inventory_response.code.to_i != 200 record = process_kube_api_up_monitor("fail", node_inventory_response) health_monitor_records.push(record) if record @@ -89,7 +94,7 @@ def enumerate health_monitor_records.push(record) if record record = process_memory_oversubscribed_monitor(pod_inventory) health_monitor_records.push(record) if record - pods_ready_hash = HealthMonitorUtils.getPodsReadyHash(pod_inventory) + pods_ready_hash = HealthMonitorUtils.getPodsReadyHash(pod_inventory, deployment_inventory) system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} @@ -103,6 +108,8 @@ def enumerate workload_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end + else + hmlog.info "POD INVENTORY IS NIL" end if !node_inventory.nil? @@ -110,6 +117,8 @@ def enumerate node_condition_records.each do |record| health_monitor_records.push(record) if record end + else + hmlog.info "NODE INVENTORY IS NIL" end #@@hmlog.debug "Health Monitor Records Size #{health_monitor_records.size}" @@ -147,7 +156,6 @@ def process_cpu_oversubscribed_monitor(pod_inventory) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" return health_record @@ -174,7 +182,6 @@ def process_memory_oversubscribed_monitor(pod_inventory) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" return health_record @@ -201,7 +208,6 @@ def process_kube_api_up_monitor(state, response) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId @@hmlog.info "Successfully processed process_kube_api_up_monitor" return health_record @@ -213,10 +219,11 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) records = [] pods_hash.keys.each do |key| - controller_name = key - total_pods = pods_hash[controller_name]['totalPods'] - pods_ready = pods_hash[controller_name]['podsReady'] - namespace = pods_hash[controller_name]['namespace'] + pod_aggregator = key + total_pods = pods_hash[pod_aggregator]['totalPods'] + pods_ready = pods_hash[pod_aggregator]['podsReady'] + namespace = pods_hash[pod_aggregator]['namespace'] + pod_aggregator_kind = pods_hash[pod_aggregator]['kind'] percent = pods_ready / total_pods * 100 timestamp = Time.now.utc.iso8601 @@ -225,10 +232,10 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) elsif config_monitor_id.downcase.start_with?("workload") state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) end - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "controllerName" => controller_name}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, controller_name]) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "podAggregator" => pod_aggregator, "namespace" => namespace, "podAggregatorKind" => pod_aggregator_kind}} + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, pod_aggregator]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) + #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, pod_aggregator: pod_aggregator) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id @@ -236,9 +243,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId - health_record[HealthMonitorRecordFields::CONTROLLER_NAME] = controller_name records.push(health_record) end @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" @@ -272,7 +277,6 @@ def process_node_condition_monitor(node_inventory) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::HEALTH_ASPECT] = HealthAspect::WORKLOAD health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId health_record[HealthMonitorRecordFields::NODE_NAME] = node_name node_condition_monitor_records.push(health_record) @@ -282,42 +286,6 @@ def process_node_condition_monitor(node_inventory) return node_condition_monitor_records end - def process_pod_statuses(log, pod_inventory) - monitor_id = HealthMonitorConstants::POD_STATUS - pods_ready_percentage_hash = {} - records = [] - monitor_config = @@healthMonitorConfig[monitor_id] - pod_inventory['items'].each do |pod| - controller_name = pod['metadata']['ownerReferences'][0]['name'] - namespace = pod['metadata']['namespace'] - status = pod['status']['phase'] - timestamp = Time.now.utc.iso8601 - state = '' - podUid = pod['metadata']['uid'] - conditions = pod['status']['conditions'] - details = {} - if status == 'Running' - state = 'pass' - else - state = 'fail' - end - details['status'] = status - conditions.each do |condition| - details[condition['type']] = {"Status" => condition['status'], "LastTransitionTime" => condition['lastTransitionTime']} - end - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, namespace, controller_name, podUid]) - HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, controller_name: controller_name) - if !record.nil? - records.push(record) - end - end - log.debug "Pod Status Records #{records.size}" - return records - end - def run_periodic @mutex.lock done = @finished diff --git a/source/code/plugin/mock_data.json b/source/code/plugin/mock_data.json index ca8906dc2..401844534 100644 --- a/source/code/plugin/mock_data.json +++ b/source/code/plugin/mock_data.json @@ -195,7 +195,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"heapster-fc665dc4b\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"heapster-fc665dc4b\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-57b4bbef6e083416b8d347d4da8de7a6", "NewState": "pass", @@ -226,7 +226,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-dns-autoscaler-746998ccf6\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-dns-autoscaler-746998ccf6\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-21832207f4e82c39f198f11abc39d104", "NewState": "pass", @@ -257,7 +257,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-dns-v20-659876bf8d\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-dns-v20-659876bf8d\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-8433c768b2f76a7978eb7317e9bb2f4e", "NewState": "pass", @@ -288,7 +288,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-proxy\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-proxy\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-29464dc11987eb670b82529ef4f5e094", "NewState": "pass", @@ -319,7 +319,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kube-svc-redirect\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-svc-redirect\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-62a5128950e4ad0d13a4163f85fbf7b3", "NewState": "pass", @@ -350,7 +350,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"kubernetes-dashboard-6d48dd4779\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kubernetes-dashboard-6d48dd4779\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-d34ec2ade900ae62a713b2a8a6d1ce74", "NewState": "pass", @@ -381,7 +381,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"metrics-server-5cbc77f79f\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"metrics-server-5cbc77f79f\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-2f6c4157408ce6f9dc13da2fd684e716", "NewState": "pass", @@ -412,7 +412,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"omsagent\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"omsagent\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-930e07ceb1ea2e952e5578b3f1557fbe", "NewState": "pass", @@ -443,7 +443,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"omsagent-rs-6dc57b8544\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"omsagent-rs-6dc57b8544\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-7d42f06622dee87b682d3b03a1e348fa", "NewState": "pass", @@ -474,7 +474,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"tunnelfront-66dd5cfc6b\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"tunnelfront-66dd5cfc6b\",\"monitor.azure.com/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-544c7f310f41ab8fc1196ae9f210fc83", "NewState": "pass", @@ -505,7 +505,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"diliprdeploymentnodeapps-c4fdfb446\",\"monitor.azure.com/namespace\":\"default\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"diliprdeploymentnodeapps-c4fdfb446\",\"monitor.azure.com/namespace\":\"default\"}", "MonitorId": "workload_pods_ready_percentage", "MonitorInstanceId": "workload_pods_ready_percentage-5d1f09e9b9c40718528d87cdb9ec0285", "NewState": "pass", @@ -536,7 +536,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/controller-name\":\"vishwadeploymentnodeapps-8686cf54db\",\"monitor.azure.com/namespace\":\"default\"}", + "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"vishwadeploymentnodeapps-8686cf54db\",\"monitor.azure.com/namespace\":\"default\"}", "MonitorId": "workload_pods_ready_percentage", "MonitorInstanceId": "workload_pods_ready_percentage-56b719128d6ea1353fb2489afd8d8ed5", "NewState": "pass", From deda155fc397812be2a14b88d002e34e421d562b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 28 May 2019 18:09:20 -0700 Subject: [PATCH 40/90] Change label namespaces, remove ClusterName from records sent, send details as a JSON if not an array --- installer/conf/health_model_definition.json | 16 +++--- installer/conf/healthmonitorconfig.json | 57 ++----------------- .../code/plugin/HealthMonitorSignalReducer.rb | 10 +++- source/code/plugin/HealthMonitorUtils.rb | 20 +++---- .../plugin/filter_health_model_builder.rb | 8 +-- .../aggregate_monitor_instance_id_labels.rb | 6 +- .../plugin/health/health_model_constants.rb | 1 - source/code/plugin/in_kube_events.rb | 9 ++- source/code/plugin/mock_data.json | 38 ++++++------- 9 files changed, 61 insertions(+), 104 deletions(-) diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json index 66c104a2c..aa1e45c10 100644 --- a/installer/conf/health_model_definition.json +++ b/installer/conf/health_model_definition.json @@ -3,25 +3,25 @@ "monitor_id": "workload_pods_ready_percentage", "parent_monitor_id": "pod_aggregator", "labels": [ - "monitor.azure.com/namespace", - "monitor.azure.com/pod-aggregator", - "monitor.azure.com/pod-aggregator-kind" + "container.azm.ms/namespace", + "container.azm.ms/pod-aggregator", + "container.azm.ms/pod-aggregator-kind" ] }, { "monitor_id": "pod_aggregator", "parent_monitor_id": "namespace", "labels": [ - "monitor.azure.com/namespace" + "container.azm.ms/namespace" ] }, { "monitor_id": "system_pods_ready_percentage", "parent_monitor_id": "system_pod_aggregator", "labels": [ - "monitor.azure.com/namespace", - "monitor.azure.com/pod-aggregator", - "monitor.azure.com/pod-aggregator-kind" + "container.azm.ms/namespace", + "container.azm.ms/pod-aggregator", + "container.azm.ms/pod-aggregator-kind" ] }, { @@ -35,7 +35,7 @@ { "monitor_id": "namespace", "labels": [ - "monitor.azure.com/namespace" + "container.azm.ms/namespace" ], "parent_monitor_id": "namespaces" }, diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json index c22ef8b99..a78dbd22b 100644 --- a/installer/conf/healthmonitorconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -2,71 +2,26 @@ "node_cpu_utilization_percentage": { "PassPercentage": 80.0, "FailPercentage": 90.0, - "SamplesBeforeNotification": 3, - "NotifyInstantly" : false, - "MonitorTimeOut": 240 + "SamplesBeforeNotification": 3 }, "node_memory_utilization_percentage": { "PassPercentage": 80.0, "FailPercentage": 90.0, - "SamplesBeforeNotification": 3, - "NotifyInstantly" : false, - "MonitorTimeOut": 240 - }, - "container_manager_runtime_running": { - "MonitorTimeOut": 240, - "NotifyInstantly" : true - }, - "kubelet_running": { - "MonitorTimeOut": 240, - "NotifyInstantly" : true - }, - "node_condition": { - "MonitorTimeOut": 240, - "NotifyInstantly" : true - }, - "pod_status": { - "MonitorTimeOut": 5, - "NotifyInstantly" : true - }, - "is_oversubscribed_cpu": { - "MonitorTimeOut": 240, - "NotifyInstantly" : true - }, - "is_oversubscribed_memory": { - "MonitorTimeOut": 240, - "NotifyInstantly" : true + "SamplesBeforeNotification": 3 }, "container_cpu_utilization_percentage": { "PassPercentage": 80.0, "FailPercentage": 90.0, - "SamplesBeforeNotification": 3, - "NotifyInstantly" : false, - "MonitorTimeOut": 240 + "SamplesBeforeNotification": 3 }, "container_memory_utilization_percentage": { "PassPercentage": 80.0, "FailPercentage": 90.0, - "SamplesBeforeNotification": 3, - "NotifyInstantly" : false, - "MonitorTimeOut": 240 + "SamplesBeforeNotification": 3 }, - "workload_pods_ready_percentage" : { + "workload_pods_ready_percentage": { "PassPercentage": 100.0, "FailPercentage": 90.0, - "SamplesBeforeNotification": 2, - "NotifyInstantly" : false, - "MonitorTimeOut": 240 - }, - "system_pods_ready_percentage" : { - "PassPercentage": 100.0, - "FailPercentage": 90.0, - "SamplesBeforeNotification": 2, - "NotifyInstantly" : false, - "MonitorTimeOut": 240 - }, - "kube_api_up": { - "MonitorTimeOut": 240, - "NotifyInstantly" : true + "SamplesBeforeNotification": 2 } } \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index 92375787a..d634adf06 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -103,7 +103,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, end def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, node_name: nil) - log.debug "Health Monitor Instance State #{health_monitor_instance_state}" + #log.debug "Health Monitor Instance State #{health_monitor_instance_state}" labels = HealthMonitorUtils.getClusterLabels #log.debug "Labels : #{labels}" @@ -135,7 +135,12 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s #log.debug "monitor_config #{monitor_config}" records = [] - details = prev_records #.each do |record| + + if prev_records.size == 1 + details = prev_records[0] + else + details = prev_records #.each do |record| + end time_observed = Time.now.utc.iso8601 #log.debug "Details: #{details}" @@ -143,7 +148,6 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s health_monitor_record = {} health_monitor_record["ClusterId"] = KubernetesApiClient.getClusterId - health_monitor_record["ClusterName"] = KubernetesApiClient.getClusterName health_monitor_record["MonitorLabels"] = labels.to_json health_monitor_record["MonitorId"] = monitor_id health_monitor_record["MonitorInstanceId"] = monitor_instance_id diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index 2eaf2c8e0..e4d5cb083 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -97,31 +97,31 @@ def getClusterLabels labels = {} cluster_id = @@clusterId region = KubernetesApiClient.getClusterRegion - labels['monitor.azure.com/cluster-region'] = region + labels['container.azm.ms/cluster-region'] = region if !cluster_id.nil? cluster_id_elements = cluster_id.split('/') azure_sub_id = cluster_id_elements[2] resource_group = cluster_id_elements[4] cluster_name = cluster_id_elements[8] - labels['monitor.azure.com/cluster-subscription-id'] = azure_sub_id - labels['monitor.azure.com/cluster-resource-group'] = resource_group - labels['monitor.azure.com/cluster-name'] = cluster_name + labels['container.azm.ms/cluster-subscription-id'] = azure_sub_id + labels['container.azm.ms/cluster-resource-group'] = resource_group + labels['container.azm.ms/cluster-name'] = cluster_name end return labels end def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: nil, namespace: nil, pod_aggregator_kind: nil) - log.debug "monitor_id #{monitor_id} pod_aggregator #{pod_aggregator} pod_aggregator_kind #{pod_aggregator_kind} namespace #{namespace}" + #log.debug "monitor_id #{monitor_id} pod_aggregator #{pod_aggregator} pod_aggregator_kind #{pod_aggregator_kind} namespace #{namespace}" monitor_labels = {} case monitor_id when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::POD_STATUS if !key.nil? #container - monitor_labels['monitor.azure.com/pod-aggregator'] = getContainerControllerName(key) - monitor_labels['monitor.azure.com/namespace'] = getContainerNamespace(key) + monitor_labels['container.azm.ms/pod-aggregator'] = getContainerControllerName(key) + monitor_labels['container.azm.ms/namespace'] = getContainerNamespace(key) else - monitor_labels['monitor.azure.com/pod-aggregator'] = pod_aggregator.split('~~')[1] - monitor_labels['monitor.azure.com/pod-aggregator-kind'] = pod_aggregator_kind - monitor_labels['monitor.azure.com/namespace'] = namespace + monitor_labels['container.azm.ms/pod-aggregator'] = pod_aggregator.split('~~')[1] + monitor_labels['container.azm.ms/pod-aggregator-kind'] = pod_aggregator_kind + monitor_labels['container.azm.ms/namespace'] = namespace end return monitor_labels when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, HealthMonitorConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index b5f8013bd..674a1af40 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -95,13 +95,14 @@ def filter_stream(tag, es) )) if filtered_record raw_records.push(filtered_record) if filtered_record + @log.info "#{filtered_record["MonitorInstanceId"]}" if filtered_record } @log.info "Filtered Records size = #{filtered_records.size}" - File.open("/tmp/mock_data-#{Time.now.to_i}.json", "w") do |f| - f.write(JSON.pretty_generate(raw_records)) - end + # File.open("/tmp/mock_data-#{Time.now.to_i}.json", "w") do |f| + # f.write(JSON.pretty_generate(raw_records)) + # end @model_builder.process_state_transitions(filtered_records) monitors = @model_builder.finalize_model @@ -114,7 +115,6 @@ def filter_stream(tag, es) record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor.monitor_instance_id record[HealthMonitorRecordFields::MONITOR_LABELS] = monitor.labels record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId - record[HealthMonitorRecordFields::CLUSTER_NAME] = KubernetesApiClient.getClusterName record[HealthMonitorRecordFields::OLD_STATE] = monitor.old_state record[HealthMonitorRecordFields::NEW_STATE] = monitor.new_state record[HealthMonitorRecordFields::DETAILS] = monitor.details if monitor.methods.include? :details diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb index c42b403b3..a435d778d 100644 --- a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb +++ b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb @@ -1,10 +1,10 @@ module HealthModel class AggregateMonitorInstanceIdLabels @@id_labels_mapping = { - MonitorId::SYSTEM_POD_AGGREGATOR => ["monitor.azure.com/namespace", "monitor.azure.com/pod-aggregator"], - MonitorId::POD_AGGREGATOR => ["monitor.azure.com/namespace", "monitor.azure.com/pod-aggregator"], + MonitorId::SYSTEM_POD_AGGREGATOR => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], + MonitorId::POD_AGGREGATOR => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], MonitorId::NODE => ["agentpool", "kubernetes.io/role", "kubernetes.io/hostname"], - MonitorId::NAMESPACE => ["monitor.azure.com/namespace"], + MonitorId::NAMESPACE => ["container.azm.ms/namespace"], MonitorId::AGENT_NODE_POOL => ["agentpool"], # MonitorId::ALL_AGENT_NODE_POOLS => [], # MonitorId::ALL_NODE_POOLS => [], diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index 671239f2c..1a84c3062 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -36,7 +36,6 @@ class MonitorId class HealthMonitorRecordFields CLUSTER_ID = "ClusterId" - CLUSTER_NAME = "ClusterName" MONITOR_ID = "MonitorId" MONITOR_INSTANCE_ID = "MonitorInstanceId" MONITOR_LABELS = "MonitorLabels" diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 309dd8034..5fb6ac79c 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -65,11 +65,11 @@ def enumerate(eventList = nil) record = {} # - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated - eventId = items['metadata']['uid'] + "/" + items['count'].to_s + eventId = items['metadata']['uid'] + "/" + items['count'].to_s newEventQueryState.push(eventId) if !eventQueryState.empty? && eventQueryState.include?(eventId) next - end + end record['ObjectKind']= items['involvedObject']['kind'] record['Namespace'] = items['involvedObject']['namespace'] record['Name'] = items['involvedObject']['name'] @@ -86,7 +86,6 @@ def enumerate(eventList = nil) else record['Computer'] = (OMS::Common.get_hostname) end - record['ClusterName'] = KubernetesApiClient.getClusterName record['ClusterId'] = KubernetesApiClient.getClusterId wrapper = { "DataType"=>"KUBE_EVENTS_BLOB", @@ -96,13 +95,13 @@ def enumerate(eventList = nil) eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream - end + end writeEventQueryState(newEventQueryState) rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end + end end def run_periodic diff --git a/source/code/plugin/mock_data.json b/source/code/plugin/mock_data.json index 401844534..2ce1b9082 100644 --- a/source/code/plugin/mock_data.json +++ b/source/code/plugin/mock_data.json @@ -1,7 +1,7 @@ [ { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", "MonitorId": "node_memory_utilization_percentage", "MonitorInstanceId": "node_memory_utilization_percentage-2b129a9a5633c0cf8f621601c6f8bb32", "NewState": "none", @@ -22,7 +22,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", "MonitorId": "node_cpu_utilization_percentage", "MonitorInstanceId": "node_cpu_utilization_percentage-2b129a9a5633c0cf8f621601c6f8bb32", "NewState": "none", @@ -43,7 +43,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\"}", "MonitorId": "kube_api_up", "MonitorInstanceId": "kube_api_up-bef5af9d919a51c49ba49d07f5784471", "NewState": "pass", @@ -67,7 +67,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\"}", "MonitorId": "is_oversubscribed_cpu", "MonitorInstanceId": "is_oversubscribed_cpu-bef5af9d919a51c49ba49d07f5784471", "NewState": "pass", @@ -88,7 +88,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\"}", "MonitorId": "is_oversubscribed_memory", "MonitorInstanceId": "is_oversubscribed_memory-bef5af9d919a51c49ba49d07f5784471", "NewState": "pass", @@ -109,7 +109,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-1\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-1\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", "MonitorId": "node_condition", "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", "NewState": "pass", @@ -152,7 +152,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", "MonitorId": "node_condition", "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59a71abb37b8", "NewState": "pass", @@ -195,7 +195,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"heapster-fc665dc4b\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"heapster-fc665dc4b\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-57b4bbef6e083416b8d347d4da8de7a6", "NewState": "pass", @@ -226,7 +226,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-dns-autoscaler-746998ccf6\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-dns-autoscaler-746998ccf6\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-21832207f4e82c39f198f11abc39d104", "NewState": "pass", @@ -257,7 +257,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-dns-v20-659876bf8d\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-dns-v20-659876bf8d\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-8433c768b2f76a7978eb7317e9bb2f4e", "NewState": "pass", @@ -288,7 +288,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-proxy\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-proxy\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-29464dc11987eb670b82529ef4f5e094", "NewState": "pass", @@ -319,7 +319,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kube-svc-redirect\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-svc-redirect\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-62a5128950e4ad0d13a4163f85fbf7b3", "NewState": "pass", @@ -350,7 +350,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"kubernetes-dashboard-6d48dd4779\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kubernetes-dashboard-6d48dd4779\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-d34ec2ade900ae62a713b2a8a6d1ce74", "NewState": "pass", @@ -381,7 +381,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"metrics-server-5cbc77f79f\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"metrics-server-5cbc77f79f\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-2f6c4157408ce6f9dc13da2fd684e716", "NewState": "pass", @@ -412,7 +412,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"omsagent\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"omsagent\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-930e07ceb1ea2e952e5578b3f1557fbe", "NewState": "pass", @@ -443,7 +443,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"omsagent-rs-6dc57b8544\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"omsagent-rs-6dc57b8544\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-7d42f06622dee87b682d3b03a1e348fa", "NewState": "pass", @@ -474,7 +474,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"tunnelfront-66dd5cfc6b\",\"monitor.azure.com/namespace\":\"kube-system\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"tunnelfront-66dd5cfc6b\",\"container.azm.ms/namespace\":\"kube-system\"}", "MonitorId": "system_pods_ready_percentage", "MonitorInstanceId": "system_pods_ready_percentage-544c7f310f41ab8fc1196ae9f210fc83", "NewState": "pass", @@ -505,7 +505,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"diliprdeploymentnodeapps-c4fdfb446\",\"monitor.azure.com/namespace\":\"default\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"diliprdeploymentnodeapps-c4fdfb446\",\"container.azm.ms/namespace\":\"default\"}", "MonitorId": "workload_pods_ready_percentage", "MonitorInstanceId": "workload_pods_ready_percentage-5d1f09e9b9c40718528d87cdb9ec0285", "NewState": "pass", @@ -536,7 +536,7 @@ }, { "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"monitor.azure.com/cluster-region\":\"eastus\",\"monitor.azure.com/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"monitor.azure.com/cluster-resource-group\":\"dilipr-health-test\",\"monitor.azure.com/cluster-name\":\"dilipr-health-test\",\"monitor.azure.com/pod-aggregator\":\"vishwadeploymentnodeapps-8686cf54db\",\"monitor.azure.com/namespace\":\"default\"}", + "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"vishwadeploymentnodeapps-8686cf54db\",\"container.azm.ms/namespace\":\"default\"}", "MonitorId": "workload_pods_ready_percentage", "MonitorInstanceId": "workload_pods_ready_percentage-56b719128d6ea1353fb2489afd8d8ed5", "NewState": "pass", From a57b0b45044a3013470b6c04305c3ec788c30cbe Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 28 May 2019 23:26:35 -0700 Subject: [PATCH 41/90] Configuration Split for Monitors --- installer/conf/healthmonitorconfig.json | 46 ++++++--- .../code/plugin/HealthMonitorSignalReducer.rb | 35 ++++--- source/code/plugin/HealthMonitorState.rb | 99 ++++++++++--------- source/code/plugin/HealthMonitorUtils.rb | 6 +- .../plugin/filter_health_model_builder.rb | 7 +- source/code/plugin/in_kube_health.rb | 7 +- 6 files changed, 109 insertions(+), 91 deletions(-) diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json index a78dbd22b..69c89586c 100644 --- a/installer/conf/healthmonitorconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -1,27 +1,43 @@ { + "node_condition": { + "ConsecutiveSamplesForStateTransition": 1 + }, + "is_oversubscribed_cpu": { + "ConsecutiveSamplesForStateTransition": 1 + }, + "is_oversubscribed_memory": { + "ConsecutiveSamplesForStateTransition": 1 + }, + "kube_api_up": { + "ConsecutiveSamplesForStateTransition": 1 + }, "node_cpu_utilization_percentage": { - "PassPercentage": 80.0, - "FailPercentage": 90.0, - "SamplesBeforeNotification": 3 + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 }, "node_memory_utilization_percentage": { - "PassPercentage": 80.0, - "FailPercentage": 90.0, - "SamplesBeforeNotification": 3 + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 }, "container_cpu_utilization_percentage": { - "PassPercentage": 80.0, - "FailPercentage": 90.0, - "SamplesBeforeNotification": 3 + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 }, "container_memory_utilization_percentage": { - "PassPercentage": 80.0, - "FailPercentage": 90.0, - "SamplesBeforeNotification": 3 + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 }, "workload_pods_ready_percentage": { - "PassPercentage": 100.0, - "FailPercentage": 90.0, - "SamplesBeforeNotification": 2 + "WarnThresholdPercentage": 0.0, + "FailThresholdPercentage": 10.0, + "ConsecutiveSamplesForStateTransition": 2 + }, + "system_pods_ready_percentage": { + "FailThresholdPercentage": 0.0, + "ConsecutiveSamplesForStateTransition": 2 } } \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index d634adf06..b57f2f7a0 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -10,27 +10,26 @@ class HealthMonitorSignalReducer @@firstMonitorRecordSent = {} class << self - def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, node_name: nil) + def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, health_signal_timeout, key: nil, node_name: nil) health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) health_monitor_records = health_monitor_instance_state.prev_records new_state = health_monitor_instance_state.new_state prev_sent_time = health_monitor_instance_state.prev_sent_record_time time_first_observed = health_monitor_instance_state.state_change_time - monitor_config['MonitorTimeOut'].nil? ? monitor_timeout = HealthMonitorConstants::DEFAULT_MONITOR_TIMEOUT : monitor_timeout = monitor_config['MonitorTimeOut'] #minutes - #log.debug monitor_timeout + samples_to_check = monitor_config['ConsecutiveSamplesForStateTransition'].to_i - # Notify Instantly sends a signal immediately on a state change - if (!monitor_config['NotifyInstantly'].nil? && monitor_config['NotifyInstantly'] == true) + if samples_to_check == 1 + #log.debug "Samples to Check #{samples_to_check}" latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning latest_record_state = latest_record["state"] latest_record_time = latest_record["timestamp"] #string representation of time - #log.debug "Latest Record #{latest_record}" + #log.debug "Latest Record #{latest_record} #{latest_record_state} #{latest_record_time}" if latest_record_state.downcase == new_state.downcase && @@firstMonitorRecordSent.key?(monitor_instance_id) #no state change #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 #log.debug "time elapsed #{time_elapsed}" - if time_elapsed > monitor_timeout # minutes + if time_elapsed > health_signal_timeout # minutes # update record for last sent record time health_monitor_instance_state.old_state = health_monitor_instance_state.new_state health_monitor_instance_state.new_state = latest_record_state @@ -53,12 +52,12 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, end end - #FIXME: if record count = 1, then send it, if it is greater than 1 and less than SamplesBeforeNotification, NO-OP. If equal to SamplesBeforeNotification, then check for consistency in state change - if health_monitor_instance_state.prev_records.size == 1 + #FIXME: if record count = 1, then send it, if it is greater than 1 and less than ConsecutiveSamplesForStateTransition, NO-OP. If equal to ConsecutiveSamplesForStateTransition, then check for consistency in state change + if health_monitor_instance_state.prev_records.size == 1 && samples_to_check > 1 #log.debug "Only One Record" return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) - elsif health_monitor_instance_state.prev_records.size < monitor_config["SamplesBeforeNotification"].to_i - log.debug "Prev records size < SamplesBeforeNotification for #{monitor_instance_id}" + elsif health_monitor_instance_state.prev_records.size < samples_to_check + log.debug "Prev records size < ConsecutiveSamplesForStateTransition for #{monitor_instance_id}" return nil else first_record = health_monitor_records[0] @@ -70,7 +69,7 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 #check if more than monitor timeout for signal #log.debug "time elapsed #{time_elapsed}" - if time_elapsed > monitor_timeout # minutes + if time_elapsed > health_signal_timeout # minutes # update record health_monitor_instance_state.old_state = health_monitor_instance_state.new_state health_monitor_instance_state.new_state = latest_record_state @@ -103,25 +102,25 @@ def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, key: nil, end def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, node_name: nil) - #log.debug "Health Monitor Instance State #{health_monitor_instance_state}" + # log.debug "Health Monitor Instance State #{health_monitor_instance_state}" labels = HealthMonitorUtils.getClusterLabels - #log.debug "Labels : #{labels}" + log.debug "Cluster Labels : #{labels}" namespace = health_monitor_instance_state.prev_records[0]['details']['namespace'] pod_aggregator = health_monitor_instance_state.prev_records[0]['details']['podAggregator'] pod_aggregator_kind = health_monitor_instance_state.prev_records[0]['details']['podAggregatorKind'] monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, pod_aggregator: pod_aggregator, node_name: node_name, namespace: namespace, pod_aggregator_kind: pod_aggregator_kind) - #log.debug "Monitor Labels : #{monitor_labels}" + # log.debug "Monitor Labels : #{monitor_labels}" - if !monitor_labels.nil? + if !monitor_labels.empty? monitor_labels.keys.each do |key| labels[key] = monitor_labels[key] end end - #log.debug "Labels after adding Monitor Labels #{labels}" + # log.debug "Labels after adding Monitor Labels #{labels}" prev_records = health_monitor_instance_state.prev_records time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called @@ -168,7 +167,7 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s return health_monitor_record end - #FIXME: check for consistency for "SamplesBeforeNotification" records + #FIXME: check for consistency for "ConsecutiveSamplesForStateTransition" records def isStateChangeConsistent(log, health_monitor_records) if health_monitor_records.nil? || health_monitor_records.size == 0 return false diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index 75991c553..36a5ee639 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -13,15 +13,13 @@ class HealthMonitorState HEALTH_MONITOR_STATE = {"PASS" => "pass", "FAIL" => "fail", "WARNING" => "warn", "NONE" => "none"} class << self - #set new_state to be the latest ONLY if the state change is consistent for monitors that are not configured to be notified instantly, i.e. For NotifyInstantly Monitors, set new state to be the latest + #set new_state to be the latest ONLY if the state change is consistent for monitors that are not configured to be notified instantly, i.e. For monitors which should have a state transition if the prev and current state are different, set new state to be the latest # record state. For others, set it to be none, if there is no state information present in the lookup table def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) #log.debug "updateHealthMonitorState" samples_to_keep = 1 - if !config.nil? && config['NotifyInstantly'] == true - samples_to_keep = 1 - elsif !config.nil? && !config['SamplesBeforeNotification'].nil? - samples_to_keep = config['SamplesBeforeNotification'].to_i + if !config.nil? && !config['ConsecutiveSamplesForStateTransition'].nil? + samples_to_keep = config['ConsecutiveSamplesForStateTransition'].to_i else samples_to_keep = HealthMonitorConstants::DEFAULT_SAMPLES_BEFORE_NOTIFICATION end @@ -67,52 +65,63 @@ def computeHealthMonitorState(log, monitor_id, value, config) #log.debug "computeHealthMonitorState" #log.info "id: #{monitor_id} value: #{value} config: #{config}" case monitor_id - when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID - return getStateForRangeMonitor(log, value, config) - when HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID - getStateForInfraPodsReadyPercentage(log, value, config) + when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID + return getState(log, value, config) end end - def getStateForRangeMonitor(log, value, config) - #log.debug "getStateForRangeMonitor" - pass_percentage = 0.0 - (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - #log.info "Pass: #{pass_percentage} Fail: #{fail_percentage}" - if value.to_f < pass_percentage.to_f - return HEALTH_MONITOR_STATE['PASS'] - elsif value.to_f > fail_percentage.to_f - return HEALTH_MONITOR_STATE['FAIL'] - else - return HEALTH_MONITOR_STATE['WARNING'] - end - end + # def getStateForRangeMonitor(log, value, config) + # #log.debug "getStateForRangeMonitor" + # pass_percentage = 0.0 + # (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + # (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + # #log.info "Pass: #{pass_percentage} Fail: #{fail_percentage}" + # if value.to_f < pass_percentage.to_f + # return HEALTH_MONITOR_STATE['PASS'] + # elsif value.to_f > fail_percentage.to_f + # return HEALTH_MONITOR_STATE['FAIL'] + # else + # return HEALTH_MONITOR_STATE['WARNING'] + # end + # end - def getStateForInfraPodsReadyPercentage(log, value, config) - # log.debug "getStateForInfraPodsReadyPercentage" - # log.debug "getStateForInfraPodsReadyPercentage #{config}" - (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - # log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" - if value.to_f < pass_percentage.to_f - return HEALTH_MONITOR_STATE['FAIL'] - else - return HEALTH_MONITOR_STATE['PASS'] - end - end + # def getStateForInfraPodsReadyPercentage(log, value, config) + # # log.debug "getStateForInfraPodsReadyPercentage" + # # log.debug "getStateForInfraPodsReadyPercentage #{config}" + # (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + # (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + # # log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" + # if value.to_f < pass_percentage.to_f + # return HEALTH_MONITOR_STATE['FAIL'] + # else + # return HEALTH_MONITOR_STATE['PASS'] + # end + # end - def getStateForWorkloadPodsReadyPercentage(log, value, config) - # log.debug "getStateForWorkloadPodsReadyPercentage" - pass_percentage = 0.0 - (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - #log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" - if value.to_f > fail_percentage.to_f && value.to_f < pass_percentage.to_f - return HEALTH_MONITOR_STATE['WARNING'] - elsif value.to_f < fail_percentage.to_f + # def getStateForWorkloadPodsReadyPercentage(log, value, config) + # # log.debug "getStateForWorkloadPodsReadyPercentage" + # pass_percentage = 0.0 + # (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f + # (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f + # #log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" + # if value.to_f > fail_percentage.to_f && value.to_f < pass_percentage.to_f + # return HEALTH_MONITOR_STATE['WARNING'] + # elsif value.to_f < fail_percentage.to_f + # return HEALTH_MONITOR_STATE['FAIL'] + # elsif value.to_f == pass_percentage.to_f + # return HEALTH_MONITOR_STATE['PASS'] + # end + # end + + def getState(log, value, config) + (config.nil? || config['WarnThresholdPercentage'].nil?) ? warn_percentage = nil : config['WarnThresholdPercentage'].to_f + fail_percentage = config['FailThresholdPercentage'].to_f + + if value > fail_percentage return HEALTH_MONITOR_STATE['FAIL'] - elsif value.to_f == pass_percentage.to_f + elsif !warn_percentage.nil? && value > warn_percentage + return HEALTH_MONITOR_STATE['WARNING'] + else return HEALTH_MONITOR_STATE['PASS'] end end diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index e4d5cb083..0bec45dd5 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -111,7 +111,6 @@ def getClusterLabels end def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: nil, namespace: nil, pod_aggregator_kind: nil) - #log.debug "monitor_id #{monitor_id} pod_aggregator #{pod_aggregator} pod_aggregator_kind #{pod_aggregator_kind} namespace #{namespace}" monitor_labels = {} case monitor_id when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::POD_STATUS @@ -123,9 +122,7 @@ def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: monitor_labels['container.azm.ms/pod-aggregator-kind'] = pod_aggregator_kind monitor_labels['container.azm.ms/namespace'] = namespace end - return monitor_labels when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, HealthMonitorConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID - @@nodeInventory["items"].each do |node| if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] #log.debug "Matched node name " @@ -134,8 +131,9 @@ def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: end end end - return monitor_labels end + #log.debug "Labels #{monitor_labels}" + return monitor_labels end def refreshKubernetesApiData(log, hostName, force: false) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 674a1af40..2b0182baf 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -14,6 +14,7 @@ class FilterHealthModelBuilder < Filter config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health_model_definition.json' + config_param :health_signal_timeout, :default => 240 attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_transition_processor, :state_finalizers, :monitor_set, :model_builder @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig @@ -80,6 +81,7 @@ def filter_stream(tag, es) filtered_record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], @@healthMonitorConfig[monitor_id], + @health_signal_timeout, key: record[HealthMonitorRecordFields::CONTAINER_ID], node_name: record[HealthMonitorRecordFields::NODE_NAME] ) @@ -95,7 +97,6 @@ def filter_stream(tag, es) )) if filtered_record raw_records.push(filtered_record) if filtered_record - @log.info "#{filtered_record["MonitorInstanceId"]}" if filtered_record } @log.info "Filtered Records size = #{filtered_records.size}" @@ -113,11 +114,11 @@ def filter_stream(tag, es) record[HealthMonitorRecordFields::MONITOR_ID] = monitor.monitor_id record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor.monitor_instance_id - record[HealthMonitorRecordFields::MONITOR_LABELS] = monitor.labels + record[HealthMonitorRecordFields::MONITOR_LABELS] = monitor.labels.to_json record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId record[HealthMonitorRecordFields::OLD_STATE] = monitor.old_state record[HealthMonitorRecordFields::NEW_STATE] = monitor.new_state - record[HealthMonitorRecordFields::DETAILS] = monitor.details if monitor.methods.include? :details + record[HealthMonitorRecordFields::DETAILS] = monitor.details.to_json if monitor.methods.include? :details record[HealthMonitorRecordFields::MONITOR_CONFIG] = monitor.config if monitor.methods.include? :config record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601 record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = monitor.transition_time diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 4b8bb9e84..8dbc1938c 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -227,15 +227,10 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) percent = pods_ready / total_pods * 100 timestamp = Time.now.utc.iso8601 - if config_monitor_id.downcase.start_with?("system") - state = HealthMonitorState.getStateForInfraPodsReadyPercentage(@@hmlog, percent, monitor_config) - elsif config_monitor_id.downcase.start_with?("workload") - state = HealthMonitorState.getStateForWorkloadPodsReadyPercentage(@@hmlog, percent, monitor_config) - end + state = HealthMonitorState.getState(@@hmlog, (100-percent), monitor_config) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "podAggregator" => pod_aggregator, "namespace" => namespace, "podAggregatorKind" => pod_aggregator_kind}} monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, pod_aggregator]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, pod_aggregator: pod_aggregator) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id From 9dbc7a8c29984ddb4d4a7d5de7bccc833d575e1e Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 29 May 2019 18:50:09 -0700 Subject: [PATCH 42/90] working version for 2 pods before naming changes --- .../code/plugin/HealthMonitorSignalReducer.rb | 8 +- .../plugin/filter_health_model_builder.rb | 2 +- .../plugin/health/health_model_builder.rb | 13 +- source/code/plugin/mock_data.json | 568 ------------------ .../filter_health_model_builder_test.rb | 5 +- 5 files changed, 14 insertions(+), 582 deletions(-) delete mode 100644 source/code/plugin/mock_data.json diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index b57f2f7a0..d433000d0 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -105,14 +105,14 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s # log.debug "Health Monitor Instance State #{health_monitor_instance_state}" labels = HealthMonitorUtils.getClusterLabels - log.debug "Cluster Labels : #{labels}" + #log.debug "Cluster Labels : #{labels}" namespace = health_monitor_instance_state.prev_records[0]['details']['namespace'] pod_aggregator = health_monitor_instance_state.prev_records[0]['details']['podAggregator'] pod_aggregator_kind = health_monitor_instance_state.prev_records[0]['details']['podAggregatorKind'] monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, pod_aggregator: pod_aggregator, node_name: node_name, namespace: namespace, pod_aggregator_kind: pod_aggregator_kind) - # log.debug "Monitor Labels : #{monitor_labels}" + #log.debug "Monitor Labels : #{monitor_labels}" if !monitor_labels.empty? monitor_labels.keys.each do |key| @@ -120,7 +120,7 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s end end - # log.debug "Labels after adding Monitor Labels #{labels}" + #log.debug "Labels after adding Monitor Labels #{labels}" prev_records = health_monitor_instance_state.prev_records time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called @@ -138,7 +138,7 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s if prev_records.size == 1 details = prev_records[0] else - details = prev_records #.each do |record| + details = prev_records end time_observed = Time.now.utc.iso8601 diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 2b0182baf..984a64a9d 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -14,7 +14,7 @@ class FilterHealthModelBuilder < Filter config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health_model_definition.json' - config_param :health_signal_timeout, :default => 240 + config_param :health_signal_timeout, :default => 5 attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_transition_processor, :state_finalizers, :monitor_set, :model_builder @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb index 04e2c3eee..2e93ab44e 100644 --- a/source/code/plugin/health/health_model_builder.rb +++ b/source/code/plugin/health/health_model_builder.rb @@ -34,14 +34,8 @@ def finalize_model # return only those monitors whose state has changed, ALWAYS including the cluster level monitor monitors_map = get_changed_monitors - - # monitors_map.each{|key, value| - # puts "#{key} ==> #{value.state}" - # } - # puts "*****************************************************" - update_last_sent_monitors - clear_monitors + clear_monitors(monitors_map.size) return monitors_map end @@ -74,8 +68,11 @@ def update_last_sent_monitors } end - def clear_monitors + def clear_monitors(size) @monitor_set = MonitorSet.new + if size == 1 + @last_sent_monitors = @last_sent_monitors.select {|k,v| k.downcase == MonitorId::CLUSTER} + end end end diff --git a/source/code/plugin/mock_data.json b/source/code/plugin/mock_data.json deleted file mode 100644 index 2ce1b9082..000000000 --- a/source/code/plugin/mock_data.json +++ /dev/null @@ -1,568 +0,0 @@ -[ - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", - "MonitorId": "node_memory_utilization_percentage", - "MonitorInstanceId": "node_memory_utilization_percentage-2b129a9a5633c0cf8f621601c6f8bb32", - "NewState": "none", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-15T17:42:30Z", - "state": "pass", - "details": { - "memoryRssBytes": 726867968.0, - "memoryUtilizationPercentage": 20.14 - } - } - ], - "MonitorConfig": "{\"PassPercentage\":80.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":3,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-15T17:43:10Z", - "TimeFirstObserved": "2019-05-15T17:42:30Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"0\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", - "MonitorId": "node_cpu_utilization_percentage", - "MonitorInstanceId": "node_cpu_utilization_percentage-2b129a9a5633c0cf8f621601c6f8bb32", - "NewState": "none", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-15T17:42:30Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 158.99904371666668, - "cpuUtilizationPercentage": 15.9 - } - } - ], - "MonitorConfig": "{\"PassPercentage\":80.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":3,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-15T17:43:10Z", - "TimeFirstObserved": "2019-05-15T17:42:30Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\"}", - "MonitorId": "kube_api_up", - "MonitorInstanceId": "kube_api_up-bef5af9d919a51c49ba49d07f5784471", - "NewState": "pass", - "OldState": "pass", - "Details": [ - { - "timestamp": "2019-05-15T17:43:10Z", - "state": "pass", - "details": { - "content-type": "application/json", - "date": "Wed, 15 May 2019 17:43:10 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - } - ], - "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", - "AgentCollectionTime": "2019-05-15T17:43:10Z", - "TimeFirstObserved": "2019-05-15T17:43:10Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\"}", - "MonitorId": "is_oversubscribed_cpu", - "MonitorInstanceId": "is_oversubscribed_cpu-bef5af9d919a51c49ba49d07f5784471", - "NewState": "pass", - "OldState": "pass", - "Details": [ - { - "timestamp": "2019-05-15T17:43:10Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 4000.0, - "clusterCpuRequests": 870.0 - } - } - ], - "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", - "AgentCollectionTime": "2019-05-15T17:43:10Z", - "TimeFirstObserved": "2019-05-15T17:43:10Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\"}", - "MonitorId": "is_oversubscribed_memory", - "MonitorInstanceId": "is_oversubscribed_memory-bef5af9d919a51c49ba49d07f5784471", - "NewState": "pass", - "OldState": "pass", - "Details": [ - { - "timestamp": "2019-05-15T17:43:10Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 14436810752.0, - "clusterMemoryRequests": 1312817152.0 - } - } - ], - "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", - "AgentCollectionTime": "2019-05-15T17:43:10Z", - "TimeFirstObserved": "2019-05-15T17:43:10Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-1\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", - "NewState": "pass", - "OldState": "pass", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", - "AgentCollectionTime": "2019-05-17T18:06:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"agentpool\":\"nodepool1\",\"beta.kubernetes.io/arch\":\"amd64\",\"beta.kubernetes.io/instance-type\":\"Standard_DS1_v2\",\"beta.kubernetes.io/os\":\"linux\",\"failure-domain.beta.kubernetes.io/region\":\"eastus\",\"failure-domain.beta.kubernetes.io/zone\":\"1\",\"kubernetes.azure.com/cluster\":\"MC_dilipr-health-test_dilipr-health-test_eastus\",\"kubernetes.io/hostname\":\"aks-nodepool1-19574989-0\",\"kubernetes.io/role\":\"agent\",\"node-role.kubernetes.io/agent\":\"\",\"storageprofile\":\"managed\",\"storagetier\":\"Premium_LRS\"}", - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59a71abb37b8", - "NewState": "pass", - "OldState": "pass", - "Details": [ - { - "timestamp": "2019-05-15T17: 43: 10Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "MonitorConfig": "{\"MonitorTimeOut\":240,\"NotifyInstantly\":true}", - "AgentCollectionTime": "2019-05-15T17:43:10Z", - "TimeFirstObserved": "2019-05-15T17:43:10Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"heapster-fc665dc4b\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-57b4bbef6e083416b8d347d4da8de7a6", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "heapster-fc665dc4b" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "heapster-fc665dc4b" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-dns-autoscaler-746998ccf6\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-21832207f4e82c39f198f11abc39d104", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "kube-dns-autoscaler-746998ccf6" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "kube-dns-autoscaler-746998ccf6" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-dns-v20-659876bf8d\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-8433c768b2f76a7978eb7317e9bb2f4e", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "kube-dns-v20-659876bf8d" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "kube-dns-v20-659876bf8d" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-proxy\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-29464dc11987eb670b82529ef4f5e094", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "kube-proxy" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "kube-proxy" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kube-svc-redirect\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-62a5128950e4ad0d13a4163f85fbf7b3", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "kube-svc-redirect" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "kube-svc-redirect" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"kubernetes-dashboard-6d48dd4779\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-d34ec2ade900ae62a713b2a8a6d1ce74", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "kubernetes-dashboard-6d48dd4779" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "kubernetes-dashboard-6d48dd4779" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"metrics-server-5cbc77f79f\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-2f6c4157408ce6f9dc13da2fd684e716", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "metrics-server-5cbc77f79f" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "metrics-server-5cbc77f79f" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"omsagent\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-930e07ceb1ea2e952e5578b3f1557fbe", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "omsagent" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "controllerName": "omsagent" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"omsagent-rs-6dc57b8544\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-7d42f06622dee87b682d3b03a1e348fa", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "omsagent-rs-6dc57b8544" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "omsagent-rs-6dc57b8544" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"tunnelfront-66dd5cfc6b\",\"container.azm.ms/namespace\":\"kube-system\"}", - "MonitorId": "system_pods_ready_percentage", - "MonitorInstanceId": "system_pods_ready_percentage-544c7f310f41ab8fc1196ae9f210fc83", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "tunnelfront-66dd5cfc6b" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "tunnelfront-66dd5cfc6b" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"diliprdeploymentnodeapps-c4fdfb446\",\"container.azm.ms/namespace\":\"default\"}", - "MonitorId": "workload_pods_ready_percentage", - "MonitorInstanceId": "workload_pods_ready_percentage-5d1f09e9b9c40718528d87cdb9ec0285", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "diliprdeploymentnodeapps-c4fdfb446" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "controllerName": "diliprdeploymentnodeapps-c4fdfb446" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - }, - { - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "MonitorLabels": "{\"container.azm.ms/cluster-region\":\"eastus\",\"container.azm.ms/cluster-subscription-id\":\"72c8e8ca-dc16-47dc-b65c-6b5875eb600a\",\"container.azm.ms/cluster-resource-group\":\"dilipr-health-test\",\"container.azm.ms/cluster-name\":\"dilipr-health-test\",\"container.azm.ms/pod-aggregator\":\"vishwadeploymentnodeapps-8686cf54db\",\"container.azm.ms/namespace\":\"default\"}", - "MonitorId": "workload_pods_ready_percentage", - "MonitorInstanceId": "workload_pods_ready_percentage-56b719128d6ea1353fb2489afd8d8ed5", - "NewState": "pass", - "OldState": "none", - "Details": [ - { - "timestamp": "2019-05-17T18:06:03Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "controllerName": "vishwadeploymentnodeapps-8686cf54db" - } - }, - { - "timestamp": "2019-05-17T18:07:03Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "controllerName": "vishwadeploymentnodeapps-8686cf54db" - } - } - ], - "MonitorConfig": "{\"PassPercentage\":100.0,\"FailPercentage\":90.0,\"SamplesBeforeNotification\":2,\"NotifyInstantly\":false,\"MonitorTimeOut\":240}", - "AgentCollectionTime": "2019-05-17T18:07:03Z", - "TimeFirstObserved": "2019-05-17T18:06:03Z" - } -] \ No newline at end of file diff --git a/test/code/plugin/filter_health_model_builder_test.rb b/test/code/plugin/filter_health_model_builder_test.rb index c918f90ef..e9f4ce931 100644 --- a/test/code/plugin/filter_health_model_builder_test.rb +++ b/test/code/plugin/filter_health_model_builder_test.rb @@ -41,8 +41,11 @@ def test_event_stream model_builder.process_state_transitions(state_transitions) changed_monitors = model_builder.finalize_model + changed_monitors.keys.each{|key| + puts key + } i = i + 1 - if i == 5 + if i == 6 break end end From 0f9f5d4ea1b99ccb8d275819859d2a7aecc1f94f Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 29 May 2019 19:58:05 -0700 Subject: [PATCH 43/90] Working Model Builder version after name changes, TODO: test on the agent --- installer/conf/health_model_definition.json | 38 +-- installer/conf/healthmonitorconfig.json | 18 +- source/code/plugin/HealthMonitorConstants.rb | 22 -- source/code/plugin/HealthMonitorState.rb | 49 +--- source/code/plugin/HealthMonitorUtils.rb | 6 +- source/code/plugin/filter_cadvisor_health.rb | 266 ------------------ .../plugin/filter_cadvisor_health_node.rb | 8 +- .../aggregate_monitor_instance_id_labels.rb | 4 +- .../plugin/health/health_model_constants.rb | 26 +- source/code/plugin/in_kube_health.rb | 10 +- 10 files changed, 59 insertions(+), 388 deletions(-) delete mode 100644 source/code/plugin/HealthMonitorConstants.rb delete mode 100644 source/code/plugin/filter_cadvisor_health.rb diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json index aa1e45c10..162ee7a07 100644 --- a/installer/conf/health_model_definition.json +++ b/installer/conf/health_model_definition.json @@ -1,7 +1,7 @@ [ { - "monitor_id": "workload_pods_ready_percentage", - "parent_monitor_id": "pod_aggregator", + "monitor_id": "user_workload_pods_ready", + "parent_monitor_id": "user_workload", "labels": [ "container.azm.ms/namespace", "container.azm.ms/pod-aggregator", @@ -9,15 +9,15 @@ ] }, { - "monitor_id": "pod_aggregator", + "monitor_id": "user_workload", "parent_monitor_id": "namespace", "labels": [ "container.azm.ms/namespace" ] }, { - "monitor_id": "system_pods_ready_percentage", - "parent_monitor_id": "system_pod_aggregator", + "monitor_id": "system_workload_pods_ready", + "parent_monitor_id": "system_workload", "labels": [ "container.azm.ms/namespace", "container.azm.ms/pod-aggregator", @@ -25,11 +25,11 @@ ] }, { - "monitor_id": "system_pod_aggregator", + "monitor_id": "system_workload", "parent_monitor_id": "k8s_infrastructure" }, { - "monitor_id": "kube_api_up", + "monitor_id": "kube_api_status", "parent_monitor_id": "k8s_infrastructure" }, { @@ -37,22 +37,22 @@ "labels": [ "container.azm.ms/namespace" ], - "parent_monitor_id": "namespaces" + "parent_monitor_id": "all_namespaces" }, { "monitor_id": "k8s_infrastructure", "parent_monitor_id": "cluster" }, { - "monitor_id": "namespaces", - "parent_monitor_id": "workload" + "monitor_id": "all_namespaces", + "parent_monitor_id": "all_workloads" }, { - "monitor_id": "workload", + "monitor_id": "all_workloads", "parent_monitor_id": "cluster" }, { - "monitor_id": "node_cpu_utilization_percentage", + "monitor_id": "node_cpu_utilization", "parent_monitor_id": "node", "labels": [ "kubernetes.io/hostname", @@ -61,7 +61,7 @@ ] }, { - "monitor_id": "node_memory_utilization_percentage", + "monitor_id": "node_memory_utilization", "parent_monitor_id": "node", "labels": [ "kubernetes.io/hostname", @@ -130,11 +130,15 @@ "parent_monitor_id": null }, { - "monitor_id": "is_oversubscribed_cpu", - "parent_monitor_id": "workload" + "monitor_id": "subscribed_capacity_cpu", + "parent_monitor_id": "capacity" }, { - "monitor_id": "is_oversubscribed_memory", - "parent_monitor_id": "workload" + "monitor_id": "subscribed_capacity_memory", + "parent_monitor_id": "capacity" + }, + { + "monitor_id": "capacity", + "parent_monitor_id": "all_workloads" } ] \ No newline at end of file diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json index 69c89586c..1ad28fcee 100644 --- a/installer/conf/healthmonitorconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -2,41 +2,41 @@ "node_condition": { "ConsecutiveSamplesForStateTransition": 1 }, - "is_oversubscribed_cpu": { + "subscribed_capacity_cpu": { "ConsecutiveSamplesForStateTransition": 1 }, - "is_oversubscribed_memory": { + "subscribed_capacity_memory": { "ConsecutiveSamplesForStateTransition": 1 }, - "kube_api_up": { + "kube_api_status": { "ConsecutiveSamplesForStateTransition": 1 }, - "node_cpu_utilization_percentage": { + "node_cpu_utilization": { "WarnThresholdPercentage": 80.0, "FailThresholdPercentage": 90.0, "ConsecutiveSamplesForStateTransition": 3 }, - "node_memory_utilization_percentage": { + "node_memory_utilization": { "WarnThresholdPercentage": 80.0, "FailThresholdPercentage": 90.0, "ConsecutiveSamplesForStateTransition": 3 }, - "container_cpu_utilization_percentage": { + "container_cpu_utilization": { "WarnThresholdPercentage": 80.0, "FailThresholdPercentage": 90.0, "ConsecutiveSamplesForStateTransition": 3 }, - "container_memory_utilization_percentage": { + "container_memory_utilization": { "WarnThresholdPercentage": 80.0, "FailThresholdPercentage": 90.0, "ConsecutiveSamplesForStateTransition": 3 }, - "workload_pods_ready_percentage": { + "user_workload_pods_ready": { "WarnThresholdPercentage": 0.0, "FailThresholdPercentage": 10.0, "ConsecutiveSamplesForStateTransition": 2 }, - "system_pods_ready_percentage": { + "system_workload_pods_ready": { "FailThresholdPercentage": 0.0, "ConsecutiveSamplesForStateTransition": 2 } diff --git a/source/code/plugin/HealthMonitorConstants.rb b/source/code/plugin/HealthMonitorConstants.rb deleted file mode 100644 index b208d700a..000000000 --- a/source/code/plugin/HealthMonitorConstants.rb +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -class HealthMonitorConstants - NODE_CPU_MONITOR_ID = "node_cpu_utilization_percentage" - NODE_MEMORY_MONITOR_ID = "node_memory_utilization_percentage" - NODE_KUBELET_HEALTH_MONITOR_ID = "kubelet_running" - NODE_CONDITION_MONITOR_ID = "node_condition" - NODE_CONTAINER_RUNTIME_MONITOR_ID = "container_manager_runtime_running" - WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "is_oversubscribed_cpu" - WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "is_oversubscribed_memory" - WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID = "workload_pods_ready_percentage" - WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization_percentage" - WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization_percentage" - MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID = "kube_api_up" - MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID = "system_pods_ready_percentage" - POD_STATUS = "pod_status" - DEFAULT_PASS_PERCENTAGE = 80.0 - DEFAULT_FAIL_PERCENTAGE = 90.0 - DEFAULT_MONITOR_TIMEOUT = 240 #4 hours - DEFAULT_SAMPLES_BEFORE_NOTIFICATION = 3 -end \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index 36a5ee639..8eb8b91e0 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -1,7 +1,7 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -require_relative 'HealthMonitorConstants' +require_relative 'health/health_model_constants' HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records) do end @@ -20,8 +20,6 @@ def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, co samples_to_keep = 1 if !config.nil? && !config['ConsecutiveSamplesForStateTransition'].nil? samples_to_keep = config['ConsecutiveSamplesForStateTransition'].to_i - else - samples_to_keep = HealthMonitorConstants::DEFAULT_SAMPLES_BEFORE_NOTIFICATION end #log.debug "Monitor Instance Id #{monitor_instance_id} samples_to_keep #{samples_to_keep}" @@ -65,54 +63,11 @@ def computeHealthMonitorState(log, monitor_id, value, config) #log.debug "computeHealthMonitorState" #log.info "id: #{monitor_id} value: #{value} config: #{config}" case monitor_id - when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID + when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID return getState(log, value, config) end end - # def getStateForRangeMonitor(log, value, config) - # #log.debug "getStateForRangeMonitor" - # pass_percentage = 0.0 - # (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - # (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - # #log.info "Pass: #{pass_percentage} Fail: #{fail_percentage}" - # if value.to_f < pass_percentage.to_f - # return HEALTH_MONITOR_STATE['PASS'] - # elsif value.to_f > fail_percentage.to_f - # return HEALTH_MONITOR_STATE['FAIL'] - # else - # return HEALTH_MONITOR_STATE['WARNING'] - # end - # end - - # def getStateForInfraPodsReadyPercentage(log, value, config) - # # log.debug "getStateForInfraPodsReadyPercentage" - # # log.debug "getStateForInfraPodsReadyPercentage #{config}" - # (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - # (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - # # log.info " getStateForInfraPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" - # if value.to_f < pass_percentage.to_f - # return HEALTH_MONITOR_STATE['FAIL'] - # else - # return HEALTH_MONITOR_STATE['PASS'] - # end - # end - - # def getStateForWorkloadPodsReadyPercentage(log, value, config) - # # log.debug "getStateForWorkloadPodsReadyPercentage" - # pass_percentage = 0.0 - # (config.nil? || config['PassPercentage'].nil?) ? pass_percentage = HealthMonitorConstants::DEFAULT_PASS_PERCENTAGE : pass_percentage = config['PassPercentage'].to_f - # (config.nil? || config['FailPercentage'].nil?) ? fail_percentage = HealthMonitorConstants::DEFAULT_FAIL_PERCENTAGE : fail_percentage = config['FailPercentage'].to_f - # #log.info "getStateForWorkloadPodsReadyPercentage Pass: #{pass_percentage} Fail: #{fail_percentage}" - # if value.to_f > fail_percentage.to_f && value.to_f < pass_percentage.to_f - # return HEALTH_MONITOR_STATE['WARNING'] - # elsif value.to_f < fail_percentage.to_f - # return HEALTH_MONITOR_STATE['FAIL'] - # elsif value.to_f == pass_percentage.to_f - # return HEALTH_MONITOR_STATE['PASS'] - # end - # end - def getState(log, value, config) (config.nil? || config['WarnThresholdPercentage'].nil?) ? warn_percentage = nil : config['WarnThresholdPercentage'].to_f fail_percentage = config['FailThresholdPercentage'].to_f diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index 0bec45dd5..c65ff4922 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -2,7 +2,7 @@ # frozen_string_literal: true require_relative 'KubernetesApiClient' -require_relative 'HealthMonitorConstants' +require_relative 'health/health_model_constants' require 'time' require 'json' @@ -113,7 +113,7 @@ def getClusterLabels def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: nil, namespace: nil, pod_aggregator_kind: nil) monitor_labels = {} case monitor_id - when HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID, HealthMonitorConstants::POD_STATUS + when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID if !key.nil? #container monitor_labels['container.azm.ms/pod-aggregator'] = getContainerControllerName(key) monitor_labels['container.azm.ms/namespace'] = getContainerNamespace(key) @@ -122,7 +122,7 @@ def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: monitor_labels['container.azm.ms/pod-aggregator-kind'] = pod_aggregator_kind monitor_labels['container.azm.ms/namespace'] = namespace end - when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_KUBELET_HEALTH_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, HealthMonitorConstants::NODE_CONTAINER_RUNTIME_MONITOR_ID + when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID @@nodeInventory["items"].each do |node| if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] #log.debug "Matched node name " diff --git a/source/code/plugin/filter_cadvisor_health.rb b/source/code/plugin/filter_cadvisor_health.rb deleted file mode 100644 index 0425bfcbf..000000000 --- a/source/code/plugin/filter_cadvisor_health.rb +++ /dev/null @@ -1,266 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -module Fluent - require 'logger' - require 'json' - require_relative 'oms_common' - require_relative 'HealthMonitorUtils' - require_relative 'HealthMonitorState' - require_relative "ApplicationInsightsUtility" - - - class CAdvisor2HealthFilter < Filter - Fluent::Plugin.register_filter('filter_cadvisor2health', self) - - config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log' - config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' - config_param :container_resource_refresh_interval_minutes, :integer, :default => 5 - - @@object_name_k8s_node = 'K8SNode' - @@object_name_k8s_container = 'K8SContainer' - - @@counter_name_cpu = 'cpuusagenanocores' - @@counter_name_memory_rss = 'memoryrssbytes' - - @@health_monitor_config = {} - - @@hostName = (OMS::Common.get_hostname) - @@clusterName = KubernetesApiClient.getClusterName - @@clusterId = KubernetesApiClient.getClusterId - @@clusterRegion = KubernetesApiClient.getClusterRegion - - - def initialize - super - @cpu_capacity = 0.0 - @memory_capacity = 0.0 - @last_resource_refresh = DateTime.now.to_time.to_i - @metrics_to_collect_hash = {} - end - - def configure(conf) - super - @log = HealthMonitorUtils.getLogHandle - @log.debug {'Starting filter_cadvisor2health plugin'} - end - - def start - super - @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect) - @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" - node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) - @cpu_capacity = node_capacity[0] - @memory_capacity = node_capacity[1] - @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" - HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) - @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig - ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) - end - - def filter_stream(tag, es) - new_es = MultiEventStream.new - HealthMonitorUtils.refreshKubernetesApiData(@log, @hostName) - records_count = 0 - es.each { |time, record| - begin - filtered_record = filter(tag, time, record) - if !filtered_record.nil? - new_es.add(time, filtered_record) - records_count += 1 - end - rescue => e - router.emit_error_event(tag, time, record, e) - end - } - @log.debug "Filter Records Count #{records_count}" - new_es - end - - def filter(tag, time, record) - begin - if record.key?("MonitorLabels") - return record - end - object_name = record['DataItems'][0]['ObjectName'] - counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase - if @metrics_to_collect_hash.key?(counter_name.downcase) - metric_value = record['DataItems'][0]['Collections'][0]['Value'] - case object_name - when @@object_name_k8s_container - case counter_name.downcase - when @@counter_name_cpu - # @log.debug "Object Name #{object_name}" - # @log.debug "Counter Name #{counter_name}" - # @log.debug "Metric Value #{metric_value}" - #return process_container_cpu_record(record, metric_value) - when @@counter_name_memory_rss - #return process_container_memory_record(record, metric_value) - end - when @@object_name_k8s_node - case counter_name.downcase - when @@counter_name_cpu - process_node_cpu_record(record, metric_value) - when @@counter_name_memory_rss - process_node_memory_record(record, metric_value) - end - end - end - rescue => e - @log.debug "Error in filter #{e}" - @log.debug "record #{record}" - @log.debug "backtrace #{e.backtrace}" - ApplicationInsightsUtility.sendExceptionTelemetry(e) - return nil - end - end - - def process_container_cpu_record(record, metric_value) - monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID - @log.debug "processing container cpu record" - if record.nil? - return nil - else - instance_name = record['DataItems'][0]['InstanceName'] - key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) - container_metadata = HealthMonitorUtils.getContainerMetadata(key) - if !container_metadata.nil? - cpu_limit = container_metadata['cpuLimit'] - end - - if cpu_limit.to_s.empty? - #@log.info "CPU Limit is nil" - cpu_limit = @cpu_capacity - end - - #@log.info "cpu limit #{cpu_limit}" - - percent = (metric_value.to_f/cpu_limit*100).round(2) - #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) - #@log.debug "Computed State : #{state}" - timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - #@log.info health_monitor_record - - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) - #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) - temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - @log.info "Processed Container CPU #{temp}" - return record - end - return nil - end - - def process_container_memory_record(record, metric_value) - monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID - #@log.debug "processing container memory record" - if record.nil? - return nil - else - instance_name = record['DataItems'][0]['InstanceName'] - key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name) - container_metadata = HealthMonitorUtils.getContainerMetadata(key) - if !container_metadata.nil? - memory_limit = container_metadata['memoryLimit'] - end - - if memory_limit.to_s.empty? - #@log.info "Memory Limit is nil" - memory_limit = @memory_capacity - end - - #@log.info "memory limit #{memory_limit}" - - percent = (metric_value.to_f/memory_limit*100).round(2) - #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) - #@log.debug "Computed State : #{state}" - timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} - #@log.info health_monitor_record - - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) - #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) - temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - @log.info "Processed Container Memory #{temp}" - return record - end - return nil - end - - def process_node_cpu_record(record, metric_value) - monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID - #@log.debug "processing node cpu record" - if record.nil? - return nil - else - instance_name = record['DataItems'][0]['InstanceName'] - #@log.info "CPU capacity #{@cpu_capacity}" - - percent = (metric_value.to_f/@cpu_capacity*100).round(2) - #@log.debug "Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID]) - #@log.debug "Computed State : #{state}" - timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) - # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - health_record = {} - time_now = Time.now.utc.iso8601 - health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id - health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id - health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record - health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now - health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName - @log.info "Processed Node CPU" - return health_record - end - return nil - end - - def process_node_memory_record(record, metric_value) - monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID - #@log.debug "processing node memory record" - if record.nil? - return nil - else - instance_name = record['DataItems'][0]['InstanceName'] - #@log.info "Memory capacity #{@memory_capacity}" - - percent = (metric_value.to_f/@memory_capacity*100).round(2) - #@log.debug "Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID]) - #@log.debug "Computed State : #{state}" - timestamp = record['DataItems'][0]['Timestamp'] - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} - #@log.info health_monitor_record - - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - #@log.info "Monitor Instance Id: #{monitor_instance_id}" - # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) - # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] - health_record = {} - time_now = Time.now.utc.iso8601 - health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id - health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id - health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record - health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now - health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::NODE_NAME] = @@hostName - @log.info "Processed Node Memory" - return health_record - end - return nil - end - end -end diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index 4de24e6c9..dca1c2a4e 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -116,7 +116,7 @@ def filter(tag, time, record) end def process_container_cpu_record(record, metric_value) - monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID + monitor_id = HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID @log.debug "processing container cpu record" if record.nil? return nil @@ -137,7 +137,7 @@ def process_container_cpu_record(record, metric_value) percent = (metric_value.to_f/cpu_limit*100).round(2) #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID]) + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} @@ -155,7 +155,7 @@ def process_container_cpu_record(record, metric_value) end def process_container_memory_record(record, metric_value) - monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID + monitor_id = HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID #@log.debug "processing container memory record" if record.nil? return nil @@ -176,7 +176,7 @@ def process_container_memory_record(record, metric_value) percent = (metric_value.to_f/memory_limit*100).round(2) #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID]) + state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID]) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb index a435d778d..2561c958c 100644 --- a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb +++ b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb @@ -1,8 +1,8 @@ module HealthModel class AggregateMonitorInstanceIdLabels @@id_labels_mapping = { - MonitorId::SYSTEM_POD_AGGREGATOR => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], - MonitorId::POD_AGGREGATOR => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], + MonitorId::SYSTEM_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], + MonitorId::USER_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], MonitorId::NODE => ["agentpool", "kubernetes.io/role", "kubernetes.io/hostname"], MonitorId::NAMESPACE => ["container.azm.ms/namespace"], MonitorId::AGENT_NODE_POOL => ["agentpool"], diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index 1a84c3062..d09bb5d35 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -17,21 +17,18 @@ class MonitorId ALL_NODES = 'all_nodes'; K8S_INFRASTRUCTURE = 'k8s_infrastructure' - NODE = 'node'; AGENT_NODE_POOL = 'agent_node_pool' MASTER_NODE_POOL = 'master_node_pool' ALL_AGENT_NODE_POOLS = 'all_agent_node_pools' ALL_NODE_POOLS = 'all_node_pools'; - WORKLOAD = 'workload'; - MANAGED_INFRA = 'managed_infra' + WORKLOAD = 'all_workloads'; CAPACITY = 'capacity'; - POD_AGGREGATOR = 'pod_aggregator'; - SYSTEM_POD_AGGREGATOR = 'system_pod_aggregator' + USER_WORKLOAD = 'user_workload'; + SYSTEM_WORKLOAD = 'system_workload' NAMESPACE = 'namespace'; - NAMESPACES = 'namespaces'; end class HealthMonitorRecordFields @@ -46,14 +43,19 @@ class HealthMonitorRecordFields AGENT_COLLECTION_TIME = "AgentCollectionTime" TIME_FIRST_OBSERVED = "TimeFirstObserved" NODE_NAME = "NodeName" - POD_AGGREGATOR = "PodAggregator" NAMESPACE = "Namespace" - CONTAINER_ID = "ContainerID" end - class HealthAspect - NODES = "Nodes" - KUBERNETES_INFRASTRUCTURE = "Kubernetes infrastructure" - WORKLOAD = "Workload" + class HealthMonitorConstants + NODE_CPU_MONITOR_ID = "node_cpu_utilization" + NODE_MEMORY_MONITOR_ID = "node_memory_utilization" + NODE_CONDITION_MONITOR_ID = "node_condition" + WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_cpu" + WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_memory" + WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization" + WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization" + KUBE_API_STATUS = "kube_api_status" + USER_WORKLOAD_PODS_READY_MONITOR_ID = "user_workload_pods_ready" + SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID = "system_workload_pods_ready" end end \ No newline at end of file diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 8dbc1938c..d5aaee2e6 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -1,7 +1,6 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -require_relative 'health/health_model_constants' include HealthModel module Fluent @@ -20,10 +19,9 @@ def initialize require_relative "oms_common" require_relative "omslog" require_relative "ApplicationInsightsUtility" - require_relative "DockerApiClient" require_relative 'HealthMonitorUtils' require_relative 'HealthMonitorState' - require_relative 'HealthMonitorConstants' + require_relative 'health/health_model_constants' end config_param :run_interval, :time, :default => "1m" @@ -99,12 +97,12 @@ def enumerate system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} - system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::MANAGEDINFRA_PODS_READY_PERCENTAGE_MONITOR_ID) + system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID) system_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end - workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::WORKLOAD_PODS_READY_PERCENTAGE_MONITOR_ID) + workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) workload_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end @@ -190,7 +188,7 @@ def process_memory_oversubscribed_monitor(pod_inventory) def process_kube_api_up_monitor(state, response) timestamp = Time.now.utc.iso8601 - monitor_id = HealthMonitorConstants::MANAGEDINFRA_KUBEAPI_AVAILABLE_MONITOR_ID + monitor_id = HealthMonitorConstants::KUBE_API_STATUS details = response.each_header.to_h details['ResponseCode'] = response.code health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} From 2f7be02153824a979a685e1b02e6883bfa749600 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 30 May 2019 13:41:46 -0700 Subject: [PATCH 44/90] E2E working version for health model aggregation TODO: Missing Signal Detection, refactoring, Unit tests --- source/code/plugin/HealthMonitorState.rb | 1 + source/code/plugin/HealthMonitorUtils.rb | 2 +- source/code/plugin/filter_cadvisor_health_node.rb | 7 +------ source/code/plugin/filter_health_model_builder.rb | 6 +++--- source/code/plugin/health/health_model_constants.rb | 2 ++ source/code/plugin/in_kube_health.rb | 2 +- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb index 8eb8b91e0..a590535c8 100644 --- a/source/code/plugin/HealthMonitorState.rb +++ b/source/code/plugin/HealthMonitorState.rb @@ -13,6 +13,7 @@ class HealthMonitorState HEALTH_MONITOR_STATE = {"PASS" => "pass", "FAIL" => "fail", "WARNING" => "warn", "NONE" => "none"} class << self + include HealthModel #set new_state to be the latest ONLY if the state change is consistent for monitors that are not configured to be notified instantly, i.e. For monitors which should have a state transition if the prev and current state are different, set new state to be the latest # record state. For others, set it to be none, if there is no state information present in the lookup table def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index c65ff4922..43ad11881 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -23,7 +23,7 @@ def initialize end class << self - + include HealthModel def build_metrics_hash(metrics_to_collect) @log.debug "Building Hash of Metrics to Collect #{metrics_to_collect}" metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip) diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index dca1c2a4e..7d7a751f3 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -145,8 +145,6 @@ def process_container_cpu_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container CPU #{temp}" return record @@ -184,7 +182,6 @@ def process_container_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container Memory #{temp}" @@ -209,8 +206,7 @@ def process_node_cpu_record(record, metric_value) timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] health_record = {} @@ -246,7 +242,6 @@ def process_node_memory_record(record, metric_value) monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] health_record = {} diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 984a64a9d..8eaf097a4 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -14,8 +14,9 @@ class FilterHealthModelBuilder < Filter config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health_model_definition.json' - config_param :health_signal_timeout, :default => 5 + config_param :health_signal_timeout, :default => 240 attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_transition_processor, :state_finalizers, :monitor_set, :model_builder + include HealthModel @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime' @@ -60,7 +61,7 @@ def filter_stream(tag, es) HealthMonitorState.updateHealthMonitorState(@log, record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], record[HealthMonitorRecordFields::DETAILS], - @@healthMonitorConfig[record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]]) + @@healthMonitorConfig[record[HealthMonitorRecordFields::MONITOR_ID]]) records.push(record) } @buffer.add_to_buffer(records) @@ -82,7 +83,6 @@ def filter_stream(tag, es) record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], @@healthMonitorConfig[monitor_id], @health_signal_timeout, - key: record[HealthMonitorRecordFields::CONTAINER_ID], node_name: record[HealthMonitorRecordFields::NODE_NAME] ) filtered_records.push(MonitorStateTransition.new( diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index d09bb5d35..b40d0344f 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -49,6 +49,8 @@ class HealthMonitorRecordFields class HealthMonitorConstants NODE_CPU_MONITOR_ID = "node_cpu_utilization" NODE_MEMORY_MONITOR_ID = "node_memory_utilization" + CONTAINER_CPU_MONITOR_ID = "container_cpu_utilization" + CONTAINER_MEMORY_MONITOR_ID = "container_memory_utilization" NODE_CONDITION_MONITOR_ID = "node_condition" WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_cpu" WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_memory" diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index d5aaee2e6..183eab0bd 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -23,7 +23,7 @@ def initialize require_relative 'HealthMonitorState' require_relative 'health/health_model_constants' end - + include HealthModel config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.api.KubeHealth.ReplicaSet" From 0f210f51e4ce8d3ebc993c02b442b3d8a2b8cbb9 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 5 Jun 2019 16:54:35 -0700 Subject: [PATCH 45/90] Change pod-aggregator to workload-name, remove node monitor hierarchy reduction, added cluster wide labels for all monitors --- installer/conf/health_model_definition.json | 151 ++++++++++++++---- .../code/plugin/HealthMonitorSignalReducer.rb | 6 +- source/code/plugin/HealthMonitorUtils.rb | 38 ++--- .../plugin/filter_health_model_builder.rb | 3 +- .../aggregate_monitor_instance_id_labels.rb | 4 +- .../health/node_monitor_hierarchy_reducer.rb | 15 +- source/code/plugin/in_kube_health.rb | 18 +-- .../filter_health_model_builder_test.rb | 3 +- 8 files changed, 170 insertions(+), 68 deletions(-) diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json index 162ee7a07..17c82f5ef 100644 --- a/installer/conf/health_model_definition.json +++ b/installer/conf/health_model_definition.json @@ -4,15 +4,23 @@ "parent_monitor_id": "user_workload", "labels": [ "container.azm.ms/namespace", - "container.azm.ms/pod-aggregator", - "container.azm.ms/pod-aggregator-kind" + "container.azm.ms/workload-name", + "container.azm.ms/workload-kind", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ] }, { "monitor_id": "user_workload", "parent_monitor_id": "namespace", "labels": [ - "container.azm.ms/namespace" + "container.azm.ms/namespace", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ] }, { @@ -20,36 +28,74 @@ "parent_monitor_id": "system_workload", "labels": [ "container.azm.ms/namespace", - "container.azm.ms/pod-aggregator", - "container.azm.ms/pod-aggregator-kind" + "container.azm.ms/workload-name", + "container.azm.ms/workload-kind", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ] }, { "monitor_id": "system_workload", - "parent_monitor_id": "k8s_infrastructure" + "parent_monitor_id": "k8s_infrastructure", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "kube_api_status", - "parent_monitor_id": "k8s_infrastructure" + "parent_monitor_id": "k8s_infrastructure", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "namespace", "labels": [ - "container.azm.ms/namespace" + "container.azm.ms/namespace", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ], "parent_monitor_id": "all_namespaces" }, { "monitor_id": "k8s_infrastructure", - "parent_monitor_id": "cluster" + "parent_monitor_id": "cluster", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "all_namespaces", - "parent_monitor_id": "all_workloads" + "parent_monitor_id": "all_workloads", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "all_workloads", - "parent_monitor_id": "cluster" + "parent_monitor_id": "cluster", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "node_cpu_utilization", @@ -57,7 +103,11 @@ "labels": [ "kubernetes.io/hostname", "agentpool", - "kubernetes.io/role" + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ] }, { @@ -66,7 +116,11 @@ "labels": [ "kubernetes.io/hostname", "agentpool", - "kubernetes.io/role" + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ] }, { @@ -75,7 +129,11 @@ "labels": [ "kubernetes.io/hostname", "agentpool", - "kubernetes.io/role" + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ] }, { @@ -84,7 +142,11 @@ "labels": [ "kubernetes.io/hostname", "agentpool", - "kubernetes.io/role" + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ], "parent_monitor_id": [ { @@ -104,41 +166,76 @@ { "monitor_id": "master_node_pool", "aggregation_algorithm": "percentage", - "parent_monitor_id": "all_nodes" + "parent_monitor_id": "all_nodes", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "agent_node_pool", "aggregation_algorithm": "worstOf", "labels": [ - "agentpool" + "agentpool", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" ], - "parent_monitor_id": "all_agent_node_pools" - }, - { - "monitor_id": "all_agent_node_pools", - "aggregation_algorithm": "worstOf", "parent_monitor_id": "all_nodes" }, { "monitor_id": "all_nodes", "aggregation_algorithm": "worstOf", - "parent_monitor_id": "cluster" + "parent_monitor_id": "cluster", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "cluster", "aggregation_algorithm": "worstOf", - "parent_monitor_id": null + "parent_monitor_id": null, + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "subscribed_capacity_cpu", - "parent_monitor_id": "capacity" + "parent_monitor_id": "capacity", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "subscribed_capacity_memory", - "parent_monitor_id": "capacity" + "parent_monitor_id": "capacity", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] }, { "monitor_id": "capacity", - "parent_monitor_id": "all_workloads" + "parent_monitor_id": "all_workloads", + "labels": [ + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] } ] \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb index d433000d0..f6a9e0fd3 100644 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ b/source/code/plugin/HealthMonitorSignalReducer.rb @@ -108,10 +108,10 @@ def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_s #log.debug "Cluster Labels : #{labels}" namespace = health_monitor_instance_state.prev_records[0]['details']['namespace'] - pod_aggregator = health_monitor_instance_state.prev_records[0]['details']['podAggregator'] - pod_aggregator_kind = health_monitor_instance_state.prev_records[0]['details']['podAggregatorKind'] + workload_name = health_monitor_instance_state.prev_records[0]['details']['workloadName'] + workload_kind = health_monitor_instance_state.prev_records[0]['details']['workloadKind'] - monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, pod_aggregator: pod_aggregator, node_name: node_name, namespace: namespace, pod_aggregator_kind: pod_aggregator_kind) + monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, workload_name: workload_name, node_name: node_name, namespace: namespace, workload_kind: workload_kind) #log.debug "Monitor Labels : #{monitor_labels}" if !monitor_labels.empty? diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb index 43ad11881..f47a9062f 100644 --- a/source/code/plugin/HealthMonitorUtils.rb +++ b/source/code/plugin/HealthMonitorUtils.rb @@ -110,16 +110,16 @@ def getClusterLabels return labels end - def getMonitorLabels(log, monitor_id, key: nil, pod_aggregator: nil, node_name: nil, namespace: nil, pod_aggregator_kind: nil) + def getMonitorLabels(log, monitor_id, key: nil, workload_name: nil, node_name: nil, namespace: nil, workload_kind: nil) monitor_labels = {} case monitor_id when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID if !key.nil? #container - monitor_labels['container.azm.ms/pod-aggregator'] = getContainerControllerName(key) + monitor_labels['container.azm.ms/workload-name'] = getContainerControllerName(key) monitor_labels['container.azm.ms/namespace'] = getContainerNamespace(key) else - monitor_labels['container.azm.ms/pod-aggregator'] = pod_aggregator.split('~~')[1] - monitor_labels['container.azm.ms/pod-aggregator-kind'] = pod_aggregator_kind + monitor_labels['container.azm.ms/workload-name'] = workload_name.split('~~')[1] + monitor_labels['container.azm.ms/workload-kind'] = workload_kind monitor_labels['container.azm.ms/namespace'] = namespace end when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID @@ -157,13 +157,13 @@ def refreshKubernetesApiData(log, hostName, force: false) podInventory['items'].each do |pod| has_owner = !pod['metadata']['ownerReferences'].nil? if !has_owner - pod_aggregator = pod['metadata']['name'] + workload_name = pod['metadata']['name'] else - pod_aggregator = pod['metadata']['ownerReferences'][0]['name'] + workload_name = pod['metadata']['ownerReferences'][0]['name'] end namespace = pod['metadata']['namespace'] - @@controllerMapping[pod_aggregator] = namespace - #log.debug "pod_aggregator #{pod_aggregator} namespace #{namespace}" + @@controllerMapping[workload_name] = namespace + #log.debug "workload_name #{workload_name} namespace #{namespace}" pod['spec']['containers'].each do |container| key = [pod['metadata']['uid'], container['name']].join('/') @@ -183,7 +183,7 @@ def refreshKubernetesApiData(log, hostName, force: false) memory_limit_value = @memory_capacity end - @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => pod_aggregator, "namespace" => namespace} + @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => workload_name, "namespace" => namespace} end end rescue => e @@ -388,7 +388,7 @@ def getPodsReadyHash(pod_inventory, deployment_inventory) namespace = pod['metadata']['namespace'] status = pod['status']['phase'] - pod_aggregator = '' + workload_name = '' if owner_kind.nil? owner_kind = 'Pod' end @@ -402,22 +402,22 @@ def getPodsReadyHash(pod_inventory, deployment_inventory) labels.each {|k,v| lookup_key = "#{namespace}-#{k}=#{v}" if deployment_lookup.key?(lookup_key) - pod_aggregator = deployment_lookup[lookup_key] + workload_name = deployment_lookup[lookup_key] break end } - if pod_aggregator.empty? - pod_aggregator = "#{namespace}~~#{controller_name}" + if workload_name.empty? + workload_name = "#{namespace}~~#{controller_name}" end when 'daemonset' - pod_aggregator = "#{namespace}~~#{controller_name}" + workload_name = "#{namespace}~~#{controller_name}" else - pod_aggregator = "#{namespace}~~#{pod['metadata']['name']}" + workload_name = "#{namespace}~~#{pod['metadata']['name']}" end - if pods_ready_percentage_hash.key?(pod_aggregator) - total_pods = pods_ready_percentage_hash[pod_aggregator]['totalPods'] - pods_ready = pods_ready_percentage_hash[pod_aggregator]['podsReady'] + if pods_ready_percentage_hash.key?(workload_name) + total_pods = pods_ready_percentage_hash[workload_name]['totalPods'] + pods_ready = pods_ready_percentage_hash[workload_name]['podsReady'] else total_pods = 0 pods_ready = 0 @@ -428,7 +428,7 @@ def getPodsReadyHash(pod_inventory, deployment_inventory) pods_ready += 1 end - pods_ready_percentage_hash[pod_aggregator] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'pod_aggregator' => pod_aggregator, 'kind' => owner_kind} + pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind} rescue => e @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" end diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 8eaf097a4..8ca9a7d56 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -27,7 +27,8 @@ def initialize @health_model_definition = HealthModel::HealthModelDefinition.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) @monitor_factory = HealthModel::MonitorFactory.new @state_transition_processor = HealthModel::StateTransitionProcessor.new(@health_model_definition, @monitor_factory) - @state_finalizers = [HealthModel::NodeMonitorHierarchyReducer.new, HealthModel::AggregateMonitorStateFinalizer.new] + # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side + @state_finalizers = [HealthModel::AggregateMonitorStateFinalizer.new] @monitor_set = HealthModel::MonitorSet.new @model_builder = HealthModel::HealthModelBuilder.new(@state_transition_processor, @state_finalizers, @monitor_set) end diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb index 2561c958c..d1eb86daf 100644 --- a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb +++ b/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb @@ -1,8 +1,8 @@ module HealthModel class AggregateMonitorInstanceIdLabels @@id_labels_mapping = { - MonitorId::SYSTEM_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], - MonitorId::USER_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/pod-aggregator"], + MonitorId::SYSTEM_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/workload-name"], + MonitorId::USER_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/workload-name"], MonitorId::NODE => ["agentpool", "kubernetes.io/role", "kubernetes.io/hostname"], MonitorId::NAMESPACE => ["container.azm.ms/namespace"], MonitorId::AGENT_NODE_POOL => ["agentpool"], diff --git a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb index 0d6999217..a063e0ae3 100644 --- a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb +++ b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb @@ -17,12 +17,15 @@ def finalize(monitor_set) #copy the children of member monitor as children of parent member_monitor_instance_id = monitor.get_member_monitors[0] #gets the only member monitor instance id member_monitor = monitor_set.get_monitor(member_monitor_instance_id) - member_monitor.get_member_monitors.each{|grandchild_monitor| - monitor.add_member_monitor(grandchild_monitor) - } - monitor.remove_member_monitor(member_monitor_instance_id) - # delete the member monitor from the monitor_set - monitor_set.delete(member_monitor_instance_id) + #reduce only if the aggregation algorithms are the same + if !member_monitor.aggregation_algorithm.nil? && member_monitor.aggregation_algorithm == AggregationAlgorithm::WORSTOF && monitor.aggregation_algorithm == member_monitor.aggregation_algorithm + member_monitor.get_member_monitors.each{|grandchild_monitor| + monitor.add_member_monitor(grandchild_monitor) + } + monitor.remove_member_monitor(member_monitor_instance_id) + # delete the member monitor from the monitor_set + monitor_set.delete(member_monitor_instance_id) + end puts "After Deleting #{monitor_set.get_size}" end end diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 183eab0bd..931dd16e5 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -97,12 +97,12 @@ def enumerate system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} - system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID) + system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) system_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end - workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) + workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID) workload_pods_ready_percentage_records.each do |record| health_monitor_records.push(record) if record end @@ -217,17 +217,17 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) records = [] pods_hash.keys.each do |key| - pod_aggregator = key - total_pods = pods_hash[pod_aggregator]['totalPods'] - pods_ready = pods_hash[pod_aggregator]['podsReady'] - namespace = pods_hash[pod_aggregator]['namespace'] - pod_aggregator_kind = pods_hash[pod_aggregator]['kind'] + workload_name = key + total_pods = pods_hash[workload_name]['totalPods'] + pods_ready = pods_hash[workload_name]['podsReady'] + namespace = pods_hash[workload_name]['namespace'] + workload_kind = pods_hash[workload_name]['kind'] percent = pods_ready / total_pods * 100 timestamp = Time.now.utc.iso8601 state = HealthMonitorState.getState(@@hmlog, (100-percent), monitor_config) - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "podAggregator" => pod_aggregator, "namespace" => namespace, "podAggregatorKind" => pod_aggregator_kind}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, pod_aggregator]) + health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workloadName" => workload_name, "namespace" => namespace, "workloadKind" => workload_kind}} + monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, workload_name]) HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) health_record = {} time_now = Time.now.utc.iso8601 diff --git a/test/code/plugin/filter_health_model_builder_test.rb b/test/code/plugin/filter_health_model_builder_test.rb index e9f4ce931..48e85b7fe 100644 --- a/test/code/plugin/filter_health_model_builder_test.rb +++ b/test/code/plugin/filter_health_model_builder_test.rb @@ -14,7 +14,8 @@ def test_event_stream health_model_definition = HealthModelDefinition.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) monitor_factory = MonitorFactory.new state_transition_processor = StateTransitionProcessor.new(health_model_definition, monitor_factory) - state_finalizers = [NodeMonitorHierarchyReducer.new, AggregateMonitorStateFinalizer.new] + # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side + state_finalizers = [AggregateMonitorStateFinalizer.new] monitor_set = MonitorSet.new model_builder = HealthModelBuilder.new(state_transition_processor, state_finalizers, monitor_set) From b89b1077fe63f2ce06ca2cc7f1cd1d686a0fb70d Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 12 Jun 2019 20:10:49 -0700 Subject: [PATCH 46/90] Refactor signal reduction logic --- .../code/plugin/HealthMonitorSignalReducer.rb | 186 ------- source/code/plugin/HealthMonitorState.rb | 86 ---- source/code/plugin/HealthMonitorUtils.rb | 461 ------------------ .../plugin/filter_health_model_builder.rb | 8 +- .../code/plugin/health/aggregate_monitor.rb | 45 +- .../aggregate_monitor_state_finalizer.rb | 5 + ...ocessor.rb => health_hierarchy_builder.rb} | 16 +- .../health/health_kubernetes_resources.rb | 19 + .../health/health_missing_monitor_handler.rb | 56 +++ .../plugin/health/health_model_builder.rb | 54 +- .../plugin/health/health_model_constants.rb | 9 + .../plugin/health/health_model_definition.rb | 1 - .../plugin/health/health_monitor_optimizer.rb | 52 ++ .../plugin/health/health_monitor_provider.rb | 110 +++++ ...transition.rb => health_monitor_record.rb} | 5 +- .../plugin/health/health_monitor_state.rb | 174 +++++++ .../plugin/health/health_monitor_utils.rb | 23 + source/code/plugin/health/monitor_factory.rb | 22 +- source/code/plugin/health/unit_monitor.rb | 11 +- .../filter_health_model_builder_test.rb | 11 +- 20 files changed, 520 insertions(+), 834 deletions(-) delete mode 100644 source/code/plugin/HealthMonitorSignalReducer.rb delete mode 100644 source/code/plugin/HealthMonitorState.rb delete mode 100644 source/code/plugin/HealthMonitorUtils.rb rename source/code/plugin/health/{state_transition_processor.rb => health_hierarchy_builder.rb} (86%) create mode 100644 source/code/plugin/health/health_kubernetes_resources.rb create mode 100644 source/code/plugin/health/health_missing_monitor_handler.rb create mode 100644 source/code/plugin/health/health_monitor_optimizer.rb create mode 100644 source/code/plugin/health/health_monitor_provider.rb rename source/code/plugin/health/{monitor_state_transition.rb => health_monitor_record.rb} (63%) create mode 100644 source/code/plugin/health/health_monitor_state.rb create mode 100644 source/code/plugin/health/health_monitor_utils.rb diff --git a/source/code/plugin/HealthMonitorSignalReducer.rb b/source/code/plugin/HealthMonitorSignalReducer.rb deleted file mode 100644 index f6a9e0fd3..000000000 --- a/source/code/plugin/HealthMonitorSignalReducer.rb +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require 'time' -require 'json' -require_relative 'KubernetesApiClient' -require_relative 'health/health_model_constants' - -class HealthMonitorSignalReducer - - @@firstMonitorRecordSent = {} - class << self - def reduceSignal(log, monitor_id, monitor_instance_id, monitor_config, health_signal_timeout, key: nil, node_name: nil) - - health_monitor_instance_state = HealthMonitorState.getHealthMonitorState(monitor_instance_id) - health_monitor_records = health_monitor_instance_state.prev_records - new_state = health_monitor_instance_state.new_state - prev_sent_time = health_monitor_instance_state.prev_sent_record_time - time_first_observed = health_monitor_instance_state.state_change_time - samples_to_check = monitor_config['ConsecutiveSamplesForStateTransition'].to_i - - if samples_to_check == 1 - #log.debug "Samples to Check #{samples_to_check}" - latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning - latest_record_state = latest_record["state"] - latest_record_time = latest_record["timestamp"] #string representation of time - #log.debug "Latest Record #{latest_record} #{latest_record_state} #{latest_record_time}" - if latest_record_state.downcase == new_state.downcase && @@firstMonitorRecordSent.key?(monitor_instance_id) #no state change - #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" - time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 - #log.debug "time elapsed #{time_elapsed}" - if time_elapsed > health_signal_timeout # minutes - # update record for last sent record time - health_monitor_instance_state.old_state = health_monitor_instance_state.new_state - health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.prev_sent_record_time = latest_record_time - #log.debug "After Updating Monitor State #{health_monitor_instance_state}" - HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) - else - #log.debug "Monitor timeout not reached #{time_elapsed}" - #log.debug "Timeout not reached for #{monitor_id}" - return nil# dont send anything - end - else - health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state - health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.state_change_time = latest_record_time - health_monitor_instance_state.prev_sent_record_time = latest_record_time - HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, node_name: node_name) - end - end - - #FIXME: if record count = 1, then send it, if it is greater than 1 and less than ConsecutiveSamplesForStateTransition, NO-OP. If equal to ConsecutiveSamplesForStateTransition, then check for consistency in state change - if health_monitor_instance_state.prev_records.size == 1 && samples_to_check > 1 - #log.debug "Only One Record" - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) - elsif health_monitor_instance_state.prev_records.size < samples_to_check - log.debug "Prev records size < ConsecutiveSamplesForStateTransition for #{monitor_instance_id}" - return nil - else - first_record = health_monitor_records[0] - latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning - latest_record_state = latest_record["state"] - latest_record_time = latest_record["timestamp"] #string representation of time - #log.debug "Latest Record #{latest_record}" - if latest_record_state.downcase == new_state.downcase # No state change - #log.debug "latest_record_state.to_s.downcase == prev_sent_status.to_s.state" - time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 #check if more than monitor timeout for signal - #log.debug "time elapsed #{time_elapsed}" - if time_elapsed > health_signal_timeout # minutes - # update record - health_monitor_instance_state.old_state = health_monitor_instance_state.new_state - health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.prev_sent_record_time = latest_record_time - #log.debug "After Updating Monitor State #{health_monitor_instance_state}" - HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) - else - #log.debug "Monitor timeout not reached #{time_elapsed}" - #log.debug "Timeout not reached for #{monitor_id}" - return nil# dont send anything - end - else # state change from previous sent state to latest record state - #check state of last n records to see if they are all in the same state - if (isStateChangeConsistent(log, health_monitor_records)) - health_monitor_instance_state.old_state = health_monitor_instance_state.new_state - health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.prev_sent_record_time = latest_record_time - health_monitor_instance_state.state_change_time = first_record["timestamp"] - HealthMonitorState.setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - return formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: key, node_name: node_name) - else - log.debug "No consistent state change for monitor #{monitor_id}" - return nil - end - end - end - log.debug "No new information for monitor #{monitor_id}" - return nil - end - - def formatRecord(log, monitor_id, monitor_instance_id, health_monitor_instance_state, monitor_config, key: nil, node_name: nil) - # log.debug "Health Monitor Instance State #{health_monitor_instance_state}" - - labels = HealthMonitorUtils.getClusterLabels - #log.debug "Cluster Labels : #{labels}" - - namespace = health_monitor_instance_state.prev_records[0]['details']['namespace'] - workload_name = health_monitor_instance_state.prev_records[0]['details']['workloadName'] - workload_kind = health_monitor_instance_state.prev_records[0]['details']['workloadKind'] - - monitor_labels = HealthMonitorUtils.getMonitorLabels(log, monitor_id, key: key, workload_name: workload_name, node_name: node_name, namespace: namespace, workload_kind: workload_kind) - #log.debug "Monitor Labels : #{monitor_labels}" - - if !monitor_labels.empty? - monitor_labels.keys.each do |key| - labels[key] = monitor_labels[key] - end - end - - #log.debug "Labels after adding Monitor Labels #{labels}" - prev_records = health_monitor_instance_state.prev_records - time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time - new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called - old_state = health_monitor_instance_state.old_state - - #log.debug "monitor_config #{monitor_config}" - if monitor_config.nil? - monitor_config = '' - end - monitor_config = monitor_config - #log.debug "monitor_config #{monitor_config}" - records = [] - - - if prev_records.size == 1 - details = prev_records[0] - else - details = prev_records - end - - time_observed = Time.now.utc.iso8601 - #log.debug "Details: #{details}" - #log.debug "time_first_observed #{time_first_observed} time_observed #{time_observed} new_state #{new_state} old_state #{old_state}" - - health_monitor_record = {} - health_monitor_record["ClusterId"] = KubernetesApiClient.getClusterId - health_monitor_record["MonitorLabels"] = labels.to_json - health_monitor_record["MonitorId"] = monitor_id - health_monitor_record["MonitorInstanceId"] = monitor_instance_id - health_monitor_record["NewState"] = new_state - health_monitor_record["OldState"] = old_state - health_monitor_record["Details"] = details - health_monitor_record["MonitorConfig"] = monitor_config.to_json - health_monitor_record["AgentCollectionTime"] = Time.now.utc.iso8601 - health_monitor_record["TimeFirstObserved"] = time_first_observed - - #log.debug "HealthMonitor Record #{health_monitor_record}" - #log.debug "Parsed Health Monitor Record for #{monitor_id}" - - if !@@firstMonitorRecordSent.key?(monitor_instance_id) - @@firstMonitorRecordSent[monitor_instance_id] = true - end - - return health_monitor_record - end - - #FIXME: check for consistency for "ConsecutiveSamplesForStateTransition" records - def isStateChangeConsistent(log, health_monitor_records) - if health_monitor_records.nil? || health_monitor_records.size == 0 - return false - end - i = 0 - while i < health_monitor_records.size - 1 - #log.debug "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}" - if health_monitor_records[i]["state"] != health_monitor_records[i + 1]["state"] - return false - end - i += 1 - end - return true - end - end -end \ No newline at end of file diff --git a/source/code/plugin/HealthMonitorState.rb b/source/code/plugin/HealthMonitorState.rb deleted file mode 100644 index a590535c8..000000000 --- a/source/code/plugin/HealthMonitorState.rb +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require_relative 'health/health_model_constants' - -HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records) do -end - -class HealthMonitorState - @@instanceStates = {} #hash of monitor_instance_id --> health monitor instance state - @@firstMonitorRecordSent = {} - #FIXME: use lookup for health_monitor_constants.rb from health folder - HEALTH_MONITOR_STATE = {"PASS" => "pass", "FAIL" => "fail", "WARNING" => "warn", "NONE" => "none"} - - class << self - include HealthModel - #set new_state to be the latest ONLY if the state change is consistent for monitors that are not configured to be notified instantly, i.e. For monitors which should have a state transition if the prev and current state are different, set new state to be the latest - # record state. For others, set it to be none, if there is no state information present in the lookup table - def updateHealthMonitorState(log, monitor_instance_id, health_monitor_record, config) - #log.debug "updateHealthMonitorState" - samples_to_keep = 1 - if !config.nil? && !config['ConsecutiveSamplesForStateTransition'].nil? - samples_to_keep = config['ConsecutiveSamplesForStateTransition'].to_i - end - - #log.debug "Monitor Instance Id #{monitor_instance_id} samples_to_keep #{samples_to_keep}" - - if @@instanceStates.key?(monitor_instance_id) - health_monitor_instance_state = @@instanceStates[monitor_instance_id] - health_monitor_records = health_monitor_instance_state.prev_records #This should be an array - - if health_monitor_records.size == samples_to_keep - health_monitor_records.delete_at(0) - end - health_monitor_records.push(health_monitor_record) - health_monitor_instance_state.prev_records = health_monitor_records - @@instanceStates[monitor_instance_id] = health_monitor_instance_state - else - # if samples_to_keep == 1, then set new state to be the health_monitor_record state, else set it as none - old_state = HEALTH_MONITOR_STATE["NONE"] - new_state = HEALTH_MONITOR_STATE["NONE"] - if samples_to_keep == 1 - new_state = health_monitor_record["state"] - end - health_monitor_instance_state = HealthMonitorInstanceState.new(health_monitor_record["timestamp"], old_state, new_state, health_monitor_record["timestamp"], [health_monitor_record]) - @@instanceStates[monitor_instance_id] = health_monitor_instance_state - end - #log.debug "Health Records Count: #{health_monitor_instance_state.prev_records.size}" - end - - def getHealthMonitorState(monitor_instance_id) - return @@instanceStates[monitor_instance_id] - end - - def setHealthMonitorState(monitor_instance_id, health_monitor_instance_state) - @@instanceStates[monitor_instance_id] = health_monitor_instance_state - end - - def getHealthMonitorStatesHash - return @@instanceStates - end - - def computeHealthMonitorState(log, monitor_id, value, config) - #log.debug "computeHealthMonitorState" - #log.info "id: #{monitor_id} value: #{value} config: #{config}" - case monitor_id - when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID - return getState(log, value, config) - end - end - - def getState(log, value, config) - (config.nil? || config['WarnThresholdPercentage'].nil?) ? warn_percentage = nil : config['WarnThresholdPercentage'].to_f - fail_percentage = config['FailThresholdPercentage'].to_f - - if value > fail_percentage - return HEALTH_MONITOR_STATE['FAIL'] - elsif !warn_percentage.nil? && value > warn_percentage - return HEALTH_MONITOR_STATE['WARNING'] - else - return HEALTH_MONITOR_STATE['PASS'] - end - end - end -end - diff --git a/source/code/plugin/HealthMonitorUtils.rb b/source/code/plugin/HealthMonitorUtils.rb deleted file mode 100644 index f47a9062f..000000000 --- a/source/code/plugin/HealthMonitorUtils.rb +++ /dev/null @@ -1,461 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require_relative 'KubernetesApiClient' -require_relative 'health/health_model_constants' -require 'time' -require 'json' - -class HealthMonitorUtils - - @LogPath = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" - @log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M - @@hostName = (OMS::Common.get_hostname) - - @@containerMetadata = {} - @@controllerMapping = {} - @@podInventory = {} - @@lastRefreshTime = '2019-01-01T00:00:00Z' - @@nodeInventory = [] - @@clusterId = KubernetesApiClient.getClusterId - - def initialize - end - - class << self - include HealthModel - def build_metrics_hash(metrics_to_collect) - @log.debug "Building Hash of Metrics to Collect #{metrics_to_collect}" - metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip) - metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h - @log.info "Metrics Collected : #{metrics_hash}" - return metrics_hash - end - - def ensure_cpu_memory_capacity_set(cpu_capacity, memory_capacity, hostname) - - @log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}" - if cpu_capacity != 0.0 && memory_capacity != 0.0 - @log.info "CPU And Memory Capacity are already set" - return [cpu_capacity, memory_capacity] - end - - begin - @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) - rescue Exception => e - @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " - ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) - end - if !@@nodeInventory.nil? - cpu_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") - if !cpu_capacity_json.nil? - cpu_capacity_json.each do |cpu_info_node| - if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == @@hostName - if !cpu_info_node['DataItems'][0]['Collections'][0]['Value'].nil? - cpu_capacity = cpu_info_node['DataItems'][0]['Collections'][0]['Value'] - end - end - end - @log.info "CPU Limit #{cpu_capacity}" - else - @log.info "Error getting cpu_capacity" - end - memory_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "memory", "memoryCapacityBytes") - if !memory_capacity_json.nil? - memory_capacity_json.each do |memory_info_node| - if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == @@hostName - if !memory_info_node['DataItems'][0]['Collections'][0]['Value'].nil? - memory_capacity = memory_info_node['DataItems'][0]['Collections'][0]['Value'] - end - end - end - @log.info "memory Limit #{memory_capacity}" - else - @log.info "Error getting memory_capacity" - end - return [cpu_capacity, memory_capacity] - end - end - - def getContainerKeyFromInstanceName(instance_name) - if instance_name.nil? - return "" - end - size = instance_name.size - instance_name_elements = instance_name.split("/") - key = [instance_name_elements[9], instance_name_elements[10]].join("/") - return key - end - - def getMonitorInstanceId(log, monitor_id, args = []) - #log.debug "getMonitorInstanceId" - string_to_hash = args.join("/") - return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" - end - - def getClusterLabels - labels = {} - cluster_id = @@clusterId - region = KubernetesApiClient.getClusterRegion - labels['container.azm.ms/cluster-region'] = region - if !cluster_id.nil? - cluster_id_elements = cluster_id.split('/') - azure_sub_id = cluster_id_elements[2] - resource_group = cluster_id_elements[4] - cluster_name = cluster_id_elements[8] - labels['container.azm.ms/cluster-subscription-id'] = azure_sub_id - labels['container.azm.ms/cluster-resource-group'] = resource_group - labels['container.azm.ms/cluster-name'] = cluster_name - end - return labels - end - - def getMonitorLabels(log, monitor_id, key: nil, workload_name: nil, node_name: nil, namespace: nil, workload_kind: nil) - monitor_labels = {} - case monitor_id - when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID - if !key.nil? #container - monitor_labels['container.azm.ms/workload-name'] = getContainerControllerName(key) - monitor_labels['container.azm.ms/namespace'] = getContainerNamespace(key) - else - monitor_labels['container.azm.ms/workload-name'] = workload_name.split('~~')[1] - monitor_labels['container.azm.ms/workload-kind'] = workload_kind - monitor_labels['container.azm.ms/namespace'] = namespace - end - when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID - @@nodeInventory["items"].each do |node| - if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] - #log.debug "Matched node name " - if !node["metadata"].nil? && !node["metadata"]["labels"].nil? - monitor_labels = node["metadata"]["labels"] - end - end - end - end - #log.debug "Labels #{monitor_labels}" - return monitor_labels - end - - def refreshKubernetesApiData(log, hostName, force: false) - #log.debug "refreshKubernetesApiData" - if ( ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 && !force) - log.debug "Less than 5 minutes since last refresh at #{@@lastRefreshTime}" - return - end - if force - @log.debug "Force Refresh" - end - - begin - - @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) - if !hostName.nil? - podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body) - else - podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body) - end - podInventory['items'].each do |pod| - has_owner = !pod['metadata']['ownerReferences'].nil? - if !has_owner - workload_name = pod['metadata']['name'] - else - workload_name = pod['metadata']['ownerReferences'][0]['name'] - end - namespace = pod['metadata']['namespace'] - @@controllerMapping[workload_name] = namespace - #log.debug "workload_name #{workload_name} namespace #{namespace}" - pod['spec']['containers'].each do |container| - key = [pod['metadata']['uid'], container['name']].join('/') - - if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil? - cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) - else - @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" - #TODO: Send warning health event #bestpractices - cpu_limit_value = @cpu_capacity - end - - if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? - #@log.info "Raw Memory Value #{container['resources']['limits']['memory']}" - memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory']) - else - @log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity" - memory_limit_value = @memory_capacity - end - - @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => workload_name, "namespace" => namespace} - end - end - rescue => e - @log.info "Error Refreshing Container Resource Limits #{e.backtrace}" - end - # log.info "Controller Mapping #{@@controllerMapping}" - # log.info "Node Inventory #{@@nodeInventory}" - # log.info "Container Metadata #{@@containerMetadata}" - # log.info "------------------------------------" - @@lastRefreshTime = Time.now.utc.iso8601 - end - - def getContainerMetadata(key) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key] - else - # This is to handle new containers/controllers that might have come up since the last refresh - @log.info "Adhoc refresh getContainerMetadata" - HealthMonitorUtils.refreshKubernetesApiData(@log, nil, force: true) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key] - else - return nil - end - end - end - - def getContainerMemoryLimit(key) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['memoryLimit'] - else - @log.info "Adhoc refresh getContainerMemoryLimit" - # This is to handle new containers/controllers that might have come up since the last refresh - HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['memoryLimit'] - else - return '' - end - end - end - - def getContainerControllerName(key) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['controllerName'] - else - @log.info "Adhoc refresh getContainerControllerName" - # This is to handle new containers/controllers that might have come up since the last refresh - HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['controllerName'] - else - return '' - end - end - end - - def getContainerNamespace(key) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['namespace'] - else - @log.info "Adhoc refresh getContainerNamespace" - # This is to handle new containers/controllers that might have come up since the last refresh - HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) - if @@containerMetadata.has_key?(key) - return @@containerMetadata[key]['namespace'] - else - return '' - end - end - end - - # def getControllerNamespace(controller_name) - # if @@controllerMapping.has_key?(controller_name) - # return @@controllerMapping[controller_name] - # else - # @log.info "Adhoc refresh getControllerNamespace" - # # This is to handle new containers/controllers that might have come up since the last refresh - # HealthMonitorUtils.refreshKubernetesApiData(@log,nil, force: true) - # if @@controllerMapping.has_key?(controller_name) - # return @@controllerMapping[controller_name] - # else - # return '' - # end - # end - # end - - def getClusterCpuMemoryCapacity - begin - node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) - cluster_cpu_capacity = 0.0 - cluster_memory_capacity = 0.0 - if !node_inventory.empty? - node_inventory['items'].each do |node| - cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores") - if !cpu_capacity_json.nil? - cpu_capacity_json.each do |cpu_capacity_node| - if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? - cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'] - end - end - @log.info "Cluster CPU Limit #{cluster_cpu_capacity}" - else - @log.info "Error getting cpu_capacity" - end - memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes") - if !memory_capacity_json.nil? - memory_capacity_json.each do |memory_capacity_node| - if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? - cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value'] - end - end - @log.info "Cluster Memory Limit #{cluster_memory_capacity}" - else - @log.info "Error getting memory_capacity" - end - end - else - @log.info "Unable to get cpu and memory capacity" - return [0.0, 0.0] - end - return [cluster_cpu_capacity, cluster_memory_capacity] - rescue => e - @log.info e - end - end - - def getResourceSubscription(pod_inventory, metric_name, metric_capacity) - subscription = 0.0 - if !pod_inventory.empty? - pod_inventory['items'].each do |pod| - pod['spec']['containers'].each do |container| - if !container['resources']['requests'].nil? && !container['resources']['requests'][metric_name].nil? - subscription += KubernetesApiClient.getMetricNumericValue(metric_name, container['resources']['requests'][metric_name]) - end - end - end - end - #@log.debug "#{metric_name} Subscription #{subscription}" - return subscription - end - - def getHealthMonitorConfig - health_monitor_config = {} - begin - file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r") - if !file.nil? - fileContents = file.read - health_monitor_config = JSON.parse(fileContents) - file.close - end - rescue => e - @log.info "Error when opening health config file #{e}" - end - return health_monitor_config - end - - def getLogHandle - return @log - end - - def getPodsReadyHash(pod_inventory, deployment_inventory) - pods_ready_percentage_hash = {} - deployment_lookup = {} - deployment_inventory['items'].each do |deployment| - match_labels = deployment['spec']['selector']['matchLabels'].to_h - namespace = deployment['metadata']['namespace'] - match_labels.each{|k,v| - deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" - } - end - pod_inventory['items'].each do |pod| - begin - # controller_name = pod['metadata']['ownerReferences'][0]['name'] - # namespace = pod['metadata']['namespace'] - # status = pod['status']['phase'] - - # if pods_ready_percentage_hash.key?(controller_name) - # total_pods = pods_ready_percentage_hash[controller_name]['totalPods'] - # pods_ready = pods_ready_percentage_hash[controller_name]['podsReady'] - # else - # total_pods = 0 - # pods_ready = 0 - # end - - # total_pods += 1 - # if status == 'Running' - # pods_ready += 1 - # end - # pods_ready_percentage_hash[controller_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace} - has_owner = !pod['metadata']['ownerReferences'].nil? - owner_kind = '' - if has_owner - owner_kind = pod['metadata']['ownerReferences'][0]['kind'] - controller_name = pod['metadata']['ownerReferences'][0]['name'] - else - owner_kind = pod['kind'] - controller_name = pod['metadata']['name'] - #@log.info "#{JSON.pretty_generate(pod)}" - end - - namespace = pod['metadata']['namespace'] - status = pod['status']['phase'] - - workload_name = '' - if owner_kind.nil? - owner_kind = 'Pod' - end - case owner_kind.downcase - when 'job' - # we are excluding jobs - next - when 'replicaset' - # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name - labels = pod['metadata']['labels'].to_h - labels.each {|k,v| - lookup_key = "#{namespace}-#{k}=#{v}" - if deployment_lookup.key?(lookup_key) - workload_name = deployment_lookup[lookup_key] - break - end - } - if workload_name.empty? - workload_name = "#{namespace}~~#{controller_name}" - end - when 'daemonset' - workload_name = "#{namespace}~~#{controller_name}" - else - workload_name = "#{namespace}~~#{pod['metadata']['name']}" - end - - if pods_ready_percentage_hash.key?(workload_name) - total_pods = pods_ready_percentage_hash[workload_name]['totalPods'] - pods_ready = pods_ready_percentage_hash[workload_name]['podsReady'] - else - total_pods = 0 - pods_ready = 0 - end - - total_pods += 1 - if status == 'Running' - pods_ready += 1 - end - - pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind} - rescue => e - @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" - end - end - return pods_ready_percentage_hash - end - - def getNodeStateFromNodeConditions(node_conditions) - pass = false - node_conditions.each do |condition| - type = condition['type'] - status = condition['status'] - - if ((type == "NetworkUnavailable" || type == "OutOfDisk") && (status == 'True' || status == 'Unknown')) - return "fail" - elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown')) - return "warn" - elsif type == "Ready" && status == 'True' - pass = true - end - end - - if pass - return "pass" - else - return "fail" - end - end - end -end \ No newline at end of file diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 8ca9a7d56..fd1efadd1 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -86,7 +86,7 @@ def filter_stream(tag, es) @health_signal_timeout, node_name: record[HealthMonitorRecordFields::NODE_NAME] ) - filtered_records.push(MonitorStateTransition.new( + filtered_records.push(HealthMonitorRecord.new( filtered_record[HealthMonitorRecordFields::MONITOR_ID], filtered_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], filtered_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], @@ -117,12 +117,12 @@ def filter_stream(tag, es) record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor.monitor_instance_id record[HealthMonitorRecordFields::MONITOR_LABELS] = monitor.labels.to_json record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId - record[HealthMonitorRecordFields::OLD_STATE] = monitor.old_state - record[HealthMonitorRecordFields::NEW_STATE] = monitor.new_state + #record[HealthMonitorRecordFields::OLD_STATE] = monitor.old_state + #record[HealthMonitorRecordFields::NEW_STATE] = monitor.new_state record[HealthMonitorRecordFields::DETAILS] = monitor.details.to_json if monitor.methods.include? :details record[HealthMonitorRecordFields::MONITOR_CONFIG] = monitor.config if monitor.methods.include? :config record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601 - record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = monitor.transition_time + record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = monitor.transition_date_time new_es.add(time, record) } diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index 76eb29310..1d505542e 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -5,25 +5,23 @@ module HealthModel class AggregateMonitor - attr_accessor :monitor_id, :monitor_instance_id, :old_state, :new_state, :transition_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor + attr_accessor :monitor_id, :monitor_instance_id, :operational_state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details attr_reader :member_monitors # constructor def initialize( monitor_id, monitor_instance_id, - old_state, - new_state, - transition_time, + operational_state, + transition_date_time, aggregation_algorithm, aggregation_algorithm_params, labels ) @monitor_id = monitor_id @monitor_instance_id = monitor_instance_id - @old_state = old_state - @new_state = new_state - @transition_time = transition_time + @operational_state = operational_state + @transition_date_time = transition_date_time @aggregation_algorithm = aggregation_algorithm || AggregationAlgorithm::WORSTOF @aggregation_algorithm_params = aggregation_algorithm_params @labels = labels @@ -54,17 +52,38 @@ def get_member_monitors def calculate_state(monitor_set) case @aggregation_algorithm when AggregationAlgorithm::WORSTOF - @old_state = calculate_worst_of_state(monitor_set, 'old_state') - @new_state = calculate_worst_of_state(monitor_set, 'new_state') + @operational_state = calculate_worst_of_state(monitor_set) when AggregationAlgorithm::PERCENTAGE @state = calculate_percentage_state(monitor_set) end end + def calculate_details(monitor_set) + @details = {} + @details['details'] = {} + @details['state'] = operational_state + @details['timestamp'] = transition_date_time + ids = [] + member_monitor_instance_ids = get_member_monitors + member_monitor_instance_ids.each{|member_monitor_id| + member_monitor = monitor_set.get_monitor(member_monitor_id) + member_state = member_monitor.operational_state + if @details.key?(member_state) + ids = details[member_state] + if !ids.include?(member_monitor.monitor_instance_id) + ids.push(member_monitor.monitor_instance_id) + end + @details['details'][member_state] = ids + else + @details['details'][member_state] = [member_monitor.monitor_instance_id] + end + } + end + # calculates the worst of state, given the member monitors - def calculate_worst_of_state(monitor_set, state_type) + def calculate_worst_of_state(monitor_set) - member_state_counts = map_member_monitor_states(monitor_set, state_type) + member_state_counts = map_member_monitor_states(monitor_set) if member_state_counts.length === 0 return MonitorState::NONE @@ -93,7 +112,7 @@ def calculate_percentage_state end # maps states of member monitors to counts - def map_member_monitor_states(monitor_set, state_type) + def map_member_monitor_states(monitor_set) member_monitor_instance_ids = get_member_monitors if member_monitor_instance_ids.nil? || member_monitor_instance_ids.size == 0 return {} @@ -104,7 +123,7 @@ def map_member_monitor_states(monitor_set, state_type) member_monitor_instance_ids.each {|monitor_instance_id| member_monitor = monitor_set.get_monitor(monitor_instance_id) - monitor_state = member_monitor.send(state_type); + monitor_state = member_monitor.operational_state if !state_counts.key?(monitor_state) state_counts[monitor_state] = 1 diff --git a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb index 0ab7e61c9..e7140375c 100644 --- a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb +++ b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb @@ -6,6 +6,11 @@ def finalize(monitor_set) if !top_level_monitor.nil? calculate_subtree_state(top_level_monitor, monitor_set) end + monitor_set.get_map.each{|k,v| + if v.is_aggregate_monitor + v.calculate_details(monitor_set) + end + } end private diff --git a/source/code/plugin/health/state_transition_processor.rb b/source/code/plugin/health/health_hierarchy_builder.rb similarity index 86% rename from source/code/plugin/health/state_transition_processor.rb rename to source/code/plugin/health/health_hierarchy_builder.rb index 6025a49cc..b01759d52 100644 --- a/source/code/plugin/health/state_transition_processor.rb +++ b/source/code/plugin/health/health_hierarchy_builder.rb @@ -1,6 +1,6 @@ require 'json' module HealthModel - class StateTransitionProcessor + class HealthHierarchyBuilder attr_accessor :health_model_definition, :monitor_factory @@ -17,15 +17,13 @@ def initialize(health_model_definition, monitor_factory) @monitor_factory = monitor_factory end - def process_state_transition(monitor_state_transition, monitor_set) - if !monitor_state_transition.is_a?(MonitorStateTransition) - raise "Unexpected Type #{monitor_state_transition.class}" + def process_record(health_monitor_record, monitor_set) + if !health_monitor_record.is_a?(HealthMonitorRecord) + raise "Unexpected Type #{health_monitor_record.class}" end - puts "process_state_transition for #{monitor_state_transition.monitor_id}" - # monitor state transition will always be on a unit monitor - child_monitor = @monitor_factory.create_unit_monitor(monitor_state_transition) + child_monitor = @monitor_factory.create_unit_monitor(health_monitor_record) monitor_set.add_or_update(child_monitor) parent_monitor_id = @health_model_definition.get_parent_monitor_id(child_monitor) monitor_labels = child_monitor.labels @@ -57,8 +55,8 @@ def process_state_transition(monitor_state_transition, monitor_set) # required to calculate the rollup state parent_monitor.add_member_monitor(child_monitor.monitor_instance_id) # update to the earliest of the transition times of child monitors - if child_monitor.transition_time < parent_monitor.transition_time - parent_monitor.transition_time = child_monitor.transition_time + if child_monitor.transition_date_time < parent_monitor.transition_date_time + parent_monitor.transition_date_time = child_monitor.transition_date_time end end diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb new file mode 100644 index 000000000..035367a44 --- /dev/null +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -0,0 +1,19 @@ +module HealthModel + class HealthKubernetesResources + + attr_accessor :node_inventory, :pod_inventory + + def initialize(node_inventory, pod_inventory) + @node_inventory = node_inventory || [] + @pod_inventory = pod_inventory || [] + end + + def get_node_inventory + return @node_inventory + end + + def get_pod_inventory + return @pod_inventory + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_missing_monitor_handler.rb b/source/code/plugin/health/health_missing_monitor_handler.rb new file mode 100644 index 000000000..aad3c2980 --- /dev/null +++ b/source/code/plugin/health/health_missing_monitor_handler.rb @@ -0,0 +1,56 @@ +module HealthModel + class HealthMissingMonitorHandler + + attr_accessor :last_sent_monitors, :unknown_state_candidates + + def initialize(last_sent_monitors, unknown_state_candidates) + @last_sent_monitors = {} + @unknown_state_candidates = {} + @node_inventory = {} + @workload_inventory ={} + end + + def detect_missing_signals(received_records) + nodes = get_node_inventory(received_records) + workloads = get_workload_inventory(received_records) + + received_records.each{|record| + monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + case monitor_id + when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID + # node monitor processing + # check if present in last_sent_monitors + # if not present + when HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID + # pods ready percentage processing + when HealthMonitorConstants::KUBE_API_STATUS + # kube api status + end + } + end + + def get_node_inventory(received_records) + @node_inventory = [] + node_records = received_records.select {|record| record[HealthMonitorRecordFields::MONITOR_ID] == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID} + node_records.each{|node_record| + node_name = JSON.parse(node_record[HealthMonitorRecordFields::MONITOR_LABELS])['kubernetes.io/hostname'] + @node_inventory.push(node_name) if node_name + } + end + + def get_workload_inventory(received_records) + @workload_inventory = [] + workload_records = received_records.select {|record| + (record[HealthMonitorRecordFields::MONITOR_ID] == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || + record[HealthMonitorRecordFields::MONITOR_ID] == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID + ) + } + workload_records.each{|workload_record| + workload_name = JSON.parse(workload_record[HealthMonitorRecordFields::MONITOR_LABELS])['container.azm.ms/workload-name'] + @workload_inventory.push(workload_name) + } + end + + + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb index 2e93ab44e..ecc1ad8d4 100644 --- a/source/code/plugin/health/health_model_builder.rb +++ b/source/code/plugin/health/health_model_builder.rb @@ -3,19 +3,19 @@ module HealthModel class HealthModelBuilder - attr_accessor :state_transition_processor, :state_finalizers, :monitor_set + attr_accessor :hierarchy_builder, :state_finalizers, :monitor_set attr_reader :last_sent_monitors - def initialize(state_transition_processor, state_finalizers, monitor_set) - @state_transition_processor = state_transition_processor + def initialize(hierarchy_builder, state_finalizers, monitor_set) + @hierarchy_builder = hierarchy_builder @state_finalizers = state_finalizers @monitor_set = monitor_set @last_sent_monitors = {} end - def process_state_transitions(state_transitions) - state_transitions.each{|transition| - @state_transition_processor.process_state_transition(transition, @monitor_set) + def process_records(health_records) + health_records.each{|health_record| + @hierarchy_builder.process_record(health_record, @monitor_set) } end @@ -32,47 +32,7 @@ def finalize_model finalizer.finalize(@monitor_set) } - # return only those monitors whose state has changed, ALWAYS including the cluster level monitor - monitors_map = get_changed_monitors - update_last_sent_monitors - clear_monitors(monitors_map.size) - return monitors_map - end - - private - def get_changed_monitors - changed_monitors = {} - # always send cluster monitor as a 'heartbeat' - top_level_monitor = @monitor_set.get_monitor(MonitorId::CLUSTER) - if top_level_monitor.nil? - top_level_monitor = AggregateMonitor.new(MonitorId::CLUSTER, MonitorId::CLUSTER, @last_sent_monitors[MonitorId::CLUSTER].old_state, @last_sent_monitors[MonitorId::CLUSTER].new_state, @last_sent_monitors[MonitorId::CLUSTER].transition_time, AggregationAlgorithm::WORSTOF, nil, {}) - end - changed_monitors[MonitorId::CLUSTER] = top_level_monitor - - @monitor_set.get_map.each{|monitor_instance_id, monitor| - if @last_sent_monitors.key?(monitor_instance_id) - last_sent_monitor_state = @last_sent_monitors[monitor_instance_id].new_state - if last_sent_monitor_state.downcase != monitor.new_state.downcase - changed_monitors[monitor_instance_id] = monitor - end - else - changed_monitors[monitor_instance_id] = monitor - end - } - return changed_monitors - end - - def update_last_sent_monitors - @monitor_set.get_map.map{|instance_id, monitor| - @last_sent_monitors[instance_id] = monitor - } - end - - def clear_monitors(size) - @monitor_set = MonitorSet.new - if size == 1 - @last_sent_monitors = @last_sent_monitors.select {|k,v| k.downcase == MonitorId::CLUSTER} - end + return @monitor_set.get_map end end diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index b40d0344f..fe5757369 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -5,6 +5,7 @@ class MonitorState WARNING = "warn" NONE = "none" HEALTHY = "pass" + UNKNOWN = "unknown" end class AggregationAlgorithm @@ -60,4 +61,12 @@ class HealthMonitorConstants USER_WORKLOAD_PODS_READY_MONITOR_ID = "user_workload_pods_ready" SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID = "system_workload_pods_ready" end + + class HealthMonitorStates + PASS = "pass" + FAIL = "fail" + WARNING = "warn" + NONE = "none" + UNKNOWN = "unknown" + end end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_definition.rb b/source/code/plugin/health/health_model_definition.rb index 6540c9e92..f290d8b60 100644 --- a/source/code/plugin/health/health_model_definition.rb +++ b/source/code/plugin/health/health_model_definition.rb @@ -20,7 +20,6 @@ def get_parent_monitor_id(monitor) if parent_monitor_id.nil? conditions = @health_model_definition[monitor_id]['conditions'] if !conditions.nil? && conditions.is_a?(Array) - puts "Conditions" labels = monitor.labels conditions.each{|condition| left = "#{labels[condition['key']]}" diff --git a/source/code/plugin/health/health_monitor_optimizer.rb b/source/code/plugin/health/health_monitor_optimizer.rb new file mode 100644 index 000000000..b33c8a986 --- /dev/null +++ b/source/code/plugin/health/health_monitor_optimizer.rb @@ -0,0 +1,52 @@ +module HealthModel + class HealthMonitorOptimizer + #ctor + def initialize + @@health_signal_timeout = 240 + @@first_record_sent = {} + end + + def should_send(monitor_instance_id, health_monitor_state, health_monitor_config) + + health_monitor_instance_state = health_monitor_state.get_state(monitor_instance_id) + health_monitor_records = health_monitor_instance_state.prev_records + health_monitor_config['ConsecutiveSamplesForStateTransition'].nil? ? samples_to_check = 1 : samples_to_check = health_monitor_config['ConsecutiveSamplesForStateTransition'].to_i + + latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning + latest_record_state = latest_record["state"] + latest_record_time = latest_record["timestamp"] #string representation of time + + new_state = health_monitor_instance_state.new_state + prev_sent_time = health_monitor_instance_state.prev_sent_record_time + time_first_observed = health_monitor_instance_state.state_change_time + + if latest_record_state.downcase == new_state.downcase + time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 + if time_elapsed > @@health_signal_timeout # minutes + return true + elsif !@@first_record_sent.key?(monitor_instance_id) + @@first_record_sent[monitor_instance_id] = true + return true + else + return false + end + else + if samples_to_check == 1 + return true + elsif health_monitor_instance_state.prev_records.size == 1 && samples_to_check > 1 + return true + elsif health_monitor_instance_state.prev_records.size < samples_to_check + return false + else + # state change from previous sent state to latest record state + #check state of last n records to see if they are all in the same state + if (health_monitor_instance_state.is_state_change_consistent) + return true + else + return false + end + end + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb new file mode 100644 index 000000000..dbcdd1daa --- /dev/null +++ b/source/code/plugin/health/health_monitor_provider.rb @@ -0,0 +1,110 @@ +module HealthModel + class HealthMonitorProvider + + attr_accessor :cluster_labels, :health_kubernetes_resources, :monitor_configuration_path + attr_reader :monitor_configuration + + def initialize(cluster_labels, health_kubernetes_resources, monitor_configuration_path) + @cluster_labels = Hash.new + cluster_labels.each{|k,v| @cluster_labels[k] = v} + @health_kubernetes_resources = health_kubernetes_resources + @monitor_configuration_path = monitor_configuration_path + begin + @monitor_configuration = {} + file = File.open(@monitor_configuration_path, "r") + if !file.nil? + fileContents = file.read + @monitor_configuration = JSON.parse(fileContents) + file.close + end + rescue => e + @log.info "Error when opening health config file #{e}" + end + end + + def get_record(health_monitor_record, health_monitor_state) + + labels = Hash.new + @cluster_labels.each{|k,v| labels[k] = v} + monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID] + monitor_instance_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + health_monitor_instance_state = health_monitor_state.get_state(monitor_instance_id) + + monitor_labels = get_labels(health_monitor_record, health_monitor_instance_state) + #log.debug "Monitor Labels : #{monitor_labels}" + + if !monitor_labels.empty? + monitor_labels.keys.each do |key| + labels[key] = monitor_labels[key] + end + end + + #log.debug "Labels after adding Monitor Labels #{labels}" + prev_records = health_monitor_instance_state.prev_records + time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time + new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called + old_state = health_monitor_instance_state.old_state + + config = get_config(monitor_id) + + if prev_records.size == 1 + details = prev_records[0] + else + details = prev_records + end + + time_observed = Time.now.utc.iso8601 + + monitor_record = {} + monitor_record["ClusterId"] = 'fake_cluster_id' #KubernetesApiClient.getClusterId + monitor_record["MonitorLabels"] = labels.to_json + monitor_record["MonitorId"] = monitor_id + monitor_record["MonitorInstanceId"] = monitor_instance_id + monitor_record["NewState"] = new_state + monitor_record["OldState"] = old_state + monitor_record["Details"] = details + monitor_record["MonitorConfig"] = config.to_json + monitor_record["AgentCollectionTime"] = Time.now.utc.iso8601 + monitor_record["TimeFirstObserved"] = time_first_observed + + #log.debug "HealthMonitor Record #{monitor_record}" + + return monitor_record + end + + def get_config(monitor_id) + if @monitor_configuration.key?(monitor_id) + return @monitor_configuration[monitor_id] + else + return {} + end + end + + def get_labels(health_monitor_record, health_monitor_instance_state) + monitor_labels = {} + monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID] + case monitor_id + when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID + + namespace = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['namespace'] + workload_name = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadName'] + workload_kind = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadKind'] + + monitor_labels['container.azm.ms/workload-name'] = workload_name.split('~~')[1] + monitor_labels['container.azm.ms/workload-kind'] = workload_kind + monitor_labels['container.azm.ms/namespace'] = namespace + + when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID + node_name = health_monitor_record[HealthMonitorRecordFields::NODE_NAME] + @health_kubernetes_resources.get_node_inventory['items'].each do |node| + if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] + if !node["metadata"].nil? && !node["metadata"]["labels"].nil? + monitor_labels = node["metadata"]["labels"] + end + end + end + end + return monitor_labels + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/monitor_state_transition.rb b/source/code/plugin/health/health_monitor_record.rb similarity index 63% rename from source/code/plugin/health/monitor_state_transition.rb rename to source/code/plugin/health/health_monitor_record.rb index 3a5756b73..81e6d51db 100644 --- a/source/code/plugin/health/monitor_state_transition.rb +++ b/source/code/plugin/health/health_monitor_record.rb @@ -1,9 +1,8 @@ -MonitorStateTransition = Struct.new( +HealthMonitorRecord = Struct.new( :monitor_id, :monitor_instance_id, :transition_date_time, - :old_state, - :new_state, + :operational_state, :labels, :config, :details diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb new file mode 100644 index 000000000..858b7da4e --- /dev/null +++ b/source/code/plugin/health/health_monitor_state.rb @@ -0,0 +1,174 @@ +module HealthModel + + HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records, :is_state_change_consistent, :should_send) do + end + + # Class that is used to store the last sent state and latest monitors + # provides services like + # get_state -- returns the current state and details + # update_instance -- updates the state of the health monitor history records + # set_state -- sets the last health monitor state + class HealthMonitorState + + + def initialize + @@monitor_states = {} + @@first_record_sent = {} + @@health_signal_timeout = 240 + end + + def get_state(monitor_instance_id) + if @@monitor_states.key?(monitor_instance_id) + return @@monitor_states[monitor_instance_id] + end + end + + def set_state(monitor_instance_id, health_monitor_instance_state) + @@monitor_states[monitor_instance_id] = health_monitor_instance_state + end +=begin + +when do u send? +--------------- +1. if the signal hasnt been sent before +2. if there is a "consistent" state change for monitors +3. if the signal is stale (> 4hrs) +=end + def update_state(monitor, #UnitMonitor/AggregateMonitor + monitor_config #Hash + ) + samples_to_keep = 1 + monitor_instance_id = monitor.monitor_instance_id + + health_monitor_instance_state = get_state(monitor_instance_id) + if !health_monitor_instance_state.nil? + health_monitor_instance_state.is_state_change_consistent = false + health_monitor_instance_state.should_send = false + set_state(monitor_instance_id, health_monitor_instance_state) # reset is_state_change_consistent + end + + if !monitor_config.nil? && !monitor_config['ConsecutiveSamplesForStateTransition'].nil? + samples_to_keep = monitor_config['ConsecutiveSamplesForStateTransition'].to_i + end + + if @@monitor_states.key?(monitor_instance_id) + health_monitor_instance_state = @@monitor_states[monitor_instance_id] + health_monitor_records = health_monitor_instance_state.prev_records #This should be an array + + if health_monitor_records.size == samples_to_keep + health_monitor_records.delete_at(0) + end + health_monitor_records.push(monitor.details) + health_monitor_instance_state.prev_records = health_monitor_records + @@monitor_states[monitor_instance_id] = health_monitor_instance_state + else + # if samples_to_keep == 1, then set new state to be the health_monitor_record state, else set it as none + + old_state = HealthMonitorStates::NONE + new_state = HealthMonitorStates::NONE + if samples_to_keep == 1 + new_state = monitor.operational_state + end + + health_monitor_instance_state = HealthMonitorInstanceState.new( + monitor.transition_date_time, + old_state, + new_state, + monitor.transition_date_time, + [monitor.details]) + + health_monitor_instance_state.should_send = true + @@monitor_states[monitor_instance_id] = health_monitor_instance_state + end + + + # update old and new state based on the history and latest record. + # TODO: this is a little hairy. Simplify + + + health_monitor_records = health_monitor_instance_state.prev_records + monitor_config['ConsecutiveSamplesForStateTransition'].nil? ? samples_to_check = 1 : samples_to_check = monitor_config['ConsecutiveSamplesForStateTransition'].to_i + + latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning + latest_record_state = latest_record["state"] + latest_record_time = latest_record["timestamp"] #string representation of time + + new_state = health_monitor_instance_state.new_state + prev_sent_time = health_monitor_instance_state.prev_sent_record_time + time_first_observed = health_monitor_instance_state.state_change_time + + # if the last sent state (news_state is different from latest monitor operational_state) + if latest_record_state.downcase == new_state.downcase + time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 + # check if health signal has "timed out" + if time_elapsed > @@health_signal_timeout # minutes + # update record for last sent record time + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state + health_monitor_instance_state.new_state = latest_record_state + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.should_send = true + #log.debug "After Updating Monitor State #{health_monitor_instance_state}" + set_state(monitor_instance_id, health_monitor_instance_state) + # check if the first record has been sent + elsif !@@first_record_sent.key?(monitor_instance_id) + @@first_record_sent[monitor_instance_id] = true + health_monitor_instance_state.should_send = true + set_state(monitor_instance_id, health_monitor_instance_state) + end + # latest state is different that last sent state + else + # if it is a monitor that needs to instantly notify on state change, update the state + # mark the monitor to be sent + if samples_to_check == 1 + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state + health_monitor_instance_state.new_state = latest_record_state + health_monitor_instance_state.state_change_time = latest_record_time + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.should_send = true + if !@@first_record_sent.key?(monitor_instance_id) + @@first_record_sent[monitor_instance_id] = true + end + set_state(monitor_instance_id, health_monitor_instance_state) + else + # state change from previous sent state to latest record state + #check state of last n records to see if they are all in the same state + if (is_state_change_consistent(health_monitor_records, samples_to_keep)) + first_record = health_monitor_records[0] + latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning + latest_record_state = latest_record["state"] + latest_record_time = latest_record["timestamp"] #string representation of time + + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state + health_monitor_instance_state.is_state_change_consistent = true # This way it wont be recomputed in the optimizer. + health_monitor_instance_state.should_send = true + health_monitor_instance_state.new_state = latest_record_state + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.state_change_time = first_record["timestamp"] + + set_state(monitor_instance_id, health_monitor_instance_state) + + if !@@first_record_sent.key?(monitor_instance_id) + @@first_record_sent[monitor_instance_id] = true + end + end + end + end + end + + private + def is_state_change_consistent(health_monitor_records, samples_to_check) + if health_monitor_records.nil? || health_monitor_records.size == 0 || health_monitor_records.size < samples_to_check + return false + end + i = 0 + while i < health_monitor_records.size - 1 + #log.debug "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}" + if health_monitor_records[i]["state"] != health_monitor_records[i + 1]["state"] + return false + end + i += 1 + end + return true + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb new file mode 100644 index 000000000..f543e9a13 --- /dev/null +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -0,0 +1,23 @@ +module HealthModel + # static class that provides a bunch of utility methods + class HealthMonitorUtils + + @@node_inventory = [] + + class << self + # compute the percentage state given a value and a monitor configuration + def compute_percentage_state(value, config) + (config.nil? || config['WarnThresholdPercentage'].nil?) ? warn_percentage = nil : config['WarnThresholdPercentage'].to_f + fail_percentage = config['FailThresholdPercentage'].to_f + + if value > fail_percentage + return HealthMonitorState::FAIL + elsif !warn_percentage.nil? && value > warn_percentage + return HealthMonitorState::WARNING + else + return HealthMonitorStatePASS + end + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb index f09b9f1a6..18fa2307f 100644 --- a/source/code/plugin/health/monitor_factory.rb +++ b/source/code/plugin/health/monitor_factory.rb @@ -5,23 +5,21 @@ def initialize end - def create_unit_monitor(monitor_state_transition) - return UnitMonitor.new(monitor_state_transition.monitor_id, - monitor_state_transition.monitor_instance_id, - monitor_state_transition.old_state, - monitor_state_transition.new_state, - monitor_state_transition.transition_date_time, - monitor_state_transition.labels, - monitor_state_transition.config, - monitor_state_transition.details) + def create_unit_monitor(monitor_record) + return UnitMonitor.new(monitor_record.monitor_id, + monitor_record.monitor_instance_id, + monitor_record.operational_state, + monitor_record.transition_date_time, + monitor_record.labels, + monitor_record.config, + monitor_record.details) end def create_aggregate_monitor(monitor_id, monitor_instance_id, labels, aggregation_algorithm, aggregation_algorithm_params, child_monitor) return AggregateMonitor.new(monitor_id, monitor_instance_id, - child_monitor.old_state, - child_monitor.new_state, - child_monitor.transition_time, + child_monitor.operational_state, + child_monitor.transition_date_time, aggregation_algorithm, aggregation_algorithm_params, labels) diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb index 7fd8d201b..49649d6ae 100644 --- a/source/code/plugin/health/unit_monitor.rb +++ b/source/code/plugin/health/unit_monitor.rb @@ -4,16 +4,15 @@ module HealthModel class UnitMonitor - attr_accessor :monitor_id, :monitor_instance_id, :old_state, :new_state, :transition_time, :labels, :config, :details, :is_aggregate_monitor + attr_accessor :monitor_id, :monitor_instance_id, :operational_state, :transition_date_time, :labels, :config, :details, :is_aggregate_monitor # constructor - def initialize(monitor_id, monitor_instance_id, old_state, new_state, transition_time, labels, config, details) + def initialize(monitor_id, monitor_instance_id, operational_state, transition_date_time, labels, config, details) @monitor_id = monitor_id @monitor_instance_id = monitor_instance_id - @old_state = old_state - @new_state = new_state - @transition_time = transition_time - @labels = JSON.parse(labels) + @transition_date_time = transition_date_time + @operational_state = operational_state + @labels = labels @config = config @details = details @is_aggregate_monitor = false diff --git a/test/code/plugin/filter_health_model_builder_test.rb b/test/code/plugin/filter_health_model_builder_test.rb index 48e85b7fe..b7a105264 100644 --- a/test/code/plugin/filter_health_model_builder_test.rb +++ b/test/code/plugin/filter_health_model_builder_test.rb @@ -13,11 +13,11 @@ def test_event_stream health_definition_path = 'C:\AzureMonitor\ContainerInsights\Docker-Provider\installer\conf\health_model_definition.json' health_model_definition = HealthModelDefinition.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) monitor_factory = MonitorFactory.new - state_transition_processor = StateTransitionProcessor.new(health_model_definition, monitor_factory) + hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side state_finalizers = [AggregateMonitorStateFinalizer.new] monitor_set = MonitorSet.new - model_builder = HealthModelBuilder.new(state_transition_processor, state_finalizers, monitor_set) + model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) i = 1 loop do @@ -25,14 +25,13 @@ def test_event_stream file = File.read(mock_data_path) data = JSON.parse(file) - state_transitions = [] + health_monitor_records = [] data.each do |record| - state_transition = MonitorStateTransition.new( + health_monitor_record = HealthMonitorRecord.new( record[HealthMonitorRecordFields::MONITOR_ID], record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], - record[HealthMonitorRecordFields::OLD_STATE], - record[HealthMonitorRecordFields::NEW_STATE], + record[HealthMonitorRecordFields::DETAILS]["state"], record[HealthMonitorRecordFields::MONITOR_LABELS], record[HealthMonitorRecordFields::MONITOR_CONFIG], record[HealthMonitorRecordFields::DETAILS] From adb8f948eac7249541430fc3cd43b1499fd75858 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 13 Jun 2019 19:56:53 -0700 Subject: [PATCH 47/90] Missing Pod signals/Node Signals send none or unknown based on the inventory --- .../code/plugin/health/aggregate_monitor.rb | 14 +- .../health/health_kube_api_down_handler.rb | 27 ++ .../health/health_kubernetes_resources.rb | 78 ++++- .../health/health_missing_signal_generator.rb | 73 +++++ .../plugin/health/health_monitor_provider.rb | 32 +- .../plugin/health/health_monitor_record.rb | 2 +- .../plugin/health/health_monitor_state.rb | 4 +- .../plugin/health/health_monitor_utils.rb | 306 ++++++++++++++++++ .../plugin/health/health_signal_reducer.rb | 51 +++ source/code/plugin/health/monitor_factory.rb | 4 +- source/code/plugin/health/unit_monitor.rb | 6 +- source/code/plugin/in_kube_health.rb | 2 +- 12 files changed, 564 insertions(+), 35 deletions(-) create mode 100644 source/code/plugin/health/health_kube_api_down_handler.rb create mode 100644 source/code/plugin/health/health_missing_signal_generator.rb create mode 100644 source/code/plugin/health/health_signal_reducer.rb diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index 1d505542e..9063d146b 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -5,14 +5,14 @@ module HealthModel class AggregateMonitor - attr_accessor :monitor_id, :monitor_instance_id, :operational_state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details + attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details attr_reader :member_monitors # constructor def initialize( monitor_id, monitor_instance_id, - operational_state, + state, transition_date_time, aggregation_algorithm, aggregation_algorithm_params, @@ -20,7 +20,7 @@ def initialize( ) @monitor_id = monitor_id @monitor_instance_id = monitor_instance_id - @operational_state = operational_state + @state = state @transition_date_time = transition_date_time @aggregation_algorithm = aggregation_algorithm || AggregationAlgorithm::WORSTOF @aggregation_algorithm_params = aggregation_algorithm_params @@ -52,7 +52,7 @@ def get_member_monitors def calculate_state(monitor_set) case @aggregation_algorithm when AggregationAlgorithm::WORSTOF - @operational_state = calculate_worst_of_state(monitor_set) + @state = calculate_worst_of_state(monitor_set) when AggregationAlgorithm::PERCENTAGE @state = calculate_percentage_state(monitor_set) end @@ -61,13 +61,13 @@ def calculate_state(monitor_set) def calculate_details(monitor_set) @details = {} @details['details'] = {} - @details['state'] = operational_state + @details['state'] = state @details['timestamp'] = transition_date_time ids = [] member_monitor_instance_ids = get_member_monitors member_monitor_instance_ids.each{|member_monitor_id| member_monitor = monitor_set.get_monitor(member_monitor_id) - member_state = member_monitor.operational_state + member_state = member_monitor.state if @details.key?(member_state) ids = details[member_state] if !ids.include?(member_monitor.monitor_instance_id) @@ -123,7 +123,7 @@ def map_member_monitor_states(monitor_set) member_monitor_instance_ids.each {|monitor_instance_id| member_monitor = monitor_set.get_monitor(monitor_instance_id) - monitor_state = member_monitor.operational_state + monitor_state = member_monitor.state if !state_counts.key?(monitor_state) state_counts[monitor_state] = 1 diff --git a/source/code/plugin/health/health_kube_api_down_handler.rb b/source/code/plugin/health/health_kube_api_down_handler.rb new file mode 100644 index 000000000..7f7ba1bd3 --- /dev/null +++ b/source/code/plugin/health/health_kube_api_down_handler.rb @@ -0,0 +1,27 @@ +module HealthModel + class HealthKubeApiDownHandler + def initialize + @@monitors_to_change = [HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID, + HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID, + HealthMonitorConstants::NODE_CONDITION_MONITOR_ID, + HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, + HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID] + end + + # update kube-api dependent monitors to be 'unknown' if kube-api is down or monitor is unavailable + def handle_kube_api_down(health_monitor_records) + health_monitor_records_map = {} + + health_monitor_records.map{|record| health_monitor_records_map[record.monitor_instance_id] = record} + if !health_monitor_records_map.key?(HealthMonitorConstants::KUBE_API_STATUS) || (health_monitor_records_map.key?(HealthMonitorConstants::KUBE_API_STATUS) && health_monitor_records_map[HealthMonitorConstants::KUBE_API_STATUS].state != 'pass') + #iterate over the map and set the state to unknown for related monitors + health_monitor_records.each{|health_monitor_record| + if @@monitors_to_change.include?(health_monitor_record.monitor_id) + health_monitor_record.state = HealthMonitorStates::UNKNOWN + end + } + end + return health_monitor_records + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb index 035367a44..468836c89 100644 --- a/source/code/plugin/health/health_kubernetes_resources.rb +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -2,18 +2,94 @@ module HealthModel class HealthKubernetesResources attr_accessor :node_inventory, :pod_inventory + attr_reader :nodes, :pods - def initialize(node_inventory, pod_inventory) + def initialize(node_inventory, pod_inventory, deployment_inventory) @node_inventory = node_inventory || [] @pod_inventory = pod_inventory || [] + @deployment_inventory = deployment_inventory || [] + @nodes = [] + @pods = [] + @workloads = get_workload_names + + @node_inventory['items'].each {|node| + @nodes.push(node['metadata']['name']) + } end def get_node_inventory return @node_inventory end + def get_nodes + return @nodes + end + def get_pod_inventory return @pod_inventory end + + def get_pods + return @pods + end + + def get_workload_names + workload_names = {} + deployment_lookup = {} + @deployment_inventory['items'].each do |deployment| + match_labels = deployment['spec']['selector']['matchLabels'].to_h + namespace = deployment['metadata']['namespace'] + match_labels.each{|k,v| + deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" + } + end + @pod_inventory['items'].each do |pod| + begin + has_owner = !pod['metadata']['ownerReferences'].nil? + owner_kind = '' + if has_owner + owner_kind = pod['metadata']['ownerReferences'][0]['kind'] + controller_name = pod['metadata']['ownerReferences'][0]['name'] + else + owner_kind = pod['kind'] + controller_name = pod['metadata']['name'] + end + + namespace = pod['metadata']['namespace'] + status = pod['status']['phase'] + + workload_name = '' + if owner_kind.nil? + owner_kind = 'Pod' + end + case owner_kind.downcase + when 'job' + # we are excluding jobs + next + when 'replicaset' + # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name + labels = pod['metadata']['labels'].to_h + labels.each {|k,v| + lookup_key = "#{namespace}-#{k}=#{v}" + if deployment_lookup.key?(lookup_key) + workload_name = deployment_lookup[lookup_key] + break + end + } + if workload_name.empty? + workload_name = "#{namespace}~~#{controller_name}" + end + when 'daemonset' + workload_name = "#{namespace}~~#{controller_name}" + else + workload_name = "#{namespace}~~#{pod['metadata']['name']}" + end + rescue => e + @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" + end + workload_names[workload_name] = true + end + return workload_names.keys + end end end \ No newline at end of file diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb new file mode 100644 index 000000000..e93541796 --- /dev/null +++ b/source/code/plugin/health/health_missing_signal_generator.rb @@ -0,0 +1,73 @@ +module HealthModel + class HealthMissingSignalGenerator + attr_accessor :last_received_records, :current_received_records + attr_reader :missing_signals + + def initialize() + @last_received_records = {} + end + + def get_missing_signals(health_monitor_records, health_k8s_inventory) + missing_monitor_ids = [] + nodes = health_k8s_inventory.get_nodes + workload_names = health_k8s_inventory.get_workload_names + missing_signals_map = {} + missing_signals = [] + health_monitor_records_map = {} + health_monitor_records.map{ + |monitor| health_monitor_records_map[monitor.monitor_instance_id] = monitor + } + @last_received_records.each{|monitor_instance_id, monitor| + if !health_monitor_records_map.key?(monitor_instance_id) + if HealthMonitorUtils.is_node_monitor(monitor.monitor_id) + node_name = monitor.labels['kubernetes.io/hostname'] + new_monitor = HealthMonitorRecord.new( + monitor.monitor_id, + monitor.monitor_instance_id, + Time.now.utc.iso8601, + monitor.state, + monitor.labels, + monitor.config, + monitor.details + ) + if !node_name.nil? && nodes.include?(node_name) + new_monitor.state = HealthMonitorStates::UNKNOWN + elsif !node_name.nil? && !nodes.include?(node_name) + new_monitor.state = HealthMonitorStates::NONE + end + missing_signals_map[monitor_instance_id] = new_monitor + elsif HealthMonitorUtils.is_pods_ready_monitor(monitor.monitor_id) + lookup = "#{monitor.labels['container.azm.ms/namespace']}~~#{monitor.labels['container.azm.ms/workload-name']}" + new_monitor = HealthMonitorRecord.new( + monitor.monitor_id, + monitor.monitor_instance_id, + Time.now.utc.iso8601, + monitor.state, + monitor.labels, + monitor.config, + monitor.details + ) + if !lookup.nil? && workload_names.include?(lookup) + new_monitor.state = HealthMonitorStates::UNKNOWN + elsif !lookup.nil? && !workload_names.include?(lookup) + new_monitor.state = HealthMonitorStates::NONE + end + missing_signals_map[monitor_instance_id] = new_monitor + end + end + } + missing_signals_map.each{|k,v| + missing_signals.push(v) + } + + return missing_signals + end + + def update_last_received_records(last_received_records) + last_received_records_map = {} + last_received_records.map {|record| last_received_records_map[record.monitor_instance_id] = record } + @last_received_records = last_received_records_map + end + end + +end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb index dbcdd1daa..a1b5bfd18 100644 --- a/source/code/plugin/health/health_monitor_provider.rb +++ b/source/code/plugin/health/health_monitor_provider.rb @@ -26,20 +26,18 @@ def get_record(health_monitor_record, health_monitor_state) labels = Hash.new @cluster_labels.each{|k,v| labels[k] = v} - monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID] - monitor_instance_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + monitor_id = health_monitor_record.monitor_id + monitor_instance_id = health_monitor_record.monitor_instance_id health_monitor_instance_state = health_monitor_state.get_state(monitor_instance_id) - monitor_labels = get_labels(health_monitor_record, health_monitor_instance_state) - #log.debug "Monitor Labels : #{monitor_labels}" + monitor_labels = health_monitor_record.labels if !monitor_labels.empty? monitor_labels.keys.each do |key| labels[key] = monitor_labels[key] end end - #log.debug "Labels after adding Monitor Labels #{labels}" prev_records = health_monitor_instance_state.prev_records time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called @@ -56,18 +54,16 @@ def get_record(health_monitor_record, health_monitor_state) time_observed = Time.now.utc.iso8601 monitor_record = {} - monitor_record["ClusterId"] = 'fake_cluster_id' #KubernetesApiClient.getClusterId - monitor_record["MonitorLabels"] = labels.to_json - monitor_record["MonitorId"] = monitor_id - monitor_record["MonitorInstanceId"] = monitor_instance_id - monitor_record["NewState"] = new_state - monitor_record["OldState"] = old_state - monitor_record["Details"] = details - monitor_record["MonitorConfig"] = config.to_json - monitor_record["AgentCollectionTime"] = Time.now.utc.iso8601 - monitor_record["TimeFirstObserved"] = time_first_observed - - #log.debug "HealthMonitor Record #{monitor_record}" + monitor_record[HealthMonitorRecordFields::CLUSTER_ID] = 'fake_cluster_id' #KubernetesApiClient.getClusterId + monitor_record[HealthMonitorRecordFields::MONITOR_LABELS] = labels.to_json + monitor_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id + monitor_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id + monitor_record[HealthMonitorRecordFields::NEW_STATE] = new_state + monitor_record[HealthMonitorRecordFields::OLD_STATE] = old_state + monitor_record[HealthMonitorRecordFields::DETAILS] = details + monitor_record[HealthMonitorRecordFields::MONITOR_CONFIG] = config.to_json + monitor_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601 + monitor_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_first_observed return monitor_record end @@ -80,7 +76,7 @@ def get_config(monitor_id) end end - def get_labels(health_monitor_record, health_monitor_instance_state) + def get_labels(health_monitor_record) monitor_labels = {} monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID] case monitor_id diff --git a/source/code/plugin/health/health_monitor_record.rb b/source/code/plugin/health/health_monitor_record.rb index 81e6d51db..873736c3a 100644 --- a/source/code/plugin/health/health_monitor_record.rb +++ b/source/code/plugin/health/health_monitor_record.rb @@ -2,7 +2,7 @@ :monitor_id, :monitor_instance_id, :transition_date_time, - :operational_state, + :state, :labels, :config, :details diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 858b7da4e..94f186eb4 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -67,7 +67,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor old_state = HealthMonitorStates::NONE new_state = HealthMonitorStates::NONE if samples_to_keep == 1 - new_state = monitor.operational_state + new_state = monitor.state end health_monitor_instance_state = HealthMonitorInstanceState.new( @@ -97,7 +97,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor prev_sent_time = health_monitor_instance_state.prev_sent_record_time time_first_observed = health_monitor_instance_state.state_change_time - # if the last sent state (news_state is different from latest monitor operational_state) + # if the last sent state (news_state is different from latest monitor state) if latest_record_state.downcase == new_state.downcase time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 # check if health signal has "timed out" diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index f543e9a13..e3e6621e2 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -18,6 +18,312 @@ def compute_percentage_state(value, config) return HealthMonitorStatePASS end end + + def is_node_monitor(monitor_id) + return (monitor_id == HealthMonitorConstants::NODE_CPU_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_MEMORY_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID) + end + + def is_pods_ready_monitor(monitor_id) + return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) + end + + def is_infra_monitor + + end + + def is_workload_monitor + + end + + + def get_pods_ready_hash(pod_inventory, deployment_inventory) + pods_ready_percentage_hash = {} + deployment_lookup = {} + deployment_inventory['items'].each do |deployment| + match_labels = deployment['spec']['selector']['matchLabels'].to_h + namespace = deployment['metadata']['namespace'] + match_labels.each{|k,v| + deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" + } + end + pod_inventory['items'].each do |pod| + begin + has_owner = !pod['metadata']['ownerReferences'].nil? + owner_kind = '' + if has_owner + owner_kind = pod['metadata']['ownerReferences'][0]['kind'] + controller_name = pod['metadata']['ownerReferences'][0]['name'] + else + owner_kind = pod['kind'] + controller_name = pod['metadata']['name'] + #@log.info "#{JSON.pretty_generate(pod)}" + end + + namespace = pod['metadata']['namespace'] + status = pod['status']['phase'] + + workload_name = '' + if owner_kind.nil? + owner_kind = 'Pod' + end + case owner_kind.downcase + when 'job' + # we are excluding jobs + next + when 'replicaset' + # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name + labels = pod['metadata']['labels'].to_h + labels.each {|k,v| + lookup_key = "#{namespace}-#{k}=#{v}" + if deployment_lookup.key?(lookup_key) + workload_name = deployment_lookup[lookup_key] + break + end + } + if workload_name.empty? + workload_name = "#{namespace}~~#{controller_name}" + end + when 'daemonset' + workload_name = "#{namespace}~~#{controller_name}" + else + workload_name = "#{namespace}~~#{pod['metadata']['name']}" + end + + if pods_ready_percentage_hash.key?(workload_name) + total_pods = pods_ready_percentage_hash[workload_name]['totalPods'] + pods_ready = pods_ready_percentage_hash[workload_name]['podsReady'] + else + total_pods = 0 + pods_ready = 0 + end + + total_pods += 1 + if status == 'Running' + pods_ready += 1 + end + + pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind} + rescue => e + @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" + end + end + return pods_ready_percentage_hash + end + + + def get_node_state_from_node_conditions(node_conditions) + pass = false + node_conditions.each do |condition| + type = condition['type'] + status = condition['status'] + + if ((type == "NetworkUnavailable" || type == "OutOfDisk") && (status == 'True' || status == 'Unknown')) + return "fail" + elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown')) + return "warn" + elsif type == "Ready" && status == 'True' + pass = true + end + end + + if pass + return "pass" + else + return "fail" + end + end + + def get_resource_subscription(pod_inventory, metric_name, metric_capacity) + subscription = 0.0 + if !pod_inventory.empty? + pod_inventory['items'].each do |pod| + pod['spec']['containers'].each do |container| + if !container['resources']['requests'].nil? && !container['resources']['requests'][metric_name].nil? + subscription += KubernetesApiClient.getMetricNumericValue(metric_name, container['resources']['requests'][metric_name]) + end + end + end + end + #@log.debug "#{metric_name} Subscription #{subscription}" + return subscription + end + + def get_cluster_cpu_memory_capacity + begin + node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + cluster_cpu_capacity = 0.0 + cluster_memory_capacity = 0.0 + if !node_inventory.empty? + node_inventory['items'].each do |node| + cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores") + if !cpu_capacity_json.nil? + cpu_capacity_json.each do |cpu_capacity_node| + if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? + cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'] + end + end + @log.info "Cluster CPU Limit #{cluster_cpu_capacity}" + else + @log.info "Error getting cpu_capacity" + end + memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes") + if !memory_capacity_json.nil? + memory_capacity_json.each do |memory_capacity_node| + if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil? + cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value'] + end + end + @log.info "Cluster Memory Limit #{cluster_memory_capacity}" + else + @log.info "Error getting memory_capacity" + end + end + else + @log.info "Unable to get cpu and memory capacity" + return [0.0, 0.0] + end + return [cluster_cpu_capacity, cluster_memory_capacity] + rescue => e + @log.info e + end + end + + + def refresh_kubernetes_api_data(log, hostName, force: false) + #log.debug "refreshKubernetesApiData" + if ( ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 && !force) + log.debug "Less than 5 minutes since last refresh at #{@@lastRefreshTime}" + return + end + if force + @log.debug "Force Refresh" + end + + begin + + @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + if !hostName.nil? + podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body) + else + podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body) + end + podInventory['items'].each do |pod| + has_owner = !pod['metadata']['ownerReferences'].nil? + if !has_owner + workload_name = pod['metadata']['name'] + else + workload_name = pod['metadata']['ownerReferences'][0]['name'] + end + namespace = pod['metadata']['namespace'] + @@controllerMapping[workload_name] = namespace + #log.debug "workload_name #{workload_name} namespace #{namespace}" + pod['spec']['containers'].each do |container| + key = [pod['metadata']['uid'], container['name']].join('/') + + if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil? + cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) + else + @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" + #TODO: Send warning health event #bestpractices + cpu_limit_value = @cpu_capacity + end + + if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? + #@log.info "Raw Memory Value #{container['resources']['limits']['memory']}" + memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory']) + else + @log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity" + memory_limit_value = @memory_capacity + end + + @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => workload_name, "namespace" => namespace} + end + end + rescue => e + @log.info "Error Refreshing Container Resource Limits #{e.backtrace}" + end + # log.info "Controller Mapping #{@@controllerMapping}" + # log.info "Node Inventory #{@@nodeInventory}" + # log.info "Container Metadata #{@@containerMetadata}" + # log.info "------------------------------------" + @@lastRefreshTime = Time.now.utc.iso8601 + end + + + def get_monitor_instance_id(monitor_id, args = []) + string_to_hash = args.join("/") + return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" + end + + + def ensure_cpu_memory_capacity_set(cpu_capacity, memory_capacity, hostname) + + @log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}" + if cpu_capacity != 0.0 && memory_capacity != 0.0 + @log.info "CPU And Memory Capacity are already set" + return [cpu_capacity, memory_capacity] + end + + begin + @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + rescue Exception => e + @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + end + if !@@nodeInventory.nil? + cpu_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") + if !cpu_capacity_json.nil? + cpu_capacity_json.each do |cpu_info_node| + if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == @@hostName + if !cpu_info_node['DataItems'][0]['Collections'][0]['Value'].nil? + cpu_capacity = cpu_info_node['DataItems'][0]['Collections'][0]['Value'] + end + end + end + @log.info "CPU Limit #{cpu_capacity}" + else + @log.info "Error getting cpu_capacity" + end + memory_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "memory", "memoryCapacityBytes") + if !memory_capacity_json.nil? + memory_capacity_json.each do |memory_info_node| + if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == @@hostName + if !memory_info_node['DataItems'][0]['Collections'][0]['Value'].nil? + memory_capacity = memory_info_node['DataItems'][0]['Collections'][0]['Value'] + end + end + end + @log.info "memory Limit #{memory_capacity}" + else + @log.info "Error getting memory_capacity" + end + return [cpu_capacity, memory_capacity] + end + end end + + def build_metrics_hash(metrics_to_collect) + @log.debug "Building Hash of Metrics to Collect #{metrics_to_collect}" + metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip) + metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h + @log.info "Metrics Collected : #{metrics_hash}" + return metrics_hash + end + + def get_health_monitor_config + health_monitor_config = {} + begin + file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r") + if !file.nil? + fileContents = file.read + health_monitor_config = JSON.parse(fileContents) + file.close + end + rescue => e + @log.info "Error when opening health config file #{e}" + end + return health_monitor_config + end + end end \ No newline at end of file diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb new file mode 100644 index 000000000..0b6d51b99 --- /dev/null +++ b/source/code/plugin/health/health_signal_reducer.rb @@ -0,0 +1,51 @@ +module HealthModel + # this class + # 1. dedupes daemonset signals and takes only the latest + # 2. removes signals for objects that are no longer in the inventory e.g. node might have sent signal before being scaled down + class HealthSignalReducer + def initialize + + end + + def reduce_signals(health_monitor_records, health_k8s_inventory) + nodes = health_k8s_inventory.get_nodes + workload_names = health_k8s_inventory.get_workload_names + reduced_signals_map = {} + reduced_signals = [] + health_monitor_records.each{|health_monitor_record| + monitor_instance_id = health_monitor_record.monitor_instance_id + monitor_id = health_monitor_record.monitor_id + if reduced_signals_map.key?(monitor_instance_id) + record = reduced_signals_map[monitor_instance_id] + if health_monitor_record.transition_date_time > record.transition_date_time # always take the latest record for a monitor instance id + puts 'Duplicate Daemon Set signal' + reduced_signals_map[monitor_instance_id] = health_monitor_record + end + elsif HealthMonitorUtils.is_node_monitor(monitor_id) + node_name = health_monitor_record.labels['kubernetes.io/hostname'] + if (node_name.nil? || !nodes.include?(node_name)) # only add daemon set records if node is present in the inventory + next + end + reduced_signals_map[monitor_instance_id] = health_monitor_record + elsif HealthMonitorUtils.is_pods_ready_monitor(monitor_id) + workload_name = health_monitor_record.labels['container.azm.ms/workload-name'] + namespace = health_monitor_record.labels['container.azm.ms/namespace'] + lookup = "#{namespace}~~#{workload_name}" + if (workload_name.nil? || !workload_names.include?(lookup)) #only add pod record if present in the inventory + next + end + reduced_signals_map[monitor_instance_id] = health_monitor_record + else + reduced_signals_map[monitor_instance_id] = health_monitor_record + end + } + + reduced_signals_map.each{|k,v| + reduced_signals.push(v) + } + + return reduced_signals + end + + end +end \ No newline at end of file diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb index 18fa2307f..e6ec9d2c3 100644 --- a/source/code/plugin/health/monitor_factory.rb +++ b/source/code/plugin/health/monitor_factory.rb @@ -8,7 +8,7 @@ def initialize def create_unit_monitor(monitor_record) return UnitMonitor.new(monitor_record.monitor_id, monitor_record.monitor_instance_id, - monitor_record.operational_state, + monitor_record.state, monitor_record.transition_date_time, monitor_record.labels, monitor_record.config, @@ -18,7 +18,7 @@ def create_unit_monitor(monitor_record) def create_aggregate_monitor(monitor_id, monitor_instance_id, labels, aggregation_algorithm, aggregation_algorithm_params, child_monitor) return AggregateMonitor.new(monitor_id, monitor_instance_id, - child_monitor.operational_state, + child_monitor.state, child_monitor.transition_date_time, aggregation_algorithm, aggregation_algorithm_params, diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb index 49649d6ae..9af599321 100644 --- a/source/code/plugin/health/unit_monitor.rb +++ b/source/code/plugin/health/unit_monitor.rb @@ -4,14 +4,14 @@ module HealthModel class UnitMonitor - attr_accessor :monitor_id, :monitor_instance_id, :operational_state, :transition_date_time, :labels, :config, :details, :is_aggregate_monitor + attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :labels, :config, :details, :is_aggregate_monitor # constructor - def initialize(monitor_id, monitor_instance_id, operational_state, transition_date_time, labels, config, details) + def initialize(monitor_id, monitor_instance_id, state, transition_date_time, labels, config, details) @monitor_id = monitor_id @monitor_instance_id = monitor_instance_id @transition_date_time = transition_date_time - @operational_state = operational_state + @state = state @labels = labels @config = config @details = details diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 931dd16e5..891db0832 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -195,7 +195,7 @@ def process_kube_api_up_monitor(state, response) hmlog = HealthMonitorUtils.getLogHandle #hmlog.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) + monitor_instance_id = HealthMonitorConstants::KUBE_API_STATUS #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) From 876bb3c06f99b365dd64e34ad1049893029ab5f3 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 13 Jun 2019 20:39:54 -0700 Subject: [PATCH 48/90] serialization and deserialization of state --- .../code/plugin/health/health_monitor_state.rb | 12 +++++++++++- .../plugin/health/health_state_deserializer.rb | 18 ++++++++++++++++++ .../plugin/health/health_state_serializer.rb | 15 +++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 source/code/plugin/health/health_state_deserializer.rb create mode 100644 source/code/plugin/health/health_state_serializer.rb diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 94f186eb4..cb01c5dfb 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -10,7 +10,6 @@ module HealthModel # set_state -- sets the last health monitor state class HealthMonitorState - def initialize @@monitor_states = {} @@first_record_sent = {} @@ -26,6 +25,17 @@ def get_state(monitor_instance_id) def set_state(monitor_instance_id, health_monitor_instance_state) @@monitor_states[monitor_instance_id] = health_monitor_instance_state end + + def to_h + return @@monitor_states + end + + def initialize_state(deserialized_state) + @@monitor_states = {} + deserialized_state.each{|k,v| + @@monitor_states[k] = v + } + end =begin when do u send? diff --git a/source/code/plugin/health/health_state_deserializer.rb b/source/code/plugin/health/health_state_deserializer.rb new file mode 100644 index 000000000..8bfcd4480 --- /dev/null +++ b/source/code/plugin/health/health_state_deserializer.rb @@ -0,0 +1,18 @@ +module HealthModel + class HealthStateDeserializer + + attr_reader :deserialize_path + + def initialize(path) + @deserialize_path = path + end + + def deserialize + file = File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/state.json") #File.read(@deserialize_path) + records = JSON.parse(file) + + #TODO: even though we call JSON.parse, records is still a string. Do JSON.parse again to return it as a hash + return JSON.parse(records) + end + end +end \ No newline at end of file diff --git a/source/code/plugin/health/health_state_serializer.rb b/source/code/plugin/health/health_state_serializer.rb new file mode 100644 index 000000000..c3706e05f --- /dev/null +++ b/source/code/plugin/health/health_state_serializer.rb @@ -0,0 +1,15 @@ +module HealthModel + class HealthStateSerializer + + attr_reader :serialized_path + def initialize(path) + @serialized_path = path + end + + def serialize(state) + File.open("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/state.json", 'w') do |f| #File.open(@serialized_path, 'w') + f.write(JSON.pretty_generate(state.to_h.to_json)) + end + end + end +end \ No newline at end of file From 7c459c49aa2e723eeea090c03ad7066211d825a2 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 17 Jun 2019 11:45:52 -0700 Subject: [PATCH 49/90] Working cadvisor_health_node filter --- .../filter_cadvisor_health_container.rb | 21 +-- .../plugin/filter_cadvisor_health_node.rb | 46 +++--- .../plugin/filter_health_model_builder.rb | 142 ++++++++++------ .../health/health_kubernetes_resources.rb | 24 +-- .../plugin/health/health_monitor_utils.rb | 151 +++++++++++------- source/code/plugin/in_kube_health.rb | 69 ++++---- 6 files changed, 255 insertions(+), 198 deletions(-) diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb index eb9c3dcdc..dbfc71388 100644 --- a/source/code/plugin/filter_cadvisor_health_container.rb +++ b/source/code/plugin/filter_cadvisor_health_container.rb @@ -49,18 +49,18 @@ def start super @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect) @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" - node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) + node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName) @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" - HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) + HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName) @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) end def filter_stream(tag, es) new_es = MultiEventStream.new - HealthMonitorUtils.refreshKubernetesApiData(@log, @hostName) + HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) records_count = 0 es.each { |time, record| begin @@ -143,10 +143,8 @@ def process_container_cpu_record(record, metric_value) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container CPU #{temp}" return record @@ -182,10 +180,8 @@ def process_container_memory_record(record, metric_value) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container Memory #{temp}" return record @@ -209,8 +205,7 @@ def process_node_cpu_record(record, metric_value) timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName]) # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] health_record = {} @@ -244,10 +239,8 @@ def process_node_memory_record(record, metric_value) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - # HealthMonitorState.updateHealthMonitorState(@log, monitor_instance_id, health_monitor_record, @@health_monitor_config[monitor_id]) - # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] health_record = {} time_now = Time.now.utc.iso8601 diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index 7d7a751f3..08449bdb3 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -5,17 +5,19 @@ module Fluent require 'logger' require 'json' require_relative 'oms_common' - require_relative 'HealthMonitorUtils' - require_relative 'HealthMonitorState' require_relative "ApplicationInsightsUtility" - + require_relative "KubernetesApiClient" + Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file } class CAdvisor2NodeHealthFilter < Filter + include HealthModel Fluent::Plugin.register_filter('filter_cadvisor_health_node', self) - config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log' + attr_accessor :provider, :resources + config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes' config_param :container_resource_refresh_interval_minutes, :integer, :default => 5 + config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json' @@object_name_k8s_node = 'K8SNode' @@object_name_k8s_container = 'K8SContainer' @@ -23,8 +25,7 @@ class CAdvisor2NodeHealthFilter < Filter @@counter_name_cpu = 'cpuusagenanocores' @@counter_name_memory_rss = 'memoryrssbytes' - @@health_monitor_config = {} - + @@hm_log = HealthMonitorUtils.get_log_handle @@hostName = (OMS::Common.get_hostname) @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@ -37,11 +38,13 @@ def initialize @memory_capacity = 0.0 @last_resource_refresh = DateTime.now.to_time.to_i @metrics_to_collect_hash = {} + @resources = HealthKubernetesResources.instance # this doesnt require node and pod inventory. So no need to populate them + @provider = HealthMonitorProvider.new(HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) end def configure(conf) super - @log = HealthMonitorUtils.getLogHandle + @log = HealthMonitorUtils.get_log_handle @log.debug {'Starting filter_cadvisor2health plugin'} end @@ -49,23 +52,23 @@ def start super @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect) @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}" - node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@cpu_capacity, @memory_capacity, @@hostName) + node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName) @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" - HealthMonitorUtils.refreshKubernetesApiData(@log, @@hostName) - @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig + HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName) ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) end def filter_stream(tag, es) new_es = MultiEventStream.new - HealthMonitorUtils.refreshKubernetesApiData(@log, @hostName) + HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) records_count = 0 es.each { |time, record| begin filtered_record = filter(tag, time, record) if !filtered_record.nil? + @@hm_log.debug "#{JSON.pretty_generate(filtered_record)}" new_es.add(time, filtered_record) records_count += 1 end @@ -137,13 +140,13 @@ def process_container_cpu_record(record, metric_value) percent = (metric_value.to_f/cpu_limit*100).round(2) #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID]) + state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(monitor_id)) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container CPU #{temp}" @@ -174,15 +177,14 @@ def process_container_memory_record(record, metric_value) percent = (metric_value.to_f/memory_limit*100).round(2) #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID]) + state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID)) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName, key]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName, key]) #@log.info "Monitor Instance Id: #{monitor_instance_id}" - record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], key: key) temp = record.nil? ? "Nil" : record["MonitorInstanceId"] @log.info "Processed Container Memory #{temp}" return record @@ -201,13 +203,12 @@ def process_node_cpu_record(record, metric_value) percent = (metric_value.to_f/@cpu_capacity*100).round(2) #@log.debug "Percentage of CPU limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID]) + state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::NODE_CPU_MONITOR_ID)) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName]) # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] health_record = {} time_now = Time.now.utc.iso8601 @@ -234,16 +235,13 @@ def process_node_memory_record(record, metric_value) percent = (metric_value.to_f/@memory_capacity*100).round(2) #@log.debug "Percentage of Memory limit: #{percent}" - state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID]) + state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::NODE_MEMORY_MONITOR_ID)) #@log.debug "Computed State : #{state}" timestamp = record['DataItems'][0]['Timestamp'] health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}} #@log.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@log, monitor_id, [@@clusterId, @@hostName]) - #@log.info "Monitor Instance Id: #{monitor_instance_id}" - # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName) - # temp = record.nil? ? "Nil" : record["MonitorInstanceId"] + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index fd1efadd1..385396adb 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -13,12 +13,13 @@ class FilterHealthModelBuilder < Filter config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' - config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health_model_definition.json' + config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json' + config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json' + config_param :health_state_serialized_path, :default => '/mnt/azure/health_state.json' config_param :health_signal_timeout, :default => 240 - attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_transition_processor, :state_finalizers, :monitor_set, :model_builder + attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator, :serializer, :deserializer include HealthModel - @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime' def initialize @@ -26,11 +27,29 @@ def initialize @buffer = HealthModel::HealthModelBuffer.new @health_model_definition = HealthModel::HealthModelDefinition.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) @monitor_factory = HealthModel::MonitorFactory.new - @state_transition_processor = HealthModel::StateTransitionProcessor.new(@health_model_definition, @monitor_factory) + @hierarchy_builder = HealthHierarchyBuilder.new(@health_model_definition, @monitor_factory) # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side @state_finalizers = [HealthModel::AggregateMonitorStateFinalizer.new] @monitor_set = HealthModel::MonitorSet.new - @model_builder = HealthModel::HealthModelBuilder.new(@state_transition_processor, @state_finalizers, @monitor_set) + @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set) + @kube_api_down_handler = HealthKubeApiDownHandler.new + @resources = HealthKubernetesResources.instance + @reducer = HealthSignalReducer.new + @state = HealthMonitorState.new + @generator = HealthMissingSignalGenerator.new + #TODO: cluster_labels needs to be initialized + @provider = HealthMonitorProvider.new(HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + @serializer = HealthStateSerializer.new(@health_state_serialized_path) + @deserializer = HealthStateDeserializer.new(@health_state_serialized_path) + # TODO: in_kube_api_health should set these values + # resources.node_inventory = node_inventory + # resources.pod_inventory = pod_inventory + # resources.deployment_inventory = deployment_inventory + #TODO: check if the path exists + deserialized_state_info = @deserializer.deserialize + @state = HealthMonitorState.new + @state.initialize_state(deserialized_state_info) + end def configure(conf) @@ -59,10 +78,6 @@ def filter_stream(tag, es) records = [] if !es.nil? es.each{|time, record| - HealthMonitorState.updateHealthMonitorState(@log, - record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - record[HealthMonitorRecordFields::DETAILS], - @@healthMonitorConfig[record[HealthMonitorRecordFields::MONITOR_ID]]) records.push(record) } @buffer.add_to_buffer(records) @@ -76,56 +91,81 @@ def filter_stream(tag, es) @buffer.add_to_buffer(records) records_to_process = @buffer.get_buffer @buffer.reset_buffer - filtered_records = [] - raw_records = [] - records_to_process.each{|record| + + health_monitor_records = [] + records_to_process.each do |record| + monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] - filtered_record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, + health_monitor_record = HealthMonitorRecord.new( + record[HealthMonitorRecordFields::MONITOR_ID], record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - @@healthMonitorConfig[monitor_id], - @health_signal_timeout, - node_name: record[HealthMonitorRecordFields::NODE_NAME] + record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + record[HealthMonitorRecordFields::DETAILS]["state"], + @provider.get_labels(record), + @provider.get_config(monitor_id), + record[HealthMonitorRecordFields::DETAILS] + ) + + @state.update_state(health_monitor_record, + @provider.get_config(health_monitor_record.monitor_id) ) - filtered_records.push(HealthMonitorRecord.new( - filtered_record[HealthMonitorRecordFields::MONITOR_ID], - filtered_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - filtered_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], - filtered_record[HealthMonitorRecordFields::OLD_STATE], - filtered_record[HealthMonitorRecordFields::NEW_STATE], - filtered_record[HealthMonitorRecordFields::MONITOR_LABELS], - filtered_record[HealthMonitorRecordFields::MONITOR_CONFIG], - filtered_record[HealthMonitorRecordFields::DETAILS] - )) if filtered_record - - raw_records.push(filtered_record) if filtered_record - } - - @log.info "Filtered Records size = #{filtered_records.size}" - - # File.open("/tmp/mock_data-#{Time.now.to_i}.json", "w") do |f| - # f.write(JSON.pretty_generate(raw_records)) - # end - @model_builder.process_state_transitions(filtered_records) - monitors = @model_builder.finalize_model - @log.debug "monitors map size = #{monitors.size}" - - monitors.map {|monitor_instance_id, monitor| - record = {} + # get the health state based on the monitor's operational state + # update state calls updates the state of the monitor based on configuration and history of the the monitor records + health_monitor_record.state = @state.get_state(monitor_instance_id).new_state + health_monitor_records.push(health_monitor_record) + instance_state = @state.get_state(monitor_instance_id) + #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + end - record[HealthMonitorRecordFields::MONITOR_ID] = monitor.monitor_id - record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor.monitor_instance_id - record[HealthMonitorRecordFields::MONITOR_LABELS] = monitor.labels.to_json - record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId - #record[HealthMonitorRecordFields::OLD_STATE] = monitor.old_state - #record[HealthMonitorRecordFields::NEW_STATE] = monitor.new_state - record[HealthMonitorRecordFields::DETAILS] = monitor.details.to_json if monitor.methods.include? :details - record[HealthMonitorRecordFields::MONITOR_CONFIG] = monitor.config if monitor.methods.include? :config - record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601 - record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = monitor.transition_date_time + health_monitor_records = @kube_api_down_handler.handle_kube_api_down(health_monitor_records) + # Dedupe daemonset signals + # Remove unit monitor signals for “gone” objects + reduced_records = @reducer.reduce_signals(health_monitor_records, resources) + + #get the list of 'none' and 'unknown' signals + missing_signals = @generator.get_missing_signals(reduced_records, resources) + #update state for missing signals + missing_signals.each{|signal| + @state.update_state(signal, + @provider.get_config(signal.monitor_id) + ) + } + @generator.update_last_received_records(reduced_records) + reduced_records.push(*missing_signals) + + # build the health model + all_records = reduced_records + @model_builder.process_records(all_records) + all_monitors = @model_builder.finalize_model + + # update the state for aggregate monitors (unit monitors are updated above) + all_monitors.each{|monitor_instance_id, monitor| + if monitor.is_aggregate_monitor + @state.update_state(monitor, + @provider.get_config(monitor.monitor_id) + ) + end + + instance_state = @state.get_state(monitor_instance_id) + #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + should_send = instance_state.should_send + + # always send cluster monitor as a heartbeat + if !should_send && monitor_instance_id != MonitorId::CLUSTER + all_monitors.delete(monitor_instance_id) + end + } + # for each key in monitor.keys, + # get the state from health_monitor_state + # generate the record to send + all_monitors.keys.each{|key| + record = @provider.get_record(all_monitors[key], state) + puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" new_es.add(time, record) } + @serializer.serialize(@state) router.emit_stream(@@rewrite_tag, new_es) # return an empty event stream, else the match will throw a NoMethodError diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb index 468836c89..b1a13be1f 100644 --- a/source/code/plugin/health/health_kubernetes_resources.rb +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -1,20 +1,19 @@ +require 'singleton' + module HealthModel class HealthKubernetesResources - attr_accessor :node_inventory, :pod_inventory - attr_reader :nodes, :pods + include Singleton + attr_accessor :node_inventory, :pod_inventory, :deployment_inventory + attr_reader :nodes, :pods, :workloads - def initialize(node_inventory, pod_inventory, deployment_inventory) - @node_inventory = node_inventory || [] - @pod_inventory = pod_inventory || [] - @deployment_inventory = deployment_inventory || [] + def initialize + @node_inventory = [] + @pod_inventory = [] + @deployment_inventory = [] @nodes = [] @pods = [] - @workloads = get_workload_names - - @node_inventory['items'].each {|node| - @nodes.push(node['metadata']['name']) - } + @workloads = [] end def get_node_inventory @@ -22,6 +21,9 @@ def get_node_inventory end def get_nodes + @node_inventory['items'].each {|node| + @nodes.push(node['metadata']['name']) + } return @nodes end diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index e3e6621e2..2da93245d 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -2,8 +2,21 @@ module HealthModel # static class that provides a bunch of utility methods class HealthMonitorUtils + begin + + if !Gem.win_platform? + require_relative '../KubernetesApiClient' + end + rescue => e + puts "#{e.message}" + end + @@node_inventory = [] + @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" + @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + @@last_refresh_time = '2019-01-01T00:00:00Z' + class << self # compute the percentage state given a value and a monitor configuration def compute_percentage_state(value, config) @@ -11,11 +24,11 @@ def compute_percentage_state(value, config) fail_percentage = config['FailThresholdPercentage'].to_f if value > fail_percentage - return HealthMonitorState::FAIL + return HealthMonitorStates::FAIL elsif !warn_percentage.nil? && value > warn_percentage - return HealthMonitorState::WARNING + return HealthMonitorStates::WARNING else - return HealthMonitorStatePASS + return HealthMonitorStates::PASS end end @@ -35,7 +48,6 @@ def is_workload_monitor end - def get_pods_ready_hash(pod_inventory, deployment_inventory) pods_ready_percentage_hash = {} deployment_lookup = {} @@ -56,7 +68,7 @@ def get_pods_ready_hash(pod_inventory, deployment_inventory) else owner_kind = pod['kind'] controller_name = pod['metadata']['name'] - #@log.info "#{JSON.pretty_generate(pod)}" + #log.info "#{JSON.pretty_generate(pod)}" end namespace = pod['metadata']['namespace'] @@ -104,13 +116,12 @@ def get_pods_ready_hash(pod_inventory, deployment_inventory) pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind} rescue => e - @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" + log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" end end return pods_ready_percentage_hash end - def get_node_state_from_node_conditions(node_conditions) pass = false node_conditions.each do |condition| @@ -144,11 +155,11 @@ def get_resource_subscription(pod_inventory, metric_name, metric_capacity) end end end - #@log.debug "#{metric_name} Subscription #{subscription}" + #log.debug "#{metric_name} Subscription #{subscription}" return subscription end - def get_cluster_cpu_memory_capacity + def get_cluster_cpu_memory_capacity(log) begin node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) cluster_cpu_capacity = 0.0 @@ -162,9 +173,9 @@ def get_cluster_cpu_memory_capacity cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'] end end - @log.info "Cluster CPU Limit #{cluster_cpu_capacity}" + log.info "Cluster CPU Limit #{cluster_cpu_capacity}" else - @log.info "Error getting cpu_capacity" + log.info "Error getting cpu_capacity" end memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes") if !memory_capacity_json.nil? @@ -173,34 +184,32 @@ def get_cluster_cpu_memory_capacity cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value'] end end - @log.info "Cluster Memory Limit #{cluster_memory_capacity}" + log.info "Cluster Memory Limit #{cluster_memory_capacity}" else - @log.info "Error getting memory_capacity" + log.info "Error getting memory_capacity" end end else - @log.info "Unable to get cpu and memory capacity" + log.info "Unable to get cpu and memory capacity" return [0.0, 0.0] end return [cluster_cpu_capacity, cluster_memory_capacity] rescue => e - @log.info e + log.info e end end - def refresh_kubernetes_api_data(log, hostName, force: false) - #log.debug "refreshKubernetesApiData" - if ( ((Time.now.utc - Time.parse(@@lastRefreshTime)) / 60 ) < 5.0 && !force) - log.debug "Less than 5 minutes since last refresh at #{@@lastRefreshTime}" + #log.debug "refresh_kubernetes_api_data" + if ( ((Time.now.utc - Time.parse(@@last_refresh_time)) / 60 ) < 5.0 && !force) + log.debug "Less than 5 minutes since last refresh at #{@@last_refresh_time}" return end if force - @log.debug "Force Refresh" + log.debug "Force Refresh" end begin - @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) if !hostName.nil? podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body) @@ -215,7 +224,8 @@ def refresh_kubernetes_api_data(log, hostName, force: false) workload_name = pod['metadata']['ownerReferences'][0]['name'] end namespace = pod['metadata']['namespace'] - @@controllerMapping[workload_name] = namespace + #TODO: Figure this out for container cpu/memory + #@@controllerMapping[workload_name] = namespace #log.debug "workload_name #{workload_name} namespace #{namespace}" pod['spec']['containers'].each do |container| key = [pod['metadata']['uid'], container['name']].join('/') @@ -223,107 +233,128 @@ def refresh_kubernetes_api_data(log, hostName, force: false) if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil? cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu']) else - @log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" + log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity" #TODO: Send warning health event #bestpractices cpu_limit_value = @cpu_capacity end if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil? - #@log.info "Raw Memory Value #{container['resources']['limits']['memory']}" + #log.info "Raw Memory Value #{container['resources']['limits']['memory']}" memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory']) else - @log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity" + log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity" memory_limit_value = @memory_capacity end - @@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => workload_name, "namespace" => namespace} + #TODO: Figure this out for container cpu/memory + #@@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => workload_name, "namespace" => namespace} end end rescue => e - @log.info "Error Refreshing Container Resource Limits #{e.backtrace}" + log.info "Error Refreshing Container Resource Limits #{e.backtrace}" end # log.info "Controller Mapping #{@@controllerMapping}" # log.info "Node Inventory #{@@nodeInventory}" # log.info "Container Metadata #{@@containerMetadata}" # log.info "------------------------------------" - @@lastRefreshTime = Time.now.utc.iso8601 + @@last_refresh_time = Time.now.utc.iso8601 end - def get_monitor_instance_id(monitor_id, args = []) string_to_hash = args.join("/") return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" end + def ensure_cpu_memory_capacity_set(log, cpu_capacity, memory_capacity, hostname) - def ensure_cpu_memory_capacity_set(cpu_capacity, memory_capacity, hostname) - - @log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}" + log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}" if cpu_capacity != 0.0 && memory_capacity != 0.0 - @log.info "CPU And Memory Capacity are already set" + log.info "CPU And Memory Capacity are already set" return [cpu_capacity, memory_capacity] end begin @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) rescue Exception => e - @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " + log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) end if !@@nodeInventory.nil? cpu_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") if !cpu_capacity_json.nil? cpu_capacity_json.each do |cpu_info_node| - if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == @@hostName + if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == hostname if !cpu_info_node['DataItems'][0]['Collections'][0]['Value'].nil? cpu_capacity = cpu_info_node['DataItems'][0]['Collections'][0]['Value'] end end end - @log.info "CPU Limit #{cpu_capacity}" + log.info "CPU Limit #{cpu_capacity}" else - @log.info "Error getting cpu_capacity" + log.info "Error getting cpu_capacity" end memory_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "memory", "memoryCapacityBytes") if !memory_capacity_json.nil? memory_capacity_json.each do |memory_info_node| - if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == @@hostName + if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == hostname if !memory_info_node['DataItems'][0]['Collections'][0]['Value'].nil? memory_capacity = memory_info_node['DataItems'][0]['Collections'][0]['Value'] end end end - @log.info "memory Limit #{memory_capacity}" + log.info "memory Limit #{memory_capacity}" else - @log.info "Error getting memory_capacity" + log.info "Error getting memory_capacity" end return [cpu_capacity, memory_capacity] end end - end - def build_metrics_hash(metrics_to_collect) - @log.debug "Building Hash of Metrics to Collect #{metrics_to_collect}" - metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip) - metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h - @log.info "Metrics Collected : #{metrics_hash}" - return metrics_hash - end + def build_metrics_hash(metrics_to_collect) + metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip) + metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h + return metrics_hash + end - def get_health_monitor_config - health_monitor_config = {} - begin - file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r") - if !file.nil? - fileContents = file.read - health_monitor_config = JSON.parse(fileContents) - file.close + def get_health_monitor_config + health_monitor_config = {} + begin + file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r") + if !file.nil? + fileContents = file.read + health_monitor_config = JSON.parse(fileContents) + file.close + end + rescue => e + log.info "Error when opening health config file #{e}" end - rescue => e - @log.info "Error when opening health config file #{e}" + return health_monitor_config end - return health_monitor_config - end + def get_cluster_labels + labels = {} + cluster_id = KubernetesApiClient.getClusterId + region = KubernetesApiClient.getClusterRegion + labels['container.azm.ms/cluster-region'] = region + if !cluster_id.nil? + cluster_id_elements = cluster_id.split('/') + azure_sub_id = cluster_id_elements[2] + resource_group = cluster_id_elements[4] + cluster_name = cluster_id_elements[8] + labels['container.azm.ms/cluster-subscription-id'] = azure_sub_id + labels['container.azm.ms/cluster-resource-group'] = resource_group + labels['container.azm.ms/cluster-name'] = cluster_name + end + return labels + end + + def get_log_handle + if Gem.win_platform? + log_path = "C:\Temp\health_monitors.log" + @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + end + return @log + end + end end end \ No newline at end of file diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 891db0832..66ef305c0 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -1,12 +1,19 @@ #!/usr/local/bin/ruby # frozen_string_literal: true -include HealthModel +require_relative "KubernetesApiClient" +require_relative "oms_common" +require_relative "omslog" +require_relative "ApplicationInsightsUtility" module Fluent + + Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file } class KubeHealthInput < Input Plugin.register_input("kubehealth", self) + config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json' + @@clusterCpuCapacity = 0.0 @@clusterMemoryCapacity = 0.0 @@ -15,14 +22,10 @@ def initialize require "yaml" require "json" - require_relative "KubernetesApiClient" - require_relative "oms_common" - require_relative "omslog" - require_relative "ApplicationInsightsUtility" - require_relative 'HealthMonitorUtils' - require_relative 'HealthMonitorState' - require_relative 'health/health_model_constants' + @resources = HealthKubernetesResources.instance + @provider = HealthMonitorProvider.new(HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) end + include HealthModel config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.api.KubeHealth.ReplicaSet" @@ -38,14 +41,13 @@ def start @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) + @@hmlog = HealthMonitorUtils.get_log_handle @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion - cluster_capacity = HealthMonitorUtils.getClusterCpuMemoryCapacity + cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog) @@clusterCpuCapacity = cluster_capacity[0] @@clusterMemoryCapacity = cluster_capacity[1] - @@healthMonitorConfig = HealthMonitorUtils.getHealthMonitorConfig - @@hmlog = HealthMonitorUtils.getLogHandle @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {}) end @@ -70,15 +72,18 @@ def enumerate eventStream = MultiEventStream.new hmlog = HealthMonitorUtils.getLogHandle - HealthMonitorUtils.refreshKubernetesApiData(@@hmlog, nil) + HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil) # we do this so that if the call fails, we get a response code/header etc. node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") node_inventory = JSON.parse(node_inventory_response.body) pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods") pod_inventory = JSON.parse(pod_inventory_response.body) - deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body) + @resources.node_inventory = node_inventory + @resources.pod_inventory = pod_inventory + @resources.deployment_inventory = deployment_inventory + if node_inventory_response.code.to_i != 200 record = process_kube_api_up_monitor("fail", node_inventory_response) health_monitor_records.push(record) if record @@ -92,7 +97,7 @@ def enumerate health_monitor_records.push(record) if record record = process_memory_oversubscribed_monitor(pod_inventory) health_monitor_records.push(record) if record - pods_ready_hash = HealthMonitorUtils.getPodsReadyHash(pod_inventory, deployment_inventory) + pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(pod_inventory, deployment_inventory) system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} @@ -119,8 +124,6 @@ def enumerate hmlog.info "NODE INVENTORY IS NIL" end - #@@hmlog.debug "Health Monitor Records Size #{health_monitor_records.size}" - health_monitor_records.each do |record| eventStream.add(emitTime, record) end @@ -134,7 +137,7 @@ def enumerate def process_cpu_oversubscribed_monitor(pod_inventory) timestamp = Time.now.utc.iso8601 - subscription = HealthMonitorUtils.getResourceSubscription(pod_inventory,"cpu", @@clusterCpuCapacity) + subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"cpu", @@clusterCpuCapacity) state = subscription > @@clusterCpuCapacity ? "fail" : "pass" #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}" @@ -143,10 +146,8 @@ def process_cpu_oversubscribed_monitor(pod_inventory) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} # @@hmlog.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@@hmlog, monitor_id, [@@clusterId]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id @@ -161,18 +162,16 @@ def process_cpu_oversubscribed_monitor(pod_inventory) def process_memory_oversubscribed_monitor(pod_inventory) timestamp = Time.now.utc.iso8601 - subscription = HealthMonitorUtils.getResourceSubscription(pod_inventory,"memory", @@clusterMemoryCapacity) + subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"memory", @@clusterMemoryCapacity) state = subscription > @@clusterMemoryCapacity ? "fail" : "pass" #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}" #CPU monitor_id = HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}} - hmlog = HealthMonitorUtils.getLogHandle + hmlog = HealthMonitorUtils.get_log_handle - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId]) - HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id @@ -192,13 +191,11 @@ def process_kube_api_up_monitor(state, response) details = response.each_header.to_h details['ResponseCode'] = response.code health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - hmlog = HealthMonitorUtils.getLogHandle + hmlog = HealthMonitorUtils.get_log_handle #hmlog.info health_monitor_record monitor_instance_id = HealthMonitorConstants::KUBE_API_STATUS #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" - HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, @@healthMonitorConfig[monitor_id]) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, @@healthMonitorConfig[monitor_id]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id @@ -212,8 +209,8 @@ def process_kube_api_up_monitor(state, response) end def process_pods_ready_percentage(pods_hash, config_monitor_id) - monitor_config = @@healthMonitorConfig[config_monitor_id] - hmlog = HealthMonitorUtils.getLogHandle + monitor_config = @@provider.get_config(config_monitor_id) + hmlog = HealthMonitorUtils.get_log_handle records = [] pods_hash.keys.each do |key| @@ -225,10 +222,9 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) percent = pods_ready / total_pods * 100 timestamp = Time.now.utc.iso8601 - state = HealthMonitorState.getState(@@hmlog, (100-percent), monitor_config) + state = HealthMonitorUtils.compute_percentage_state((100-percent), monitor_config) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workloadName" => workload_name, "namespace" => namespace, "workloadKind" => workload_kind}} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, config_monitor_id, [@@clusterId, namespace, workload_name]) - HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@clusterId, namespace, workload_name]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id @@ -244,7 +240,6 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) end def process_node_condition_monitor(node_inventory) - hmlog = HealthMonitorUtils.getLogHandle monitor_id = HealthMonitorConstants::NODE_CONDITION_MONITOR_ID timestamp = Time.now.utc.iso8601 monitor_config = @@healthMonitorConfig[monitor_id] @@ -253,16 +248,14 @@ def process_node_condition_monitor(node_inventory) node_inventory['items'].each do |node| node_name = node['metadata']['name'] conditions = node['status']['conditions'] - state = HealthMonitorUtils.getNodeStateFromNodeConditions(conditions) + state = HealthMonitorUtils.get_node_state_from_node_conditions(conditions) #hmlog.debug "Node Name = #{node_name} State = #{state}" details = {} conditions.each do |condition| details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - monitor_instance_id = HealthMonitorUtils.getMonitorInstanceId(@@hmlog, monitor_id, [@@clusterId, node_name]) - HealthMonitorState.updateHealthMonitorState(@@hmlog, monitor_instance_id, health_monitor_record, monitor_config) - #record = HealthMonitorSignalReducer.reduceSignal(@@hmlog, monitor_id, monitor_instance_id, monitor_config, node_name: node_name) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@@hmlog, monitor_id, [@@clusterId, node_name]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id From 497c26a5ec93f70e03cc2161707acb969d676bcd Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 17 Jun 2019 19:12:31 -0700 Subject: [PATCH 50/90] working version E2E with state serialization and deserialization --- .../plugin/filter_health_model_builder.rb | 24 ++++++-- .../health/health_missing_monitor_handler.rb | 56 ------------------- .../health/health_missing_signal_generator.rb | 44 ++++++++++++++- .../plugin/health/health_monitor_provider.rb | 12 ++++ .../plugin/health/health_monitor_utils.rb | 12 ++-- .../health/health_state_deserializer.rb | 26 +++++++-- .../plugin/health/health_state_serializer.rb | 9 ++- source/code/plugin/in_cadvisor_perf.rb | 4 +- source/code/plugin/in_kube_health.rb | 9 ++- 9 files changed, 116 insertions(+), 80 deletions(-) delete mode 100644 source/code/plugin/health/health_missing_monitor_handler.rb diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 385396adb..127005c9b 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -15,8 +15,7 @@ class FilterHealthModelBuilder < Filter config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log' config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json' config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json' - config_param :health_state_serialized_path, :default => '/mnt/azure/health_state.json' - config_param :health_signal_timeout, :default => 240 + config_param :health_state_serialized_path, :default => '/mnt/azure/health_model_state.json' attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator, :serializer, :deserializer include HealthModel @@ -48,8 +47,7 @@ def initialize #TODO: check if the path exists deserialized_state_info = @deserializer.deserialize @state = HealthMonitorState.new - @state.initialize_state(deserialized_state_info) - + #@state.initialize_state(deserialized_state_info) end def configure(conf) @@ -84,6 +82,7 @@ def filter_stream(tag, es) end return [] elsif tag.start_with?("oms.api.KubeHealth.ReplicaSet") + @log.info "TAG #{tag}" records = [] es.each{|time, record| records.push(record) @@ -118,13 +117,19 @@ def filter_stream(tag, es) #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" end + @log.info "health_monitor_records.size #{health_monitor_records.size}" + health_monitor_records = @kube_api_down_handler.handle_kube_api_down(health_monitor_records) + @log.info " after kube api down handler health_monitor_records.size #{health_monitor_records.size}" # Dedupe daemonset signals # Remove unit monitor signals for “gone” objects - reduced_records = @reducer.reduce_signals(health_monitor_records, resources) + reduced_records = @reducer.reduce_signals(health_monitor_records, @resources) + @log.info "after deduping and removing gone objects reduced_records.size #{reduced_records.size}" #get the list of 'none' and 'unknown' signals - missing_signals = @generator.get_missing_signals(reduced_records, resources) + missing_signals = @generator.get_missing_signals(KubernetesApiClient.getClusterId, reduced_records, @resources, @provider) + + @log.info "after getting missing signals missing_signals.size #{missing_signals.size}" #update state for missing signals missing_signals.each{|signal| @state.update_state(signal, @@ -134,11 +139,15 @@ def filter_stream(tag, es) @generator.update_last_received_records(reduced_records) reduced_records.push(*missing_signals) + @log.info "after Adding missing signals reduced_records.size #{reduced_records.size}" + # build the health model all_records = reduced_records @model_builder.process_records(all_records) all_monitors = @model_builder.finalize_model + @log.info "after building health_model #{all_monitors.size}" + # update the state for aggregate monitors (unit monitors are updated above) all_monitors.each{|monitor_instance_id, monitor| if monitor.is_aggregate_monitor @@ -157,6 +166,8 @@ def filter_stream(tag, es) end } + @log.info "after optimizing health signals all_monitors.size #{all_monitors.size}" + # for each key in monitor.keys, # get the state from health_monitor_state # generate the record to send @@ -165,6 +176,7 @@ def filter_stream(tag, es) puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" new_es.add(time, record) } + @serializer.serialize(@state) router.emit_stream(@@rewrite_tag, new_es) diff --git a/source/code/plugin/health/health_missing_monitor_handler.rb b/source/code/plugin/health/health_missing_monitor_handler.rb deleted file mode 100644 index aad3c2980..000000000 --- a/source/code/plugin/health/health_missing_monitor_handler.rb +++ /dev/null @@ -1,56 +0,0 @@ -module HealthModel - class HealthMissingMonitorHandler - - attr_accessor :last_sent_monitors, :unknown_state_candidates - - def initialize(last_sent_monitors, unknown_state_candidates) - @last_sent_monitors = {} - @unknown_state_candidates = {} - @node_inventory = {} - @workload_inventory ={} - end - - def detect_missing_signals(received_records) - nodes = get_node_inventory(received_records) - workloads = get_workload_inventory(received_records) - - received_records.each{|record| - monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] - case monitor_id - when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID - # node monitor processing - # check if present in last_sent_monitors - # if not present - when HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID - # pods ready percentage processing - when HealthMonitorConstants::KUBE_API_STATUS - # kube api status - end - } - end - - def get_node_inventory(received_records) - @node_inventory = [] - node_records = received_records.select {|record| record[HealthMonitorRecordFields::MONITOR_ID] == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID} - node_records.each{|node_record| - node_name = JSON.parse(node_record[HealthMonitorRecordFields::MONITOR_LABELS])['kubernetes.io/hostname'] - @node_inventory.push(node_name) if node_name - } - end - - def get_workload_inventory(received_records) - @workload_inventory = [] - workload_records = received_records.select {|record| - (record[HealthMonitorRecordFields::MONITOR_ID] == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || - record[HealthMonitorRecordFields::MONITOR_ID] == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID - ) - } - workload_records.each{|workload_record| - workload_name = JSON.parse(workload_record[HealthMonitorRecordFields::MONITOR_LABELS])['container.azm.ms/workload-name'] - @workload_inventory.push(workload_name) - } - end - - - end -end \ No newline at end of file diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb index e93541796..f3c6f7328 100644 --- a/source/code/plugin/health/health_missing_signal_generator.rb +++ b/source/code/plugin/health/health_missing_signal_generator.rb @@ -7,7 +7,7 @@ def initialize() @last_received_records = {} end - def get_missing_signals(health_monitor_records, health_k8s_inventory) + def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory, provider) missing_monitor_ids = [] nodes = health_k8s_inventory.get_nodes workload_names = health_k8s_inventory.get_workload_names @@ -17,6 +17,12 @@ def get_missing_signals(health_monitor_records, health_k8s_inventory) health_monitor_records.map{ |monitor| health_monitor_records_map[monitor.monitor_instance_id] = monitor } + + node_signals_hash = {} + nodes.each{|node| + node_signals_hash[node] = [HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID] + } + @last_received_records.each{|monitor_instance_id, monitor| if !health_monitor_records_map.key?(monitor_instance_id) if HealthMonitorUtils.is_node_monitor(monitor.monitor_id) @@ -56,6 +62,42 @@ def get_missing_signals(health_monitor_records, health_k8s_inventory) end end } + + # remove signals from the list of expected signals if we see them in the list of current signals + health_monitor_records.each{|health_monitor_record| + if HealthMonitorUtils.is_node_monitor(health_monitor_record.monitor_id) + node_name = health_monitor_record.labels['kubernetes.io/hostname'] + if node_signals_hash.key?(node_name) + signals = node_signals_hash[node_name] + signals.delete(health_monitor_record.monitor_id) + if signals.size == 0 + node_signals_hash.delete(node_name) + end + end + end + } + + # if the hash is not empty, means we have missing signals + if node_signals_hash.size > 0 + # these signals were not sent previously + # these signals need to be assigned an unknown state + node_signals_hash.each{|node, monitor_ids| + monitor_ids.each{|monitor_id| + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [cluster_id, node]) + new_monitor = HealthMonitorRecord.new( + monitor_id, + monitor_instance_id, + Time.now.utc.iso8601, + HealthMonitorStates::UNKNOWN, + provider.get_node_labels(node), + {}, + {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => "no signal received from node #{node}"} + ) + missing_signals_map[monitor_instance_id] = new_monitor + } + } + end + missing_signals_map.each{|k,v| missing_signals.push(v) } diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb index a1b5bfd18..654ca3cfb 100644 --- a/source/code/plugin/health/health_monitor_provider.rb +++ b/source/code/plugin/health/health_monitor_provider.rb @@ -102,5 +102,17 @@ def get_labels(health_monitor_record) end return monitor_labels end + + def get_node_labels(node_name) + monitor_labels = {} + @health_kubernetes_resources.get_node_inventory['items'].each do |node| + if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] + if !node["metadata"].nil? && !node["metadata"]["labels"].nil? + monitor_labels = node["metadata"]["labels"] + end + end + end + return monitor_labels + end end end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index 2da93245d..e4d1d1f34 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -1,3 +1,6 @@ +require 'logger' +require 'digest' + module HealthModel # static class that provides a bunch of utility methods class HealthMonitorUtils @@ -14,6 +17,11 @@ class HealthMonitorUtils @@node_inventory = [] @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" + + if Gem.win_platform? + @log_path = "C:\Temp\health_monitors.log" + end + @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M @@last_refresh_time = '2019-01-01T00:00:00Z' @@ -349,10 +357,6 @@ def get_cluster_labels end def get_log_handle - if Gem.win_platform? - log_path = "C:\Temp\health_monitors.log" - @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M - end return @log end end diff --git a/source/code/plugin/health/health_state_deserializer.rb b/source/code/plugin/health/health_state_deserializer.rb index 8bfcd4480..0a3ac3563 100644 --- a/source/code/plugin/health/health_state_deserializer.rb +++ b/source/code/plugin/health/health_state_deserializer.rb @@ -8,11 +8,29 @@ def initialize(path) end def deserialize - file = File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/state.json") #File.read(@deserialize_path) - records = JSON.parse(file) + if !File.file?(@deserialize_path) + return {} + end - #TODO: even though we call JSON.parse, records is still a string. Do JSON.parse again to return it as a hash - return JSON.parse(records) + file = File.read(@deserialize_path) #File.read(@deserialize_path) + + deserialized_state = {} + if !file.nil? || !file.empty? + records = JSON.parse(file) + + records.each{|monitor_instance_id, health_monitor_instance_state_hash| + state = HealthMonitorInstanceState.new(*health_monitor_instance_state_hash.values_at(*HealthMonitorInstanceState.members)) + state.prev_sent_record_time = health_monitor_instance_state_hash["prev_sent_record_time"] + state.old_state = health_monitor_instance_state_hash["old_state"] + state.new_state = health_monitor_instance_state_hash["new_state"] + state.state_change_time = health_monitor_instance_state_hash["state_change_time"] + state.prev_records = health_monitor_instance_state_hash["prev_records"] + state.is_state_change_consistent = health_monitor_instance_state_hash["is_state_change_consistent"] || false + state.should_send = health_monitor_instance_state_hash["should_send"] + deserialized_state[monitor_instance_id] = state + } + return deserialized_state + end end end end \ No newline at end of file diff --git a/source/code/plugin/health/health_state_serializer.rb b/source/code/plugin/health/health_state_serializer.rb index c3706e05f..83e115d3c 100644 --- a/source/code/plugin/health/health_state_serializer.rb +++ b/source/code/plugin/health/health_state_serializer.rb @@ -7,8 +7,13 @@ def initialize(path) end def serialize(state) - File.open("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/state.json", 'w') do |f| #File.open(@serialized_path, 'w') - f.write(JSON.pretty_generate(state.to_h.to_json)) + File.open(@serialized_path, 'w') do |f| #File.open(@serialized_path, 'w') + states = state.to_h + states_hash = {} + states.each{|id, value| + states_hash[id] = value.to_h + } + f.write(JSON.pretty_generate(states_hash)) end end end diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 7fcdd3349..d3bfd791f 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -19,7 +19,7 @@ def initialize config_param :tag, :string, :default => "oms.api.cadvisorperf" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" config_param :nodehealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Node" - config_param :containerhealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Container" + #config_param :containerhealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Container" def configure(conf) @@ -59,7 +59,7 @@ def enumerate() router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@mdmtag, eventStream) if eventStream - router.emit_stream(@containerhealthtag, eventStream) if eventStream + #router.emit_stream(@containerhealthtag, eventStream) if eventStream router.emit_stream(@nodehealthtag, eventStream) if eventStream @@istestvar = ENV["ISTEST"] diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 66ef305c0..1c031fb71 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -71,7 +71,6 @@ def enumerate health_monitor_records = [] eventStream = MultiEventStream.new - hmlog = HealthMonitorUtils.getLogHandle HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil) # we do this so that if the call fails, we get a response code/header etc. node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") @@ -146,7 +145,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} # @@hmlog.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@@hmlog, monitor_id, [@@clusterId]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" health_record = {} time_now = Time.now.utc.iso8601 @@ -209,7 +208,7 @@ def process_kube_api_up_monitor(state, response) end def process_pods_ready_percentage(pods_hash, config_monitor_id) - monitor_config = @@provider.get_config(config_monitor_id) + monitor_config = @provider.get_config(config_monitor_id) hmlog = HealthMonitorUtils.get_log_handle records = [] @@ -242,7 +241,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) def process_node_condition_monitor(node_inventory) monitor_id = HealthMonitorConstants::NODE_CONDITION_MONITOR_ID timestamp = Time.now.utc.iso8601 - monitor_config = @@healthMonitorConfig[monitor_id] + monitor_config = @provider.get_config(monitor_id) node_condition_monitor_records = [] if !node_inventory.nil? node_inventory['items'].each do |node| @@ -255,7 +254,7 @@ def process_node_condition_monitor(node_inventory) details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@@hmlog, monitor_id, [@@clusterId, node_name]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, node_name]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id From f3520fe540e1a95f103840d61ddd4c42cf93e3d6 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 17 Jun 2019 19:27:30 -0700 Subject: [PATCH 51/90] adding source, health config to base_container.data --- installer/datafiles/base_container.data | 44 ++++++++++++++++++++----- source/code/plugin/in_cadvisor_perf.rb | 1 - 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 6824da684..9d7a5b914 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -50,14 +50,6 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/out_mdm.rb; source/code/plugin/out_mdm.rb; 644; root; root /opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/code/plugin/filter_cadvisor2mdm.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_cadvisor_health.rb; source/code/plugin/filter_cadvisor_health.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_kube_health.rb; source/code/plugin/in_kube_health.rb; 644; root; root -/opt/microsoft/omsagent/plugin/HealthMonitorConstants.rb; source/code/plugin/HealthMonitorConstants.rb; 644; root; root -/opt/microsoft/omsagent/plugin/HealthMonitorSignalReducer.rb; source/code/plugin/HealthMonitorSignalReducer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/HealthMonitorState.rb; source/code/plugin/HealthMonitorState.rb; 644; root; root -/opt/microsoft/omsagent/plugin/HealthMonitorUtils.rb; source/code/plugin/HealthMonitorUtils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/healthmonitorconfig.json; installer/conf/healthmonitorconfig.json; 644; root; root - /opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/code/plugin/lib/application_insights/version.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/code/plugin/lib/application_insights/rack/track_request.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb; source/code/plugin/lib/application_insights/unhandled_exception.rb; 644; root; root @@ -111,6 +103,40 @@ MAINTAINER: 'Microsoft Corporation' +/opt/microsoft/omsagent/plugin/filter_cadvisor_health_node.rb; source/code/plugin/filter_cadvisor_health_node.rb; 644; root; root +/opt/microsoft/omsagent/plugin/filter_health_model_builder.rb; source/code/plugin/filter_health_model_builder.rb; 644; root; root +/opt/microsoft/omsagent/plugin/in_kube_health.rb; source/code/plugin/in_kube_health.rb; 644; root; root +/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json; installer/conf/healthmonitorconfig.json; 644; root; root +/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json; installer/conf/health_model_definition.json; 644; root; root + + +/opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb source/code/plugin/health/aggregate_monitor.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/aggregate_monitor_instance_id_labels.rb source/code/plugin/health/aggregate_monitor_instance_id_labels.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb source/code/plugin/health/health_kube_api_down_handler.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_missing_signal_generator.rb source/code/plugin/health/health_missing_signal_generator.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_buffer.rb source/code/plugin/health/health_model_buffer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_builder.rb source/code/plugin/health/health_model_builder.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_constants.rb source/code/plugin/health/health_model_constants.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_definition.rb source/code/plugin/health/health_model_definition.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb source/code/plugin/health/health_model_definition_parser.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb source/code/plugin/health/health_monitor_provider.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_record.rb source/code/plugin/health/health_monitor_record.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_state.rb source/code/plugin/health/health_monitor_state.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb source/code/plugin/health/health_monitor_utils.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb source/code/plugin/health/health_signal_reducer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_state_deserializer.rb source/code/plugin/health/health_state_deserializer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_state_serializer.rb source/code/plugin/health/health_state_serializer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/monitor_factory.rb source/code/plugin/health/monitor_factory.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/monitor_set.rb source/code/plugin/health/monitor_set.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/node_missing_signal_state.rb source/code/plugin/health/node_missing_signal_state.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/node_monitor_hierarchy_reducer.rb source/code/plugin/health/node_monitor_hierarchy_reducer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/unit_monitor.rb source/code/plugin/health/unit_monitor.rb; 644; root; root + + %Links /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root @@ -124,6 +150,7 @@ MAINTAINER: 'Microsoft Corporation' /etc/opt/microsoft; 755; root; root; sysdir /etc/opt/microsoft/docker-cimprov; 755; root; root /etc/opt/microsoft/docker-cimprov/conf; 755; root; root +/etc/opt/microsoft/docker-cimprov/health; 755; root; root /etc/opt/omi; 755; root; root; sysdir /etc/opt/omi/conf; 755; root; root; sysdir @@ -137,6 +164,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent; 755; root; root; sysdir /opt/microsoft/omsagent/plugin; 755; root; root; sysdir +/opt/microsoft/omsagent/plugin/health; 755; root; root; sysdir /opt/omi; 755; root; root; sysdir /opt/omi/lib; 755; root; root; sysdir diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index d3bfd791f..f203d426d 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -21,7 +21,6 @@ def initialize config_param :nodehealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Node" #config_param :containerhealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Container" - def configure(conf) super end From bc57eb29d85059d9add16f953337c9ba2a6e9b7d Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 17 Jun 2019 19:46:20 -0700 Subject: [PATCH 52/90] Container conf changes, permissions for log files etc. --- installer/conf/container.conf | 34 +++++++++++---------- installer/conf/kube.conf | 39 ++++++++++++++++--------- installer/datafiles/base_container.data | 8 +++++ 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 4ffb3a7a6..ce33ebbc6 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -28,10 +28,6 @@ log_level debug - - type filter_cadvisor_health_container - log_level debug - #custom_metrics_mdm filter plugin @@ -71,17 +67,25 @@ max_retry_wait 9m - - type out_oms_api - log_level debug - buffer_chunk_limit 10m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_kubehealth*.buffer - buffer_queue_limit 10 - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m + + + @type forward + send_timeout 60s + recover_wait 10s + hard_timeout 60s + #phi_failure_detector false + #phi_threshold 32 + heartbeat_type tcp + + + host replicaset-service.kube-system + port 25235 + + + + @type file + path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log + diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index a43e6ec8f..11b1ce841 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -1,4 +1,9 @@ # Fluentd config file for OMS Docker - cluster components (kubeAPI) + + type forward + port 25235 + bind 0.0.0.0 + #Kubernetes pod inventory @@ -47,14 +52,15 @@ log_level debug -#cluster health +#Kubernetes health type kubehealth - tag oms.api.KubeHealth.AgentCollectionTime + tag oms.api.KubeHealth.ReplicaSet run_interval 60s log_level debug +#cadvisor perf- Windows nodes type wincadvisorperf tag oms.api.wincadvisorperf @@ -76,6 +82,9 @@ log_level info + + type filter_health_model_builder + type out_mdm log_level debug @@ -125,7 +134,7 @@ type out_oms_api log_level debug - buffer_chunk_limit 10m + buffer_chunk_limit 10m buffer_type file buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer buffer_queue_limit 10 @@ -134,17 +143,7 @@ retry_wait 30s - - type out_oms_api - log_level debug - buffer_chunk_limit 10m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer - buffer_queue_limit 10 - flush_interval 20s - retry_limit 10 - retry_wait 30s - + type out_oms @@ -233,4 +232,16 @@ retry_limit 10 retry_wait 30s max_retry_wait 9m + + + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s \ No newline at end of file diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 9d7a5b914..580f5afe2 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -230,6 +230,14 @@ touch /var/opt/microsoft/docker-cimprov/log/health_monitors.log chmod 666 /var/opt/microsoft/docker-cimprov/log/health_monitors.log chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/health_monitors.log +touch /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log +chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log + +touch /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log +chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log + mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf From 88621c73a1318e33b5c66642bbb3adf15679f988 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 18 Jun 2019 14:44:51 -0700 Subject: [PATCH 53/90] Reinstate run_interval that was removed accidentally --- source/code/plugin/in_cadvisor_perf.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index f203d426d..1702877a2 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -16,6 +16,7 @@ def initialize require_relative "omslog" end + config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.api.cadvisorperf" config_param :mdmtag, :string, :default => "mdm.cadvisorperf" config_param :nodehealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Node" From 966a0b16b1b50ee57dcc6dd6d1456778e5644ce4 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 18 Jun 2019 22:39:31 -0700 Subject: [PATCH 54/90] Remove single sample flip configs, fixed details.to_json bug, pass in cluster_id for testability, make cluster_id a class variable in all plugins --- installer/conf/healthmonitorconfig.json | 12 ---------- .../plugin/filter_cadvisor_health_node.rb | 2 +- .../plugin/filter_health_model_builder.rb | 7 +++--- .../code/plugin/health/aggregate_monitor.rb | 4 ++-- .../plugin/health/health_monitor_provider.rb | 17 +++++++++----- .../plugin/health/health_monitor_utils.rb | 2 +- source/code/plugin/in_kube_health.rb | 22 +++++++++---------- 7 files changed, 30 insertions(+), 36 deletions(-) diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json index 1ad28fcee..28d562652 100644 --- a/installer/conf/healthmonitorconfig.json +++ b/installer/conf/healthmonitorconfig.json @@ -1,16 +1,4 @@ { - "node_condition": { - "ConsecutiveSamplesForStateTransition": 1 - }, - "subscribed_capacity_cpu": { - "ConsecutiveSamplesForStateTransition": 1 - }, - "subscribed_capacity_memory": { - "ConsecutiveSamplesForStateTransition": 1 - }, - "kube_api_status": { - "ConsecutiveSamplesForStateTransition": 1 - }, "node_cpu_utilization": { "WarnThresholdPercentage": 80.0, "FailThresholdPercentage": 90.0, diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index 08449bdb3..29d6eb822 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -39,7 +39,7 @@ def initialize @last_resource_refresh = DateTime.now.to_time.to_i @metrics_to_collect_hash = {} @resources = HealthKubernetesResources.instance # this doesnt require node and pod inventory. So no need to populate them - @provider = HealthMonitorProvider.new(HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + @provider = HealthMonitorProvider.new(@@clusterId, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) end def configure(conf) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 127005c9b..70420dd9a 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -20,6 +20,7 @@ class FilterHealthModelBuilder < Filter include HealthModel @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime' + @@cluster_id = KubernetesApiClient.getClusterId def initialize super @@ -37,7 +38,7 @@ def initialize @state = HealthMonitorState.new @generator = HealthMissingSignalGenerator.new #TODO: cluster_labels needs to be initialized - @provider = HealthMonitorProvider.new(HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) @serializer = HealthStateSerializer.new(@health_state_serialized_path) @deserializer = HealthStateDeserializer.new(@health_state_serialized_path) # TODO: in_kube_api_health should set these values @@ -127,7 +128,7 @@ def filter_stream(tag, es) @log.info "after deduping and removing gone objects reduced_records.size #{reduced_records.size}" #get the list of 'none' and 'unknown' signals - missing_signals = @generator.get_missing_signals(KubernetesApiClient.getClusterId, reduced_records, @resources, @provider) + missing_signals = @generator.get_missing_signals(@@cluster_id, reduced_records, @resources, @provider) @log.info "after getting missing signals missing_signals.size #{missing_signals.size}" #update state for missing signals @@ -173,7 +174,7 @@ def filter_stream(tag, es) # generate the record to send all_monitors.keys.each{|key| record = @provider.get_record(all_monitors[key], state) - puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + #@log.info "#{record["Details"]} #{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" new_es.add(time, record) } diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index 9063d146b..a0086601d 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -68,8 +68,8 @@ def calculate_details(monitor_set) member_monitor_instance_ids.each{|member_monitor_id| member_monitor = monitor_set.get_monitor(member_monitor_id) member_state = member_monitor.state - if @details.key?(member_state) - ids = details[member_state] + if @details['details'].key?(member_state) + ids = @details['details'][member_state] if !ids.include?(member_monitor.monitor_instance_id) ids.push(member_monitor.monitor_instance_id) end diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb index 654ca3cfb..5a20ba31f 100644 --- a/source/code/plugin/health/health_monitor_provider.rb +++ b/source/code/plugin/health/health_monitor_provider.rb @@ -1,12 +1,13 @@ module HealthModel class HealthMonitorProvider - attr_accessor :cluster_labels, :health_kubernetes_resources, :monitor_configuration_path + attr_accessor :cluster_labels, :health_kubernetes_resources, :monitor_configuration_path, :cluster_id attr_reader :monitor_configuration - def initialize(cluster_labels, health_kubernetes_resources, monitor_configuration_path) + def initialize(cluster_id, cluster_labels, health_kubernetes_resources, monitor_configuration_path) @cluster_labels = Hash.new cluster_labels.each{|k,v| @cluster_labels[k] = v} + @cluster_id = cluster_id @health_kubernetes_resources = health_kubernetes_resources @monitor_configuration_path = monitor_configuration_path begin @@ -54,13 +55,14 @@ def get_record(health_monitor_record, health_monitor_state) time_observed = Time.now.utc.iso8601 monitor_record = {} - monitor_record[HealthMonitorRecordFields::CLUSTER_ID] = 'fake_cluster_id' #KubernetesApiClient.getClusterId + + monitor_record[HealthMonitorRecordFields::CLUSTER_ID] = @cluster_id monitor_record[HealthMonitorRecordFields::MONITOR_LABELS] = labels.to_json monitor_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id monitor_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id monitor_record[HealthMonitorRecordFields::NEW_STATE] = new_state monitor_record[HealthMonitorRecordFields::OLD_STATE] = old_state - monitor_record[HealthMonitorRecordFields::DETAILS] = details + monitor_record[HealthMonitorRecordFields::DETAILS] = details.to_json monitor_record[HealthMonitorRecordFields::MONITOR_CONFIG] = config.to_json monitor_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601 monitor_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_first_observed @@ -77,7 +79,10 @@ def get_config(monitor_id) end def get_labels(health_monitor_record) - monitor_labels = {} + monitor_labels = Hash.new + @cluster_labels.keys.each{|key| + monitor_labels[key] = @cluster_labels[key] + } monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID] case monitor_id when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID @@ -95,7 +100,7 @@ def get_labels(health_monitor_record) @health_kubernetes_resources.get_node_inventory['items'].each do |node| if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name'] if !node["metadata"].nil? && !node["metadata"]["labels"].nil? - monitor_labels = node["metadata"]["labels"] + monitor_labels = monitor_labels.merge(node["metadata"]["labels"]) end end end diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index e4d1d1f34..bb1728095 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -11,7 +11,7 @@ class HealthMonitorUtils require_relative '../KubernetesApiClient' end rescue => e - puts "#{e.message}" + $log.info "Error loading KubernetesApiClient #{e.message}" end @@node_inventory = [] diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 1c031fb71..3650f6efa 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -22,8 +22,9 @@ def initialize require "yaml" require "json" + @@cluster_id = KubernetesApiClient.getClusterId @resources = HealthKubernetesResources.instance - @provider = HealthMonitorProvider.new(HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) end include HealthModel @@ -43,7 +44,6 @@ def start @@hmlog = HealthMonitorUtils.get_log_handle @@clusterName = KubernetesApiClient.getClusterName - @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog) @@clusterCpuCapacity = cluster_capacity[0] @@ -145,7 +145,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}} # @@hmlog.info health_monitor_record - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id]) #hmlog.info "Monitor Instance Id: #{monitor_instance_id}" health_record = {} time_now = Time.now.utc.iso8601 @@ -154,7 +154,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" return health_record end @@ -170,7 +170,7 @@ def process_memory_oversubscribed_monitor(pod_inventory) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}} hmlog = HealthMonitorUtils.get_log_handle - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id @@ -178,7 +178,7 @@ def process_memory_oversubscribed_monitor(pod_inventory) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" return health_record end @@ -202,7 +202,7 @@ def process_kube_api_up_monitor(state, response) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id @@hmlog.info "Successfully processed process_kube_api_up_monitor" return health_record end @@ -223,7 +223,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) state = HealthMonitorUtils.compute_percentage_state((100-percent), monitor_config) health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workloadName" => workload_name, "namespace" => namespace, "workloadKind" => workload_kind}} - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@clusterId, namespace, workload_name]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id @@ -231,7 +231,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id records.push(health_record) end @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" @@ -254,7 +254,7 @@ def process_node_condition_monitor(node_inventory) details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']} end health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, node_name]) + monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name]) health_record = {} time_now = Time.now.utc.iso8601 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id @@ -262,7 +262,7 @@ def process_node_condition_monitor(node_inventory) health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now - health_record[HealthMonitorRecordFields::CLUSTER_ID] = KubernetesApiClient.getClusterId + health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id health_record[HealthMonitorRecordFields::NODE_NAME] = node_name node_condition_monitor_records.push(health_record) end From 5edf6161f7ef85158d0a65ee6d4a2a577d823278 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 18 Jun 2019 22:42:16 -0700 Subject: [PATCH 55/90] Remove unnecessary logging --- source/code/plugin/filter_cadvisor_health_node.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index 29d6eb822..4605091c6 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -68,7 +68,6 @@ def filter_stream(tag, es) begin filtered_record = filter(tag, time, record) if !filtered_record.nil? - @@hm_log.debug "#{JSON.pretty_generate(filtered_record)}" new_es.add(time, filtered_record) records_count += 1 end From 09063ba7b23e954d00d35c7370bdd2d01f7c4ca3 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 19 Jun 2019 18:54:02 -0700 Subject: [PATCH 56/90] Fix Aggregation logic for 'percentage' agg algorithm monitors --- installer/conf/health_model_definition.json | 10 +++++++- .../code/plugin/health/aggregate_monitor.rb | 24 +++++++++++++++---- .../plugin/health/health_hierarchy_builder.rb | 2 +- .../health/health_model_definition_parser.rb | 7 +++--- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json index 17c82f5ef..aaa7e133d 100644 --- a/installer/conf/health_model_definition.json +++ b/installer/conf/health_model_definition.json @@ -166,6 +166,10 @@ { "monitor_id": "master_node_pool", "aggregation_algorithm": "percentage", + "aggregation_algorithm_params": { + "critical_threshold": 80.0, + "warning_threshold": 90.0 + }, "parent_monitor_id": "all_nodes", "labels": [ "container.azm.ms/cluster-region", @@ -176,7 +180,11 @@ }, { "monitor_id": "agent_node_pool", - "aggregation_algorithm": "worstOf", + "aggregation_algorithm": "percentage", + "aggregation_algorithm_params": { + "critical_threshold": 80.0, + "warning_threshold": 90.0 + }, "labels": [ "agentpool", "container.azm.ms/cluster-region", diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index a0086601d..bf4e01bbc 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -6,7 +6,7 @@ module HealthModel class AggregateMonitor attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details - attr_reader :member_monitors + attr_reader :member_monitors, :member_state_counts # constructor def initialize( @@ -26,6 +26,7 @@ def initialize( @aggregation_algorithm_params = aggregation_algorithm_params @labels = labels @member_monitors = {} + @member_state_counts = {} @is_aggregate_monitor = true end @@ -83,7 +84,7 @@ def calculate_details(monitor_set) # calculates the worst of state, given the member monitors def calculate_worst_of_state(monitor_set) - member_state_counts = map_member_monitor_states(monitor_set) + @member_state_counts = map_member_monitor_states(monitor_set) if member_state_counts.length === 0 return MonitorState::NONE @@ -100,15 +101,30 @@ def calculate_worst_of_state(monitor_set) end if member_state_counts.key?(MonitorState::NONE) && member_state_counts[MonitorState::NONE] > 0 - return MonitorState::NONE + return MonitorState::HEALTHY #none should win over healthy in aggregation end return MonitorState::HEALTHY end # calculates a percentage state, given the aggregation algorithm parameters - def calculate_percentage_state + def calculate_percentage_state(monitor_set) + + if @member_state_counts.nil? || @member_state_counts.empty? + @member_state_counts = map_member_monitor_states(monitor_set) + end + member_state_counts_percentage = {} + @member_state_counts.map{|k,v| member_state_counts_percentage[k] = @member_state_counts[k] / @member_monitors.size * 100 } + healthy = (member_state_counts_percentage[MonitorState::HEALTHY] || 0) + (member_state_counts_percentage[MonitorState::NONE] || 0) + + if healthy > @aggregation_algorithm_params['warning_threshold'] + @state = MonitorState::HEALTHY + elsif healthy > @aggregation_algorithm_params['critical_threshold'] + @state = MonitorState::WARNING + else + @state = MonitorState::CRITICAL + end end # maps states of member monitors to counts diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb index b01759d52..bfd66ebd9 100644 --- a/source/code/plugin/health/health_hierarchy_builder.rb +++ b/source/code/plugin/health/health_hierarchy_builder.rb @@ -48,7 +48,7 @@ def process_record(health_monitor_record, monitor_set) # if present, update the state based on the aggregation algorithm parent_monitor = nil if !monitor_set.contains?(parent_monitor_instance_id) - parent_monitor = @monitor_factory.create_aggregate_monitor(parent_monitor_id, parent_monitor_instance_id, parent_monitor_labels, parent_monitor_configuration['aggregation_algorithm'], nil, child_monitor) + parent_monitor = @monitor_factory.create_aggregate_monitor(parent_monitor_id, parent_monitor_instance_id, parent_monitor_labels, parent_monitor_configuration['aggregation_algorithm'], parent_monitor_configuration['aggregation_algorithm_params'], child_monitor) parent_monitor.add_member_monitor(child_monitor.monitor_instance_id) else parent_monitor = monitor_set.get_monitor(parent_monitor_instance_id) diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb index ee424c265..f6c7a781d 100644 --- a/source/code/plugin/health/health_model_definition_parser.rb +++ b/source/code/plugin/health/health_model_definition_parser.rb @@ -27,6 +27,7 @@ def parse_file parent_monitor_id = entry['parent_monitor_id'] labels = entry['labels'] if entry['labels'] aggregation_algorithm = entry['aggregation_algorithm'] if entry['aggregation_algorithm'] + aggregation_algorithm_params = entry['aggregation_algorithm_params'] if entry['aggregation_algorithm_params'] if parent_monitor_id.is_a?(Array) conditions = [] parent_monitor_id.each{|condition| @@ -36,11 +37,11 @@ def parse_file parent_id = condition['id'] conditions.push({"key" => key, "operator" => operator, "value" => value, "parent_id" => parent_id}) } - @health_model_definition[monitor_id] = {"conditions" => conditions, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm} + @health_model_definition[monitor_id] = {"conditions" => conditions, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params} elsif parent_monitor_id.is_a?(String) - @health_model_definition[monitor_id] = {"parent_monitor_id" => parent_monitor_id, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm} + @health_model_definition[monitor_id] = {"parent_monitor_id" => parent_monitor_id, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params} elsif parent_monitor_id.nil? - @health_model_definition[monitor_id] = {"parent_monitor_id" => nil, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm} + @health_model_definition[monitor_id] = {"parent_monitor_id" => nil, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params} end } @health_model_definition From aba0d17d9236eace7a2061f62852e938c5da41e5 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 20 Jun 2019 23:04:55 -0700 Subject: [PATCH 57/90] Scale up Scale down bugs fixed, sending none signal on first occurence, cache parent instance id and parent monitor instance id, healthy takes precedence over none --- .../plugin/filter_health_model_builder.rb | 14 ++++--- .../code/plugin/health/aggregate_monitor.rb | 11 ++++-- .../plugin/health/health_hierarchy_builder.rb | 2 +- .../health/health_kubernetes_resources.rb | 7 +++- .../health/health_missing_signal_generator.rb | 39 ++++++++++++++++--- .../plugin/health/health_model_builder.rb | 2 - .../plugin/health/health_model_definition.rb | 23 +++++++++-- .../plugin/health/health_monitor_state.rb | 16 +++++++- 8 files changed, 89 insertions(+), 25 deletions(-) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 70420dd9a..138a2f4cc 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -133,17 +133,17 @@ def filter_stream(tag, es) @log.info "after getting missing signals missing_signals.size #{missing_signals.size}" #update state for missing signals missing_signals.each{|signal| - @state.update_state(signal, - @provider.get_config(signal.monitor_id) - ) + + @state.update_state(signal, @provider.get_config(signal.monitor_id)) + @log.info "After Updating #{@state.get_state(signal.monitor_instance_id)} #{@state.get_state(signal.monitor_instance_id).new_state}" } @generator.update_last_received_records(reduced_records) - reduced_records.push(*missing_signals) + all_records = reduced_records.clone + all_records.push(*missing_signals) - @log.info "after Adding missing signals reduced_records.size #{reduced_records.size}" + @log.info "after Adding missing signals all_records.size #{all_records.size}" # build the health model - all_records = reduced_records @model_builder.process_records(all_records) all_monitors = @model_builder.finalize_model @@ -179,6 +179,8 @@ def filter_stream(tag, es) } @serializer.serialize(@state) + @monitor_set = HealthModel::MonitorSet.new + @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set) router.emit_stream(@@rewrite_tag, new_es) # return an empty event stream, else the match will throw a NoMethodError diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index bf4e01bbc..248cfa565 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -100,11 +100,16 @@ def calculate_worst_of_state(monitor_set) return MonitorState::WARNING end - if member_state_counts.key?(MonitorState::NONE) && member_state_counts[MonitorState::NONE] > 0 - return MonitorState::HEALTHY #none should win over healthy in aggregation + if member_state_counts.key?(MonitorState::UNKNOWN) && member_state_counts[MonitorState::UNKNOWN] > 0 + return MonitorState::UNKNOWN end - return MonitorState::HEALTHY + if member_state_counts.key?(MonitorState::HEALTHY) && member_state_counts[MonitorState::HEALTHY] > 0 + return MonitorState::HEALTHY #healthy should win over none in aggregation + end + + return MonitorState::NONE + end # calculates a percentage state, given the aggregation algorithm parameters diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb index bfd66ebd9..c73616399 100644 --- a/source/code/plugin/health/health_hierarchy_builder.rb +++ b/source/code/plugin/health/health_hierarchy_builder.rb @@ -42,7 +42,7 @@ def process_record(health_monitor_record, monitor_set) # get the parent monitor configuration parent_monitor_configuration = @health_model_definition.get_parent_monitor_config(parent_monitor_id) #get monitor instance id for parent monitor. Does this belong in HealthModelDefinition? - parent_monitor_instance_id = @health_model_definition.get_parent_monitor_instance_id(parent_monitor_id, parent_monitor_labels) + parent_monitor_instance_id = @health_model_definition.get_parent_monitor_instance_id(child_monitor.monitor_instance_id, parent_monitor_id, parent_monitor_labels) # check if monitor set has the parent monitor id # if not present, add # if present, update the state based on the aggregation algorithm diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb index b1a13be1f..b9600101c 100644 --- a/source/code/plugin/health/health_kubernetes_resources.rb +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -21,8 +21,12 @@ def get_node_inventory end def get_nodes + @nodes = [] @node_inventory['items'].each {|node| - @nodes.push(node['metadata']['name']) + if !@nodes.include?(node['metadata']['name']) + @nodes.push(node['metadata']['name']) + end + } return @nodes end @@ -36,6 +40,7 @@ def get_pods end def get_workload_names + @pods = [] workload_names = {} deployment_lookup = {} @deployment_inventory['items'].each do |deployment| diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb index f3c6f7328..589dfa234 100644 --- a/source/code/plugin/health/health_missing_signal_generator.rb +++ b/source/code/plugin/health/health_missing_signal_generator.rb @@ -1,10 +1,11 @@ module HealthModel class HealthMissingSignalGenerator attr_accessor :last_received_records, :current_received_records - attr_reader :missing_signals + attr_reader :missing_signals, :unknown_signals_hash def initialize() @last_received_records = {} + @unknown_signals_hash = {} end def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory, provider) @@ -22,7 +23,8 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory nodes.each{|node| node_signals_hash[node] = [HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID] } - + log = HealthMonitorUtils.get_log_handle + log.info "last_received_records #{@last_received_records.size} nodes #{nodes}" @last_received_records.each{|monitor_instance_id, monitor| if !health_monitor_records_map.key?(monitor_instance_id) if HealthMonitorUtils.is_node_monitor(monitor.monitor_id) @@ -34,14 +36,20 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory monitor.state, monitor.labels, monitor.config, - monitor.details + {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => ""} ) if !node_name.nil? && nodes.include?(node_name) new_monitor.state = HealthMonitorStates::UNKNOWN + new_monitor.details["state"] = HealthMonitorStates::UNKNOWN + new_monitor.details["details"] = "Node present in inventory but no signal for #{monitor.monitor_id} from node #{node_name}" + @unknown_signals_hash[monitor_instance_id] = new_monitor elsif !node_name.nil? && !nodes.include?(node_name) new_monitor.state = HealthMonitorStates::NONE + new_monitor.details["state"] = HealthMonitorStates::NONE + new_monitor.details["details"] = "Node NOT present in inventory. node: #{node_name}" end missing_signals_map[monitor_instance_id] = new_monitor + log.info "Added missing signal #{new_monitor.monitor_instance_id} #{new_monitor.state}" elsif HealthMonitorUtils.is_pods_ready_monitor(monitor.monitor_id) lookup = "#{monitor.labels['container.azm.ms/namespace']}~~#{monitor.labels['container.azm.ms/workload-name']}" new_monitor = HealthMonitorRecord.new( @@ -51,20 +59,26 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory monitor.state, monitor.labels, monitor.config, - monitor.details + {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => ""} ) if !lookup.nil? && workload_names.include?(lookup) new_monitor.state = HealthMonitorStates::UNKNOWN + new_monitor.details["state"] = HealthMonitorStates::UNKNOWN + new_monitor.details["details"] = "Workload present in inventory. But no signal for #{lookup}" + @unknown_signals_hash[monitor_instance_id] = new_monitor elsif !lookup.nil? && !workload_names.include?(lookup) new_monitor.state = HealthMonitorStates::NONE + new_monitor.details["state"] = HealthMonitorStates::NONE + new_monitor.details["details"] = "Workload #{lookup} NOT present in inventory" end missing_signals_map[monitor_instance_id] = new_monitor end end } - # remove signals from the list of expected signals if we see them in the list of current signals + health_monitor_records.each{|health_monitor_record| + # remove signals from the list of expected signals if we see them in the list of current signals if HealthMonitorUtils.is_node_monitor(health_monitor_record.monitor_id) node_name = health_monitor_record.labels['kubernetes.io/hostname'] if node_signals_hash.key?(node_name) @@ -94,14 +108,27 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => "no signal received from node #{node}"} ) missing_signals_map[monitor_instance_id] = new_monitor + log.info "Added missing signal when node_signals_hash was not empty #{new_monitor.monitor_instance_id} #{new_monitor.state}" } } end missing_signals_map.each{|k,v| - missing_signals.push(v) + missing_signals.push(v) } + # if an unknown signal is present neither in missing signals or the incoming signals, change its state to none, and remove from unknown_signals + # in update_state of HealthMonitorState, send if latest_record_state is none + @unknown_signals_hash.each{|k,v| + if !missing_signals_map.key?(k) && !health_monitor_records_map.key?(k) + monitor_record = @unknown_signals_hash[k] + monitor_record.details["state"] = HealthMonitorStates::NONE # used for calculating the old and new states in update_state + monitor_record.state = HealthMonitorStates::NONE #used for calculating the aggregate monitor state + missing_signals.push(monitor_record) + @unknown_signals_hash.delete(k) + log.info "Updating state from unknown to none for #{k}" + end + } return missing_signals end diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb index ecc1ad8d4..4cf802798 100644 --- a/source/code/plugin/health/health_model_builder.rb +++ b/source/code/plugin/health/health_model_builder.rb @@ -4,13 +4,11 @@ module HealthModel class HealthModelBuilder attr_accessor :hierarchy_builder, :state_finalizers, :monitor_set - attr_reader :last_sent_monitors def initialize(hierarchy_builder, state_finalizers, monitor_set) @hierarchy_builder = hierarchy_builder @state_finalizers = state_finalizers @monitor_set = monitor_set - @last_sent_monitors = {} end def process_records(health_records) diff --git a/source/code/plugin/health/health_model_definition.rb b/source/code/plugin/health/health_model_definition.rb index f290d8b60..f0ba57344 100644 --- a/source/code/plugin/health/health_model_definition.rb +++ b/source/code/plugin/health/health_model_definition.rb @@ -1,20 +1,28 @@ module HealthModel class HealthModelDefinition - attr_reader :health_model_definition + attr_reader :health_model_definition, :parent_monitor_mapping, :parent_monitor_instance_mapping def initialize(definition) @health_model_definition = definition + @parent_monitor_mapping = {} #monitorId --> parent_monitor_id mapping + @parent_monitor_instance_mapping = {} #child monitor id -- > parent monitor instance mapping. Used in instances when the node no longer exists and impossible to compute from kube api results end # gets the parent monitor id given the state transition. It requires the monitor id and labels to determine the parent id def get_parent_monitor_id(monitor) monitor_id = monitor.monitor_id + # cache the parent monitor id so it is not recomputed every time + if @parent_monitor_mapping.key?(monitor.monitor_instance_id) + return @parent_monitor_mapping[monitor.monitor_instance_id] + end + if @health_model_definition.key?(monitor_id) parent_monitor_id = @health_model_definition[monitor_id]['parent_monitor_id'] # check parent_monitor_id is an array, then evaluate the conditions, else return the parent_monitor_id if parent_monitor_id.is_a?(String) + @parent_monitor_mapping[monitor.monitor_instance_id] = parent_monitor_id return parent_monitor_id end if parent_monitor_id.nil? @@ -28,9 +36,9 @@ def get_parent_monitor_id(monitor) cond = left.send(op.to_sym, right) if cond + @parent_monitor_mapping[monitor.monitor_instance_id] = condition['parent_id'] return condition['parent_id'] end - } end end @@ -55,16 +63,23 @@ def get_parent_monitor_config(parent_monitor_id) return @health_model_definition[parent_monitor_id] end - def get_parent_monitor_instance_id(parent_monitor_id, parent_monitor_labels) + def get_parent_monitor_instance_id(monitor_instance_id, parent_monitor_id, parent_monitor_labels) + if @parent_monitor_instance_mapping.key?(monitor_instance_id) + return @parent_monitor_instance_mapping[monitor_instance_id] + end + labels = AggregateMonitorInstanceIdLabels.get_labels_for(parent_monitor_id) if !labels.is_a?(Array) raise "Expected #{labels} to be an Array for #{parent_monitor_id}" end values = labels.map{|label| parent_monitor_labels[label]} if values.nil? || values.empty? || values.size == 0 + @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_id return parent_monitor_id end - return "#{parent_monitor_id}-#{values.join('-')}" + parent_monitor_instance_id = "#{parent_monitor_id}-#{values.join('-')}" + @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_instance_id + return parent_monitor_instance_id end end end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index cb01c5dfb..0bf683b61 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -36,13 +36,14 @@ def initialize_state(deserialized_state) @@monitor_states[k] = v } end -=begin +=begin when do u send? --------------- 1. if the signal hasnt been sent before 2. if there is a "consistent" state change for monitors 3. if the signal is stale (> 4hrs) +4. If the latest state is none =end def update_state(monitor, #UnitMonitor/AggregateMonitor monitor_config #Hash @@ -127,9 +128,20 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor end # latest state is different that last sent state else + #if latest_record_state is none, send + if latest_record_state.downcase == HealthMonitorStates::NONE + health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state + health_monitor_instance_state.new_state = latest_record_state + health_monitor_instance_state.state_change_time = latest_record_time + health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.should_send = true + if !@@first_record_sent.key?(monitor_instance_id) + @@first_record_sent[monitor_instance_id] = true + end + set_state(monitor_instance_id, health_monitor_instance_state) # if it is a monitor that needs to instantly notify on state change, update the state # mark the monitor to be sent - if samples_to_check == 1 + elsif samples_to_check == 1 health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state health_monitor_instance_state.new_state = latest_record_state health_monitor_instance_state.state_change_time = latest_record_time From 24b04791f14f563c56cf8bf917bfc553272a0228 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 21 Jun 2019 11:26:44 -0700 Subject: [PATCH 58/90] Enable state initialization, fix bug where records are always sent the first time even when should_send = false --- source/code/plugin/filter_health_model_builder.rb | 4 ++-- source/code/plugin/health/health_monitor_state.rb | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 138a2f4cc..49281e0c0 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -48,7 +48,7 @@ def initialize #TODO: check if the path exists deserialized_state_info = @deserializer.deserialize @state = HealthMonitorState.new - #@state.initialize_state(deserialized_state_info) + @state.initialize_state(deserialized_state_info) end def configure(conf) @@ -133,7 +133,7 @@ def filter_stream(tag, es) @log.info "after getting missing signals missing_signals.size #{missing_signals.size}" #update state for missing signals missing_signals.each{|signal| - + @state.update_state(signal, @provider.get_config(signal.monitor_id)) @log.info "After Updating #{@state.get_state(signal.monitor_instance_id)} #{@state.get_state(signal.monitor_instance_id).new_state}" } diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 0bf683b61..41c4a0a77 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -34,6 +34,7 @@ def initialize_state(deserialized_state) @@monitor_states = {} deserialized_state.each{|k,v| @@monitor_states[k] = v + @@first_record_sent[k] = true } end From d3d267a32d86c7e2b8dd6e3079178820ab4debf2 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 21 Jun 2019 18:19:25 -0700 Subject: [PATCH 59/90] Fix percentage agg algorithm state calculation --- installer/conf/health_model_definition.json | 3 +- .../code/plugin/health/aggregate_monitor.rb | 53 +++++++++++++++---- .../plugin/health/health_model_constants.rb | 2 +- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json index aaa7e133d..1112fe158 100644 --- a/installer/conf/health_model_definition.json +++ b/installer/conf/health_model_definition.json @@ -182,8 +182,7 @@ "monitor_id": "agent_node_pool", "aggregation_algorithm": "percentage", "aggregation_algorithm_params": { - "critical_threshold": 80.0, - "warning_threshold": 90.0 + "state_threshold": 80.0 }, "labels": [ "agentpool", diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index 248cfa565..4e58f6f97 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -8,6 +8,14 @@ class AggregateMonitor attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details attr_reader :member_monitors, :member_state_counts + @@sort_key_order = { + MonitorState::UNKNOWN => 1, + MonitorState::CRITICAL => 2, + MonitorState::WARNING => 3, + MonitorState::HEALTHY => 4, + MonitorState::NONE => 5 + } + # constructor def initialize( monitor_id, @@ -115,20 +123,19 @@ def calculate_worst_of_state(monitor_set) # calculates a percentage state, given the aggregation algorithm parameters def calculate_percentage_state(monitor_set) - if @member_state_counts.nil? || @member_state_counts.empty? - @member_state_counts = map_member_monitor_states(monitor_set) - end - member_state_counts_percentage = {} - @member_state_counts.map{|k,v| member_state_counts_percentage[k] = @member_state_counts[k] / @member_monitors.size * 100 } + #sort + #TODO: What if sorted_filtered is empty? is that even possible? + sorted_filtered = sort_filter_member_monitors(monitor_set) - healthy = (member_state_counts_percentage[MonitorState::HEALTHY] || 0) + (member_state_counts_percentage[MonitorState::NONE] || 0) + state_threshold = @aggregation_algorithm_params['state_threshold'].to_f - if healthy > @aggregation_algorithm_params['warning_threshold'] - @state = MonitorState::HEALTHY - elsif healthy > @aggregation_algorithm_params['critical_threshold'] - @state = MonitorState::WARNING + size = sorted_filtered.size + if size == 1 + @state = sorted_filtered[0].state else - @state = MonitorState::CRITICAL + count = ((state_threshold*size)/100).ceil + index = size - count + @state = sorted_filtered[index].state end end @@ -156,5 +163,29 @@ def map_member_monitor_states(monitor_set) return state_counts; end + + # Sort the member monitors in the following order +=begin + 1. Error + 2. Unknown + 3. Critical + 4. Warning + 5. Healthy + Remove 'none' state monitors +=end + def sort_filter_member_monitors(monitor_set) + member_monitor_instance_ids = get_member_monitors + member_monitors = [] + + member_monitor_instance_ids.each {|monitor_instance_id| + member_monitor = monitor_set.get_monitor(monitor_instance_id) + member_monitors.push(member_monitor) + } + + filtered = member_monitors.select{|monitor| monitor.state != MonitorState::NONE} + sorted = filtered.sort_by{ |monitor| [@@sort_key_order[monitor.state]] } + + return sorted + end end end diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index fe5757369..9598e3a9c 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -1,7 +1,7 @@ module HealthModel class MonitorState CRITICAL = "fail" - ERROR = "fail" + ERROR = "err" WARNING = "warn" NONE = "none" HEALTHY = "pass" From d0f4a7b0a8d7bcd95152942fe359772dca9b33b9 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 21 Jun 2019 19:56:55 -0700 Subject: [PATCH 60/90] Fix the bug where if signal is unknown state, its state is not updated to be the new_state of the monitor instance --- source/code/plugin/filter_health_model_builder.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 49281e0c0..fe211a93a 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -96,6 +96,7 @@ def filter_stream(tag, es) records_to_process.each do |record| monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + #HealthMonitorRecord health_monitor_record = HealthMonitorRecord.new( record[HealthMonitorRecordFields::MONITOR_ID], record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], @@ -114,7 +115,6 @@ def filter_stream(tag, es) # update state calls updates the state of the monitor based on configuration and history of the the monitor records health_monitor_record.state = @state.get_state(monitor_instance_id).new_state health_monitor_records.push(health_monitor_record) - instance_state = @state.get_state(monitor_instance_id) #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" end @@ -136,7 +136,10 @@ def filter_stream(tag, es) @state.update_state(signal, @provider.get_config(signal.monitor_id)) @log.info "After Updating #{@state.get_state(signal.monitor_instance_id)} #{@state.get_state(signal.monitor_instance_id).new_state}" + # for unknown/none records, update the "monitor state" to be the latest state (new_state) of the monitor instance from the state + signal.state = @state.get_state(monitor_instance_id).new_state } + @generator.update_last_received_records(reduced_records) all_records = reduced_records.clone all_records.push(*missing_signals) From 990f70c641c2ce69ae5e2139839fee7daaf32116 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 21 Jun 2019 20:25:07 -0700 Subject: [PATCH 61/90] fix compute percentage bug when value is in warning state --- source/code/plugin/health/health_monitor_utils.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index bb1728095..4edcf5265 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -28,7 +28,11 @@ class HealthMonitorUtils class << self # compute the percentage state given a value and a monitor configuration def compute_percentage_state(value, config) - (config.nil? || config['WarnThresholdPercentage'].nil?) ? warn_percentage = nil : config['WarnThresholdPercentage'].to_f + if config.nil? || config['WarnThresholdPercentage'].nil? + warn_percentage = nil + else + warn_percentage = config['WarnThresholdPercentage'].to_f + end fail_percentage = config['FailThresholdPercentage'].to_f if value > fail_percentage From 275fcf399b4ca3f002d234595060ee4591bdfae8 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 21 Jun 2019 21:38:45 -0700 Subject: [PATCH 62/90] Update state_transition_time to current time whenever state change happens --- .../code/plugin/health/health_monitor_state.rb | 16 ++++++++-------- .../code/plugin/health/health_monitor_utils.rb | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 41c4a0a77..59d0f5e0c 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -52,6 +52,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor samples_to_keep = 1 monitor_instance_id = monitor.monitor_instance_id + current_time = Time.now.utc.iso8601 health_monitor_instance_state = get_state(monitor_instance_id) if !health_monitor_instance_state.nil? health_monitor_instance_state.is_state_change_consistent = false @@ -107,7 +108,6 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor new_state = health_monitor_instance_state.new_state prev_sent_time = health_monitor_instance_state.prev_sent_record_time - time_first_observed = health_monitor_instance_state.state_change_time # if the last sent state (news_state is different from latest monitor state) if latest_record_state.downcase == new_state.downcase @@ -117,7 +117,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor # update record for last sent record time health_monitor_instance_state.old_state = health_monitor_instance_state.new_state health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.prev_sent_record_time = current_time health_monitor_instance_state.should_send = true #log.debug "After Updating Monitor State #{health_monitor_instance_state}" set_state(monitor_instance_id, health_monitor_instance_state) @@ -133,8 +133,8 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor if latest_record_state.downcase == HealthMonitorStates::NONE health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.state_change_time = latest_record_time - health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.state_change_time = current_time + health_monitor_instance_state.prev_sent_record_time = current_time health_monitor_instance_state.should_send = true if !@@first_record_sent.key?(monitor_instance_id) @@first_record_sent[monitor_instance_id] = true @@ -145,8 +145,8 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor elsif samples_to_check == 1 health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.state_change_time = latest_record_time - health_monitor_instance_state.prev_sent_record_time = latest_record_time + health_monitor_instance_state.state_change_time = current_time + health_monitor_instance_state.prev_sent_record_time = current_time health_monitor_instance_state.should_send = true if !@@first_record_sent.key?(monitor_instance_id) @@first_record_sent[monitor_instance_id] = true @@ -165,8 +165,8 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor health_monitor_instance_state.is_state_change_consistent = true # This way it wont be recomputed in the optimizer. health_monitor_instance_state.should_send = true health_monitor_instance_state.new_state = latest_record_state - health_monitor_instance_state.prev_sent_record_time = latest_record_time - health_monitor_instance_state.state_change_time = first_record["timestamp"] + health_monitor_instance_state.prev_sent_record_time = current_time + health_monitor_instance_state.state_change_time = current_time set_state(monitor_instance_id, health_monitor_instance_state) diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index 4edcf5265..466c4120a 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -28,6 +28,7 @@ class HealthMonitorUtils class << self # compute the percentage state given a value and a monitor configuration def compute_percentage_state(value, config) + if config.nil? || config['WarnThresholdPercentage'].nil? warn_percentage = nil else From 2901e9981724a4ea4706352dbb2d398145747ce1 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 24 Jun 2019 06:57:28 -0700 Subject: [PATCH 63/90] Update missing signal state to be the instance state for correct rollup --- source/code/plugin/filter_health_model_builder.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index fe211a93a..840258017 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -137,7 +137,7 @@ def filter_stream(tag, es) @state.update_state(signal, @provider.get_config(signal.monitor_id)) @log.info "After Updating #{@state.get_state(signal.monitor_instance_id)} #{@state.get_state(signal.monitor_instance_id).new_state}" # for unknown/none records, update the "monitor state" to be the latest state (new_state) of the monitor instance from the state - signal.state = @state.get_state(monitor_instance_id).new_state + signal.state = @state.get_state(signal.monitor_instance_id).new_state } @generator.update_last_received_records(reduced_records) From bd7cf0ae03cd0699f7a6ebf1be70ce8a65779512 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 25 Jun 2019 10:46:16 -0700 Subject: [PATCH 64/90] 1. Remove some unnecessary logging 2. Added logs for state change 3. Update the state after reducing the records --- .../plugin/filter_health_model_builder.rb | 22 ++++++++++--------- .../plugin/health/health_monitor_state.rb | 13 ++++++++--- .../plugin/health/health_monitor_utils.rb | 10 +-------- source/code/plugin/in_kube_health.rb | 10 ++++----- 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 840258017..d6612132a 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -107,26 +107,28 @@ def filter_stream(tag, es) record[HealthMonitorRecordFields::DETAILS] ) - @state.update_state(health_monitor_record, - @provider.get_config(health_monitor_record.monitor_id) - ) - - # get the health state based on the monitor's operational state - # update state calls updates the state of the monitor based on configuration and history of the the monitor records - health_monitor_record.state = @state.get_state(monitor_instance_id).new_state health_monitor_records.push(health_monitor_record) #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" end @log.info "health_monitor_records.size #{health_monitor_records.size}" - - health_monitor_records = @kube_api_down_handler.handle_kube_api_down(health_monitor_records) - @log.info " after kube api down handler health_monitor_records.size #{health_monitor_records.size}" # Dedupe daemonset signals # Remove unit monitor signals for “gone” objects + # update state for the reduced set of signals reduced_records = @reducer.reduce_signals(health_monitor_records, @resources) + reduced_records.each{|record| + @state.update_state(record, + @provider.get_config(record.monitor_id) + ) + # get the health state based on the monitor's operational state + # update state calls updates the state of the monitor based on configuration and history of the the monitor records + record.state = @state.get_state(record.monitor_instance_id).new_state + } @log.info "after deduping and removing gone objects reduced_records.size #{reduced_records.size}" + reduced_records = @kube_api_down_handler.handle_kube_api_down(reduced_records) + @log.info "after kube api down handler health_monitor_records.size #{health_monitor_records.size}" + #get the list of 'none' and 'unknown' signals missing_signals = @generator.get_missing_signals(@@cluster_id, reduced_records, @resources, @provider) diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 59d0f5e0c..1a2378283 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -51,7 +51,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor ) samples_to_keep = 1 monitor_instance_id = monitor.monitor_instance_id - + log = HealthMonitorUtils.get_log_handle current_time = Time.now.utc.iso8601 health_monitor_instance_state = get_state(monitor_instance_id) if !health_monitor_instance_state.nil? @@ -98,9 +98,12 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor # update old and new state based on the history and latest record. # TODO: this is a little hairy. Simplify - health_monitor_records = health_monitor_instance_state.prev_records - monitor_config['ConsecutiveSamplesForStateTransition'].nil? ? samples_to_check = 1 : samples_to_check = monitor_config['ConsecutiveSamplesForStateTransition'].to_i + if monitor_config['ConsecutiveSamplesForStateTransition'].nil? + samples_to_check = 1 + else + samples_to_check = monitor_config['ConsecutiveSamplesForStateTransition'].to_i + end latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning latest_record_state = latest_record["state"] @@ -121,6 +124,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor health_monitor_instance_state.should_send = true #log.debug "After Updating Monitor State #{health_monitor_instance_state}" set_state(monitor_instance_id, health_monitor_instance_state) + log.debug "#{monitor_instance_id} condition: signal timeout should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}" # check if the first record has been sent elsif !@@first_record_sent.key?(monitor_instance_id) @@first_record_sent[monitor_instance_id] = true @@ -140,6 +144,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor @@first_record_sent[monitor_instance_id] = true end set_state(monitor_instance_id, health_monitor_instance_state) + log.debug "#{monitor_instance_id} condition: NONE state should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}" # if it is a monitor that needs to instantly notify on state change, update the state # mark the monitor to be sent elsif samples_to_check == 1 @@ -152,6 +157,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor @@first_record_sent[monitor_instance_id] = true end set_state(monitor_instance_id, health_monitor_instance_state) + log.debug "#{monitor_instance_id} condition: state change, samples_to_check = #{samples_to_check} should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}" else # state change from previous sent state to latest record state #check state of last n records to see if they are all in the same state @@ -173,6 +179,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor if !@@first_record_sent.key?(monitor_instance_id) @@first_record_sent[monitor_instance_id] = true end + log.debug "#{monitor_instance_id} condition: consistent state change, samples_to_check = #{samples_to_check} should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}" end end end diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index 466c4120a..947770075 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -18,7 +18,7 @@ class HealthMonitorUtils @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" - if Gem.win_platform? + if Gem.win_platform? #unit testing on windows dev machine @log_path = "C:\Temp\health_monitors.log" end @@ -53,14 +53,6 @@ def is_pods_ready_monitor(monitor_id) return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) end - def is_infra_monitor - - end - - def is_workload_monitor - - end - def get_pods_ready_hash(pod_inventory, deployment_inventory) pods_ready_percentage_hash = {} deployment_lookup = {} diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 3650f6efa..5a3b847dd 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -155,7 +155,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory) health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id - @@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" + #@@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor" return health_record end @@ -179,7 +179,7 @@ def process_memory_oversubscribed_monitor(pod_inventory) health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id - @@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" + #@@hmlog.info "Successfully processed process_memory_oversubscribed_monitor" return health_record end @@ -203,7 +203,7 @@ def process_kube_api_up_monitor(state, response) health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = time_now health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id - @@hmlog.info "Successfully processed process_kube_api_up_monitor" + #@@hmlog.info "Successfully processed process_kube_api_up_monitor" return health_record end @@ -234,7 +234,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id) health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id records.push(health_record) end - @@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" + #@@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}" return records end @@ -267,7 +267,7 @@ def process_node_condition_monitor(node_inventory) node_condition_monitor_records.push(health_record) end end - @@hmlog.info "Successfully processed process_node_condition_monitor #{node_condition_monitor_records.size}" + #@@hmlog.info "Successfully processed process_node_condition_monitor #{node_condition_monitor_records.size}" return node_condition_monitor_records end From 23fa7a2aa4420718452c3acb880eb9e183c415d6 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 25 Jun 2019 12:00:07 -0700 Subject: [PATCH 65/90] Removing calls to kube api since they are not required as of now. Will reinstante when we do container level resource utilization --- source/code/plugin/filter_cadvisor_health_container.rb | 4 ++-- source/code/plugin/filter_cadvisor_health_node.rb | 4 ++-- source/code/plugin/in_kube_health.rb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb index dbfc71388..448a03885 100644 --- a/source/code/plugin/filter_cadvisor_health_container.rb +++ b/source/code/plugin/filter_cadvisor_health_container.rb @@ -53,14 +53,14 @@ def start @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" - HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName) + #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName) @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) end def filter_stream(tag, es) new_es = MultiEventStream.new - HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) + #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) records_count = 0 es.each { |time, record| begin diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index 4605091c6..fcfa1cb1a 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -56,13 +56,13 @@ def start @cpu_capacity = node_capacity[0] @memory_capacity = node_capacity[1] @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}" - HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName) + #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName) ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {}) end def filter_stream(tag, es) new_es = MultiEventStream.new - HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) + #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) records_count = 0 es.each { |time, record| begin diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 5a3b847dd..7f8e5d523 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -71,7 +71,7 @@ def enumerate health_monitor_records = [] eventStream = MultiEventStream.new - HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil) + #HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil) # we do this so that if the call fails, we get a response code/header etc. node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes") node_inventory = JSON.parse(node_inventory_response.body) From 1697f40a3b3a72a1aed6697e620f168853465304 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 27 Jun 2019 06:55:56 -0700 Subject: [PATCH 66/90] Send telemetry for cluster level state changes --- source/code/plugin/filter_health_model_builder.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index d6612132a..10adbbf5e 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -49,6 +49,8 @@ def initialize deserialized_state_info = @deserializer.deserialize @state = HealthMonitorState.new @state.initialize_state(deserialized_state_info) + @cluster_old_state = 'none' + @cluster_new_state = 'none' end def configure(conf) @@ -179,6 +181,16 @@ def filter_stream(tag, es) # generate the record to send all_monitors.keys.each{|key| record = @provider.get_record(all_monitors[key], state) + if record[HealthMonitorRecordFields::MONITOR_ID] == MonitorId::CLUSTER && all_monitors.size > 1 + old_state = record[HealthMonitorRecordFields::OLD_STATE] + new_state = record[HealthMonitorRecordFields::NEW_STATE] + if old_state != new_state && @cluster_old_state != old_state && @cluster_new_state != new_state + ApplicationInsightsUtility.sendCustomEvent("HealthModel_ClusterStateChanged",{"old_state" => old_state , "new_state" => new_state, "monitor_count" => all_monitors.size}) + @log.info "sent telemetry for cluster state change from #{record['OldState']} to #{record['NewState']}" + @cluster_old_state = old_state + @cluster_new_state = new_state + end + end #@log.info "#{record["Details"]} #{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" new_es.add(time, record) } From ec65d492ad2400b459d1398263bed7493ee803d3 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 8 Jul 2019 13:21:34 -0700 Subject: [PATCH 67/90] Testing Rake --- Rakefile | 4 +++- test/code/plugin/health/unit_monitor_test.rb | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/code/plugin/health/unit_monitor_test.rb diff --git a/Rakefile b/Rakefile index d48941cc1..dd6044313 100644 --- a/Rakefile +++ b/Rakefile @@ -1,7 +1,9 @@ require 'rake/testtask' +task default: "test" + Rake::TestTask.new do |task| task.libs << "test" - task.pattern = './test/code/plugin/*_test.rb' + task.pattern = './test/code/plugin/health/*_test.rb' task.verbose = true end \ No newline at end of file diff --git a/test/code/plugin/health/unit_monitor_test.rb b/test/code/plugin/health/unit_monitor_test.rb new file mode 100644 index 000000000..656e79430 --- /dev/null +++ b/test/code/plugin/health/unit_monitor_test.rb @@ -0,0 +1,12 @@ +require_relative '../../../../source/code/plugin/health/unit_monitor' +require 'minitest/autorun' +require 'time' + +class UnitMonitorTest < Minitest::Test + include HealthModel + + def test_is_aggregate_monitor_false + monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, Time.now.utc.iso8601, {}, {}, {}) + assert_equal monitor.is_aggregate_monitor, false + end +end From d0a62d328cd4388317c9a91df064a7c801122504 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 16 Jul 2019 18:23:39 -0700 Subject: [PATCH 68/90] First Round of Tests --- Rakefile | 4 +- .../plugin/filter_health_model_builder.rb | 2 +- .../code/plugin/health/aggregate_monitor.rb | 4 +- .../aggregate_monitor_state_finalizer.rb | 4 - .../plugin/health/health_hierarchy_builder.rb | 8 +- .../health/health_kubernetes_resources.rb | 1 - .../code/plugin/health/health_model_buffer.rb | 9 - .../plugin/health/health_model_definition.rb | 85 ----- .../plugin/health/health_monitor_state.rb | 2 +- .../plugin/health/health_monitor_utils.rb | 1 - .../plugin/health/health_state_serializer.rb | 2 +- source/code/plugin/health/newfiles.txt | 15 - test/code/plugin/aggregate_monitor_test.rb | 10 - .../filter_health_model_builder_test.rb | 2 +- .../plugin/health/aggregate_monitor_spec.rb | 257 +++++++++++++ .../aggregate_monitor_state_finalizer_spec.rb | 59 +++ .../health/health_hierarchy_builder_spec.rb | 11 + .../health/health_kubernetes_resource_spec.rb | 222 ++++++++++++ .../health_missing_signal_generator_spec.rb | 79 ++++ .../plugin/health/health_model_buffer_spec.rb | 25 ++ .../health/health_model_builder_spec.rb | 37 ++ .../health/health_model_builder_test.rb | 338 ++++++++++++++++++ .../health_model_definition_parser_spec.rb | 23 ++ .../health/health_monitor_state_spec.rb | 129 +++++++ .../health/health_signal_reducer_spec.rb | 96 +++++ .../health/kube_api_down_handler_spec.rb | 26 ++ .../plugin/health/monitor_factory_spec.rb | 28 ++ test/code/plugin/health/monitor_set_spec.rb | 58 +++ .../health/parent_monitor_provider_spec.rb | 144 ++++++++ .../health/test_health_model_definition.json | 42 +++ test/code/plugin/health/tests/.gitignore | 8 + test/code/plugin/health/tests/.travis.yml | 7 + .../plugin/health/tests/CODE_OF_CONDUCT.md | 74 ++++ test/code/plugin/health/tests/Gemfile | 4 + test/code/plugin/health/tests/LICENSE.txt | 21 ++ test/code/plugin/health/tests/README.md | 43 +++ test/code/plugin/health/tests/Rakefile | 10 + test/code/plugin/health/tests/bin/console | 14 + test/code/plugin/health/tests/bin/setup | 8 + test/code/plugin/health/tests/lib/tests.rb | 6 + .../plugin/health/tests/lib/tests/version.rb | 3 + .../plugin/health/tests/test/test_helper.rb | 4 + .../plugin/health/tests/test/tests_test.rb | 11 + test/code/plugin/health/tests/tests.gemspec | 34 ++ test/code/plugin/health/unit_monitor_spec.rb | 20 ++ test/code/plugin/health/unit_monitor_test.rb | 10 +- 46 files changed, 1861 insertions(+), 139 deletions(-) delete mode 100644 source/code/plugin/health/health_model_definition.rb delete mode 100644 source/code/plugin/health/newfiles.txt delete mode 100644 test/code/plugin/aggregate_monitor_test.rb create mode 100644 test/code/plugin/health/aggregate_monitor_spec.rb create mode 100644 test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb create mode 100644 test/code/plugin/health/health_hierarchy_builder_spec.rb create mode 100644 test/code/plugin/health/health_kubernetes_resource_spec.rb create mode 100644 test/code/plugin/health/health_missing_signal_generator_spec.rb create mode 100644 test/code/plugin/health/health_model_buffer_spec.rb create mode 100644 test/code/plugin/health/health_model_builder_spec.rb create mode 100644 test/code/plugin/health/health_model_builder_test.rb create mode 100644 test/code/plugin/health/health_model_definition_parser_spec.rb create mode 100644 test/code/plugin/health/health_monitor_state_spec.rb create mode 100644 test/code/plugin/health/health_signal_reducer_spec.rb create mode 100644 test/code/plugin/health/kube_api_down_handler_spec.rb create mode 100644 test/code/plugin/health/monitor_factory_spec.rb create mode 100644 test/code/plugin/health/monitor_set_spec.rb create mode 100644 test/code/plugin/health/parent_monitor_provider_spec.rb create mode 100644 test/code/plugin/health/test_health_model_definition.json create mode 100644 test/code/plugin/health/tests/.gitignore create mode 100644 test/code/plugin/health/tests/.travis.yml create mode 100644 test/code/plugin/health/tests/CODE_OF_CONDUCT.md create mode 100644 test/code/plugin/health/tests/Gemfile create mode 100644 test/code/plugin/health/tests/LICENSE.txt create mode 100644 test/code/plugin/health/tests/README.md create mode 100644 test/code/plugin/health/tests/Rakefile create mode 100644 test/code/plugin/health/tests/bin/console create mode 100644 test/code/plugin/health/tests/bin/setup create mode 100644 test/code/plugin/health/tests/lib/tests.rb create mode 100644 test/code/plugin/health/tests/lib/tests/version.rb create mode 100644 test/code/plugin/health/tests/test/test_helper.rb create mode 100644 test/code/plugin/health/tests/test/tests_test.rb create mode 100644 test/code/plugin/health/tests/tests.gemspec create mode 100644 test/code/plugin/health/unit_monitor_spec.rb diff --git a/Rakefile b/Rakefile index dd6044313..3733e71a3 100644 --- a/Rakefile +++ b/Rakefile @@ -4,6 +4,6 @@ task default: "test" Rake::TestTask.new do |task| task.libs << "test" - task.pattern = './test/code/plugin/health/*_test.rb' - task.verbose = true + task.pattern = './test/code/plugin/health/*_spec.rb' + task.warning = false end \ No newline at end of file diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 10adbbf5e..8c6e82520 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -25,7 +25,7 @@ class FilterHealthModelBuilder < Filter def initialize super @buffer = HealthModel::HealthModelBuffer.new - @health_model_definition = HealthModel::HealthModelDefinition.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) + @health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) @monitor_factory = HealthModel::MonitorFactory.new @hierarchy_builder = HealthHierarchyBuilder.new(@health_model_definition, @monitor_factory) # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb index 4e58f6f97..794f716ce 100644 --- a/source/code/plugin/health/aggregate_monitor.rb +++ b/source/code/plugin/health/aggregate_monitor.rb @@ -64,6 +64,8 @@ def calculate_state(monitor_set) @state = calculate_worst_of_state(monitor_set) when AggregationAlgorithm::PERCENTAGE @state = calculate_percentage_state(monitor_set) + else + raise 'No aggregation algorithm specified' end end @@ -184,7 +186,7 @@ def sort_filter_member_monitors(monitor_set) filtered = member_monitors.select{|monitor| monitor.state != MonitorState::NONE} sorted = filtered.sort_by{ |monitor| [@@sort_key_order[monitor.state]] } - + return sorted end end diff --git a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb index e7140375c..74e780924 100644 --- a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb +++ b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb @@ -14,14 +14,12 @@ def finalize(monitor_set) end private - def calculate_subtree_state(monitor, monitor_set) if monitor.nil? || !monitor.is_aggregate_monitor raise 'AggregateMonitorStateFinalizer:calculateSubtreeState Parameter monitor must be non-null AggregateMonitor' end member_monitor_instance_ids = monitor.get_member_monitors # monitor_instance_ids - member_monitor_instance_ids.each{|member_monitor_instance_id| member_monitor = monitor_set.get_monitor(member_monitor_instance_id) @@ -29,9 +27,7 @@ def calculate_subtree_state(monitor, monitor_set) calculate_subtree_state(member_monitor, monitor_set) end } - monitor.calculate_state(monitor_set) end - end end \ No newline at end of file diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb index c73616399..8643f34a4 100644 --- a/source/code/plugin/health/health_hierarchy_builder.rb +++ b/source/code/plugin/health/health_hierarchy_builder.rb @@ -6,13 +6,13 @@ class HealthHierarchyBuilder def initialize(health_model_definition, monitor_factory) - if !health_model_definition.is_a?(HealthModelDefinition) - raise 'Invalid Type Expected: HealthModelDefinition Actual: #{@health_model_definition.class.name}' + if !health_model_definition.is_a?(ParentMonitorProvider) + raise 'Invalid Type Expected: ParentMonitorProvider Actual: #{@health_model_definition.class.name}' end @health_model_definition = health_model_definition if !monitor_factory.is_a?(MonitorFactory) - raise 'Invalid Type Expected: HealthModelDefinition Actual: #{@monitor_factory.class.name}' + raise 'Invalid Type Expected: ParentMonitorProvider Actual: #{@monitor_factory.class.name}' end @monitor_factory = monitor_factory end @@ -41,7 +41,7 @@ def process_record(health_monitor_record, monitor_set) parent_monitor_labels = @health_model_definition.get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id) # get the parent monitor configuration parent_monitor_configuration = @health_model_definition.get_parent_monitor_config(parent_monitor_id) - #get monitor instance id for parent monitor. Does this belong in HealthModelDefinition? + #get monitor instance id for parent monitor. Does this belong in ParentMonitorProvider? parent_monitor_instance_id = @health_model_definition.get_parent_monitor_instance_id(child_monitor.monitor_instance_id, parent_monitor_id, parent_monitor_labels) # check if monitor set has the parent monitor id # if not present, add diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb index b9600101c..b11bfafc5 100644 --- a/source/code/plugin/health/health_kubernetes_resources.rb +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -63,7 +63,6 @@ def get_workload_names end namespace = pod['metadata']['namespace'] - status = pod['status']['phase'] workload_name = '' if owner_kind.nil? diff --git a/source/code/plugin/health/health_model_buffer.rb b/source/code/plugin/health/health_model_buffer.rb index 402f699cc..1ccfe7349 100644 --- a/source/code/plugin/health/health_model_buffer.rb +++ b/source/code/plugin/health/health_model_buffer.rb @@ -25,14 +25,5 @@ def add_to_buffer(records) def reset_buffer @records_buffer = [] end - - # prints the buffer - def print_buffer - end - - # gets the number of records in the buffer - def get_buffer_length - @records_buffer.length - end end end \ No newline at end of file diff --git a/source/code/plugin/health/health_model_definition.rb b/source/code/plugin/health/health_model_definition.rb deleted file mode 100644 index f0ba57344..000000000 --- a/source/code/plugin/health/health_model_definition.rb +++ /dev/null @@ -1,85 +0,0 @@ -module HealthModel - class HealthModelDefinition - - attr_reader :health_model_definition, :parent_monitor_mapping, :parent_monitor_instance_mapping - - def initialize(definition) - @health_model_definition = definition - @parent_monitor_mapping = {} #monitorId --> parent_monitor_id mapping - @parent_monitor_instance_mapping = {} #child monitor id -- > parent monitor instance mapping. Used in instances when the node no longer exists and impossible to compute from kube api results - end - - # gets the parent monitor id given the state transition. It requires the monitor id and labels to determine the parent id - def get_parent_monitor_id(monitor) - monitor_id = monitor.monitor_id - - # cache the parent monitor id so it is not recomputed every time - if @parent_monitor_mapping.key?(monitor.monitor_instance_id) - return @parent_monitor_mapping[monitor.monitor_instance_id] - end - - if @health_model_definition.key?(monitor_id) - parent_monitor_id = @health_model_definition[monitor_id]['parent_monitor_id'] - # check parent_monitor_id is an array, then evaluate the conditions, else return the parent_monitor_id - if parent_monitor_id.is_a?(String) - @parent_monitor_mapping[monitor.monitor_instance_id] = parent_monitor_id - return parent_monitor_id - end - if parent_monitor_id.nil? - conditions = @health_model_definition[monitor_id]['conditions'] - if !conditions.nil? && conditions.is_a?(Array) - labels = monitor.labels - conditions.each{|condition| - left = "#{labels[condition['key']]}" - op = "#{condition['operator']}" - right = "#{condition['value']}" - cond = left.send(op.to_sym, right) - - if cond - @parent_monitor_mapping[monitor.monitor_instance_id] = condition['parent_id'] - return condition['parent_id'] - end - } - end - end - else - raise "Invalid Monitor Id #{monitor_id} in get_parent_monitor_id" - end - end - - def get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id) - labels_to_copy = @health_model_definition[monitor_id]['labels'] - if labels_to_copy.nil? - return {} - end - parent_monitor_labels = {} - labels_to_copy.each{|label| - parent_monitor_labels[label] = monitor_labels[label] - } - return parent_monitor_labels - end - - def get_parent_monitor_config(parent_monitor_id) - return @health_model_definition[parent_monitor_id] - end - - def get_parent_monitor_instance_id(monitor_instance_id, parent_monitor_id, parent_monitor_labels) - if @parent_monitor_instance_mapping.key?(monitor_instance_id) - return @parent_monitor_instance_mapping[monitor_instance_id] - end - - labels = AggregateMonitorInstanceIdLabels.get_labels_for(parent_monitor_id) - if !labels.is_a?(Array) - raise "Expected #{labels} to be an Array for #{parent_monitor_id}" - end - values = labels.map{|label| parent_monitor_labels[label]} - if values.nil? || values.empty? || values.size == 0 - @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_id - return parent_monitor_id - end - parent_monitor_instance_id = "#{parent_monitor_id}-#{values.join('-')}" - @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_instance_id - return parent_monitor_instance_id - end - end -end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 1a2378283..c78effe2e 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -112,7 +112,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor new_state = health_monitor_instance_state.new_state prev_sent_time = health_monitor_instance_state.prev_sent_record_time - # if the last sent state (news_state is different from latest monitor state) + # if the last sent state (new_state is different from latest monitor state) if latest_record_state.downcase == new_state.downcase time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60 # check if health signal has "timed out" diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index 947770075..b450ee3e0 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -6,7 +6,6 @@ module HealthModel class HealthMonitorUtils begin - if !Gem.win_platform? require_relative '../KubernetesApiClient' end diff --git a/source/code/plugin/health/health_state_serializer.rb b/source/code/plugin/health/health_state_serializer.rb index 83e115d3c..7ffb445c2 100644 --- a/source/code/plugin/health/health_state_serializer.rb +++ b/source/code/plugin/health/health_state_serializer.rb @@ -7,7 +7,7 @@ def initialize(path) end def serialize(state) - File.open(@serialized_path, 'w') do |f| #File.open(@serialized_path, 'w') + File.open(@serialized_path, 'w') do |f| states = state.to_h states_hash = {} states.each{|id, value| diff --git a/source/code/plugin/health/newfiles.txt b/source/code/plugin/health/newfiles.txt deleted file mode 100644 index cf52faf59..000000000 --- a/source/code/plugin/health/newfiles.txt +++ /dev/null @@ -1,15 +0,0 @@ -aggregate_monitor.rb -aggregate_monitor_instance_id_labels.rb -aggregate_monitor_state_finalizer.rb -health_model_buffer.rb -health_model_builder.rb -health_model_constants.rb -health_model_definition.rb -health_model_definition_parser.rb -hierarchy_finalizer.rb -monitor_factory.rb -monitor_set.rb -monitor_state_transition.rb -node_monitor_hierarchy_reducer.rb -state_transition_processor.rb -unit_monitor.rb \ No newline at end of file diff --git a/test/code/plugin/aggregate_monitor_test.rb b/test/code/plugin/aggregate_monitor_test.rb deleted file mode 100644 index 631cda175..000000000 --- a/test/code/plugin/aggregate_monitor_test.rb +++ /dev/null @@ -1,10 +0,0 @@ -require 'test/unit' -require_relative '../../../source/code/plugin/aggregate_monitor' - -class AggregateMonitor_Test < Test::Unit::TestCase - def test_get_name - puts 'starting test_get_name' - agg = AggregateMonitor.new('Cluster', 'Cluster') - assert_equal(agg.name, 'Cluster') - end -end diff --git a/test/code/plugin/filter_health_model_builder_test.rb b/test/code/plugin/filter_health_model_builder_test.rb index b7a105264..f4dba11ed 100644 --- a/test/code/plugin/filter_health_model_builder_test.rb +++ b/test/code/plugin/filter_health_model_builder_test.rb @@ -11,7 +11,7 @@ class FilterHealthModelBuilderTest < Test::Unit::TestCase def test_event_stream health_definition_path = 'C:\AzureMonitor\ContainerInsights\Docker-Provider\installer\conf\health_model_definition.json' - health_model_definition = HealthModelDefinition.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) monitor_factory = MonitorFactory.new hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side diff --git a/test/code/plugin/health/aggregate_monitor_spec.rb b/test/code/plugin/health/aggregate_monitor_spec.rb new file mode 100644 index 000000000..08cfafacd --- /dev/null +++ b/test/code/plugin/health/aggregate_monitor_spec.rb @@ -0,0 +1,257 @@ +require_relative '../test_helpers' + +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } + +include HealthModel + +describe "AggregateMonitor Spec" do + it "is_aggregate_monitor is true for AggregateMonitor" do + # Arrange/Act + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {}) + # Assert + assert_equal monitor.is_aggregate_monitor, true + end + + it "add_member_monitor tests -- adds a member monitor as a child monitor" do + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {}) + #Act + monitor.add_member_monitor("child_monitor_1") + #Assert + assert_equal monitor.get_member_monitors.include?("child_monitor_1"), true + + #Act + monitor.add_member_monitor("child_monitor_1") + #Assert + assert_equal monitor.get_member_monitors.size, 1 + end + + it "remove_member_monitor tests -- removes a member monitor as a child monitor" do + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {}) + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + + #Act + monitor.remove_member_monitor("child_monitor_1") + #Assert + assert_equal monitor.get_member_monitors.size, 1 + + #Act + monitor.remove_member_monitor("unknown_child") + #Assert + assert_equal monitor.get_member_monitors.size, 1 + end + + it "calculate_details tests -- calculates rollup details based on member monitor states" do + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {}) + + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + + #Act + monitor.calculate_details(monitor_set) + #Assert + assert_equal monitor.details["details"], {"pass"=>["child_monitor_1"], "fail"=>["child_monitor_2"]} + + #Arrange + child_monitor_3 = UnitMonitor.new("monitor_3", "child_monitor_3", "pass", "time", {}, {}, {}) + monitor_set.add_or_update(child_monitor_3) + monitor.add_member_monitor("child_monitor_3") + + #Act + monitor.calculate_details(monitor_set) + #Assert + assert_equal monitor.details["details"], {"pass"=>["child_monitor_1", "child_monitor_3"], "fail"=>["child_monitor_2"]} + end + + it "calculate_state tests -- raises when right aggregation_algorithm NOT specified" do + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "", [], {}) + #Assert + assert_raises do + monitor.calculate_state(monitor_set) + end + end + + it "calculate_state tests -- calculate_worst_of_state " do + # Arrange -- pass, fail = fail + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {}) + + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "fail" + + #Arrange -- pass, pass = pass + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "pass", "time", {}, {}, {}) + monitor_set.add_or_update(child_monitor_2) + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "pass" + + #Arrange -- pass, warn = warn + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "warn", "time", {}, {}, {}) + monitor_set.add_or_update(child_monitor_2) + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "warn" + + #Arrange -- warn, fail = fail + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "warn", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {}) + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "fail" + + #Arrange -- warn, unknown = unknown + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "warn", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "unknown", "time", {}, {}, {}) + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "warn" + + #Arrange -- pass, unknown = unknown + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "unknown", "time", {}, {}, {}) + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "unknown" + end + + it "calculate_state tests -- calculate_percentage_state " do + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 90.0}, {}) + + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "fail" + + #Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 50.0}, {}) + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "pass" + + #Arrange -- single child monitor + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 33.3}, {}) + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor.add_member_monitor("child_monitor_1") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "pass" + + + #Arrange -- remove none state + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :none, :time, "percentage", {"state_threshold" => 100.0}, {}) + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "none", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "pass" + + + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 50.0}, {}) + + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {}) + child_monitor_3 = UnitMonitor.new("monitor_3", "child_monitor_3", "fail", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + monitor_set.add_or_update(child_monitor_3) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + monitor.add_member_monitor("child_monitor_3") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "fail" + + + # Arrange + monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 90.0}, {}) + + child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {}) + child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "pass", "time", {}, {}, {}) + child_monitor_3 = UnitMonitor.new("monitor_3", "child_monitor_3", "pass", "time", {}, {}, {}) + + monitor_set = MonitorSet.new + monitor_set.add_or_update(child_monitor_1) + monitor_set.add_or_update(child_monitor_2) + monitor_set.add_or_update(child_monitor_3) + + monitor.add_member_monitor("child_monitor_1") + monitor.add_member_monitor("child_monitor_2") + monitor.add_member_monitor("child_monitor_3") + #Act + monitor.calculate_state(monitor_set) + #Assert + assert_equal monitor.state, "pass" + end +end \ No newline at end of file diff --git a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb new file mode 100644 index 000000000..2d0873849 --- /dev/null +++ b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb @@ -0,0 +1,59 @@ +require_relative '../test_helpers' +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "AggregateMonitorStateFinalizer spec" do + it 'computes the right state and details' do + #arrange + monitor_set = Mock.new + + #mock unit monitors + child1 = Mock.new + def child1.state; "pass"; end + def child1.monitor_id; "child1";end + def child1.monitor_instance_id; "child1"; end + def child1.nil?; false; end + def child1.is_aggregate_monitor; false; end + + child2 = Mock.new + def child2.state; "fail"; end + def child2.monitor_id; "child2";end + def child2.monitor_instance_id; "child2"; end + def child2.nil?; false; end + def child2.is_aggregate_monitor; false; end + + parent_monitor = AggregateMonitor.new("parent_monitor", "parent_monitor", :none, :time, "worstOf", nil, {}) + parent_monitor.add_member_monitor("child1") + parent_monitor.add_member_monitor("child2") + + top_level_monitor = AggregateMonitor.new("cluster", "cluster", :none, :time, "worstOf", nil, {}) + top_level_monitor.add_member_monitor("parent_monitor") + + monitor_set.expect(:get_map, {"cluster" => top_level_monitor, "parent_monitor" => parent_monitor, "child1" => child1, "child2" => child2}) + monitor_set.expect(:get_monitor, top_level_monitor, ["cluster"]) + monitor_set.expect(:get_monitor, parent_monitor, ["parent_monitor"]) + monitor_set.expect(:get_monitor, child1, ["child1"]) + monitor_set.expect(:get_monitor, child2, ["child2"]) + monitor_set.expect(:get_monitor, child1, ["child1"]) + monitor_set.expect(:get_monitor, child2, ["child2"]) + monitor_set.expect(:get_monitor, parent_monitor, ["parent_monitor"]) + + + monitor_set.expect(:get_monitor, parent_monitor, ["parent_monitor"]) + monitor_set.expect(:get_monitor, child1, ["child1"]) + monitor_set.expect(:get_monitor, child2, ["child2"]) + + #act + finalizer = AggregateMonitorStateFinalizer.new + finalizer.finalize(monitor_set) + #assert + + assert_equal parent_monitor.state, "fail" + assert_equal parent_monitor.details, {"details"=>{"pass"=>["child1"], "fail"=>["child2"]}, "state"=>"fail", "timestamp"=>:time} + + assert_equal top_level_monitor.state, "fail" + assert_equal top_level_monitor.details, {"details"=>{"fail"=>["parent_monitor"]}, "state"=>"fail", "timestamp"=>:time} + + end +end \ No newline at end of file diff --git a/test/code/plugin/health/health_hierarchy_builder_spec.rb b/test/code/plugin/health/health_hierarchy_builder_spec.rb new file mode 100644 index 000000000..701fab691 --- /dev/null +++ b/test/code/plugin/health/health_hierarchy_builder_spec.rb @@ -0,0 +1,11 @@ +require_relative '../test_helpers' +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "HealthHierarchyBuilder spec" do + it 'builds right hierarchy given a child monitor and a parent monitor provider' do + + end + +end \ No newline at end of file diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb new file mode 100644 index 000000000..093c685df --- /dev/null +++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb @@ -0,0 +1,222 @@ +require_relative '../test_helpers' +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel + +describe "HealthKubernetesResources spec" do + it "returns the right set of nodes and workloads given node and pod inventory" do + + #arrange + nodes_json = '{ + "items": [ + { + "metadata": { + "name": "aks-nodepool1-19574989-0" + } + }, + { + "metadata": { + "name": "aks-nodepool1-19574989-1" + } + } + ] + }' + + pods_json = '{ + "items": [ + { + "metadata": { + "name": "diliprdeploymentnodeapps-c4fdfb446-mzcsr", + "generateName": "diliprdeploymentnodeapps-c4fdfb446-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprdeploymentnodeapps-c4fdfb446-mzcsr", + "uid": "ee31a9ce-526e-11e9-a899-6a5520730c61", + "resourceVersion": "4597573", + "creationTimestamp": "2019-03-29T22:06:40Z", + "labels": { + "app": "diliprsnodeapppod", + "diliprPodLabel1": "p1", + "diliprPodLabel2": "p2", + "pod-template-hash": "709896002" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprdeploymentnodeapps-c4fdfb446", + "uid": "ee1e78e0-526e-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "pi-m8ccw", + "generateName": "pi-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/pi-m8ccw", + "uid": "9fb16aaa-7ccc-11e9-8d23-32c49ee6f300", + "resourceVersion": "7940877", + "creationTimestamp": "2019-05-22T20:03:10Z", + "labels": { + "controller-uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300", + "job-name": "pi" + }, + "ownerReferences": [ + { + "apiVersion": "batch/v1", + "kind": "Job", + "name": "pi", + "uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "rss-site", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/rss-site", + "uid": "68a34ea4-7ce4-11e9-8d23-32c49ee6f300", + "resourceVersion": "7954135", + "creationTimestamp": "2019-05-22T22:53:26Z", + "labels": { + "app": "web" + }, + "annotations": { + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Pod\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"web\"},\"name\":\"rss-site\",\"namespace\":\"default\"},\"spec\":{\"containers\":[{\"image\":\"nginx\",\"name\":\"front-end\",\"ports\":[{\"containerPort\":80}]},{\"image\":\"nickchase/rss-php-nginx:v1\",\"name\":\"rss-reader\",\"ports\":[{\"containerPort\":88}]}]}}\n" + } + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-proxy-4hjws", + "generateName": "kube-proxy-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-4hjws", + "uid": "8cf7c410-88f4-11e9-b1b0-5eb4a3e9de7d", + "resourceVersion": "9661065", + "creationTimestamp": "2019-06-07T07:19:12Z", + "labels": { + "component": "kube-proxy", + "controller-revision-hash": "1271944371", + "pod-template-generation": "16", + "tier": "node" + }, + "annotations": { + "aks.microsoft.com/release-time": "seconds:1559735217 nanos:797729016 ", + "remediator.aks.microsoft.com/kube-proxy-restart": "7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-proxy", + "uid": "45640bf6-44e5-11e9-9920-423525a6b683", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "apiVersion": "v1", + "kind": "Pod" + } + ] + }' + deployments_json = '{ + "items": [ + { + "metadata": { + "name": "diliprdeploymentnodeapps", + "namespace": "default", + "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/diliprdeploymentnodeapps", + "uid": "ee1b111d-526e-11e9-a899-6a5520730c61", + "resourceVersion": "4597575", + "generation": 1, + "creationTimestamp": "2019-03-29T22:06:40Z", + "labels": { + "diliprdeploymentLabel1": "d1", + "diliprdeploymentLabel2": "d2" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"diliprdeploymentLabel1\":\"d1\",\"diliprdeploymentLabel2\":\"d2\"},\"name\":\"diliprdeploymentnodeapps\",\"namespace\":\"default\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"app\":\"diliprsnodeapppod\"}},\"template\":{\"metadata\":{\"labels\":{\"app\":\"diliprsnodeapppod\",\"diliprPodLabel1\":\"p1\",\"diliprPodLabel2\":\"p2\"}},\"spec\":{\"containers\":[{\"image\":\"rdilip83/logeverysecond:v2\",\"name\":\"diliprcontainerhelloapp\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "app": "diliprsnodeapppod" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "diliprsnodeapppod", + "diliprPodLabel1": "p1", + "diliprPodLabel2": "p2" + } + }, + "spec": { + "containers": [ + { + "name": "diliprcontainerhelloapp", + "image": "rdilip83/logeverysecond:v2", + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": "25%", + "maxSurge": "25%" + } + }, + "revisionHistoryLimit": 2, + "progressDeadlineSeconds": 600 + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + } + ] + }' + nodes = JSON.parse(nodes_json) + pods = JSON.parse(pods_json) + deployments = JSON.parse(deployments_json) + resources = HealthKubernetesResources.instance + resources.node_inventory = nodes + resources.pod_inventory = pods + resources.deployment_inventory = deployments + #act + parsed_nodes = resources.get_nodes + parsed_workloads = resources.get_workload_names + + #assert + assert_equal parsed_nodes.size, 2 + assert_equal parsed_workloads.size, 3 + + assert_equal parsed_nodes, ['aks-nodepool1-19574989-0', 'aks-nodepool1-19574989-1'] + assert_equal parsed_workloads, ['default~~diliprdeploymentnodeapps', 'default~~rss-site', 'kube-system~~kube-proxy'] + end +end \ No newline at end of file diff --git a/test/code/plugin/health/health_missing_signal_generator_spec.rb b/test/code/plugin/health/health_missing_signal_generator_spec.rb new file mode 100644 index 000000000..01cba8028 --- /dev/null +++ b/test/code/plugin/health/health_missing_signal_generator_spec.rb @@ -0,0 +1,79 @@ +require_relative '../test_helpers' +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "HealthMissingSignalGenerator spec" do + it 'generates missing node signals' do + #arrange + resources = Mock.new + resources.expect(:get_nodes, ["node1"]) + resources.expect(:get_workload_names, ["default~~workload1"]) + + provider = Mock.new + provider.expect(:get_node_labels, {"kubernetes.io/hostname" => "node1"}, ["node1"]) + + node1_cpu_record = Mock.new + def node1_cpu_record.monitor_id; "node_cpu_utilization"; end + def node1_cpu_record.monitor_instance_id; "node_cpu_utilization"; end + def node1_cpu_record.labels; {"kubernetes.io/hostname" => "node1"}; end + def node1_cpu_record.config; {}; end + def node1_cpu_record.state; "pass"; end + + node1_memory_record = Mock.new + def node1_memory_record.monitor_id; "node_memory_utilization"; end + def node1_memory_record.monitor_instance_id; "node_memory_utilization"; end + def node1_memory_record.labels; {"kubernetes.io/hostname" => "node1"}; end + def node1_memory_record.config; {}; end + def node1_memory_record.state; "pass"; end + + node1_condition_record = Mock.new + def node1_condition_record.monitor_id; "node_condition"; end + def node1_condition_record.monitor_instance_id; "node_condition-0c593682737a955dc8e0947ad12754fe"; end + def node1_condition_record.labels; {"kubernetes.io/hostname" => "node1"}; end + def node1_condition_record.config; {}; end + def node1_condition_record.state; "pass"; end + + + workload1_pods_ready_record = Mock.new + def workload1_pods_ready_record.monitor_id; "user_workload_pods_ready"; end + def workload1_pods_ready_record.monitor_instance_id; "user_workload_pods_ready-workload1"; end + def workload1_pods_ready_record.labels; {"container.azm.ms/namespace" => "default", "container.azm.ms/workload-name" => "workload1"}; end + def workload1_pods_ready_record.config; {}; end + def workload1_pods_ready_record.state; "pass"; end + + generator = HealthMissingSignalGenerator.new + generator.update_last_received_records([node1_cpu_record, node1_memory_record, node1_condition_record, workload1_pods_ready_record]) + + #act + missing = generator.get_missing_signals('fake_cluster_id', [node1_cpu_record, node1_memory_record], resources, provider) + + #assert + assert_equal missing.size, 2 + + assert_equal missing[0].monitor_id, "node_condition" + assert_equal missing[0].state, "unknown" + assert_equal missing[0].monitor_instance_id, "node_condition-0c593682737a955dc8e0947ad12754fe" + + assert_equal missing[1].monitor_id, "user_workload_pods_ready" + assert_equal missing[1].state, "unknown" + assert_equal missing[1].monitor_instance_id, "user_workload_pods_ready-workload1" + + #arrange + resources.expect(:get_nodes, ["node1"]) + resources.expect(:get_workload_names, ["default~~workload1"]) + provider.expect(:get_node_labels, {"kubernetes.io/hostname" => "node1"}, ["node1"]) + generator.update_last_received_records([node1_cpu_record, node1_memory_record]) + #act + missing = generator.get_missing_signals('fake_cluster_id', [node1_cpu_record, node1_memory_record], resources, provider) + #assert + assert_equal missing.size, 2 + assert_equal missing[0].monitor_id, "node_condition" + assert_equal missing[0].state, "unknown" + assert_equal missing[0].monitor_instance_id, "node_condition-0c593682737a955dc8e0947ad12754fe" + + assert_equal missing[1].monitor_id, "user_workload_pods_ready" + assert_equal missing[1].state, "none" + assert_equal missing[1].monitor_instance_id, "user_workload_pods_ready-workload1" + end +end \ No newline at end of file diff --git a/test/code/plugin/health/health_model_buffer_spec.rb b/test/code/plugin/health/health_model_buffer_spec.rb new file mode 100644 index 000000000..259513c08 --- /dev/null +++ b/test/code/plugin/health/health_model_buffer_spec.rb @@ -0,0 +1,25 @@ +require_relative '../../../../source/code/plugin/health/health_model_buffer' +require_relative '../test_helpers' + +include HealthModel + +describe "HealthModelBuffer Spec" do + it "get_buffer returns the correct buffer data" do + # Arrange + buffer = HealthModelBuffer.new + # Act + buffer.add_to_buffer(['mockRecord']) + # Assert + assert_equal buffer.get_buffer.length, 1 + + #Act + buffer.add_to_buffer(['mockRecord1', 'mockRecord2']) + #Assert + assert_equal buffer.get_buffer.length, 3 + + #Act + buffer.reset_buffer + #Assert + assert_equal buffer.get_buffer.length, 0 + end +end \ No newline at end of file diff --git a/test/code/plugin/health/health_model_builder_spec.rb b/test/code/plugin/health/health_model_builder_spec.rb new file mode 100644 index 000000000..4a98695c5 --- /dev/null +++ b/test/code/plugin/health/health_model_builder_spec.rb @@ -0,0 +1,37 @@ +require_relative '../test_helpers' +# consider doing this in test_helpers.rb so that this code is common +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "HealthModelBuilder spec" do + it "Verify hierarchy builder and finalizer public methods are called" do + #arrange + mock_hierarchy_builder = Mock::new + health_record = Mock::new + mock_monitor_set = Mock::new + mock_state_finalizer = Mock::new + mock_hierarchy_builder.expect(:process_record, nil, [health_record, mock_monitor_set]) + mock_state_finalizer.expect(:finalize, {}, [mock_monitor_set]) + def mock_monitor_set.get_map; {}; end + + #act + builder = HealthModelBuilder.new(mock_hierarchy_builder, [mock_state_finalizer], mock_monitor_set) + builder.process_records([health_record]) + builder.finalize_model + #assert + assert mock_hierarchy_builder.verify + assert mock_state_finalizer.verify + end + + it "Verify finalize_model raises if state_finalizers is empty" do + #arrange + mock_hierarchy_builder = Mock.new + mock_monitor_set = Mock.new + builder = HealthModelBuilder.new(mock_hierarchy_builder, [], mock_monitor_set) + #act and assert + assert_raises do + builder.finalize_model + end + end +end \ No newline at end of file diff --git a/test/code/plugin/health/health_model_builder_test.rb b/test/code/plugin/health/health_model_builder_test.rb new file mode 100644 index 000000000..ac51893da --- /dev/null +++ b/test/code/plugin/health/health_model_builder_test.rb @@ -0,0 +1,338 @@ +require 'test/unit' +require 'json' +# require_relative '../../../source/code/plugin/health' + +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require file } + +class FilterHealthModelBuilderTest < Test::Unit::TestCase + include HealthModel + + # def test_event_stream + # #setup + # health_definition_path = 'C:\AzureMonitor\ContainerInsights\Docker-Provider\installer\conf\health_model_definition.json' + # health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + # monitor_factory = MonitorFactory.new + # hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) + # # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side + # state_finalizers = [AggregateMonitorStateFinalizer.new] + # monitor_set = MonitorSet.new + # model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) + + # nodes_file_map = { + # "extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", + # "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # } + + # pods_file_map = { + # "extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", + # "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # } + + # cluster_labels = { + # 'container.azm.ms/cluster-region' => 'eastus', + # 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + # 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', + # 'container.azm.ms/cluster-name' => 'dilipr-health-test' + # } + + # cluster_id = 'fake_cluster_id' + + # #test + # state = HealthMonitorState.new() + # generator = HealthMissingSignalGenerator.new + + # for scenario in ["first", "second", "third"] + # mock_data_path = "C:/AzureMonitor/ContainerInsights/Docker-Provider/health_records/#{scenario}_daemon_set_signals.json" + # file = File.read(mock_data_path) + # records = JSON.parse(file) + + # node_inventory = JSON.parse(File.read(nodes_file_map[scenario])) + # pod_inventory = JSON.parse(File.read(pods_file_map[scenario])) + # deployment_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/deployments.json")) + # resources = HealthKubernetesResources.instance + # resources.node_inventory = node_inventory + # resources.pod_inventory = pod_inventory + # resources.deployment_inventory = deployment_inventory + + # workload_names = resources.get_workload_names + # provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, "C:/AzureMonitor/ContainerInsights/Docker-Provider/installer/conf/healthmonitorconfig.json") + + # health_monitor_records = [] + # records.each do |record| + # monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + # monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + # health_monitor_record = HealthMonitorRecord.new( + # record[HealthMonitorRecordFields::MONITOR_ID], + # record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + # record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + # record[HealthMonitorRecordFields::DETAILS]["state"], + # provider.get_labels(record), + # provider.get_config(monitor_id), + # record[HealthMonitorRecordFields::DETAILS] + # ) + + # state.update_state(health_monitor_record, + # provider.get_config(health_monitor_record.monitor_id) + # ) + + # # get the health state based on the monitor's operational state + # # update state calls updates the state of the monitor based on configuration and history of the the monitor records + # health_monitor_record.state = state.get_state(monitor_instance_id).new_state + # health_monitor_records.push(health_monitor_record) + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # end + + + # #handle kube api down + # kube_api_down_handler = HealthKubeApiDownHandler.new + # health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) + + # # Dedupe daemonset signals + # # Remove unit monitor signals for “gone” objects + # reducer = HealthSignalReducer.new() + # reduced_records = reducer.reduce_signals(health_monitor_records, resources) + + # cluster_id = 'fake_cluster_id' + + # #get the list of 'none' and 'unknown' signals + # missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) + # #update state for missing signals + # missing_signals.each{|signal| + # state.update_state(signal, + # provider.get_config(signal.monitor_id) + # ) + # } + # generator.update_last_received_records(reduced_records) + # reduced_records.push(*missing_signals) + + # # build the health model + # all_records = reduced_records + # model_builder.process_records(all_records) + # all_monitors = model_builder.finalize_model + + # # update the state for aggregate monitors (unit monitors are updated above) + # all_monitors.each{|monitor_instance_id, monitor| + # if monitor.is_aggregate_monitor + # state.update_state(monitor, + # provider.get_config(monitor.monitor_id) + # ) + # end + + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # should_send = instance_state.should_send + + # # always send cluster monitor as a heartbeat + # if !should_send && monitor_instance_id != MonitorId::CLUSTER + # all_monitors.delete(monitor_instance_id) + # end + # } + + # records_to_send = [] + # all_monitors.keys.each{|key| + # record = provider.get_record(all_monitors[key], state) + # puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + # } + # # for each key in monitor.keys, + # # get the state from health_monitor_state + # # generate the record to send + # serializer = HealthStateSerializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') + # serializer.serialize(state) + + # deserializer = HealthStateDeserializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') + # deserialized_state = deserializer.deserialize + + # after_state = HealthMonitorState.new + # # deserialized_state.each{|k,v| + # # attrs = v.gsub(/(\n|# "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", + "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + } + + pods_file_map = { + "extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", + "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", + "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", + "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", + } + + cluster_labels = { + 'container.azm.ms/cluster-region' => 'eastus', + 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + 'container.azm.ms/cluster-resource-group' => 'aks-engine-health', + 'container.azm.ms/cluster-name' => 'aks-engine-health' + } + + cluster_id = 'fake_cluster_id' + + #test + state = HealthMonitorState.new() + generator = HealthMissingSignalGenerator.new + + for scenario in 1..3 + mock_data_path = "C:/AzureMonitor/ContainerInsights/Docker-Provider/health_records/aks-engine/aks-engine-#{scenario}.json" + file = File.read(mock_data_path) + records = JSON.parse(file) + + node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"])) + pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"])) + deployment_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/deployments.json")) + resources = HealthKubernetesResources.instance + resources.node_inventory = node_inventory + resources.pod_inventory = pod_inventory + resources.deployment_inventory = deployment_inventory + + workload_names = resources.get_workload_names + provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, "C:/AzureMonitor/ContainerInsights/Docker-Provider/installer/conf/healthmonitorconfig.json") + + health_monitor_records = [] + records.each do |record| + monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + health_monitor_record = HealthMonitorRecord.new( + record[HealthMonitorRecordFields::MONITOR_ID], + record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + record[HealthMonitorRecordFields::DETAILS]["state"], + provider.get_labels(record), + provider.get_config(monitor_id), + record[HealthMonitorRecordFields::DETAILS] + ) + + state.update_state(health_monitor_record, + provider.get_config(health_monitor_record.monitor_id) + ) + + # get the health state based on the monitor's operational state + # update state calls updates the state of the monitor based on configuration and history of the the monitor records + health_monitor_record.state = state.get_state(monitor_instance_id).new_state + health_monitor_records.push(health_monitor_record) + instance_state = state.get_state(monitor_instance_id) + #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + end + + + #handle kube api down + kube_api_down_handler = HealthKubeApiDownHandler.new + health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) + + # Dedupe daemonset signals + # Remove unit monitor signals for “gone” objects + reducer = HealthSignalReducer.new() + reduced_records = reducer.reduce_signals(health_monitor_records, resources) + + cluster_id = 'fake_cluster_id' + + #get the list of 'none' and 'unknown' signals + missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) + #update state for missing signals + missing_signals.each{|signal| + state.update_state(signal, + provider.get_config(signal.monitor_id) + ) + } + generator.update_last_received_records(reduced_records) + reduced_records.push(*missing_signals) + + # build the health model + all_records = reduced_records + model_builder.process_records(all_records) + all_monitors = model_builder.finalize_model + + # update the state for aggregate monitors (unit monitors are updated above) + all_monitors.each{|monitor_instance_id, monitor| + if monitor.is_aggregate_monitor + state.update_state(monitor, + provider.get_config(monitor.monitor_id) + ) + end + + instance_state = state.get_state(monitor_instance_id) + #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + should_send = instance_state.should_send + + # always send cluster monitor as a heartbeat + if !should_send && monitor_instance_id != MonitorId::CLUSTER + all_monitors.delete(monitor_instance_id) + end + } + + records_to_send = [] + all_monitors.keys.each{|key| + record = provider.get_record(all_monitors[key], state) + puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + } + # for each key in monitor.keys, + # get the state from health_monitor_state + # generate the record to send + serializer = HealthStateSerializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') + serializer.serialize(state) + + deserializer = HealthStateDeserializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') + deserialized_state = deserializer.deserialize + + after_state = HealthMonitorState.new + after_state.initialize_state(deserialized_state) + + puts "#{state.to_h == after_state.to_h}" + puts scenario + end + end +end \ No newline at end of file diff --git a/test/code/plugin/health/health_model_definition_parser_spec.rb b/test/code/plugin/health/health_model_definition_parser_spec.rb new file mode 100644 index 000000000..5b74c29a4 --- /dev/null +++ b/test/code/plugin/health/health_model_definition_parser_spec.rb @@ -0,0 +1,23 @@ +require_relative '../test_helpers' +# consider doing this in test_helpers.rb so that this code is common +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel + +describe "HealthModelDefinitionParser spec " do + it "parses the definition file correctly with the right conditions" do + #arrange + parser = HealthModelDefinitionParser.new(File.join(__dir__, 'test_health_model_definition.json')) + #act + model_definition = parser.parse_file + + #assert + assert_equal model_definition['conditional_monitor_id'].key?("conditions"), true + assert_equal model_definition['conditional_monitor_id']["conditions"].size, 2 + assert_equal model_definition['conditional_monitor_id'].key?("parent_monitor_id"), false + + #assert + assert_equal model_definition['monitor_id'].key?("conditions"), false + assert_equal model_definition['monitor_id'].key?("parent_monitor_id"), true + end + +end \ No newline at end of file diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb new file mode 100644 index 000000000..b68291cda --- /dev/null +++ b/test/code/plugin/health/health_monitor_state_spec.rb @@ -0,0 +1,129 @@ +require_relative '../test_helpers' +# consider doing this in test_helpers.rb so that this code is common +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "HealthMonitorState spec" do + it 'updates should_send to true for monitors which hasnt been sent before' do + #arrange + state = HealthMonitorState.new + mock_monitor = Mock.new + def mock_monitor.state; "pass"; end + def mock_monitor.monitor_id; "monitor_id"; end + def mock_monitor.monitor_instance_id; "monitor_instance_id"; end + def mock_monitor.transition_date_time; Time.now.utc.iso8601; end + def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + + #act + state.update_state(mock_monitor, {}) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + monitor_state.old_state.must_equal "none" + monitor_state.new_state.must_equal "pass" + end + + it 'updates should_send to true for monitors which need no consistent state change' do + #arrange + state = HealthMonitorState.new + mock_monitor = Mock.new + def mock_monitor.state; "pass"; end + def mock_monitor.monitor_id; "monitor_id"; end + def mock_monitor.monitor_instance_id; "monitor_instance_id"; end + def mock_monitor.transition_date_time; Time.now.utc.iso8601; end + def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + + #act + state.update_state(mock_monitor, {}) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + monitor_state.old_state.must_equal "none" + monitor_state.new_state.must_equal "pass" + + #arrange + def mock_monitor.state; "fail"; end + def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + #act + state.update_state(mock_monitor, {}) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + monitor_state.old_state.must_equal "pass" + monitor_state.new_state.must_equal "fail" + end + + it 'updates should_send to false for monitors which need consistent state change and has no consistent state change' do + #arrange + state = HealthMonitorState.new + mock_monitor = Mock.new + def mock_monitor.state; "pass"; end + def mock_monitor.monitor_id; "monitor_id"; end + def mock_monitor.monitor_instance_id; "monitor_instance_id"; end + def mock_monitor.transition_date_time; Time.now.utc.iso8601; end + def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + + config = JSON.parse('{ + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 + }') + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + + #arrange + def mock_monitor.state; "fail"; end + def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal false + end + + it 'updates should_send to true for monitors which need consistent state change and has a consistent state change' do + #arrange + state = HealthMonitorState.new + mock_monitor = Mock.new + def mock_monitor.state; "pass"; end + def mock_monitor.monitor_id; "monitor_id"; end + def mock_monitor.monitor_instance_id; "monitor_instance_id"; end + def mock_monitor.transition_date_time; Time.now.utc.iso8601; end + def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + + config = JSON.parse('{ + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 + }') + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + + #arrange + def mock_monitor.state; "fail"; end + def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal false + + #act + state.update_state(mock_monitor, config) + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + monitor_state.old_state.must_equal "none" + monitor_state.new_state.must_equal "fail" + end + + +end \ No newline at end of file diff --git a/test/code/plugin/health/health_signal_reducer_spec.rb b/test/code/plugin/health/health_signal_reducer_spec.rb new file mode 100644 index 000000000..667649876 --- /dev/null +++ b/test/code/plugin/health/health_signal_reducer_spec.rb @@ -0,0 +1,96 @@ +require_relative '../test_helpers' +# consider doing this in test_helpers.rb so that this code is common +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "HealthSignalReducer spec" do + it "returns the right set of records -- no reduction" do + #arrange + record1 = Mock.new + def record1.monitor_id; "node_cpu_utilization"; end + def record1.monitor_instance_id; "node_cpu_utilization-node1"; end + def record1.labels; {"kubernetes.io/hostname" => "node1"}; end + inventory = Mock.new + def inventory.get_nodes; ["node1"]; end + def inventory.get_workload_names; []; end + reducer = HealthSignalReducer.new + #act + reduced = reducer.reduce_signals([record1], inventory) + #Assert + assert_equal reduced.size, 1 + end + + it "returns only the latest record if multiple records are present for the same monitor" do + #arrange + record1 = Mock.new + def record1.monitor_id; "node_cpu_utilization"; end + def record1.monitor_instance_id; "node_cpu_utilization-node1"; end + def record1.labels; {"kubernetes.io/hostname" => "node1"}; end + def record1.transition_date_time; Time.now.utc.iso8601 ; end + + + record2 = Mock.new + def record2.monitor_id; "node_cpu_utilization"; end + def record2.monitor_instance_id; "node_cpu_utilization-node1"; end + def record2.labels; {"kubernetes.io/hostname" => "node1"}; end + def record2.transition_date_time; "#{Time.now.utc.iso8601}" ; end + + inventory = Mock.new + def inventory.get_nodes; ["node1"]; end + def inventory.get_workload_names; []; end + reducer = HealthSignalReducer.new + #act + reduced = reducer.reduce_signals([record1, record2], inventory) + #Assert + assert_equal reduced.size, 1 + end + + it "returns only those records if the node is present in the inventory" do + #arrange + record1 = Mock.new + def record1.monitor_id; "node_cpu_utilization"; end + def record1.monitor_instance_id; "node_cpu_utilization-node1"; end + def record1.labels; {"kubernetes.io/hostname" => "node1"}; end + inventory = Mock.new + def inventory.get_nodes; ["node2"]; end + def inventory.get_workload_names; []; end + + #act + reducer = HealthSignalReducer.new + #assert + assert_equal reducer.reduce_signals([record1], inventory).size, 0 + end + + it "returns only those records if the workdload name is present in the inventory" do + #arrange + record1 = Mock.new + def record1.monitor_id; "user_workload_pods_ready"; end + def record1.monitor_instance_id; "user_workload_pods_ready-workload1"; end + def record1.labels; {"container.azm.ms/namespace" => "default", "container.azm.ms/workload-name" => "workload1"}; end + def record1.transition_date_time; Time.now.utc.iso8601 ; end + + inventory = Mock.new + def inventory.get_nodes; ["node2"]; end + def inventory.get_workload_names; ["default~~workload1"]; end + reducer = HealthSignalReducer.new + + #act + reduced = reducer.reduce_signals([record1], inventory) + + #assert + assert_equal reduced.size, 1 + + #arrange + record2 = Mock.new + def record2.monitor_id; "user_workload_pods_ready"; end + def record2.monitor_instance_id; "user_workload_pods_ready-workload2"; end + def record2.labels; {"container.azm.ms/namespace" => "default1", "container.azm.ms/workload-name" => "workload2"}; end + def record1.transition_date_time; Time.now.utc.iso8601 ; end + #act + reduced = reducer.reduce_signals([record1, record2], inventory) + #assert + assert_equal reduced.size, 1 + end + +end diff --git a/test/code/plugin/health/kube_api_down_handler_spec.rb b/test/code/plugin/health/kube_api_down_handler_spec.rb new file mode 100644 index 000000000..f0be88feb --- /dev/null +++ b/test/code/plugin/health/kube_api_down_handler_spec.rb @@ -0,0 +1,26 @@ +require_relative '../test_helpers' +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel + +describe "KubeApiDownHandler spec" do + it "updates states for monitors in monitors_to_change" do + #arrange + record1 = HealthMonitorRecord.new("node_condition", "node_condition-node1", Time.now.utc.iso8601, "pass", {}, {}, {}) + record2 = HealthMonitorRecord.new("kube_api_status", "kube_api_status", Time.now.utc.iso8601, "fail", {}, {}, {}) + record3 = HealthMonitorRecord.new("user_workload_pods_ready", "user_workload_pods_ready-workload1", Time.now.utc.iso8601, "pass", {}, {}, {}) + record4 = HealthMonitorRecord.new("system_workload_pods_ready", "system_workload_pods_ready-workload2", Time.now.utc.iso8601, "pass", {}, {}, {}) + record5 = HealthMonitorRecord.new("subscribed_capacity_cpu", "subscribed_capacity_cpu", Time.now.utc.iso8601, "pass", {}, {}, {}) + record6 = HealthMonitorRecord.new("subscribed_capacity_memory", "subscribed_capacity_memory", Time.now.utc.iso8601, "pass", {}, {}, {}) + handler = HealthKubeApiDownHandler.new + + #act + handler.handle_kube_api_down([record1, record2, record3, record4, record5, record6]) + #assert + assert_equal record1.state, HealthMonitorStates::UNKNOWN + assert_equal record3.state, HealthMonitorStates::UNKNOWN + assert_equal record4.state, HealthMonitorStates::UNKNOWN + assert_equal record5.state, HealthMonitorStates::UNKNOWN + assert_equal record6.state, HealthMonitorStates::UNKNOWN + + end +end diff --git a/test/code/plugin/health/monitor_factory_spec.rb b/test/code/plugin/health/monitor_factory_spec.rb new file mode 100644 index 000000000..69dcaf914 --- /dev/null +++ b/test/code/plugin/health/monitor_factory_spec.rb @@ -0,0 +1,28 @@ +require_relative '../test_helpers' +# consider doing this in test_helpers.rb so that this code is common +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel + +describe "MonitorFactory Spec" do + it "returns UnitMonitor for create_unit_monitor" do + #Arrange + factory = MonitorFactory.new() + monitor_record = HealthMonitorRecord.new(:monitor_id, :monitor_instance_id, :time, :pass, {}, {}, {}) + #act + monitor = factory.create_unit_monitor(monitor_record) + # assert + monitor.must_be_kind_of(UnitMonitor) + end + + it "returns AggregateMonitor for create_aggregate_monitor" do + #arrange + factory = MonitorFactory.new() + mock = Minitest::Mock.new + def mock.state; :pass; end + def mock.transition_date_time; :time; end + #act + monitor = factory.create_aggregate_monitor(:monitor_id, :monitor_instance_id, :pass, {}, {}, mock) + #assert + monitor.must_be_kind_of(AggregateMonitor) + end +end \ No newline at end of file diff --git a/test/code/plugin/health/monitor_set_spec.rb b/test/code/plugin/health/monitor_set_spec.rb new file mode 100644 index 000000000..5d6efb4eb --- /dev/null +++ b/test/code/plugin/health/monitor_set_spec.rb @@ -0,0 +1,58 @@ +require_relative '../test_helpers' +# consider doing this in test_helpers.rb so that this code is common +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel + +describe "MonitorSet Spec" do + it "add_or_update -- adds a monitor" do + #arrange + set = MonitorSet.new + mock_monitor = MiniTest::Mock.new + def mock_monitor.monitor_instance_id; "monitor_instance_id_1"; end + def mock_monitor.state; :pass;end + #act + set.add_or_update(mock_monitor) + #assert + assert_equal set.get_map.size, 1 + assert_equal set.get_map.key?("monitor_instance_id_1"), true + end + + it "add_or_update -- updates a monitor" do + #arrange + set = MonitorSet.new + mock_monitor = MiniTest::Mock.new + def mock_monitor.monitor_instance_id; "monitor_instance_id_1"; end + def mock_monitor.state; :pass;end + #act + set.add_or_update(mock_monitor) + #assert + assert_equal set.get_map["monitor_instance_id_1"].state, :pass + + #act + def mock_monitor.state; :fail;end + set.add_or_update(mock_monitor) + #assert + assert_equal set.get_map["monitor_instance_id_1"].state, :fail + end + + it "delete -- delete a monitor" do + #arrange + set = MonitorSet.new + mock_monitor = MiniTest::Mock.new + def mock_monitor.monitor_instance_id; "monitor_instance_id_1"; end + def mock_monitor.state; :pass;end + set.add_or_update(mock_monitor) + + #act + set.delete("monitor_instance_id_1") + #assert + assert_equal set.get_map.size, 0 + end + + it "get_map -- returns a hash" do + #arrange + set = MonitorSet.new + #act and assert + set.get_map.must_be_kind_of(Hash) + end +end diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb new file mode 100644 index 000000000..d8cef316a --- /dev/null +++ b/test/code/plugin/health/parent_monitor_provider_spec.rb @@ -0,0 +1,144 @@ +require_relative '../test_helpers' +Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +include HealthModel +include Minitest + +describe "ParentMonitorProvider spec" do + it 'returns correct parent_monitor_id for a non-condition case' do + #arrange + definition = JSON.parse('{ + "monitor_id" : { + "parent_monitor_id": "parent_monitor_id", + "labels": [ + "label_1", + "label_2" + ] + } + }' + ) + health_model_definition = ParentMonitorProvider.new(definition) + + monitor = Mock.new + def monitor.monitor_id; "monitor_id"; end + def monitor.monitor_instance_id; "monitor_instance_id"; end + + #act + parent_id = health_model_definition.get_parent_monitor_id(monitor) + #assert + assert_equal parent_id, "parent_monitor_id" + end + + it 'returns raises for an incorrect monitor id' do + #arrange + definition = JSON.parse('{ + "monitor_id" : { + "parent_monitor_id": "parent_monitor_id", + "labels": [ + "label_1", + "label_2" + ] + } + }' + ) + health_model_definition = ParentMonitorProvider.new(definition) + + monitor = Mock.new + def monitor.monitor_id; "monitor_id_!"; end + def monitor.monitor_instance_id; "monitor_instance_id"; end + + #act and assert + assert_raises do + parent_id = health_model_definition.get_parent_monitor_id(monitor) + end + end + + it 'returns correct parent_monitor_id for a conditional case' do + #arrange + definition = JSON.parse('{"conditional_monitor_id": { + "conditions": [ + { + "key": "kubernetes.io/role", + "operator": "==", + "value": "master", + "parent_id": "master_node_pool" + }, + { + "key": "kubernetes.io/role", + "operator": "==", + "value": "agent", + "parent_id": "agent_node_pool" + } + ], + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ], + "aggregation_algorithm": "worstOf", + "aggregation_algorithm_params": null + } + + }' + ) + health_model_definition = ParentMonitorProvider.new(definition) + + monitor = Mock.new + def monitor.monitor_id; "conditional_monitor_id"; end + def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end + def monitor.labels; {"kubernetes.io/role" => "master"}; end + + #act + parent_id = health_model_definition.get_parent_monitor_id(monitor) + #assert + assert_equal parent_id, "master_node_pool" + end + + it 'raises if conditions are not met' do + #arrange + definition = JSON.parse('{"conditional_monitor_id": { + "conditions": [ + { + "key": "kubernetes.io/role", + "operator": "==", + "value": "master", + "parent_id": "master_node_pool" + }, + { + "key": "kubernetes.io/role", + "operator": "==", + "value": "agent", + "parent_id": "agent_node_pool" + } + ], + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ], + "aggregation_algorithm": "worstOf", + "aggregation_algorithm_params": null + } + + }' + ) + health_model_definition = ParentMonitorProvider.new(definition) + + monitor = Mock.new + def monitor.monitor_id; "conditional_monitor_id"; end + def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end + def monitor.labels; {"kubernetes.io/role" => "master1"}; end + + #act and assert + assert_raises do + parent_id = health_model_definition.get_parent_monitor_id(monitor) + end + end +end diff --git a/test/code/plugin/health/test_health_model_definition.json b/test/code/plugin/health/test_health_model_definition.json new file mode 100644 index 000000000..31d219705 --- /dev/null +++ b/test/code/plugin/health/test_health_model_definition.json @@ -0,0 +1,42 @@ +[ + { + "monitor_id": "monitor_id", + "parent_monitor_id": "parent_monitor_id", + "labels": [ + "container.azm.ms/namespace", + "container.azm.ms/workload-name", + "container.azm.ms/workload-kind", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ] + }, + { + "monitor_id": "conditional_monitor_id", + "aggregation_algorithm": "worstOf", + "labels": [ + "kubernetes.io/hostname", + "agentpool", + "kubernetes.io/role", + "container.azm.ms/cluster-region", + "container.azm.ms/cluster-subscription-id", + "container.azm.ms/cluster-resource-group", + "container.azm.ms/cluster-name" + ], + "parent_monitor_id": [ + { + "label": "kubernetes.io/role", + "operator": "==", + "value": "master", + "id": "master_node_pool" + }, + { + "label": "kubernetes.io/role", + "operator": "==", + "value": "agent", + "id": "agent_node_pool" + } + ] + } +] \ No newline at end of file diff --git a/test/code/plugin/health/tests/.gitignore b/test/code/plugin/health/tests/.gitignore new file mode 100644 index 000000000..9106b2a34 --- /dev/null +++ b/test/code/plugin/health/tests/.gitignore @@ -0,0 +1,8 @@ +/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ diff --git a/test/code/plugin/health/tests/.travis.yml b/test/code/plugin/health/tests/.travis.yml new file mode 100644 index 000000000..686d6c428 --- /dev/null +++ b/test/code/plugin/health/tests/.travis.yml @@ -0,0 +1,7 @@ +--- +sudo: false +language: ruby +cache: bundler +rvm: + - 2.5.1 +before_install: gem install bundler -v 2.0.2 diff --git a/test/code/plugin/health/tests/CODE_OF_CONDUCT.md b/test/code/plugin/health/tests/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..36f2de801 --- /dev/null +++ b/test/code/plugin/health/tests/CODE_OF_CONDUCT.md @@ -0,0 +1,74 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at dilip.rangarajan@gmail.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/test/code/plugin/health/tests/Gemfile b/test/code/plugin/health/tests/Gemfile new file mode 100644 index 000000000..b2501d8c1 --- /dev/null +++ b/test/code/plugin/health/tests/Gemfile @@ -0,0 +1,4 @@ +source "https://rubygems.org" + +# Specify your gem's dependencies in tests.gemspec +gemspec diff --git a/test/code/plugin/health/tests/LICENSE.txt b/test/code/plugin/health/tests/LICENSE.txt new file mode 100644 index 000000000..d45d69768 --- /dev/null +++ b/test/code/plugin/health/tests/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2019 r-dilip + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/test/code/plugin/health/tests/README.md b/test/code/plugin/health/tests/README.md new file mode 100644 index 000000000..dcd05bc99 --- /dev/null +++ b/test/code/plugin/health/tests/README.md @@ -0,0 +1,43 @@ +# Tests + +Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/tests`. To experiment with that code, run `bin/console` for an interactive prompt. + +TODO: Delete this and the text above, and describe your gem + +## Installation + +Add this line to your application's Gemfile: + +```ruby +gem 'tests' +``` + +And then execute: + + $ bundle + +Or install it yourself as: + + $ gem install tests + +## Usage + +TODO: Write usage instructions here + +## Development + +After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. + +To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). + +## Contributing + +Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/tests. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. + +## License + +The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). + +## Code of Conduct + +Everyone interacting in the Tests project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/tests/blob/master/CODE_OF_CONDUCT.md). diff --git a/test/code/plugin/health/tests/Rakefile b/test/code/plugin/health/tests/Rakefile new file mode 100644 index 000000000..d433a1edc --- /dev/null +++ b/test/code/plugin/health/tests/Rakefile @@ -0,0 +1,10 @@ +require "bundler/gem_tasks" +require "rake/testtask" + +Rake::TestTask.new(:test) do |t| + t.libs << "test" + t.libs << "lib" + t.test_files = FileList["test/**/*_test.rb"] +end + +task :default => :test diff --git a/test/code/plugin/health/tests/bin/console b/test/code/plugin/health/tests/bin/console new file mode 100644 index 000000000..59eb02ef2 --- /dev/null +++ b/test/code/plugin/health/tests/bin/console @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +require "bundler/setup" +require "tests" + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +# (If you use this, don't forget to add pry to your Gemfile!) +# require "pry" +# Pry.start + +require "irb" +IRB.start(__FILE__) diff --git a/test/code/plugin/health/tests/bin/setup b/test/code/plugin/health/tests/bin/setup new file mode 100644 index 000000000..dce67d860 --- /dev/null +++ b/test/code/plugin/health/tests/bin/setup @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' +set -vx + +bundle install + +# Do any other automated setup that you need to do here diff --git a/test/code/plugin/health/tests/lib/tests.rb b/test/code/plugin/health/tests/lib/tests.rb new file mode 100644 index 000000000..ed7795e48 --- /dev/null +++ b/test/code/plugin/health/tests/lib/tests.rb @@ -0,0 +1,6 @@ +require "tests/version" + +module Tests + class Error < StandardError; end + # Your code goes here... +end diff --git a/test/code/plugin/health/tests/lib/tests/version.rb b/test/code/plugin/health/tests/lib/tests/version.rb new file mode 100644 index 000000000..c1945fbde --- /dev/null +++ b/test/code/plugin/health/tests/lib/tests/version.rb @@ -0,0 +1,3 @@ +module Tests + VERSION = "0.1.0" +end diff --git a/test/code/plugin/health/tests/test/test_helper.rb b/test/code/plugin/health/tests/test/test_helper.rb new file mode 100644 index 000000000..ca750dd40 --- /dev/null +++ b/test/code/plugin/health/tests/test/test_helper.rb @@ -0,0 +1,4 @@ +$LOAD_PATH.unshift File.expand_path("../lib", __dir__) +require "tests" + +require "minitest/autorun" diff --git a/test/code/plugin/health/tests/test/tests_test.rb b/test/code/plugin/health/tests/test/tests_test.rb new file mode 100644 index 000000000..7bb30bfd8 --- /dev/null +++ b/test/code/plugin/health/tests/test/tests_test.rb @@ -0,0 +1,11 @@ +require "test_helper" + +class TestsTest < Minitest::Test + def test_that_it_has_a_version_number + refute_nil ::Tests::VERSION + end + + def test_it_does_something_useful + assert false + end +end diff --git a/test/code/plugin/health/tests/tests.gemspec b/test/code/plugin/health/tests/tests.gemspec new file mode 100644 index 000000000..b551315cc --- /dev/null +++ b/test/code/plugin/health/tests/tests.gemspec @@ -0,0 +1,34 @@ +lib = File.expand_path("lib", __dir__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require "tests/version" + +Gem::Specification.new do |spec| + spec.name = "tests" + spec.version = Tests::VERSION + spec.authors = ["r-dilip"] + spec.email = ["dilip.rangarajan@gmail.com"] + + spec.summary = %q{TODO: Write a short summary, because RubyGems requires one.} + spec.description = %q{TODO: Write a longer description or delete this line.} + spec.homepage = "TODO: Put your gem's website or public repo URL here." + spec.license = "MIT" + + spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'" + + spec.metadata["homepage_uri"] = spec.homepage + spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here." + spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here." + + # Specify which files should be added to the gem when it is released. + # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do + `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } + end + spec.bindir = "exe" + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] + + spec.add_development_dependency "bundler", "~> 2.0" + spec.add_development_dependency "rake", "~> 10.0" + spec.add_development_dependency "minitest", "~> 5.0" +end diff --git a/test/code/plugin/health/unit_monitor_spec.rb b/test/code/plugin/health/unit_monitor_spec.rb new file mode 100644 index 000000000..4cbf794db --- /dev/null +++ b/test/code/plugin/health/unit_monitor_spec.rb @@ -0,0 +1,20 @@ +require_relative '../../../../source/code/plugin/health/unit_monitor' +require_relative '../test_helpers' + +include HealthModel + +describe "UnitMonitor Spec" do + it "is_aggregate_monitor is false for UnitMonitor" do + # Arrange/Act + monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {}) + # Assert + assert_equal monitor.is_aggregate_monitor, false + end + + it "get_member_monitors is nil for UnitMonitor" do + # Arrange/Act + monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {}) + #Assert + assert_nil monitor.get_member_monitors + end +end \ No newline at end of file diff --git a/test/code/plugin/health/unit_monitor_test.rb b/test/code/plugin/health/unit_monitor_test.rb index 656e79430..e53617c99 100644 --- a/test/code/plugin/health/unit_monitor_test.rb +++ b/test/code/plugin/health/unit_monitor_test.rb @@ -1,12 +1,16 @@ require_relative '../../../../source/code/plugin/health/unit_monitor' -require 'minitest/autorun' -require 'time' +require_relative '../test_helpers' class UnitMonitorTest < Minitest::Test include HealthModel def test_is_aggregate_monitor_false - monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, Time.now.utc.iso8601, {}, {}, {}) + monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {}) assert_equal monitor.is_aggregate_monitor, false end + + def test_get_member_monitors_nil + monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {}) + assert_nil monitor.get_member_monitors + end end From 2e50407703aafdb01572a41a4a2021e9862f3c83 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 11:19:29 -0700 Subject: [PATCH 69/90] added integration tests for aks and aks-engine --- health_records/aks-engine/aks-engine-1.json | 549 ++ health_records/aks-engine/aks-engine-2.json | 549 ++ health_records/aks-engine/aks-engine-3.json | 549 ++ health_records/first_daemon_set_signals.json | 456 ++ health_records/health_model_state.json | 1272 +++ .../health_model_state_aks-engine.json | 1497 ++++ health_records/second_daemon_set_signals.json | 456 ++ health_records/third_daemon_set_signals.json | 456 ++ inventory/aks-engine/deployments.json | 1139 +++ inventory/aks-engine/nodes.json | 1439 ++++ inventory/aks-engine/pods.json | 6622 ++++++++++++++++ inventory/deployments.json | 1702 ++++ inventory/nodes.json | 964 +++ inventory/pods.json | 6971 +++++++++++++++++ .../health/health_model_builder_test.rb | 383 +- .../health/health_monitor_state_spec.rb | 47 + 16 files changed, 24859 insertions(+), 192 deletions(-) create mode 100644 health_records/aks-engine/aks-engine-1.json create mode 100644 health_records/aks-engine/aks-engine-2.json create mode 100644 health_records/aks-engine/aks-engine-3.json create mode 100644 health_records/first_daemon_set_signals.json create mode 100644 health_records/health_model_state.json create mode 100644 health_records/health_model_state_aks-engine.json create mode 100644 health_records/second_daemon_set_signals.json create mode 100644 health_records/third_daemon_set_signals.json create mode 100644 inventory/aks-engine/deployments.json create mode 100644 inventory/aks-engine/nodes.json create mode 100644 inventory/aks-engine/pods.json create mode 100644 inventory/deployments.json create mode 100644 inventory/nodes.json create mode 100644 inventory/pods.json diff --git a/health_records/aks-engine/aks-engine-1.json b/health_records/aks-engine/aks-engine-1.json new file mode 100644 index 000000000..593cba634 --- /dev/null +++ b/health_records/aks-engine/aks-engine-1.json @@ -0,0 +1,549 @@ +[ + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:41:34Z", + "state": "pass", + "details": { + "memoryRssBytes": 693760000.0, + "memoryUtilizationPercentage": 9.52 + } + }, + "AgentCollectionTime": "2019-06-19T19:41:44Z", + "TimeFirstObserved": "2019-06-19T19:41:44Z", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:41:34Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 116.89870505, + "cpuUtilizationPercentage": 5.84 + } + }, + "AgentCollectionTime": "2019-06-19T19:41:44Z", + "TimeFirstObserved": "2019-06-19T19:41:44Z", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:41:38Z", + "state": "fail", + "details": { + "memoryRssBytes": 578871296.0, + "memoryUtilizationPercentage": 7.92 + } + }, + "AgentCollectionTime": "2019-06-19T19:41:39Z", + "TimeFirstObserved": "2019-06-19T19:41:39Z", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:41:38Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 68.11712868852459, + "cpuUtilizationPercentage": 3.41 + } + }, + "AgentCollectionTime": "2019-06-19T19:41:39Z", + "TimeFirstObserved": "2019-06-19T19:41:39Z", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:41:53Z", + "state": "pass", + "details": { + "memoryRssBytes": 1582518272.0, + "memoryUtilizationPercentage": 21.65 + } + }, + "AgentCollectionTime": "2019-06-19T19:42:02Z", + "TimeFirstObserved": "2019-06-19T19:42:02Z", + "NodeName": "k8s-master-15159885-0" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:41:53Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 191.90451009836065, + "cpuUtilizationPercentage": 9.6 + } + }, + "AgentCollectionTime": "2019-06-19T19:42:02Z", + "TimeFirstObserved": "2019-06-19T19:42:02Z", + "NodeName": "k8s-master-15159885-0" + }, + { + "MonitorId": "kube_api_status", + "MonitorInstanceId": "kube_api_status", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "audit-id": "2abd071d-9c29-477b-9cb8-8457fc0e2d48", + "content-type": "application/json", + "date": "Wed, 19 Jun 2019 19:43:13 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "subscribed_capacity_cpu", + "MonitorInstanceId": "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 18000.0, + "clusterCpuRequests": 1606.0 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "subscribed_capacity_memory", + "MonitorInstanceId": "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 65701564416.0, + "clusterMemoryRequests": 2493513728.0 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-cni-networkmonitor", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-ip-masq-agent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~blobfuse-flexvol-installer", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~coredns", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~keyvault-flexvolume", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-27e541d084fab4b79e80381168eead29", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-7c98478057c9066c415f39d201d13455", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tiller-deploy", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "totalPods": 5, + "podsReady": 5, + "workloadName": "default~~diliprnodejsonlog", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:43:13Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:43:13Z", + "TimeFirstObserved": "2019-06-19T19:43:13Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-master-15159885-0" + } +] \ No newline at end of file diff --git a/health_records/aks-engine/aks-engine-2.json b/health_records/aks-engine/aks-engine-2.json new file mode 100644 index 000000000..383594a9e --- /dev/null +++ b/health_records/aks-engine/aks-engine-2.json @@ -0,0 +1,549 @@ +[ + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:42:34Z", + "state": "pass", + "details": { + "memoryRssBytes": 656310272.0, + "memoryUtilizationPercentage": 9.01 + } + }, + "AgentCollectionTime": "2019-06-19T19:42:45Z", + "TimeFirstObserved": "2019-06-19T19:42:45Z", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:42:34Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 110.81541546666666, + "cpuUtilizationPercentage": 5.54 + } + }, + "AgentCollectionTime": "2019-06-19T19:42:45Z", + "TimeFirstObserved": "2019-06-19T19:42:45Z", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:42:38Z", + "state": "pass", + "details": { + "memoryRssBytes": 579223552.0, + "memoryUtilizationPercentage": 7.93 + } + }, + "AgentCollectionTime": "2019-06-19T19:42:39Z", + "TimeFirstObserved": "2019-06-19T19:42:39Z", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:42:38Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 69.03265261666667, + "cpuUtilizationPercentage": 3.45 + } + }, + "AgentCollectionTime": "2019-06-19T19:42:39Z", + "TimeFirstObserved": "2019-06-19T19:42:39Z", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:42:53Z", + "state": "pass", + "details": { + "memoryRssBytes": 1583378432.0, + "memoryUtilizationPercentage": 21.67 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:02Z", + "TimeFirstObserved": "2019-06-19T19:43:02Z", + "NodeName": "k8s-master-15159885-0" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:42:53Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 192.56217935, + "cpuUtilizationPercentage": 9.63 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:02Z", + "TimeFirstObserved": "2019-06-19T19:43:02Z", + "NodeName": "k8s-master-15159885-0" + }, + { + "MonitorId": "kube_api_status", + "MonitorInstanceId": "kube_api_status", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "audit-id": "475b1f63-6d5f-40ba-88bc-161d5ac8de85", + "content-type": "application/json", + "date": "Wed, 19 Jun 2019 19:44:14 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "subscribed_capacity_cpu", + "MonitorInstanceId": "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 18000.0, + "clusterCpuRequests": 1606.0 + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "subscribed_capacity_memory", + "MonitorInstanceId": "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 65701564416.0, + "clusterMemoryRequests": 2493513728.0 + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-cni-networkmonitor", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-ip-masq-agent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~blobfuse-flexvol-installer", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~coredns", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~keyvault-flexvolume", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-27e541d084fab4b79e80381168eead29", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-7c98478057c9066c415f39d201d13455", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tiller-deploy", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 5, + "podsReady": 5, + "workloadName": "default~~diliprnodejsonlog", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:44:14Z", + "TimeFirstObserved": "2019-06-19T19:44:14Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-master-15159885-0" + } +] \ No newline at end of file diff --git a/health_records/aks-engine/aks-engine-3.json b/health_records/aks-engine/aks-engine-3.json new file mode 100644 index 000000000..674a87a9c --- /dev/null +++ b/health_records/aks-engine/aks-engine-3.json @@ -0,0 +1,549 @@ +[ + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:43:44Z", + "state": "pass", + "details": { + "memoryRssBytes": 668811264.0, + "memoryUtilizationPercentage": 9.18 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:45Z", + "TimeFirstObserved": "2019-06-19T19:43:45Z", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:43:44Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 91.65759084285713, + "cpuUtilizationPercentage": 4.58 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:45Z", + "TimeFirstObserved": "2019-06-19T19:43:45Z", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:43:38Z", + "state": "pass", + "details": { + "memoryRssBytes": 577175552.0, + "memoryUtilizationPercentage": 7.9 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:39Z", + "TimeFirstObserved": "2019-06-19T19:43:39Z", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:43:38Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 69.51127465, + "cpuUtilizationPercentage": 3.48 + } + }, + "AgentCollectionTime": "2019-06-19T19:43:39Z", + "TimeFirstObserved": "2019-06-19T19:43:39Z", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:43:53Z", + "state": "pass", + "details": { + "memoryRssBytes": 1582354432.0, + "memoryUtilizationPercentage": 21.65 + } + }, + "AgentCollectionTime": "2019-06-19T19:44:02Z", + "TimeFirstObserved": "2019-06-19T19:44:02Z", + "NodeName": "k8s-master-15159885-0" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:43:53Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 192.88827258333333, + "cpuUtilizationPercentage": 9.64 + } + }, + "AgentCollectionTime": "2019-06-19T19:44:02Z", + "TimeFirstObserved": "2019-06-19T19:44:02Z", + "NodeName": "k8s-master-15159885-0" + }, + { + "MonitorId": "kube_api_status", + "MonitorInstanceId": "kube_api_status", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "audit-id": "2116a117-abc9-4a72-a4d9-ea1bd111b127", + "content-type": "application/json", + "date": "Wed, 19 Jun 2019 19:45:15 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "subscribed_capacity_cpu", + "MonitorInstanceId": "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 18000.0, + "clusterCpuRequests": 1606.0 + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "subscribed_capacity_memory", + "MonitorInstanceId": "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 65701564416.0, + "clusterMemoryRequests": 2493513728.0 + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-cni-networkmonitor", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-ip-masq-agent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~blobfuse-flexvol-installer", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~coredns", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~keyvault-flexvolume", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-27e541d084fab4b79e80381168eead29", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-7c98478057c9066c415f39d201d13455", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tiller-deploy", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 5, + "podsReady": 5, + "workloadName": "default~~diliprnodejsonlog", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-agentpool1-15159885-vmss000000" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-c74648e5c2362fcdc295a88737fdb134", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-agentpool1-15159885-vmss000001" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-d31bde3d37d8ad276db34f17aa1ec71a", + "Details": { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-19T19:45:15Z", + "TimeFirstObserved": "2019-06-19T19:45:15Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", + "NodeName": "k8s-master-15159885-0" + } +] \ No newline at end of file diff --git a/health_records/first_daemon_set_signals.json b/health_records/first_daemon_set_signals.json new file mode 100644 index 000000000..b540294f5 --- /dev/null +++ b/health_records/first_daemon_set_signals.json @@ -0,0 +1,456 @@ +[ + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:31:03Z", + "state": "pass", + "details": { + "memoryRssBytes": 953540608.0, + "memoryUtilizationPercentage": 26.42 + } + }, + "AgentCollectionTime": "2019-06-08T00:31:05Z", + "TimeFirstObserved": "2019-06-08T00:31:05Z", + "NodeName": "aks-nodepool1-19574989-1" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:31:03Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 253.0181856885246, + "cpuUtilizationPercentage": 25.3 + } + }, + "AgentCollectionTime": "2019-06-08T00:31:05Z", + "TimeFirstObserved": "2019-06-08T00:31:05Z", + "NodeName": "aks-nodepool1-19574989-1" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:31:36Z", + "state": "pass", + "details": { + "memoryRssBytes": 786239488.0, + "memoryUtilizationPercentage": 21.78 + } + }, + "AgentCollectionTime": "2019-06-08T00:31:42Z", + "TimeFirstObserved": "2019-06-08T00:31:42Z", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:31:36Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 216.61802946666666, + "cpuUtilizationPercentage": 21.66 + } + }, + "AgentCollectionTime": "2019-06-08T00:31:42Z", + "TimeFirstObserved": "2019-06-08T00:31:42Z", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "kube_api_status", + "MonitorInstanceId": "kube_api_status", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "content-type": "application/json", + "date": "Sat, 08 Jun 2019 00:31:49 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "subscribed_capacity_cpu", + "MonitorInstanceId": "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 4000.0, + "clusterCpuRequests": 878.0 + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "subscribed_capacity_memory", + "MonitorInstanceId": "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 14436810752.0, + "clusterMemoryRequests": 1379926016.0 + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-dns-autoscaler", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-dns-v20", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-svc-redirect", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tunnelfront", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~diliprdeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "default~~nginx-deployment", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~rss-site", + "namespace": "default", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "workloadName": "default~~vishwadeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "test~~nginx-deployment", + "namespace": "test", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:31:49Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-08T00:31:49Z", + "TimeFirstObserved": "2019-06-08T00:31:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "NodeName": "aks-nodepool1-19574989-1" + } +] \ No newline at end of file diff --git a/health_records/health_model_state.json b/health_records/health_model_state.json new file mode 100644 index 000000000..8efa173c3 --- /dev/null +++ b/health_records/health_model_state.json @@ -0,0 +1,1272 @@ +{ + "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:31:03Z", + "state": "pass", + "details": { + "memoryRssBytes": 953540608.0, + "memoryUtilizationPercentage": 26.42 + } + }, + { + "timestamp": "2019-06-08T00:32:03Z", + "state": "pass", + "details": { + "memoryRssBytes": 927559680.0, + "memoryUtilizationPercentage": 25.7 + } + }, + { + "timestamp": "2019-06-08T00:33:03Z", + "state": "pass", + "details": { + "memoryRssBytes": 930779136.0, + "memoryUtilizationPercentage": 25.79 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:31:03Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 253.0181856885246, + "cpuUtilizationPercentage": 25.3 + } + }, + { + "timestamp": "2019-06-08T00:32:03Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 251.33592345, + "cpuUtilizationPercentage": 25.13 + } + }, + { + "timestamp": "2019-06-08T00:33:03Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 254.57922858333333, + "cpuUtilizationPercentage": 25.46 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:31:36Z", + "state": "pass", + "details": { + "memoryRssBytes": 786239488.0, + "memoryUtilizationPercentage": 21.78 + } + }, + { + "timestamp": "2019-06-08T00:32:37Z", + "state": "pass", + "details": { + "memoryRssBytes": 748142592.0, + "memoryUtilizationPercentage": 20.73 + } + }, + { + "timestamp": "2019-06-08T00:33:37Z", + "state": "pass", + "details": { + "memoryRssBytes": 749514752.0, + "memoryUtilizationPercentage": 20.77 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:31:36Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 216.61802946666666, + "cpuUtilizationPercentage": 21.66 + } + }, + { + "timestamp": "2019-06-08T00:32:37Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 207.61622447540984, + "cpuUtilizationPercentage": 20.76 + } + }, + { + "timestamp": "2019-06-08T00:33:37Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 214.84893721666666, + "cpuUtilizationPercentage": 21.48 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "kube_api_status": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "content-type": "application/json", + "date": "Sat, 08 Jun 2019 00:33:50 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 4000.0, + "clusterCpuRequests": 878.0 + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 14436810752.0, + "clusterMemoryRequests": 1379926016.0 + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-dns-autoscaler", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-dns-autoscaler", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-dns-v20", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-dns-v20", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-svc-redirect", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-svc-redirect", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tunnelfront", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tunnelfront", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~diliprdeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~diliprdeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "default~~nginx-deployment", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "default~~nginx-deployment", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~rss-site", + "namespace": "default", + "workloadKind": "Pod" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~rss-site", + "namespace": "default", + "workloadKind": "Pod" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "workloadName": "default~~vishwadeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "workloadName": "default~~vishwadeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "test~~nginx-deployment", + "namespace": "test", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "test~~nginx-deployment", + "namespace": "test", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node_condition-2b129a9a5633c0cf8f621601c6f8bb32": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node_condition-af2f3c986ea63b47fc7d59b71abb37b8": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node-nodepool1-agent-aks-nodepool1-19574989-1": { + "prev_sent_record_time": "2019-06-08T00:31:05Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:05Z", + "prev_records": [ + { + "details": { + "pass": [ + "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "node_condition-af2f3c986ea63b47fc7d59b71abb37b8" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:33:05Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "agent_node_pool-nodepool1": { + "prev_sent_record_time": "2019-06-08T00:31:05Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:05Z", + "prev_records": [ + { + "details": { + "pass": [ + "node-nodepool1-agent-aks-nodepool1-19574989-1", + "node-nodepool1-agent-aks-nodepool1-19574989-0" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:33:05Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "all_nodes": { + "prev_sent_record_time": "2019-06-08T00:31:05Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:05Z", + "prev_records": [ + { + "details": { + "pass": [ + "agent_node_pool-nodepool1" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:33:05Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "cluster": { + "prev_sent_record_time": "2019-06-08T00:31:05Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:05Z", + "prev_records": [ + { + "details": { + "pass": [ + "all_nodes", + "k8s_infrastructure", + "all_workloads" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:05Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node-nodepool1-agent-aks-nodepool1-19574989-0": { + "prev_sent_record_time": "2019-06-08T00:31:42Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:42Z", + "prev_records": [ + { + "details": { + "pass": [ + "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "node_condition-2b129a9a5633c0cf8f621601c6f8bb32" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:33:42Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "k8s_infrastructure": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "details": { + "pass": [ + "kube_api_status", + "system_workload-kube-system-heapster", + "system_workload-kube-system-kube-dns-autoscaler", + "system_workload-kube-system-kube-dns-v20", + "system_workload-kube-system-kube-proxy", + "system_workload-kube-system-kube-svc-redirect", + "system_workload-kube-system-kubernetes-dashboard", + "system_workload-kube-system-metrics-server", + "system_workload-kube-system-omsagent", + "system_workload-kube-system-omsagent-rs", + "system_workload-kube-system-tunnelfront" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "capacity": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "details": { + "pass": [ + "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", + "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:33:51Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "all_workloads": { + "prev_sent_record_time": "2019-06-08T00:31:49Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-08T00:31:49Z", + "prev_records": [ + { + "details": { + "pass": [ + "capacity", + "all_namespaces" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-heapster": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-dns-autoscaler": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-dns-v20": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-proxy": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-svc-redirect": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kubernetes-dashboard": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-metrics-server": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-omsagent": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-omsagent-rs": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-tunnelfront": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload-default-diliprdeploymentnodeapps": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "namespace-default": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload-default-diliprdeploymentnodeapps", + "user_workload-default-nginx-deployment", + "user_workload-default-rss-site", + "user_workload-default-vishwadeploymentnodeapps" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "all_namespaces": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "namespace-default", + "namespace-test" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload-default-nginx-deployment": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload-default-rss-site": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload-default-vishwadeploymentnodeapps": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload-test-nginx-deployment": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "namespace-test": { + "prev_sent_record_time": "2019-07-17T18:02:10Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:10Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload-test-nginx-deployment" + ] + }, + "state": "pass", + "timestamp": "2019-06-08T00:31:49Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + } +} \ No newline at end of file diff --git a/health_records/health_model_state_aks-engine.json b/health_records/health_model_state_aks-engine.json new file mode 100644 index 000000000..a72cf67b3 --- /dev/null +++ b/health_records/health_model_state_aks-engine.json @@ -0,0 +1,1497 @@ +{ + "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:41:34Z", + "state": "pass", + "details": { + "memoryRssBytes": 693760000.0, + "memoryUtilizationPercentage": 9.52 + } + }, + { + "timestamp": "2019-06-19T19:42:34Z", + "state": "pass", + "details": { + "memoryRssBytes": 656310272.0, + "memoryUtilizationPercentage": 9.01 + } + }, + { + "timestamp": "2019-06-19T19:43:44Z", + "state": "pass", + "details": { + "memoryRssBytes": 668811264.0, + "memoryUtilizationPercentage": 9.18 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:41:34Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 116.89870505, + "cpuUtilizationPercentage": 5.84 + } + }, + { + "timestamp": "2019-06-19T19:42:34Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 110.81541546666666, + "cpuUtilizationPercentage": 5.54 + } + }, + { + "timestamp": "2019-06-19T19:43:44Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 91.65759084285713, + "cpuUtilizationPercentage": 4.58 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c": { + "prev_sent_record_time": "2019-06-19T19:41:39Z", + "old_state": "none", + "new_state": "none", + "state_change_time": "2019-06-19T19:41:39Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:41:38Z", + "state": "fail", + "details": { + "memoryRssBytes": 578871296.0, + "memoryUtilizationPercentage": 7.92 + } + }, + { + "timestamp": "2019-06-19T19:42:38Z", + "state": "pass", + "details": { + "memoryRssBytes": 579223552.0, + "memoryUtilizationPercentage": 7.93 + } + }, + { + "timestamp": "2019-06-19T19:43:38Z", + "state": "pass", + "details": { + "memoryRssBytes": 577175552.0, + "memoryUtilizationPercentage": 7.9 + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:41:38Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 68.11712868852459, + "cpuUtilizationPercentage": 3.41 + } + }, + { + "timestamp": "2019-06-19T19:42:38Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 69.03265261666667, + "cpuUtilizationPercentage": 3.45 + } + }, + { + "timestamp": "2019-06-19T19:43:38Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 69.51127465, + "cpuUtilizationPercentage": 3.48 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:41:53Z", + "state": "pass", + "details": { + "memoryRssBytes": 1582518272.0, + "memoryUtilizationPercentage": 21.65 + } + }, + { + "timestamp": "2019-06-19T19:42:53Z", + "state": "pass", + "details": { + "memoryRssBytes": 1583378432.0, + "memoryUtilizationPercentage": 21.67 + } + }, + { + "timestamp": "2019-06-19T19:43:53Z", + "state": "pass", + "details": { + "memoryRssBytes": 1582354432.0, + "memoryUtilizationPercentage": 21.65 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:41:53Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 191.90451009836065, + "cpuUtilizationPercentage": 9.6 + } + }, + { + "timestamp": "2019-06-19T19:42:53Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 192.56217935, + "cpuUtilizationPercentage": 9.63 + } + }, + { + "timestamp": "2019-06-19T19:43:53Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 192.88827258333333, + "cpuUtilizationPercentage": 9.64 + } + } + ], + "is_state_change_consistent": true, + "should_send": true + }, + "kube_api_status": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "audit-id": "2116a117-abc9-4a72-a4d9-ea1bd111b127", + "content-type": "application/json", + "date": "Wed, 19 Jun 2019 19:45:15 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 18000.0, + "clusterCpuRequests": 1606.0 + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 65701564416.0, + "clusterMemoryRequests": 2493513728.0 + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-cni-networkmonitor", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-cni-networkmonitor", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-ip-masq-agent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~azure-ip-masq-agent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~blobfuse-flexvol-installer", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~blobfuse-flexvol-installer", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~coredns", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~coredns", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~keyvault-flexvolume", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~keyvault-flexvolume", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-27e541d084fab4b79e80381168eead29": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", + "namespace": "kube-system", + "workloadKind": "Pod" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-7c98478057c9066c415f39d201d13455": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tiller-deploy", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tiller-deploy", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:44:14Z", + "state": "pass", + "details": { + "totalPods": 5, + "podsReady": 5, + "workloadName": "default~~diliprnodejsonlog", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "totalPods": 5, + "podsReady": 5, + "workloadName": "default~~diliprnodejsonlog", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node_condition-c74648e5c2362fcdc295a88737fdb134": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node_condition-d31bde3d37d8ad276db34f17aa1ec71a": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "timestamp": "2019-06-19T19:45:15Z", + "state": "pass", + "details": { + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000001": { + "prev_sent_record_time": "2019-06-19T19:41:44Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:41:44Z", + "prev_records": [ + { + "details": { + "pass": [ + "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", + "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", + "node_condition-c74648e5c2362fcdc295a88737fdb134" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:45Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "agent_node_pool-agentpool1": { + "prev_sent_record_time": "2019-06-19T19:41:39Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:41:39Z", + "prev_records": [ + { + "details": { + "pass": [ + "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000001", + "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000000" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:39Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "all_nodes": { + "prev_sent_record_time": "2019-06-19T19:41:39Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:41:39Z", + "prev_records": [ + { + "details": { + "pass": [ + "agent_node_pool-agentpool1", + "master_node_pool" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:39Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "cluster": { + "prev_sent_record_time": "2019-06-19T19:41:39Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:41:39Z", + "prev_records": [ + { + "details": { + "pass": [ + "all_nodes", + "k8s_infrastructure", + "all_workloads" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:41:39Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000000": { + "prev_sent_record_time": "2019-06-19T19:41:39Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:41:39Z", + "prev_records": [ + { + "details": { + "none": [ + "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c" + ], + "pass": [ + "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", + "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:39Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "node--master-k8s-master-15159885-0": { + "prev_sent_record_time": "2019-06-19T19:42:02Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:42:02Z", + "prev_records": [ + { + "details": { + "pass": [ + "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", + "node_condition-d31bde3d37d8ad276db34f17aa1ec71a" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:44:02Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "master_node_pool": { + "prev_sent_record_time": "2019-06-19T19:42:02Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:42:02Z", + "prev_records": [ + { + "details": { + "pass": [ + "node--master-k8s-master-15159885-0" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:44:02Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "k8s_infrastructure": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "details": { + "pass": [ + "kube_api_status", + "system_workload-kube-system-azure-cni-networkmonitor", + "system_workload-kube-system-azure-ip-masq-agent", + "system_workload-kube-system-blobfuse-flexvol-installer", + "system_workload-kube-system-coredns", + "system_workload-kube-system-heapster", + "system_workload-kube-system-keyvault-flexvolume", + "system_workload-kube-system-kube-addon-manager-k8s-master-15159885-0", + "system_workload-kube-system-kube-apiserver-k8s-master-15159885-0", + "system_workload-kube-system-kube-controller-manager-k8s-master-15159885-0", + "system_workload-kube-system-kube-proxy", + "system_workload-kube-system-kube-scheduler-k8s-master-15159885-0", + "system_workload-kube-system-kubernetes-dashboard", + "system_workload-kube-system-metrics-server", + "system_workload-kube-system-omsagent", + "system_workload-kube-system-omsagent-rs", + "system_workload-kube-system-tiller-deploy" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "capacity": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "details": { + "pass": [ + "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", + "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:45:15Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "all_workloads": { + "prev_sent_record_time": "2019-06-19T19:43:13Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-06-19T19:43:13Z", + "prev_records": [ + { + "details": { + "pass": [ + "capacity", + "all_namespaces" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-azure-cni-networkmonitor": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-azure-ip-masq-agent": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-blobfuse-flexvol-installer": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-coredns": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-heapster": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-keyvault-flexvolume": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-addon-manager-k8s-master-15159885-0": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-apiserver-k8s-master-15159885-0": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-controller-manager-k8s-master-15159885-0": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-27e541d084fab4b79e80381168eead29" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-proxy": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kube-scheduler-k8s-master-15159885-0": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-kubernetes-dashboard": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-7c98478057c9066c415f39d201d13455" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-metrics-server": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-omsagent": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-omsagent-rs": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "system_workload-kube-system-tiller-deploy": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "user_workload-default-diliprnodejsonlog": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "namespace-default": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "user_workload-default-diliprnodejsonlog" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + }, + "all_namespaces": { + "prev_sent_record_time": "2019-07-17T18:02:15Z", + "old_state": "none", + "new_state": "pass", + "state_change_time": "2019-07-17T18:02:15Z", + "prev_records": [ + { + "details": { + "pass": [ + "namespace-default" + ] + }, + "state": "pass", + "timestamp": "2019-06-19T19:43:13Z" + } + ], + "is_state_change_consistent": false, + "should_send": false + } +} \ No newline at end of file diff --git a/health_records/second_daemon_set_signals.json b/health_records/second_daemon_set_signals.json new file mode 100644 index 000000000..ba97c51f2 --- /dev/null +++ b/health_records/second_daemon_set_signals.json @@ -0,0 +1,456 @@ +[ + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:32:03Z", + "state": "pass", + "details": { + "memoryRssBytes": 927559680.0, + "memoryUtilizationPercentage": 25.7 + } + }, + "AgentCollectionTime": "2019-06-08T00:32:05Z", + "TimeFirstObserved": "2019-06-08T00:32:05Z", + "NodeName": "aks-nodepool1-19574989-1" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:32:03Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 251.33592345, + "cpuUtilizationPercentage": 25.13 + } + }, + "AgentCollectionTime": "2019-06-08T00:32:05Z", + "TimeFirstObserved": "2019-06-08T00:32:05Z", + "NodeName": "aks-nodepool1-19574989-1" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:32:37Z", + "state": "pass", + "details": { + "memoryRssBytes": 748142592.0, + "memoryUtilizationPercentage": 20.73 + } + }, + "AgentCollectionTime": "2019-06-08T00:32:42Z", + "TimeFirstObserved": "2019-06-08T00:32:42Z", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:32:37Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 207.61622447540984, + "cpuUtilizationPercentage": 20.76 + } + }, + "AgentCollectionTime": "2019-06-08T00:32:42Z", + "TimeFirstObserved": "2019-06-08T00:32:42Z", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "kube_api_status", + "MonitorInstanceId": "kube_api_status", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "content-type": "application/json", + "date": "Sat, 08 Jun 2019 00:32:49 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "subscribed_capacity_cpu", + "MonitorInstanceId": "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 4000.0, + "clusterCpuRequests": 878.0 + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "subscribed_capacity_memory", + "MonitorInstanceId": "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 14436810752.0, + "clusterMemoryRequests": 1379926016.0 + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-dns-autoscaler", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-dns-v20", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-svc-redirect", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tunnelfront", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~diliprdeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "default~~nginx-deployment", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~rss-site", + "namespace": "default", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "workloadName": "default~~vishwadeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "test~~nginx-deployment", + "namespace": "test", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:32:49Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-08T00:32:49Z", + "TimeFirstObserved": "2019-06-08T00:32:49Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "NodeName": "aks-nodepool1-19574989-1" + } +] \ No newline at end of file diff --git a/health_records/third_daemon_set_signals.json b/health_records/third_daemon_set_signals.json new file mode 100644 index 000000000..cae617283 --- /dev/null +++ b/health_records/third_daemon_set_signals.json @@ -0,0 +1,456 @@ +[ + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:33:03Z", + "state": "pass", + "details": { + "memoryRssBytes": 930779136.0, + "memoryUtilizationPercentage": 25.79 + } + }, + "AgentCollectionTime": "2019-06-08T00:33:05Z", + "TimeFirstObserved": "2019-06-08T00:33:05Z", + "NodeName": "aks-nodepool1-19574989-1" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:33:03Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 254.57922858333333, + "cpuUtilizationPercentage": 25.46 + } + }, + "AgentCollectionTime": "2019-06-08T00:33:05Z", + "TimeFirstObserved": "2019-06-08T00:33:05Z", + "NodeName": "aks-nodepool1-19574989-1" + }, + { + "MonitorId": "node_memory_utilization", + "MonitorInstanceId": "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:33:37Z", + "state": "pass", + "details": { + "memoryRssBytes": 749514752.0, + "memoryUtilizationPercentage": 20.77 + } + }, + "AgentCollectionTime": "2019-06-08T00:33:42Z", + "TimeFirstObserved": "2019-06-08T00:33:42Z", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "node_cpu_utilization", + "MonitorInstanceId": "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:33:37Z", + "state": "pass", + "details": { + "cpuUsageMillicores": 214.84893721666666, + "cpuUtilizationPercentage": 21.48 + } + }, + "AgentCollectionTime": "2019-06-08T00:33:42Z", + "TimeFirstObserved": "2019-06-08T00:33:42Z", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "kube_api_status", + "MonitorInstanceId": "kube_api_status", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "content-type": "application/json", + "date": "Sat, 08 Jun 2019 00:33:50 GMT", + "connection": "close", + "transfer-encoding": "chunked", + "ResponseCode": "200" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "subscribed_capacity_cpu", + "MonitorInstanceId": "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "clusterCpuCapacity": 4000.0, + "clusterCpuRequests": 878.0 + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "subscribed_capacity_memory", + "MonitorInstanceId": "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "clusterMemoryCapacity": 14436810752.0, + "clusterMemoryRequests": 1379926016.0 + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~heapster", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kube-dns-autoscaler", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-dns-v20", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-proxy", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~kube-svc-redirect", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~kubernetes-dashboard", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~metrics-server", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "kube-system~~omsagent", + "namespace": "kube-system", + "workloadKind": "DaemonSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~omsagent-rs", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "system_workload_pods_ready", + "MonitorInstanceId": "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "kube-system~~tunnelfront", + "namespace": "kube-system", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~diliprdeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 3, + "podsReady": 3, + "workloadName": "default~~nginx-deployment", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 1, + "podsReady": 1, + "workloadName": "default~~rss-site", + "namespace": "default", + "workloadKind": "Pod" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 10, + "podsReady": 10, + "workloadName": "default~~vishwadeploymentnodeapps", + "namespace": "default", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "user_workload_pods_ready", + "MonitorInstanceId": "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "totalPods": 2, + "podsReady": 2, + "workloadName": "test~~nginx-deployment", + "namespace": "test", + "workloadKind": "ReplicaSet" + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-2b129a9a5633c0cf8f621601c6f8bb32", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "NodeName": "aks-nodepool1-19574989-0" + }, + { + "MonitorId": "node_condition", + "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", + "Details": { + "timestamp": "2019-06-08T00:33:51Z", + "state": "pass", + "details": { + "NetworkUnavailable": { + "Reason": "RouteCreated", + "Message": "RouteController created a route" + }, + "OutOfDisk": { + "Reason": "KubeletHasSufficientDisk", + "Message": "kubelet has sufficient disk space available" + }, + "MemoryPressure": { + "Reason": "KubeletHasSufficientMemory", + "Message": "kubelet has sufficient memory available" + }, + "DiskPressure": { + "Reason": "KubeletHasNoDiskPressure", + "Message": "kubelet has no disk pressure" + }, + "PIDPressure": { + "Reason": "KubeletHasSufficientPID", + "Message": "kubelet has sufficient PID available" + }, + "Ready": { + "Reason": "KubeletReady", + "Message": "kubelet is posting ready status. AppArmor enabled" + } + } + }, + "AgentCollectionTime": "2019-06-08T00:33:51Z", + "TimeFirstObserved": "2019-06-08T00:33:51Z", + "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", + "NodeName": "aks-nodepool1-19574989-1" + } +] \ No newline at end of file diff --git a/inventory/aks-engine/deployments.json b/inventory/aks-engine/deployments.json new file mode 100644 index 000000000..a356331b0 --- /dev/null +++ b/inventory/aks-engine/deployments.json @@ -0,0 +1,1139 @@ +{ + "items": [ + { + "metadata": { + "name": "diliprnodejsonlog", + "namespace": "default", + "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/diliprnodejsonlog", + "uid": "ecc14148-66ec-11e9-a358-000d3a53d49f", + "resourceVersion": "7362279", + "generation": 1, + "creationTimestamp": "2019-04-24T23:58:58Z", + "labels": { + "VishwadeploymentLabel1": "d1", + "VishwadeploymentLabel2": "d2" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"VishwadeploymentLabel1\":\"d1\",\"VishwadeploymentLabel2\":\"d2\"},\"name\":\"diliprnodejsonlog\",\"namespace\":\"default\"},\"spec\":{\"replicas\":5,\"selector\":{\"matchLabels\":{\"app\":\"diliprnodejsonlog\"}},\"template\":{\"metadata\":{\"labels\":{\"VishwaPodLabel1\":\"p1\",\"VishwaPodLabel2\":\"p2\",\"app\":\"diliprnodejsonlog\"}},\"spec\":{\"containers\":[{\"image\":\"rdilip83/jsonlogger:v12\",\"name\":\"diliprnodejsonlogpodapp\",\"resources\":{\"limits\":{\"cpu\":\"20m\",\"memory\":\"60Mi\"},\"requests\":{\"cpu\":\"10m\",\"memory\":\"30Mi\"}}}]}}}}\n" + } + }, + "spec": { + "replicas": 5, + "selector": { + "matchLabels": { + "app": "diliprnodejsonlog" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "diliprnodejsonlog" + } + }, + "spec": { + "containers": [ + { + "name": "diliprnodejsonlogpodapp", + "image": "rdilip83/jsonlogger:v12", + "resources": { + "limits": { + "cpu": "20m", + "memory": "60Mi" + }, + "requests": { + "cpu": "10m", + "memory": "30Mi" + } + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": "25%", + "maxSurge": "25%" + } + }, + "revisionHistoryLimit": 2, + "progressDeadlineSeconds": 600 + }, + "status": { + "observedGeneration": 1, + "replicas": 5, + "updatedReplicas": 5, + "readyReplicas": 5, + "availableReplicas": 5, + "conditions": [ + { + "type": "Progressing", + "status": "True", + "lastUpdateTime": "2019-04-24T23:59:36Z", + "lastTransitionTime": "2019-04-24T23:58:58Z", + "reason": "NewReplicaSetAvailable", + "message": "ReplicaSet \"diliprnodejsonlog-5959b88bf7\" has successfully progressed." + }, + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-06-14T07:59:50Z", + "lastTransitionTime": "2019-06-14T07:59:50Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "coredns", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/coredns", + "uid": "3c60dd90-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "675", + "generation": 1, + "creationTimestamp": "2019-04-17T04:28:22Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "kube-dns", + "kubernetes.io/cluster-service": "true", + "kubernetes.io/name": "CoreDNS" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"kubernetes.io/name\":\"CoreDNS\"},\"name\":\"coredns\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns\"}},\"strategy\":{\"rollingUpdate\":{\"maxUnavailable\":1},\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"seccomp.security.alpha.kubernetes.io/pod\":\"docker/default\"},\"labels\":{\"k8s-app\":\"kube-dns\"}},\"spec\":{\"affinity\":{\"podAntiAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"failure-domain.beta.kubernetes.io/zone\"},\"weight\":10},{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"kubernetes.io/hostname\"},\"weight\":5}]}},\"containers\":[{\"args\":[\"-conf\",\"/etc/coredns/Corefile\"],\"image\":\"k8s.gcr.io/coredns:1.2.2\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/health\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"coredns\",\"ports\":[{\"containerPort\":53,\"name\":\"dns\",\"protocol\":\"UDP\"},{\"containerPort\":53,\"name\":\"dns-tcp\",\"protocol\":\"TCP\"},{\"containerPort\":9153,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"memory\":\"170Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"70Mi\"}},\"securityContext\":{\"allowPrivilegeEscalation\":false,\"capabilities\":{\"add\":[\"NET_BIND_SERVICE\"],\"drop\":[\"all\"]},\"readOnlyRootFilesystem\":true},\"volumeMounts\":[{\"mountPath\":\"/etc/coredns\",\"name\":\"config-volume\",\"readOnly\":true},{\"mountPath\":\"/tmp\",\"name\":\"tmp\"}]}],\"dnsPolicy\":\"Default\",\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"coredns\",\"tolerations\":[{\"effect\":\"NoSchedule\",\"key\":\"node-role.kubernetes.io/master\"},{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"},{\"effect\":\"NoExecute\",\"operator\":\"Exists\"},{\"effect\":\"NoSchedule\",\"operator\":\"Exists\"}],\"volumes\":[{\"emptyDir\":{},\"name\":\"tmp\"},{\"configMap\":{\"items\":[{\"key\":\"Corefile\",\"path\":\"Corefile\"}],\"name\":\"coredns\"},\"name\":\"config-volume\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "kube-dns" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "kube-dns" + }, + "annotations": { + "seccomp.security.alpha.kubernetes.io/pod": "docker/default" + } + }, + "spec": { + "volumes": [ + { + "name": "tmp", + "emptyDir": {} + }, + { + "name": "config-volume", + "configMap": { + "name": "coredns", + "items": [ + { + "key": "Corefile", + "path": "Corefile" + } + ], + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "coredns", + "image": "k8s.gcr.io/coredns:1.2.2", + "args": [ + "-conf", + "/etc/coredns/Corefile" + ], + "ports": [ + { + "name": "dns", + "containerPort": 53, + "protocol": "UDP" + }, + { + "name": "dns-tcp", + "containerPort": 53, + "protocol": "TCP" + }, + { + "name": "metrics", + "containerPort": 9153, + "protocol": "TCP" + } + ], + "resources": { + "limits": { + "memory": "170Mi" + }, + "requests": { + "cpu": "100m", + "memory": "70Mi" + } + }, + "volumeMounts": [ + { + "name": "config-volume", + "readOnly": true, + "mountPath": "/etc/coredns" + }, + { + "name": "tmp", + "mountPath": "/tmp" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/health", + "port": 8080, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "capabilities": { + "add": [ + "NET_BIND_SERVICE" + ], + "drop": [ + "all" + ] + }, + "readOnlyRootFilesystem": true, + "allowPrivilegeEscalation": false, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "coredns", + "serviceAccount": "coredns", + "securityContext": {}, + "affinity": { + "podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [ + { + "weight": 10, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "failure-domain.beta.kubernetes.io/zone" + } + }, + { + "weight": 5, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "kubernetes.io/hostname" + } + } + ] + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "effect": "NoSchedule" + }, + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-17T04:28:23Z", + "lastTransitionTime": "2019-04-17T04:28:23Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "heapster", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/heapster", + "uid": "3981201f-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362276", + "generation": 1, + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "addonmanager.kubernetes.io/mode": "EnsureExists", + "k8s-app": "heapster", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "heapster" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "heapster" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "" + } + }, + "spec": { + "volumes": [ + { + "name": "heapster-config-volume", + "configMap": { + "name": "heapster-config", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "heapster", + "image": "k8s.gcr.io/heapster-amd64:v1.5.4", + "command": [ + "/heapster", + "--source=kubernetes.summary_api:''" + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "livenessProbe": { + "httpGet": { + "path": "/healthz", + "port": 8082, + "scheme": "HTTP" + }, + "initialDelaySeconds": 180, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "heapster-nanny", + "image": "k8s.gcr.io/addon-resizer:1.8.4", + "command": [ + "/pod_nanny", + "--config-dir=/etc/config", + "--cpu=80m", + "--extra-cpu=0.5m", + "--memory=140Mi", + "--extra-memory=4Mi", + "--threshold=5", + "--deployment=heapster", + "--container=heapster", + "--poll-period=300000", + "--estimator=exponential" + ], + "env": [ + { + "name": "MY_POD_NAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.name" + } + } + }, + { + "name": "MY_POD_NAMESPACE", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + } + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "volumeMounts": [ + { + "name": "heapster-config-volume", + "mountPath": "/etc/config" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "heapster", + "serviceAccount": "heapster", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-17T04:28:18Z", + "lastTransitionTime": "2019-04-17T04:28:18Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "kubernetes-dashboard", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kubernetes-dashboard", + "uid": "3cef280a-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110361", + "generation": 1, + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "kubernetes-dashboard", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kubernetes-dashboard\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"k8s-app\":\"kubernetes-dashboard\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"kubernetes-dashboard\"}},\"spec\":{\"containers\":[{\"args\":[\"--auto-generate-certificates\",\"--heapster-host=http://heapster.kube-system:80\"],\"image\":\"k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"httpGet\":{\"path\":\"/\",\"port\":8443,\"scheme\":\"HTTPS\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":30},\"name\":\"kubernetes-dashboard\",\"ports\":[{\"containerPort\":8443,\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"300m\",\"memory\":\"150Mi\"},\"requests\":{\"cpu\":\"300m\",\"memory\":\"150Mi\"}},\"volumeMounts\":[{\"mountPath\":\"/certs\",\"name\":\"kubernetes-dashboard-certs\"}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"serviceAccountName\":\"kubernetes-dashboard\",\"volumes\":[{\"emptyDir\":{},\"name\":\"kubernetes-dashboard-certs\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "kubernetes-dashboard" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "kubernetes-dashboard" + } + }, + "spec": { + "volumes": [ + { + "name": "kubernetes-dashboard-certs", + "emptyDir": {} + } + ], + "containers": [ + { + "name": "kubernetes-dashboard", + "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", + "args": [ + "--auto-generate-certificates", + "--heapster-host=http://heapster.kube-system:80" + ], + "ports": [ + { + "containerPort": 8443, + "protocol": "TCP" + } + ], + "resources": { + "limits": { + "cpu": "300m", + "memory": "150Mi" + }, + "requests": { + "cpu": "300m", + "memory": "150Mi" + } + }, + "volumeMounts": [ + { + "name": "kubernetes-dashboard-certs", + "mountPath": "/certs" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/", + "port": 8443, + "scheme": "HTTPS" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 30, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kubernetes-dashboard", + "serviceAccount": "kubernetes-dashboard", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-17T04:28:24Z", + "lastTransitionTime": "2019-04-17T04:28:24Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "metrics-server", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/metrics-server", + "uid": "3caeca05-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110358", + "generation": 1, + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "metrics-server", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"metrics-server\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"metrics-server\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"metrics-server\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"metrics-server\"},\"name\":\"metrics-server\"},\"spec\":{\"containers\":[{\"command\":[\"/metrics-server\",\"--source=kubernetes.summary_api:''\"],\"image\":\"k8s.gcr.io/metrics-server-amd64:v0.2.1\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"metrics-server\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"serviceAccountName\":\"metrics-server\"}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "metrics-server" + } + }, + "template": { + "metadata": { + "name": "metrics-server", + "creationTimestamp": null, + "labels": { + "k8s-app": "metrics-server" + } + }, + "spec": { + "containers": [ + { + "name": "metrics-server", + "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", + "command": [ + "/metrics-server", + "--source=kubernetes.summary_api:''" + ], + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "metrics-server", + "serviceAccount": "metrics-server", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-17T04:28:23Z", + "lastTransitionTime": "2019-04-17T04:28:23Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "omsagent-rs", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/omsagent-rs", + "uid": "83ab437f-925b-11e9-a358-000d3a53d49f", + "resourceVersion": "7987241", + "generation": 1, + "creationTimestamp": "2019-06-19T06:28:56Z", + "labels": { + "rsName": "omsagent-rs" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"name\":\"omsagent-rs\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"rsName\":\"omsagent-rs\"}},\"strategy\":{\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"agentVersion\":\"1.10.0.1\",\"dockerProviderVersion\":\"5.0.0-1\",\"schema-versions\":\"v1\"},\"labels\":{\"rsName\":\"omsagent-rs\"}},\"spec\":{\"containers\":[{\"env\":[{\"name\":\"AKS_RESOURCE_ID\",\"value\":\"/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview\"},{\"name\":\"AKS_REGION\",\"value\":\"eastus\"},{\"name\":\"DISABLE_KUBE_SYSTEM_LOG_COLLECTION\",\"value\":\"true\"},{\"name\":\"CONTROLLER_TYPE\",\"value\":\"ReplicaSet\"},{\"name\":\"NODE_IP\",\"valueFrom\":{\"fieldRef\":{\"fieldPath\":\"status.hostIP\"}}}],\"image\":\"rdilip83/healthpreview06192019\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/bin/bash\",\"-c\",\"ps -ef | grep main\"]},\"initialDelaySeconds\":60,\"periodSeconds\":60},\"name\":\"omsagent\",\"ports\":[{\"containerPort\":25225,\"protocol\":\"TCP\"},{\"containerPort\":25224,\"protocol\":\"UDP\"},{\"containerPort\":25235,\"name\":\"in-rs-tcp\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"150m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"50m\",\"memory\":\"175Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/var/run/host\",\"name\":\"docker-sock\"},{\"mountPath\":\"/var/log\",\"name\":\"host-log\"},{\"mountPath\":\"/var/lib/docker/containers\",\"name\":\"containerlog-path\"},{\"mountPath\":\"/etc/kubernetes/host\",\"name\":\"azure-json-path\"},{\"mountPath\":\"/etc/omsagent-secret\",\"name\":\"omsagent-secret\",\"readOnly\":true},{\"mountPath\":\"/etc/config\",\"name\":\"omsagent-rs-config\"},{\"mountPath\":\"/etc/config/settings\",\"name\":\"settings-vol-config\",\"readOnly\":true},{\"mountPath\":\"/mnt/azure\",\"name\":\"azurefile-pv\"}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\",\"kubernetes.io/role\":\"agent\"},\"serviceAccountName\":\"omsagent\",\"volumes\":[{\"hostPath\":{\"path\":\"/var/run\"},\"name\":\"docker-sock\"},{\"hostPath\":{\"path\":\"/etc/hostname\"},\"name\":\"container-hostname\"},{\"hostPath\":{\"path\":\"/var/log\"},\"name\":\"host-log\"},{\"hostPath\":{\"path\":\"/var/lib/docker/containers\"},\"name\":\"containerlog-path\"},{\"hostPath\":{\"path\":\"/etc/kubernetes\"},\"name\":\"azure-json-path\"},{\"name\":\"omsagent-secret\",\"secret\":{\"secretName\":\"omsagent-secret\"}},{\"configMap\":{\"name\":\"omsagent-rs-config\"},\"name\":\"omsagent-rs-config\"},{\"configMap\":{\"name\":\"container-azm-ms-agentconfig\",\"optional\":true},\"name\":\"settings-vol-config\"},{\"name\":\"azurefile-pv\",\"persistentVolumeClaim\":{\"claimName\":\"azurefile\"}}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "rsName": "omsagent-rs" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "rsName": "omsagent-rs" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "5.0.0-1", + "schema-versions": "v1" + } + }, + "spec": { + "volumes": [ + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "omsagent-rs-config", + "configMap": { + "name": "omsagent-rs-config", + "defaultMode": 420 + } + }, + { + "name": "settings-vol-config", + "configMap": { + "name": "container-azm-ms-agentconfig", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "azurefile-pv", + "persistentVolumeClaim": { + "claimName": "azurefile" + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "rdilip83/healthpreview06192019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + }, + { + "name": "in-rs-tcp", + "containerPort": 25235, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "ReplicaSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "500Mi" + }, + "requests": { + "cpu": "50m", + "memory": "175Mi" + } + }, + "volumeMounts": [ + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "readOnly": true, + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "omsagent-rs-config", + "mountPath": "/etc/config" + }, + { + "name": "settings-vol-config", + "readOnly": true, + "mountPath": "/etc/config/settings" + }, + { + "name": "azurefile-pv", + "mountPath": "/mnt/azure" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux", + "kubernetes.io/role": "agent" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-06-19T06:28:56Z", + "lastTransitionTime": "2019-06-19T06:28:56Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "tiller-deploy", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/tiller-deploy", + "uid": "39b60b02-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362285", + "generation": 1, + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "addonmanager.kubernetes.io/mode": "EnsureExists", + "app": "helm", + "kubernetes.io/cluster-service": "true", + "name": "tiller" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "app": "helm", + "name": "tiller" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "helm", + "name": "tiller" + } + }, + "spec": { + "containers": [ + { + "name": "tiller", + "image": "gcr.io/kubernetes-helm/tiller:v2.11.0", + "ports": [ + { + "name": "tiller", + "containerPort": 44134, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "TILLER_NAMESPACE", + "value": "kube-system" + }, + { + "name": "TILLER_HISTORY_MAX", + "value": "0" + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "150Mi" + }, + "requests": { + "cpu": "50m", + "memory": "150Mi" + } + }, + "livenessProbe": { + "httpGet": { + "path": "/liveness", + "port": 44135, + "scheme": "HTTP" + }, + "initialDelaySeconds": 1, + "timeoutSeconds": 1, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "readinessProbe": { + "httpGet": { + "path": "/readiness", + "port": 44135, + "scheme": "HTTP" + }, + "initialDelaySeconds": 1, + "timeoutSeconds": 1, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "tiller", + "serviceAccount": "tiller", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-17T04:28:18Z", + "lastTransitionTime": "2019-04-17T04:28:18Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + } + ] +} \ No newline at end of file diff --git a/inventory/aks-engine/nodes.json b/inventory/aks-engine/nodes.json new file mode 100644 index 000000000..8e913ec20 --- /dev/null +++ b/inventory/aks-engine/nodes.json @@ -0,0 +1,1439 @@ +{ + "items": [ + { + "metadata": { + "name": "k8s-agentpool1-15159885-vmss000000", + "selfLink": "/api/v1/nodes/k8s-agentpool1-15159885-vmss000000", + "uid": "2f606f53-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "8059379", + "creationTimestamp": "2019-04-17T04:28:01Z", + "labels": { + "agentpool": "agentpool1", + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/instance-type": "Standard_D2_v2", + "beta.kubernetes.io/os": "linux", + "failure-domain.beta.kubernetes.io/region": "eastus", + "failure-domain.beta.kubernetes.io/zone": "0", + "kubernetes.azure.com/cluster": "aks-engine-health", + "kubernetes.io/hostname": "k8s-agentpool1-15159885-vmss000000", + "kubernetes.io/role": "agent", + "node-role.kubernetes.io/agent": "", + "storageprofile": "managed", + "storagetier": "Standard_LRS" + }, + "annotations": { + "node.alpha.kubernetes.io/ttl": "0", + "volumes.kubernetes.io/controller-managed-attach-detach": "true" + } + }, + "spec": { + "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/aks-engine-health/providers/Microsoft.Compute/virtualMachineScaleSets/k8s-agentpool1-15159885-vmss/virtualMachines/0" + }, + "status": { + "capacity": { + "attachable-volumes-azure-disk": "8", + "cpu": "2", + "ephemeral-storage": "30428648Ki", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "7137036Ki", + "pods": "30" + }, + "allocatable": { + "attachable-volumes-azure-disk": "8", + "cpu": "2", + "ephemeral-storage": "28043041951", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "6369036Ki", + "pods": "30" + }, + "conditions": [ + { + "type": "OutOfDisk", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:57Z", + "lastTransitionTime": "2019-05-27T13:06:02Z", + "reason": "KubeletHasSufficientDisk", + "message": "kubelet has sufficient disk space available" + }, + { + "type": "MemoryPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:57Z", + "lastTransitionTime": "2019-05-27T13:06:02Z", + "reason": "KubeletHasSufficientMemory", + "message": "kubelet has sufficient memory available" + }, + { + "type": "DiskPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:57Z", + "lastTransitionTime": "2019-05-27T13:06:02Z", + "reason": "KubeletHasNoDiskPressure", + "message": "kubelet has no disk pressure" + }, + { + "type": "PIDPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:57Z", + "lastTransitionTime": "2019-04-17T04:27:58Z", + "reason": "KubeletHasSufficientPID", + "message": "kubelet has sufficient PID available" + }, + { + "type": "Ready", + "status": "True", + "lastHeartbeatTime": "2019-06-19T20:09:57Z", + "lastTransitionTime": "2019-05-27T13:06:02Z", + "reason": "KubeletReady", + "message": "kubelet is posting ready status. AppArmor enabled" + } + ], + "addresses": [ + { + "type": "Hostname", + "address": "k8s-agentpool1-15159885-vmss000000" + }, + { + "type": "InternalIP", + "address": "10.240.0.34" + } + ], + "daemonEndpoints": { + "kubeletEndpoint": { + "Port": 10250 + } + }, + "nodeInfo": { + "machineID": "01beaa0c1a9c4687b859844363b7c73a", + "systemUUID": "32977732-40C8-5F45-A56F-043EB7178A9A", + "bootID": "08a81d2d-1223-4ce7-82f2-0fe218610052", + "kernelVersion": "4.15.0-1041-azure", + "osImage": "Ubuntu 16.04.6 LTS", + "containerRuntimeVersion": "docker://3.0.4", + "kubeletVersion": "v1.12.7", + "kubeProxyVersion": "v1.12.7", + "operatingSystem": "linux", + "architecture": "amd64" + }, + "images": [ + { + "names": [ + "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "rdilip83/jsonlogger:v12" + ], + "sizeBytes": 676594134 + }, + { + "names": [ + "k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "k8s.gcr.io/hyperkube-amd64:v1.12.7" + ], + "sizeBytes": 635452178 + }, + { + "names": [ + "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "rdilip83/healthpreview06192019:latest" + ], + "sizeBytes": 493799437 + }, + { + "names": [ + "microsoft/oms@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", + "rdilip83/hp@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", + "microsoft/oms:healthpreview04172019", + "rdilip83/hp:0417" + ], + "sizeBytes": 458385124 + }, + { + "names": [ + "microsoft/oms@sha256:042f0216394fd0709e384dceffc8ab0f8c983474313b63e9e27f98cf50825cee", + "microsoft/oms:healthpreview04152019" + ], + "sizeBytes": 458383728 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771", + "k8s.gcr.io/cluster-autoscaler:v1.12.3" + ], + "sizeBytes": 232229241 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", + "k8s.gcr.io/cluster-autoscaler:v1.12.2" + ], + "sizeBytes": 232167833 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207", + "k8s.gcr.io/cluster-autoscaler:v1.3.7" + ], + "sizeBytes": 217353965 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", + "k8s.gcr.io/cluster-autoscaler:v1.3.4" + ], + "sizeBytes": 217264129 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", + "k8s.gcr.io/cluster-autoscaler:v1.3.3" + ], + "sizeBytes": 217259793 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", + "k8s.gcr.io/cluster-autoscaler:v1.2.2" + ], + "sizeBytes": 208688449 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", + "k8s.gcr.io/cluster-autoscaler:v1.1.2" + ], + "sizeBytes": 198265853 + }, + { + "names": [ + "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56", + "containernetworking/azure-npm:v1.0.18" + ], + "sizeBytes": 170727162 + }, + { + "names": [ + "k8s.gcr.io/cloud-controller-manager-amd64@sha256:c5b5c835a4a435da69f226fc08e27d2cc52e5911acc8215bb2c6372932bee7c0", + "k8s.gcr.io/cloud-controller-manager-amd64:v1.12.7" + ], + "sizeBytes": 144153702 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b", + "k8s.gcr.io/cluster-autoscaler:v1.14.0" + ], + "sizeBytes": 142102721 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7", + "k8s.gcr.io/cluster-autoscaler:v1.13.2" + ], + "sizeBytes": 136684274 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", + "k8s.gcr.io/cluster-autoscaler:v1.13.1" + ], + "sizeBytes": 136618018 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", + "containernetworking/networkmonitor:v0.0.6" + ], + "sizeBytes": 123663837 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", + "containernetworking/networkmonitor:v0.0.5" + ], + "sizeBytes": 122043325 + }, + { + "names": [ + "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" + ], + "sizeBytes": 121711221 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.8" + ], + "sizeBytes": 99631084 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", + "k8s.gcr.io/kube-addon-manager-amd64:v8.9" + ], + "sizeBytes": 99240637 + }, + { + "names": [ + "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", + "microsoft/virtual-kubelet:latest" + ], + "sizeBytes": 83395521 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9", + "k8s.gcr.io/kube-addon-manager-amd64:v9.0" + ], + "sizeBytes": 83077558 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.6" + ], + "sizeBytes": 78384272 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", + "k8s.gcr.io/heapster-amd64:v1.5.1" + ], + "sizeBytes": 75318380 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", + "k8s.gcr.io/heapster-amd64:v1.5.4" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", + "k8s.gcr.io/heapster-amd64:v1.5.3" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", + "k8s.gcr.io/rescheduler:v0.3.1" + ], + "sizeBytes": 74659350 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", + "gcr.io/kubernetes-helm/tiller:v2.11.0" + ], + "sizeBytes": 71821984 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", + "gcr.io/kubernetes-helm/tiller:v2.8.1" + ], + "sizeBytes": 71509364 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", + "k8s.gcr.io/kube-addon-manager-amd64:v8.7" + ], + "sizeBytes": 63322109 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", + "nvidia/k8s-device-plugin:1.11" + ], + "sizeBytes": 63138633 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", + "nvidia/k8s-device-plugin:1.10" + ], + "sizeBytes": 63130377 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" + ], + "sizeBytes": 51157394 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", + "quay.io/coreos/flannel:v0.8.0" + ], + "sizeBytes": 50732259 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" + ], + "sizeBytes": 49387411 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" + ], + "sizeBytes": 49052023 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", + "k8s.gcr.io/rescheduler:v0.4.0" + ], + "sizeBytes": 48973149 + }, + { + "names": [ + "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "k8s.gcr.io/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", + "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0" + ], + "sizeBytes": 48645472 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", + "quay.io/coreos/flannel:v0.10.0" + ], + "sizeBytes": 44598861 + }, + { + "names": [ + "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", + "k8s.gcr.io/metrics-server-amd64:v0.2.1" + ], + "sizeBytes": 42541759 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" + ], + "sizeBytes": 42210862 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" + ], + "sizeBytes": 42033070 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10" + ], + "sizeBytes": 41635309 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:46b933bb70270c8a02fa6b6f87d440f6f1fce1a5a2a719e164f83f7b109f7544", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.5" + ], + "sizeBytes": 41423617 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:93c827f018cf3322f1ff2aa80324a0306048b0a69bc274e423071fb0d2d29d8b", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.8" + ], + "sizeBytes": 40951779 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10" + ], + "sizeBytes": 40372149 + }, + { + "names": [ + "k8s.gcr.io/coredns@sha256:02382353821b12c21b062c59184e227e001079bb13ebd01f9d3270ba0fcbf1e4", + "k8s.gcr.io/coredns:1.3.1" + ], + "sizeBytes": 40303560 + }, + { + "names": [ + "k8s.gcr.io/coredns@sha256:81936728011c0df9404cb70b95c17bbc8af922ec9a70d0561a5d01fefa6ffa51", + "k8s.gcr.io/coredns:1.2.6" + ], + "sizeBytes": 40017418 + } + ] + }, + "apiVersion": "v1", + "kind": "Node" + }, + { + "metadata": { + "name": "k8s-agentpool1-15159885-vmss000001", + "selfLink": "/api/v1/nodes/k8s-agentpool1-15159885-vmss000001", + "uid": "2f589cda-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "8059374", + "creationTimestamp": "2019-04-17T04:28:01Z", + "labels": { + "agentpool": "agentpool1", + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/instance-type": "Standard_D2_v2", + "beta.kubernetes.io/os": "linux", + "failure-domain.beta.kubernetes.io/region": "eastus", + "failure-domain.beta.kubernetes.io/zone": "1", + "kubernetes.azure.com/cluster": "aks-engine-health", + "kubernetes.io/hostname": "k8s-agentpool1-15159885-vmss000001", + "kubernetes.io/role": "agent", + "node-role.kubernetes.io/agent": "", + "storageprofile": "managed", + "storagetier": "Standard_LRS" + }, + "annotations": { + "node.alpha.kubernetes.io/ttl": "0", + "volumes.kubernetes.io/controller-managed-attach-detach": "true" + } + }, + "spec": { + "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/aks-engine-health/providers/Microsoft.Compute/virtualMachineScaleSets/k8s-agentpool1-15159885-vmss/virtualMachines/1" + }, + "status": { + "capacity": { + "attachable-volumes-azure-disk": "8", + "cpu": "2", + "ephemeral-storage": "30428648Ki", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "7113156Ki", + "pods": "30" + }, + "allocatable": { + "attachable-volumes-azure-disk": "8", + "cpu": "2", + "ephemeral-storage": "28043041951", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "6345156Ki", + "pods": "30" + }, + "conditions": [ + { + "type": "OutOfDisk", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:53Z", + "lastTransitionTime": "2019-06-14T07:59:09Z", + "reason": "KubeletHasSufficientDisk", + "message": "kubelet has sufficient disk space available" + }, + { + "type": "MemoryPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:53Z", + "lastTransitionTime": "2019-06-14T07:59:09Z", + "reason": "KubeletHasSufficientMemory", + "message": "kubelet has sufficient memory available" + }, + { + "type": "DiskPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:53Z", + "lastTransitionTime": "2019-06-14T07:59:09Z", + "reason": "KubeletHasNoDiskPressure", + "message": "kubelet has no disk pressure" + }, + { + "type": "PIDPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:53Z", + "lastTransitionTime": "2019-04-17T04:27:57Z", + "reason": "KubeletHasSufficientPID", + "message": "kubelet has sufficient PID available" + }, + { + "type": "Ready", + "status": "True", + "lastHeartbeatTime": "2019-06-19T20:09:53Z", + "lastTransitionTime": "2019-06-14T07:59:09Z", + "reason": "KubeletReady", + "message": "kubelet is posting ready status. AppArmor enabled" + } + ], + "addresses": [ + { + "type": "Hostname", + "address": "k8s-agentpool1-15159885-vmss000001" + }, + { + "type": "InternalIP", + "address": "10.240.0.65" + } + ], + "daemonEndpoints": { + "kubeletEndpoint": { + "Port": 10250 + } + }, + "nodeInfo": { + "machineID": "b961d62fbbc543c0a421e9c1aa2499fd", + "systemUUID": "858668CE-5637-0148-A5DD-811E6E452DA9", + "bootID": "812a23fb-95b8-463c-81c5-63c1b289e8e9", + "kernelVersion": "4.15.0-1046-azure", + "osImage": "Ubuntu 16.04.6 LTS", + "containerRuntimeVersion": "docker://3.0.4", + "kubeletVersion": "v1.12.7", + "kubeProxyVersion": "v1.12.7", + "operatingSystem": "linux", + "architecture": "amd64" + }, + "images": [ + { + "names": [ + "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "rdilip83/jsonlogger:v12" + ], + "sizeBytes": 676594134 + }, + { + "names": [ + "k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "k8s.gcr.io/hyperkube-amd64:v1.12.7" + ], + "sizeBytes": 635452178 + }, + { + "names": [ + "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "rdilip83/healthpreview06192019:latest" + ], + "sizeBytes": 493799437 + }, + { + "names": [ + "microsoft/oms@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", + "rdilip83/hp@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", + "microsoft/oms:healthpreview04172019", + "rdilip83/hp:0417" + ], + "sizeBytes": 458385124 + }, + { + "names": [ + "microsoft/oms@sha256:042f0216394fd0709e384dceffc8ab0f8c983474313b63e9e27f98cf50825cee", + "microsoft/oms:healthpreview04152019" + ], + "sizeBytes": 458383728 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771", + "k8s.gcr.io/cluster-autoscaler:v1.12.3" + ], + "sizeBytes": 232229241 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", + "k8s.gcr.io/cluster-autoscaler:v1.12.2" + ], + "sizeBytes": 232167833 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207", + "k8s.gcr.io/cluster-autoscaler:v1.3.7" + ], + "sizeBytes": 217353965 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", + "k8s.gcr.io/cluster-autoscaler:v1.3.4" + ], + "sizeBytes": 217264129 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", + "k8s.gcr.io/cluster-autoscaler:v1.3.3" + ], + "sizeBytes": 217259793 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", + "k8s.gcr.io/cluster-autoscaler:v1.2.2" + ], + "sizeBytes": 208688449 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", + "k8s.gcr.io/cluster-autoscaler:v1.1.2" + ], + "sizeBytes": 198265853 + }, + { + "names": [ + "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56", + "containernetworking/azure-npm:v1.0.18" + ], + "sizeBytes": 170727162 + }, + { + "names": [ + "k8s.gcr.io/cloud-controller-manager-amd64@sha256:c5b5c835a4a435da69f226fc08e27d2cc52e5911acc8215bb2c6372932bee7c0", + "k8s.gcr.io/cloud-controller-manager-amd64:v1.12.7" + ], + "sizeBytes": 144153702 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b", + "k8s.gcr.io/cluster-autoscaler:v1.14.0" + ], + "sizeBytes": 142102721 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7", + "k8s.gcr.io/cluster-autoscaler:v1.13.2" + ], + "sizeBytes": 136684274 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", + "k8s.gcr.io/cluster-autoscaler:v1.13.1" + ], + "sizeBytes": 136618018 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", + "containernetworking/networkmonitor:v0.0.6" + ], + "sizeBytes": 123663837 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", + "containernetworking/networkmonitor:v0.0.5" + ], + "sizeBytes": 122043325 + }, + { + "names": [ + "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" + ], + "sizeBytes": 121711221 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.8" + ], + "sizeBytes": 99631084 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", + "k8s.gcr.io/kube-addon-manager-amd64:v8.9" + ], + "sizeBytes": 99240637 + }, + { + "names": [ + "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", + "microsoft/virtual-kubelet:latest" + ], + "sizeBytes": 83395521 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9", + "k8s.gcr.io/kube-addon-manager-amd64:v9.0" + ], + "sizeBytes": 83077558 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.6" + ], + "sizeBytes": 78384272 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", + "k8s.gcr.io/heapster-amd64:v1.5.1" + ], + "sizeBytes": 75318380 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", + "k8s.gcr.io/heapster-amd64:v1.5.4" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", + "k8s.gcr.io/heapster-amd64:v1.5.3" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", + "k8s.gcr.io/rescheduler:v0.3.1" + ], + "sizeBytes": 74659350 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", + "gcr.io/kubernetes-helm/tiller:v2.11.0" + ], + "sizeBytes": 71821984 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", + "gcr.io/kubernetes-helm/tiller:v2.8.1" + ], + "sizeBytes": 71509364 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", + "k8s.gcr.io/kube-addon-manager-amd64:v8.7" + ], + "sizeBytes": 63322109 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", + "nvidia/k8s-device-plugin:1.11" + ], + "sizeBytes": 63138633 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", + "nvidia/k8s-device-plugin:1.10" + ], + "sizeBytes": 63130377 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" + ], + "sizeBytes": 51157394 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", + "quay.io/coreos/flannel:v0.8.0" + ], + "sizeBytes": 50732259 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" + ], + "sizeBytes": 49387411 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" + ], + "sizeBytes": 49052023 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", + "k8s.gcr.io/rescheduler:v0.4.0" + ], + "sizeBytes": 48973149 + }, + { + "names": [ + "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "k8s.gcr.io/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", + "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0" + ], + "sizeBytes": 48645472 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", + "quay.io/coreos/flannel:v0.10.0" + ], + "sizeBytes": 44598861 + }, + { + "names": [ + "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", + "k8s.gcr.io/metrics-server-amd64:v0.2.1" + ], + "sizeBytes": 42541759 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" + ], + "sizeBytes": 42210862 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" + ], + "sizeBytes": 42033070 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10" + ], + "sizeBytes": 41635309 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:46b933bb70270c8a02fa6b6f87d440f6f1fce1a5a2a719e164f83f7b109f7544", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.5" + ], + "sizeBytes": 41423617 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:93c827f018cf3322f1ff2aa80324a0306048b0a69bc274e423071fb0d2d29d8b", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.8" + ], + "sizeBytes": 40951779 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10" + ], + "sizeBytes": 40372149 + }, + { + "names": [ + "k8s.gcr.io/coredns@sha256:02382353821b12c21b062c59184e227e001079bb13ebd01f9d3270ba0fcbf1e4", + "k8s.gcr.io/coredns:1.3.1" + ], + "sizeBytes": 40303560 + }, + { + "names": [ + "k8s.gcr.io/coredns@sha256:81936728011c0df9404cb70b95c17bbc8af922ec9a70d0561a5d01fefa6ffa51", + "k8s.gcr.io/coredns:1.2.6" + ], + "sizeBytes": 40017418 + } + ] + }, + "apiVersion": "v1", + "kind": "Node" + }, + { + "metadata": { + "name": "k8s-master-15159885-0", + "selfLink": "/api/v1/nodes/k8s-master-15159885-0", + "uid": "32b3af81-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "8059368", + "creationTimestamp": "2019-04-17T04:28:06Z", + "labels": { + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/instance-type": "Standard_D2_v2", + "beta.kubernetes.io/os": "linux", + "failure-domain.beta.kubernetes.io/region": "eastus", + "failure-domain.beta.kubernetes.io/zone": "0", + "kubernetes.azure.com/cluster": "aks-engine-health", + "kubernetes.io/hostname": "k8s-master-15159885-0", + "kubernetes.io/role": "master", + "node-role.kubernetes.io/master": "" + }, + "annotations": { + "node.alpha.kubernetes.io/ttl": "0", + "volumes.kubernetes.io/controller-managed-attach-detach": "true" + } + }, + "spec": { + "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/aks-engine-health/providers/Microsoft.Compute/virtualMachines/k8s-master-15159885-0", + "taints": [ + { + "key": "node-role.kubernetes.io/master", + "value": "true", + "effect": "NoSchedule" + } + ] + }, + "status": { + "capacity": { + "attachable-volumes-azure-disk": "8", + "cpu": "2", + "ephemeral-storage": "30428648Ki", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "7137036Ki", + "pods": "30" + }, + "allocatable": { + "attachable-volumes-azure-disk": "8", + "cpu": "2", + "ephemeral-storage": "28043041951", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "6369036Ki", + "pods": "30" + }, + "conditions": [ + { + "type": "OutOfDisk", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:49Z", + "lastTransitionTime": "2019-04-17T04:28:06Z", + "reason": "KubeletHasSufficientDisk", + "message": "kubelet has sufficient disk space available" + }, + { + "type": "MemoryPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:49Z", + "lastTransitionTime": "2019-04-17T04:28:06Z", + "reason": "KubeletHasSufficientMemory", + "message": "kubelet has sufficient memory available" + }, + { + "type": "DiskPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:49Z", + "lastTransitionTime": "2019-04-17T04:28:06Z", + "reason": "KubeletHasNoDiskPressure", + "message": "kubelet has no disk pressure" + }, + { + "type": "PIDPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-19T20:09:49Z", + "lastTransitionTime": "2019-04-17T04:28:06Z", + "reason": "KubeletHasSufficientPID", + "message": "kubelet has sufficient PID available" + }, + { + "type": "Ready", + "status": "True", + "lastHeartbeatTime": "2019-06-19T20:09:49Z", + "lastTransitionTime": "2019-04-17T04:28:06Z", + "reason": "KubeletReady", + "message": "kubelet is posting ready status. AppArmor enabled" + } + ], + "addresses": [ + { + "type": "Hostname", + "address": "k8s-master-15159885-0" + }, + { + "type": "InternalIP", + "address": "10.255.255.5" + } + ], + "daemonEndpoints": { + "kubeletEndpoint": { + "Port": 10250 + } + }, + "nodeInfo": { + "machineID": "93481a94c1844562bd1e450604d6281f", + "systemUUID": "98EB8773-7ACD-F94F-9D56-C530369EF42E", + "bootID": "68915d7c-2ecd-4992-b69c-d63751683c5e", + "kernelVersion": "4.15.0-1041-azure", + "osImage": "Ubuntu 16.04.6 LTS", + "containerRuntimeVersion": "docker://3.0.4", + "kubeletVersion": "v1.12.7", + "kubeProxyVersion": "v1.12.7", + "operatingSystem": "linux", + "architecture": "amd64" + }, + "images": [ + { + "names": [ + "k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "k8s.gcr.io/hyperkube-amd64:v1.12.7" + ], + "sizeBytes": 635452178 + }, + { + "names": [ + "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "rdilip83/healthpreview06192019:latest" + ], + "sizeBytes": 493799437 + }, + { + "names": [ + "microsoft/oms@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", + "rdilip83/hp@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", + "microsoft/oms:healthpreview04172019", + "rdilip83/hp:0417" + ], + "sizeBytes": 458385124 + }, + { + "names": [ + "microsoft/oms@sha256:042f0216394fd0709e384dceffc8ab0f8c983474313b63e9e27f98cf50825cee", + "microsoft/oms:healthpreview04152019" + ], + "sizeBytes": 458383728 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771", + "k8s.gcr.io/cluster-autoscaler:v1.12.3" + ], + "sizeBytes": 232229241 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", + "k8s.gcr.io/cluster-autoscaler:v1.12.2" + ], + "sizeBytes": 232167833 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207", + "k8s.gcr.io/cluster-autoscaler:v1.3.7" + ], + "sizeBytes": 217353965 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", + "k8s.gcr.io/cluster-autoscaler:v1.3.4" + ], + "sizeBytes": 217264129 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", + "k8s.gcr.io/cluster-autoscaler:v1.3.3" + ], + "sizeBytes": 217259793 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", + "k8s.gcr.io/cluster-autoscaler:v1.2.2" + ], + "sizeBytes": 208688449 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", + "k8s.gcr.io/cluster-autoscaler:v1.1.2" + ], + "sizeBytes": 198265853 + }, + { + "names": [ + "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56", + "containernetworking/azure-npm:v1.0.18" + ], + "sizeBytes": 170727162 + }, + { + "names": [ + "k8s.gcr.io/cloud-controller-manager-amd64@sha256:c5b5c835a4a435da69f226fc08e27d2cc52e5911acc8215bb2c6372932bee7c0", + "k8s.gcr.io/cloud-controller-manager-amd64:v1.12.7" + ], + "sizeBytes": 144153702 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b", + "k8s.gcr.io/cluster-autoscaler:v1.14.0" + ], + "sizeBytes": 142102721 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7", + "k8s.gcr.io/cluster-autoscaler:v1.13.2" + ], + "sizeBytes": 136684274 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", + "k8s.gcr.io/cluster-autoscaler:v1.13.1" + ], + "sizeBytes": 136618018 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", + "containernetworking/networkmonitor:v0.0.6" + ], + "sizeBytes": 123663837 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", + "containernetworking/networkmonitor:v0.0.5" + ], + "sizeBytes": 122043325 + }, + { + "names": [ + "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" + ], + "sizeBytes": 121711221 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.8" + ], + "sizeBytes": 99631084 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", + "k8s.gcr.io/kube-addon-manager-amd64:v8.9" + ], + "sizeBytes": 99240637 + }, + { + "names": [ + "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", + "microsoft/virtual-kubelet:latest" + ], + "sizeBytes": 83395521 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9", + "k8s.gcr.io/kube-addon-manager-amd64:v9.0" + ], + "sizeBytes": 83077558 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.6" + ], + "sizeBytes": 78384272 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", + "k8s.gcr.io/heapster-amd64:v1.5.1" + ], + "sizeBytes": 75318380 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", + "k8s.gcr.io/heapster-amd64:v1.5.3" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", + "k8s.gcr.io/heapster-amd64:v1.5.4" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", + "k8s.gcr.io/rescheduler:v0.3.1" + ], + "sizeBytes": 74659350 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", + "gcr.io/kubernetes-helm/tiller:v2.11.0" + ], + "sizeBytes": 71821984 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", + "gcr.io/kubernetes-helm/tiller:v2.8.1" + ], + "sizeBytes": 71509364 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", + "k8s.gcr.io/kube-addon-manager-amd64:v8.7" + ], + "sizeBytes": 63322109 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", + "nvidia/k8s-device-plugin:1.11" + ], + "sizeBytes": 63138633 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", + "nvidia/k8s-device-plugin:1.10" + ], + "sizeBytes": 63130377 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" + ], + "sizeBytes": 51157394 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", + "quay.io/coreos/flannel:v0.8.0" + ], + "sizeBytes": 50732259 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" + ], + "sizeBytes": 49387411 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" + ], + "sizeBytes": 49052023 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", + "k8s.gcr.io/rescheduler:v0.4.0" + ], + "sizeBytes": 48973149 + }, + { + "names": [ + "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "k8s.gcr.io/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", + "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0" + ], + "sizeBytes": 48645472 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", + "quay.io/coreos/flannel:v0.10.0" + ], + "sizeBytes": 44598861 + }, + { + "names": [ + "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", + "k8s.gcr.io/metrics-server-amd64:v0.2.1" + ], + "sizeBytes": 42541759 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" + ], + "sizeBytes": 42210862 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" + ], + "sizeBytes": 42033070 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10" + ], + "sizeBytes": 41635309 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:46b933bb70270c8a02fa6b6f87d440f6f1fce1a5a2a719e164f83f7b109f7544", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.5" + ], + "sizeBytes": 41423617 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:93c827f018cf3322f1ff2aa80324a0306048b0a69bc274e423071fb0d2d29d8b", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.8" + ], + "sizeBytes": 40951779 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10" + ], + "sizeBytes": 40372149 + }, + { + "names": [ + "k8s.gcr.io/coredns@sha256:02382353821b12c21b062c59184e227e001079bb13ebd01f9d3270ba0fcbf1e4", + "k8s.gcr.io/coredns:1.3.1" + ], + "sizeBytes": 40303560 + }, + { + "names": [ + "k8s.gcr.io/coredns@sha256:81936728011c0df9404cb70b95c17bbc8af922ec9a70d0561a5d01fefa6ffa51", + "k8s.gcr.io/coredns:1.2.6" + ], + "sizeBytes": 40017418 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:ed10a8d06cde253474a7e9cfa65bbdf5f8f0c6a377f66c033e623ff63e9d5edf", + "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.15.0" + ], + "sizeBytes": 39814576 + } + ] + }, + "apiVersion": "v1", + "kind": "Node" + } + ] +} \ No newline at end of file diff --git a/inventory/aks-engine/pods.json b/inventory/aks-engine/pods.json new file mode 100644 index 000000000..16cc66b78 --- /dev/null +++ b/inventory/aks-engine/pods.json @@ -0,0 +1,6622 @@ +{ + "items": [ + { + "metadata": { + "name": "diliprnodejsonlog-5959b88bf7-8qk62", + "generateName": "diliprnodejsonlog-5959b88bf7-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-8qk62", + "uid": "eccca9a1-66ec-11e9-a358-000d3a53d49f", + "resourceVersion": "5110373", + "creationTimestamp": "2019-04-24T23:58:58Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "diliprnodejsonlog", + "pod-template-hash": "5959b88bf7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprnodejsonlog-5959b88bf7", + "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-c5cvg", + "secret": { + "secretName": "default-token-c5cvg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "diliprnodejsonlogpodapp", + "image": "rdilip83/jsonlogger:v12", + "resources": { + "limits": { + "cpu": "20m", + "memory": "60Mi" + }, + "requests": { + "cpu": "10m", + "memory": "30Mi" + } + }, + "volumeMounts": [ + { + "name": "default-token-c5cvg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:59:28Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:59:28Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.44", + "startTime": "2019-04-24T23:58:58Z", + "containerStatuses": [ + { + "name": "diliprnodejsonlogpodapp", + "state": { + "running": { + "startedAt": "2019-04-24T23:59:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/jsonlogger:v12", + "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "containerID": "docker://e01c161647be4ba9c88056ae101f6cb71923182c7c523ec56257c3f8125ad825" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "diliprnodejsonlog-5959b88bf7-8vttp", + "generateName": "diliprnodejsonlog-5959b88bf7-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-8vttp", + "uid": "eccf9171-66ec-11e9-a358-000d3a53d49f", + "resourceVersion": "7362277", + "creationTimestamp": "2019-04-24T23:58:58Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "diliprnodejsonlog", + "pod-template-hash": "5959b88bf7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprnodejsonlog-5959b88bf7", + "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-c5cvg", + "secret": { + "secretName": "default-token-c5cvg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "diliprnodejsonlogpodapp", + "image": "rdilip83/jsonlogger:v12", + "resources": { + "limits": { + "cpu": "20m", + "memory": "60Mi" + }, + "requests": { + "cpu": "10m", + "memory": "30Mi" + } + }, + "volumeMounts": [ + { + "name": "default-token-c5cvg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:59Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:52Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:52Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.66", + "startTime": "2019-04-24T23:58:59Z", + "containerStatuses": [ + { + "name": "diliprnodejsonlogpodapp", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:50Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-24T23:59:35Z", + "finishedAt": "2019-06-14T07:58:49Z", + "containerID": "docker://9669e88adb7e0431fb51223ec0ae76013f7f8809cb1f5125eb9426d707004e64" + } + }, + "ready": true, + "restartCount": 1, + "image": "rdilip83/jsonlogger:v12", + "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "containerID": "docker://efdfd688957948aa6797a72a7b2256517a6867b690e9bc4ecf33723d5a190f4e" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "diliprnodejsonlog-5959b88bf7-k4dvh", + "generateName": "diliprnodejsonlog-5959b88bf7-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-k4dvh", + "uid": "ecca1b71-66ec-11e9-a358-000d3a53d49f", + "resourceVersion": "7362259", + "creationTimestamp": "2019-04-24T23:58:58Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "diliprnodejsonlog", + "pod-template-hash": "5959b88bf7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprnodejsonlog-5959b88bf7", + "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-c5cvg", + "secret": { + "secretName": "default-token-c5cvg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "diliprnodejsonlogpodapp", + "image": "rdilip83/jsonlogger:v12", + "resources": { + "limits": { + "cpu": "20m", + "memory": "60Mi" + }, + "requests": { + "cpu": "10m", + "memory": "30Mi" + } + }, + "volumeMounts": [ + { + "name": "default-token-c5cvg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:59Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:50Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:50Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.69", + "startTime": "2019-04-24T23:58:59Z", + "containerStatuses": [ + { + "name": "diliprnodejsonlogpodapp", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:47Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-24T23:59:35Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://2d9d3776aea273be3b7c4f2f40065f30b4dc0dca669db8f9f83ca2930667f0ff" + } + }, + "ready": true, + "restartCount": 1, + "image": "rdilip83/jsonlogger:v12", + "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "containerID": "docker://fa8dd2896fc2d8fc86a665e1e0c89ebc87ed1b976c61960ec01611534a8360f7" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "diliprnodejsonlog-5959b88bf7-lc9jh", + "generateName": "diliprnodejsonlog-5959b88bf7-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-lc9jh", + "uid": "eccf6462-66ec-11e9-a358-000d3a53d49f", + "resourceVersion": "5110364", + "creationTimestamp": "2019-04-24T23:58:58Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "diliprnodejsonlog", + "pod-template-hash": "5959b88bf7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprnodejsonlog-5959b88bf7", + "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-c5cvg", + "secret": { + "secretName": "default-token-c5cvg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "diliprnodejsonlogpodapp", + "image": "rdilip83/jsonlogger:v12", + "resources": { + "limits": { + "cpu": "20m", + "memory": "60Mi" + }, + "requests": { + "cpu": "10m", + "memory": "30Mi" + } + }, + "volumeMounts": [ + { + "name": "default-token-c5cvg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:59:29Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:59:29Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.54", + "startTime": "2019-04-24T23:58:58Z", + "containerStatuses": [ + { + "name": "diliprnodejsonlogpodapp", + "state": { + "running": { + "startedAt": "2019-04-24T23:59:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/jsonlogger:v12", + "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "containerID": "docker://daecdde84dc4f84318cd3502ce83f43ef9c864b1f8f2dcad88163b6eb8bc9d11" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "diliprnodejsonlog-5959b88bf7-rrvkx", + "generateName": "diliprnodejsonlog-5959b88bf7-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-rrvkx", + "uid": "ecccf8cb-66ec-11e9-a358-000d3a53d49f", + "resourceVersion": "7362253", + "creationTimestamp": "2019-04-24T23:58:58Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "diliprnodejsonlog", + "pod-template-hash": "5959b88bf7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprnodejsonlog-5959b88bf7", + "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-c5cvg", + "secret": { + "secretName": "default-token-c5cvg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "diliprnodejsonlogpodapp", + "image": "rdilip83/jsonlogger:v12", + "resources": { + "limits": { + "cpu": "20m", + "memory": "60Mi" + }, + "requests": { + "cpu": "10m", + "memory": "30Mi" + } + }, + "volumeMounts": [ + { + "name": "default-token-c5cvg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:59Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:49Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:49Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-24T23:58:58Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.82", + "startTime": "2019-04-24T23:58:59Z", + "containerStatuses": [ + { + "name": "diliprnodejsonlogpodapp", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:45Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-24T23:59:35Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://184537356d8008b41e4abc884ca6c2b8bae85982d9be8b3eddcebfb0c03301b2" + } + }, + "ready": true, + "restartCount": 1, + "image": "rdilip83/jsonlogger:v12", + "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "containerID": "docker://71e246dcbaabe0d58aed303879e5d03dd937ba1d8ab97f55bc55bb0b9099b536" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "azure-cni-networkmonitor-nf2sl", + "generateName": "azure-cni-networkmonitor-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/azure-cni-networkmonitor-nf2sl", + "uid": "398ac33e-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110349", + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "controller-revision-hash": "57ccd9984c", + "k8s-app": "azure-cnms", + "pod-template-generation": "1" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "azure-cni-networkmonitor", + "uid": "3968a5f4-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "log", + "hostPath": { + "path": "/var/log", + "type": "Directory" + } + }, + { + "name": "ebtables-rule-repo", + "hostPath": { + "path": "/var/run/", + "type": "Directory" + } + }, + { + "name": "telemetry", + "hostPath": { + "path": "/opt/cni/bin", + "type": "Directory" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "azure-cnms", + "image": "containernetworking/networkmonitor:v0.0.6", + "env": [ + { + "name": "HOSTNAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "spec.nodeName" + } + } + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "ebtables-rule-repo", + "mountPath": "/var/run" + }, + { + "name": "log", + "mountPath": "/var/log" + }, + { + "name": "telemetry", + "mountPath": "/opt/cni/bin" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000000" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:25Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:25Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.34", + "startTime": "2019-04-17T04:28:18Z", + "containerStatuses": [ + { + "name": "azure-cnms", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:24Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "containernetworking/networkmonitor:v0.0.6", + "imageID": "docker-pullable://containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", + "containerID": "docker://05e533e79958c1e28594be54effb8191d22648d1b2b1085a327e84f8eb203222" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "azure-cni-networkmonitor-wnkxs", + "generateName": "azure-cni-networkmonitor-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/azure-cni-networkmonitor-wnkxs", + "uid": "39887ab5-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362209", + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "controller-revision-hash": "57ccd9984c", + "k8s-app": "azure-cnms", + "pod-template-generation": "1" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "azure-cni-networkmonitor", + "uid": "3968a5f4-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "log", + "hostPath": { + "path": "/var/log", + "type": "Directory" + } + }, + { + "name": "ebtables-rule-repo", + "hostPath": { + "path": "/var/run/", + "type": "Directory" + } + }, + { + "name": "telemetry", + "hostPath": { + "path": "/opt/cni/bin", + "type": "Directory" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "azure-cnms", + "image": "containernetworking/networkmonitor:v0.0.6", + "env": [ + { + "name": "HOSTNAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "spec.nodeName" + } + } + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "ebtables-rule-repo", + "mountPath": "/var/run" + }, + { + "name": "log", + "mountPath": "/var/log" + }, + { + "name": "telemetry", + "mountPath": "/opt/cni/bin" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000001" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:39Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:39Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.65", + "startTime": "2019-04-17T04:28:18Z", + "containerStatuses": [ + { + "name": "azure-cnms", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:19Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:25Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://7ff49388bac2a94afec799ec20fe4ecfea439eaab51400cb543f0c4933f6812d" + } + }, + "ready": true, + "restartCount": 1, + "image": "containernetworking/networkmonitor:v0.0.6", + "imageID": "docker-pullable://containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", + "containerID": "docker://bf0154934a9a3080e3df001e9ed855acf8efb17dd18af8e8227a4f58a354c83a" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "azure-cni-networkmonitor-zjztb", + "generateName": "azure-cni-networkmonitor-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/azure-cni-networkmonitor-zjztb", + "uid": "398adddd-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "474", + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "controller-revision-hash": "57ccd9984c", + "k8s-app": "azure-cnms", + "pod-template-generation": "1" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "azure-cni-networkmonitor", + "uid": "3968a5f4-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "log", + "hostPath": { + "path": "/var/log", + "type": "Directory" + } + }, + { + "name": "ebtables-rule-repo", + "hostPath": { + "path": "/var/run/", + "type": "Directory" + } + }, + { + "name": "telemetry", + "hostPath": { + "path": "/opt/cni/bin", + "type": "Directory" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "azure-cnms", + "image": "containernetworking/networkmonitor:v0.0.6", + "env": [ + { + "name": "HOSTNAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "spec.nodeName" + } + } + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "ebtables-rule-repo", + "mountPath": "/var/run" + }, + { + "name": "log", + "mountPath": "/var/log" + }, + { + "name": "telemetry", + "mountPath": "/opt/cni/bin" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-master-15159885-0" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:22Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:22Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:28:18Z", + "containerStatuses": [ + { + "name": "azure-cnms", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:21Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "containernetworking/networkmonitor:v0.0.6", + "imageID": "docker-pullable://containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", + "containerID": "docker://4477e44f375bb4402c6a47794e8ba257f99548502735519c2cb4c4adf4c91fd4" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "azure-ip-masq-agent-5jxwv", + "generateName": "azure-ip-masq-agent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/azure-ip-masq-agent-5jxwv", + "uid": "3c83177c-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "668", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "controller-revision-hash": "85c7cb54c8", + "k8s-app": "azure-ip-masq-agent", + "pod-template-generation": "1", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "azure-ip-masq-agent", + "uid": "3c7be430-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "azure-ip-masq-agent-config-volume", + "configMap": { + "name": "azure-ip-masq-agent-config", + "defaultMode": 420 + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "azure-ip-masq-agent", + "image": "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0", + "resources": { + "limits": { + "cpu": "50m", + "memory": "250Mi" + }, + "requests": { + "cpu": "50m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "azure-ip-masq-agent-config-volume", + "mountPath": "/etc/config" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-master-15159885-0" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:28:23Z", + "containerStatuses": [ + { + "name": "azure-ip-masq-agent", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", + "imageID": "docker-pullable://gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "containerID": "docker://8df696bf9c1380bb36bbea1214f493c2c3020a360f95635d20b1ff11bd09122f" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "azure-ip-masq-agent-7l7br", + "generateName": "azure-ip-masq-agent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/azure-ip-masq-agent-7l7br", + "uid": "3c83cb0c-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110351", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "controller-revision-hash": "85c7cb54c8", + "k8s-app": "azure-ip-masq-agent", + "pod-template-generation": "1", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "azure-ip-masq-agent", + "uid": "3c7be430-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "azure-ip-masq-agent-config-volume", + "configMap": { + "name": "azure-ip-masq-agent-config", + "defaultMode": 420 + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "azure-ip-masq-agent", + "image": "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0", + "resources": { + "limits": { + "cpu": "50m", + "memory": "250Mi" + }, + "requests": { + "cpu": "50m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "azure-ip-masq-agent-config-volume", + "mountPath": "/etc/config" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000000" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.34", + "startTime": "2019-04-17T04:28:23Z", + "containerStatuses": [ + { + "name": "azure-ip-masq-agent", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", + "imageID": "docker-pullable://gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "containerID": "docker://a47cc35959cf657fa5d4f422008ea2600ed11d16fb24ce3caf8c913df9f558b4" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "azure-ip-masq-agent-dvnl8", + "generateName": "azure-ip-masq-agent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/azure-ip-masq-agent-dvnl8", + "uid": "3c7e9b23-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362234", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "controller-revision-hash": "85c7cb54c8", + "k8s-app": "azure-ip-masq-agent", + "pod-template-generation": "1", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "azure-ip-masq-agent", + "uid": "3c7be430-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "azure-ip-masq-agent-config-volume", + "configMap": { + "name": "azure-ip-masq-agent-config", + "defaultMode": 420 + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "azure-ip-masq-agent", + "image": "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0", + "resources": { + "limits": { + "cpu": "50m", + "memory": "250Mi" + }, + "requests": { + "cpu": "50m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "azure-ip-masq-agent-config-volume", + "mountPath": "/etc/config" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000001" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:47Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:47Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.65", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "azure-ip-masq-agent", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:39Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:29Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://17afffc794c5b786053a388ffa9b2d7a3bdfffe5423ed884c8c1c49ff6437da1" + } + }, + "ready": true, + "restartCount": 1, + "image": "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", + "imageID": "docker-pullable://gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "containerID": "docker://30b9b4ce91cc9c290f51bf3411d91369d7b5689d6927f89f939c12b827685ed0" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "blobfuse-flexvol-installer-pn29n", + "generateName": "blobfuse-flexvol-installer-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/blobfuse-flexvol-installer-pn29n", + "uid": "3c55a79a-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362256", + "creationTimestamp": "2019-04-17T04:28:22Z", + "labels": { + "controller-revision-hash": "b7d447cd7", + "kubernetes.io/cluster-service": "true", + "name": "blobfuse", + "pod-template-generation": "1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "blobfuse-flexvol-installer", + "uid": "3c4a7413-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "varlog", + "hostPath": { + "path": "/var/log/", + "type": "" + } + }, + { + "name": "volplugins", + "hostPath": { + "path": "/etc/kubernetes/volumeplugins/", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "blobfuse-flexvol-installer", + "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", + "resources": { + "limits": { + "cpu": "50m", + "memory": "100Mi" + }, + "requests": { + "cpu": "50m", + "memory": "100Mi" + } + }, + "volumeMounts": [ + { + "name": "volplugins", + "mountPath": "/etc/kubernetes/volumeplugins/" + }, + { + "name": "varlog", + "mountPath": "/var/log/" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000001" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:49Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:49Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:22Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.80", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "blobfuse-flexvol-installer", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:46Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:29Z", + "finishedAt": "2019-06-14T07:58:49Z", + "containerID": "docker://a3a6f00c2d127e8edcf32942f827a9c80c1d19119a22758f1d7663fb28e2b899" + } + }, + "ready": true, + "restartCount": 1, + "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", + "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume@sha256:23d8c6033f02a1ecad05127ebdc931bb871264228661bc122704b0974e4d9fdd", + "containerID": "docker://fe2e2fde1c62fd8cfc28688c6168920e664578bb97eeb99609f2d0277c46812c" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "blobfuse-flexvol-installer-sh7vs", + "generateName": "blobfuse-flexvol-installer-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/blobfuse-flexvol-installer-sh7vs", + "uid": "3c4e8b3f-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110368", + "creationTimestamp": "2019-04-17T04:28:22Z", + "labels": { + "controller-revision-hash": "b7d447cd7", + "kubernetes.io/cluster-service": "true", + "name": "blobfuse", + "pod-template-generation": "1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "blobfuse-flexvol-installer", + "uid": "3c4a7413-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "varlog", + "hostPath": { + "path": "/var/log/", + "type": "" + } + }, + { + "name": "volplugins", + "hostPath": { + "path": "/etc/kubernetes/volumeplugins/", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "blobfuse-flexvol-installer", + "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", + "resources": { + "limits": { + "cpu": "50m", + "memory": "100Mi" + }, + "requests": { + "cpu": "50m", + "memory": "100Mi" + } + }, + "volumeMounts": [ + { + "name": "volplugins", + "mountPath": "/etc/kubernetes/volumeplugins/" + }, + { + "name": "varlog", + "mountPath": "/var/log/" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000000" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:22Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:22Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.42", + "startTime": "2019-04-17T04:28:22Z", + "containerStatuses": [ + { + "name": "blobfuse-flexvol-installer", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", + "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume@sha256:23d8c6033f02a1ecad05127ebdc931bb871264228661bc122704b0974e4d9fdd", + "containerID": "docker://11c00901b3445daca28933112c94d9921c9e8daec48052d2188a54908337a1fe" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "coredns-69c4fccc6c-vqjd9", + "generateName": "coredns-69c4fccc6c-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/coredns-69c4fccc6c-vqjd9", + "uid": "3d100836-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "672", + "creationTimestamp": "2019-04-17T04:28:24Z", + "labels": { + "k8s-app": "kube-dns", + "pod-template-hash": "69c4fccc6c" + }, + "annotations": { + "seccomp.security.alpha.kubernetes.io/pod": "docker/default" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "coredns-69c4fccc6c", + "uid": "3c6392eb-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "tmp", + "emptyDir": {} + }, + { + "name": "config-volume", + "configMap": { + "name": "coredns", + "items": [ + { + "key": "Corefile", + "path": "Corefile" + } + ], + "defaultMode": 420 + } + }, + { + "name": "coredns-token-ltgr2", + "secret": { + "secretName": "coredns-token-ltgr2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "coredns", + "image": "k8s.gcr.io/coredns:1.2.2", + "args": [ + "-conf", + "/etc/coredns/Corefile" + ], + "ports": [ + { + "name": "dns", + "containerPort": 53, + "protocol": "UDP" + }, + { + "name": "dns-tcp", + "containerPort": 53, + "protocol": "TCP" + }, + { + "name": "metrics", + "containerPort": 9153, + "protocol": "TCP" + } + ], + "resources": { + "limits": { + "memory": "170Mi" + }, + "requests": { + "cpu": "100m", + "memory": "70Mi" + } + }, + "volumeMounts": [ + { + "name": "config-volume", + "readOnly": true, + "mountPath": "/etc/coredns" + }, + { + "name": "tmp", + "mountPath": "/tmp" + }, + { + "name": "coredns-token-ltgr2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/health", + "port": 8080, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "capabilities": { + "add": [ + "NET_BIND_SERVICE" + ], + "drop": [ + "all" + ] + }, + "readOnlyRootFilesystem": true, + "allowPrivilegeEscalation": false, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "coredns", + "serviceAccount": "coredns", + "nodeName": "k8s-master-15159885-0", + "securityContext": {}, + "affinity": { + "podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [ + { + "weight": 10, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "failure-domain.beta.kubernetes.io/zone" + } + }, + { + "weight": 5, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "kubernetes.io/hostname" + } + } + ] + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "effect": "NoSchedule" + }, + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:29Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.240.0.11", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "coredns", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:29Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/coredns:1.2.2", + "imageID": "docker-pullable://k8s.gcr.io/coredns@sha256:3e2be1cec87aca0b74b7668bbe8c02964a95a402e45ceb51b2252629d608d03a", + "containerID": "docker://27eafce23c91980dd9dcab79f9afd23ba4cfd934ff9a4d1e6c91890adfdbcbdf" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "heapster-6f6cbcfcf6-nq9jg", + "generateName": "heapster-6f6cbcfcf6-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/heapster-6f6cbcfcf6-nq9jg", + "uid": "3cd3a1fa-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362273", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "k8s-app": "heapster", + "pod-template-hash": "6f6cbcfcf6" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "heapster-6f6cbcfcf6", + "uid": "398f0ce1-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "heapster-config-volume", + "configMap": { + "name": "heapster-config", + "defaultMode": 420 + } + }, + { + "name": "heapster-token-xhtkx", + "secret": { + "secretName": "heapster-token-xhtkx", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "heapster", + "image": "k8s.gcr.io/heapster-amd64:v1.5.4", + "command": [ + "/heapster", + "--source=kubernetes.summary_api:''" + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "volumeMounts": [ + { + "name": "heapster-token-xhtkx", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthz", + "port": 8082, + "scheme": "HTTP" + }, + "initialDelaySeconds": 180, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "heapster-nanny", + "image": "k8s.gcr.io/addon-resizer:1.8.4", + "command": [ + "/pod_nanny", + "--config-dir=/etc/config", + "--cpu=80m", + "--extra-cpu=0.5m", + "--memory=140Mi", + "--extra-memory=4Mi", + "--threshold=5", + "--deployment=heapster", + "--container=heapster", + "--poll-period=300000", + "--estimator=exponential" + ], + "env": [ + { + "name": "MY_POD_NAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.name" + } + } + }, + { + "name": "MY_POD_NAMESPACE", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + } + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "volumeMounts": [ + { + "name": "heapster-config-volume", + "mountPath": "/etc/config" + }, + { + "name": "heapster-token-xhtkx", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "heapster", + "serviceAccount": "heapster", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:52Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:52Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.67", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "heapster", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:46Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:29Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://4fe82e9788f21c2659397e65b1b17b198fcf9039eaa8d36fade3802f69e4fa6e" + } + }, + "ready": true, + "restartCount": 1, + "image": "k8s.gcr.io/heapster-amd64:v1.5.4", + "imageID": "docker-pullable://k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", + "containerID": "docker://c3cfed70a5e504c1237f919bf74cbc304f343b3daf4fd7259a4030d8d5595afb" + }, + { + "name": "heapster-nanny", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:52Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:31Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://bf60e749edf895279e23c7476749e54a400d112c13b305ee3c14b0e1566b912e" + } + }, + "ready": true, + "restartCount": 1, + "image": "k8s.gcr.io/addon-resizer:1.8.4", + "imageID": "docker-pullable://k8s.gcr.io/addon-resizer@sha256:a31822f30e947885d038812f4a5a5675e72f92c06cef17b1989c80426aa89012", + "containerID": "docker://8097a0688928cb82264ec0b69246eafd5e3bca9fe526d3b997ae2ef2a601aa9b" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "keyvault-flexvolume-tcxxk", + "generateName": "keyvault-flexvolume-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/keyvault-flexvolume-tcxxk", + "uid": "398d186b-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110367", + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "addonmanager.kubernetes.io/mode": "EnsureExists", + "app": "keyvault-flexvolume", + "controller-revision-hash": "57fd55fc4c", + "kubernetes.io/cluster-service": "true", + "pod-template-generation": "1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "keyvault-flexvolume", + "uid": "396bd1ea-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "volplugins", + "hostPath": { + "path": "/etc/kubernetes/volumeplugins", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "keyvault-flexvolume", + "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", + "env": [ + { + "name": "TARGET_DIR", + "value": "/etc/kubernetes/volumeplugins" + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "100Mi" + }, + "requests": { + "cpu": "50m", + "memory": "100Mi" + } + }, + "volumeMounts": [ + { + "name": "volplugins", + "mountPath": "/etc/kubernetes/volumeplugins" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000000" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:25Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:25Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.46", + "startTime": "2019-04-17T04:28:18Z", + "containerStatuses": [ + { + "name": "keyvault-flexvolume", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:25Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", + "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume@sha256:4fd30d43947d4a54fc89ead7985beecfd3c9b2a93a0655a373b1608ab90bd5af", + "containerID": "docker://32a3b44a0a7e8ed53867743709d84db19a0bf7c6adacc28b43221c39a9b2c028" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "keyvault-flexvolume-wxzvc", + "generateName": "keyvault-flexvolume-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/keyvault-flexvolume-wxzvc", + "uid": "398a00ce-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362237", + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "addonmanager.kubernetes.io/mode": "EnsureExists", + "app": "keyvault-flexvolume", + "controller-revision-hash": "57fd55fc4c", + "kubernetes.io/cluster-service": "true", + "pod-template-generation": "1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "keyvault-flexvolume", + "uid": "396bd1ea-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "volplugins", + "hostPath": { + "path": "/etc/kubernetes/volumeplugins", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "keyvault-flexvolume", + "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", + "env": [ + { + "name": "TARGET_DIR", + "value": "/etc/kubernetes/volumeplugins" + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "100Mi" + }, + "requests": { + "cpu": "50m", + "memory": "100Mi" + } + }, + "volumeMounts": [ + { + "name": "volplugins", + "mountPath": "/etc/kubernetes/volumeplugins" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000001" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:47Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:47Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.68", + "startTime": "2019-04-17T04:28:18Z", + "containerStatuses": [ + { + "name": "keyvault-flexvolume", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:38Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:26Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://622bb2e556297f093a67c213a22adde5c0c1202be701fa5685e8801267c2a3e3" + } + }, + "ready": true, + "restartCount": 1, + "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", + "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume@sha256:4fd30d43947d4a54fc89ead7985beecfd3c9b2a93a0655a373b1608ab90bd5af", + "containerID": "docker://ab2ce04c40220650f742f91297c317cbcb71fbd6a6d0d9909bb4c2578a8a2bc7" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-addon-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-addon-manager-k8s-master-15159885-0", + "uid": "531eee2e-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "752", + "creationTimestamp": "2019-04-17T04:29:01Z", + "annotations": { + "kubernetes.io/config.hash": "9b6d7694dd69e6f24ee489db7850e74c", + "kubernetes.io/config.mirror": "9b6d7694dd69e6f24ee489db7850e74c", + "kubernetes.io/config.seen": "2019-04-17T04:27:21.914647772Z", + "kubernetes.io/config.source": "file" + } + }, + "spec": { + "volumes": [ + { + "name": "addons", + "hostPath": { + "path": "/etc/kubernetes/addons", + "type": "" + } + }, + { + "name": "msi", + "hostPath": { + "path": "/var/lib/waagent/ManagedIdentity-Settings", + "type": "" + } + } + ], + "containers": [ + { + "name": "kube-addon-manager", + "image": "k8s.gcr.io/kube-addon-manager-amd64:v8.7", + "resources": { + "requests": { + "cpu": "5m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "addons", + "readOnly": true, + "mountPath": "/etc/kubernetes/addons" + }, + { + "name": "msi", + "readOnly": true, + "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "operator": "Exists", + "effect": "NoExecute" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:23Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:33Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:33Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:23Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:27:23Z", + "containerStatuses": [ + { + "name": "kube-addon-manager", + "state": { + "running": { + "startedAt": "2019-04-17T04:27:32Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/kube-addon-manager-amd64:v8.7", + "imageID": "docker-pullable://k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", + "containerID": "docker://532518ffaf3a046fc478091d8341f0855a3152a01ef630aa28f5347f9757b3a2" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-apiserver-k8s-master-15159885-0", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-apiserver-k8s-master-15159885-0", + "uid": "4ef304e5-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "753", + "creationTimestamp": "2019-04-17T04:28:54Z", + "labels": { + "component": "kube-apiserver", + "tier": "control-plane" + }, + "annotations": { + "kubernetes.io/config.hash": "ac8862219c876924f19eed3f22c7f33f", + "kubernetes.io/config.mirror": "ac8862219c876924f19eed3f22c7f33f", + "kubernetes.io/config.seen": "2019-04-17T04:27:21.914656172Z", + "kubernetes.io/config.source": "file" + } + }, + "spec": { + "volumes": [ + { + "name": "etc-kubernetes", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "var-lib-kubelet", + "hostPath": { + "path": "/var/lib/kubelet", + "type": "" + } + }, + { + "name": "msi", + "hostPath": { + "path": "/var/lib/waagent/ManagedIdentity-Settings", + "type": "" + } + }, + { + "name": "sock", + "hostPath": { + "path": "/opt", + "type": "" + } + }, + { + "name": "auditlog", + "hostPath": { + "path": "/var/log/kubeaudit", + "type": "" + } + } + ], + "containers": [ + { + "name": "kube-apiserver", + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "command": [ + "/hyperkube", + "apiserver" + ], + "args": [ + "--advertise-address=10.255.255.5", + "--allow-privileged=true", + "--anonymous-auth=false", + "--audit-log-maxage=30", + "--audit-log-maxbackup=10", + "--audit-log-maxsize=100", + "--audit-log-path=/var/log/kubeaudit/audit.log", + "--audit-policy-file=/etc/kubernetes/addons/audit-policy.yaml", + "--authorization-mode=Node,RBAC", + "--bind-address=0.0.0.0", + "--client-ca-file=/etc/kubernetes/certs/ca.crt", + "--cloud-config=/etc/kubernetes/azure.json", + "--cloud-provider=azure", + "--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,ValidatingAdmissionWebhook,ResourceQuota,ExtendedResourceToleration", + "--enable-bootstrap-token-auth=true", + "--etcd-cafile=/etc/kubernetes/certs/ca.crt", + "--etcd-certfile=/etc/kubernetes/certs/etcdclient.crt", + "--etcd-keyfile=/etc/kubernetes/certs/etcdclient.key", + "--etcd-servers=https://127.0.0.1:2379", + "--insecure-port=8080", + "--kubelet-client-certificate=/etc/kubernetes/certs/client.crt", + "--kubelet-client-key=/etc/kubernetes/certs/client.key", + "--profiling=false", + "--proxy-client-cert-file=/etc/kubernetes/certs/proxy.crt", + "--proxy-client-key-file=/etc/kubernetes/certs/proxy.key", + "--repair-malformed-updates=false", + "--requestheader-allowed-names=", + "--requestheader-client-ca-file=/etc/kubernetes/certs/proxy-ca.crt", + "--requestheader-extra-headers-prefix=X-Remote-Extra-", + "--requestheader-group-headers=X-Remote-Group", + "--requestheader-username-headers=X-Remote-User", + "--secure-port=443", + "--service-account-key-file=/etc/kubernetes/certs/apiserver.key", + "--service-account-lookup=true", + "--service-cluster-ip-range=10.0.0.0/16", + "--storage-backend=etcd3", + "--tls-cert-file=/etc/kubernetes/certs/apiserver.crt", + "--tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", + "--tls-private-key-file=/etc/kubernetes/certs/apiserver.key", + "--v=4" + ], + "resources": {}, + "volumeMounts": [ + { + "name": "etc-kubernetes", + "mountPath": "/etc/kubernetes" + }, + { + "name": "var-lib-kubelet", + "mountPath": "/var/lib/kubelet" + }, + { + "name": "msi", + "readOnly": true, + "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" + }, + { + "name": "sock", + "mountPath": "/opt" + }, + { + "name": "auditlog", + "mountPath": "/var/log/kubeaudit" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "operator": "Exists", + "effect": "NoExecute" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:23Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:32Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:32Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:23Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:27:23Z", + "containerStatuses": [ + { + "name": "kube-apiserver", + "state": { + "running": { + "startedAt": "2019-04-17T04:27:32Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "containerID": "docker://39ddc73db1132efddaaac859c77e2e2dbbe7af97e8de6340f45b56e5fa26d9ca" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-controller-manager-k8s-master-15159885-0", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-controller-manager-k8s-master-15159885-0", + "uid": "4e5a8160-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7367800", + "creationTimestamp": "2019-04-17T04:28:53Z", + "labels": { + "component": "kube-controller-manager", + "tier": "control-plane" + }, + "annotations": { + "kubernetes.io/config.hash": "09f954293df880f1d19599a8f2e11701", + "kubernetes.io/config.mirror": "09f954293df880f1d19599a8f2e11701", + "kubernetes.io/config.seen": "2019-04-17T04:27:21.914658072Z", + "kubernetes.io/config.source": "file" + } + }, + "spec": { + "volumes": [ + { + "name": "etc-kubernetes", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "var-lib-kubelet", + "hostPath": { + "path": "/var/lib/kubelet", + "type": "" + } + }, + { + "name": "msi", + "hostPath": { + "path": "/var/lib/waagent/ManagedIdentity-Settings", + "type": "" + } + } + ], + "containers": [ + { + "name": "kube-controller-manager", + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "command": [ + "/hyperkube", + "controller-manager" + ], + "args": [ + "--allocate-node-cidrs=false", + "--cloud-config=/etc/kubernetes/azure.json", + "--cloud-provider=azure", + "--cluster-cidr=10.240.0.0/12", + "--cluster-name=aks-engine-health", + "--cluster-signing-cert-file=/etc/kubernetes/certs/ca.crt", + "--cluster-signing-key-file=/etc/kubernetes/certs/ca.key", + "--configure-cloud-routes=false", + "--controllers=*,bootstrapsigner,tokencleaner", + "--feature-gates=LocalStorageCapacityIsolation=true,ServiceNodeExclusion=true", + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--leader-elect=true", + "--node-monitor-grace-period=40s", + "--pod-eviction-timeout=5m0s", + "--profiling=false", + "--root-ca-file=/etc/kubernetes/certs/ca.crt", + "--route-reconciliation-period=10s", + "--service-account-private-key-file=/etc/kubernetes/certs/apiserver.key", + "--terminated-pod-gc-threshold=5000", + "--use-service-account-credentials=true", + "--v=2" + ], + "resources": {}, + "volumeMounts": [ + { + "name": "etc-kubernetes", + "mountPath": "/etc/kubernetes" + }, + { + "name": "var-lib-kubelet", + "mountPath": "/var/lib/kubelet" + }, + { + "name": "msi", + "readOnly": true, + "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "operator": "Exists", + "effect": "NoExecute" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:23Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T09:02:46Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T09:02:46Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:23Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:27:23Z", + "containerStatuses": [ + { + "name": "kube-controller-manager", + "state": { + "running": { + "startedAt": "2019-06-14T09:02:46Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-06-11T17:20:25Z", + "finishedAt": "2019-06-14T09:02:40Z", + "containerID": "docker://c485dbd938dbded8ecc0ce25656c00b3efc702aa3403bbdd45bca5527ed70ac3" + } + }, + "ready": true, + "restartCount": 5, + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "containerID": "docker://981b3ead0f719297dd6fc7d8583733c10a8cd282e04aef00c94884bb67ff0dc3" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-proxy-8d8kh", + "generateName": "kube-proxy-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-8d8kh", + "uid": "3ccad1a3-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110371", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "component": "kube-proxy", + "controller-revision-hash": "77f7f9d65b", + "pod-template-generation": "1", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-proxy", + "uid": "3cc3b072-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "ssl-certs-host", + "hostPath": { + "path": "/usr/share/ca-certificates", + "type": "" + } + }, + { + "name": "kubeconfig", + "hostPath": { + "path": "/var/lib/kubelet/kubeconfig", + "type": "" + } + }, + { + "name": "etc-kubernetes", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "iptableslock", + "hostPath": { + "path": "/run/xtables.lock", + "type": "" + } + }, + { + "name": "kernelmodules", + "hostPath": { + "path": "/lib/modules/", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kube-proxy", + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "command": [ + "/hyperkube", + "proxy", + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--cluster-cidr=10.240.0.0/12", + "--feature-gates=ExperimentalCriticalPodAnnotation=true", + "--proxy-mode=iptables" + ], + "resources": { + "requests": { + "cpu": "100m" + } + }, + "volumeMounts": [ + { + "name": "ssl-certs-host", + "readOnly": true, + "mountPath": "/etc/ssl/certs" + }, + { + "name": "etc-kubernetes", + "readOnly": true, + "mountPath": "/etc/kubernetes" + }, + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/var/lib/kubelet/kubeconfig" + }, + { + "name": "iptableslock", + "mountPath": "/run/xtables.lock" + }, + { + "name": "kernelmodules", + "readOnly": true, + "mountPath": "/lib/modules/" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000000" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:30Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:30Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.34", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "kube-proxy", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "containerID": "docker://00c9202089cbbd3606f7bde9b1217ee2c00e24ab2090ffba36d65e44b2875423" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-proxy-gxs7l", + "generateName": "kube-proxy-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-gxs7l", + "uid": "3cc8af8a-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362266", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "component": "kube-proxy", + "controller-revision-hash": "77f7f9d65b", + "pod-template-generation": "1", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-proxy", + "uid": "3cc3b072-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "ssl-certs-host", + "hostPath": { + "path": "/usr/share/ca-certificates", + "type": "" + } + }, + { + "name": "kubeconfig", + "hostPath": { + "path": "/var/lib/kubelet/kubeconfig", + "type": "" + } + }, + { + "name": "etc-kubernetes", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "iptableslock", + "hostPath": { + "path": "/run/xtables.lock", + "type": "" + } + }, + { + "name": "kernelmodules", + "hostPath": { + "path": "/lib/modules/", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kube-proxy", + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "command": [ + "/hyperkube", + "proxy", + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--cluster-cidr=10.240.0.0/12", + "--feature-gates=ExperimentalCriticalPodAnnotation=true", + "--proxy-mode=iptables" + ], + "resources": { + "requests": { + "cpu": "100m" + } + }, + "volumeMounts": [ + { + "name": "ssl-certs-host", + "readOnly": true, + "mountPath": "/etc/ssl/certs" + }, + { + "name": "etc-kubernetes", + "readOnly": true, + "mountPath": "/etc/kubernetes" + }, + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/var/lib/kubelet/kubeconfig" + }, + { + "name": "iptableslock", + "mountPath": "/run/xtables.lock" + }, + { + "name": "kernelmodules", + "readOnly": true, + "mountPath": "/lib/modules/" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000001" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:50Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:50Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.65", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "kube-proxy", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:49Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:29Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://07135f639d2e785e4e34ed47b9175df3e9f122fdb8020a7d394eca04ca337d68" + } + }, + "ready": true, + "restartCount": 1, + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "containerID": "docker://4cf5ab87fc206f4ef92cfed7859392056418e0759c798c1b94d19f885d423ab2" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-proxy-vth8z", + "generateName": "kube-proxy-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-vth8z", + "uid": "3cca5c6e-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "645", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "component": "kube-proxy", + "controller-revision-hash": "77f7f9d65b", + "pod-template-generation": "1", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-proxy", + "uid": "3cc3b072-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "ssl-certs-host", + "hostPath": { + "path": "/usr/share/ca-certificates", + "type": "" + } + }, + { + "name": "kubeconfig", + "hostPath": { + "path": "/var/lib/kubelet/kubeconfig", + "type": "" + } + }, + { + "name": "etc-kubernetes", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "iptableslock", + "hostPath": { + "path": "/run/xtables.lock", + "type": "" + } + }, + { + "name": "kernelmodules", + "hostPath": { + "path": "/lib/modules/", + "type": "" + } + }, + { + "name": "default-token-297b2", + "secret": { + "secretName": "default-token-297b2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kube-proxy", + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "command": [ + "/hyperkube", + "proxy", + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--cluster-cidr=10.240.0.0/12", + "--feature-gates=ExperimentalCriticalPodAnnotation=true", + "--proxy-mode=iptables" + ], + "resources": { + "requests": { + "cpu": "100m" + } + }, + "volumeMounts": [ + { + "name": "ssl-certs-host", + "readOnly": true, + "mountPath": "/etc/ssl/certs" + }, + { + "name": "etc-kubernetes", + "readOnly": true, + "mountPath": "/etc/kubernetes" + }, + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/var/lib/kubelet/kubeconfig" + }, + { + "name": "iptableslock", + "mountPath": "/run/xtables.lock" + }, + { + "name": "kernelmodules", + "readOnly": true, + "mountPath": "/lib/modules/" + }, + { + "name": "default-token-297b2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-master-15159885-0" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:28Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:28Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "kube-proxy", + "state": { + "running": { + "startedAt": "2019-04-17T04:28:26Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "containerID": "docker://b84bffd5f1bca13f1b880363816417fb1b13938ad067530e36ba796ffa43a5a9" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-scheduler-k8s-master-15159885-0", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-scheduler-k8s-master-15159885-0", + "uid": "48fcfcf4-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7367801", + "creationTimestamp": "2019-04-17T04:28:44Z", + "labels": { + "component": "kube-scheduler", + "tier": "control-plane" + }, + "annotations": { + "kubernetes.io/config.hash": "0ae37fc92f08b39c7641e5282c92b279", + "kubernetes.io/config.mirror": "0ae37fc92f08b39c7641e5282c92b279", + "kubernetes.io/config.seen": "2019-04-17T04:27:21.914659772Z", + "kubernetes.io/config.source": "file" + } + }, + "spec": { + "volumes": [ + { + "name": "etc-kubernetes", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "var-lib-kubelet", + "hostPath": { + "path": "/var/lib/kubelet", + "type": "" + } + }, + { + "name": "msi", + "hostPath": { + "path": "/var/lib/waagent/ManagedIdentity-Settings", + "type": "" + } + } + ], + "containers": [ + { + "name": "kube-scheduler", + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "command": [ + "/hyperkube", + "scheduler" + ], + "args": [ + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--leader-elect=true", + "--profiling=false", + "--v=2" + ], + "resources": {}, + "volumeMounts": [ + { + "name": "etc-kubernetes", + "mountPath": "/etc/kubernetes" + }, + { + "name": "var-lib-kubelet", + "mountPath": "/var/lib/kubelet" + }, + { + "name": "msi", + "readOnly": true, + "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeName": "k8s-master-15159885-0", + "hostNetwork": true, + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "operator": "Exists", + "effect": "NoExecute" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:26Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T09:02:46Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T09:02:46Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:27:26Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.255.255.5", + "startTime": "2019-04-17T04:27:26Z", + "containerStatuses": [ + { + "name": "kube-scheduler", + "state": { + "running": { + "startedAt": "2019-06-14T09:02:46Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 1, + "reason": "Error", + "startedAt": "2019-06-11T17:20:25Z", + "finishedAt": "2019-06-14T09:02:41Z", + "containerID": "docker://557db82de83e2fbea6d231e7ee9643501038e3d7bded085c65d8e9a392ec785a" + } + }, + "ready": true, + "restartCount": 6, + "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", + "containerID": "docker://5445127376fb63c7f3a45781e499e90a7d1f668d8d43f4064cbe8de7f28e9dd8" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kubernetes-dashboard-9bf969764-nswqh", + "generateName": "kubernetes-dashboard-9bf969764-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kubernetes-dashboard-9bf969764-nswqh", + "uid": "3cfa7f6c-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110357", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "k8s-app": "kubernetes-dashboard", + "pod-template-hash": "9bf969764" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "kubernetes-dashboard-9bf969764", + "uid": "3cf41c1e-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kubernetes-dashboard-certs", + "emptyDir": {} + }, + { + "name": "kubernetes-dashboard-token-kpldg", + "secret": { + "secretName": "kubernetes-dashboard-token-kpldg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kubernetes-dashboard", + "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", + "args": [ + "--auto-generate-certificates", + "--heapster-host=http://heapster.kube-system:80" + ], + "ports": [ + { + "containerPort": 8443, + "protocol": "TCP" + } + ], + "resources": { + "limits": { + "cpu": "300m", + "memory": "150Mi" + }, + "requests": { + "cpu": "300m", + "memory": "150Mi" + } + }, + "volumeMounts": [ + { + "name": "kubernetes-dashboard-certs", + "mountPath": "/certs" + }, + { + "name": "kubernetes-dashboard-token-kpldg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/", + "port": 8443, + "scheme": "HTTPS" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 30, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kubernetes-dashboard", + "serviceAccount": "kubernetes-dashboard", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:29:04Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:29:04Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:24Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.40", + "startTime": "2019-04-17T04:28:24Z", + "containerStatuses": [ + { + "name": "kubernetes-dashboard", + "state": { + "running": { + "startedAt": "2019-04-17T04:29:03Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 1, + "reason": "Error", + "startedAt": "2019-04-17T04:28:29Z", + "finishedAt": "2019-04-17T04:29:02Z", + "containerID": "docker://28f5aa442fd07db9ceeb73f73f73622e515bba3f3f8d06494046d349c660d6a2" + } + }, + "ready": true, + "restartCount": 1, + "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", + "imageID": "docker-pullable://k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "containerID": "docker://9a5b3b9bbc3b298261a88db12106d0096a542332985fa03043c5dfe0e386dee9" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "metrics-server-67b4964794-l6qgt", + "generateName": "metrics-server-67b4964794-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/metrics-server-67b4964794-l6qgt", + "uid": "3cbba31a-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "5110354", + "creationTimestamp": "2019-04-17T04:28:23Z", + "labels": { + "k8s-app": "metrics-server", + "pod-template-hash": "67b4964794" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "metrics-server-67b4964794", + "uid": "3cb826ab-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "metrics-server-token-8d2n9", + "secret": { + "secretName": "metrics-server-token-8d2n9", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "metrics-server", + "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", + "command": [ + "/metrics-server", + "--source=kubernetes.summary_api:''" + ], + "resources": {}, + "volumeMounts": [ + { + "name": "metrics-server-token-8d2n9", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "metrics-server", + "serviceAccount": "metrics-server", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:29:04Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:29:04Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:23Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.62", + "startTime": "2019-04-17T04:28:23Z", + "containerStatuses": [ + { + "name": "metrics-server", + "state": { + "running": { + "startedAt": "2019-04-17T04:29:03Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:28Z", + "finishedAt": "2019-04-17T04:29:02Z", + "containerID": "docker://4aebee6450b565af51cddd7889d2a1af3d9de84d7cacd1f8d5679280f68c58a8" + } + }, + "ready": true, + "restartCount": 1, + "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", + "imageID": "docker-pullable://k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", + "containerID": "docker://cd21e1db0924d6ce7f2158268bd2bb7a658b624800cc7bfa6935154628f18593" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-62cnn", + "generateName": "omsagent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-62cnn", + "uid": "838b2248-925b-11e9-a358-000d3a53d49f", + "resourceVersion": "7987224", + "creationTimestamp": "2019-06-19T06:28:55Z", + "labels": { + "controller-revision-hash": "775fd7566d", + "dsName": "omsagent-ds", + "pod-template-generation": "1" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "5.0.0-1", + "schema-versions": "v1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "omsagent", + "uid": "838334ba-925b-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "host-root", + "hostPath": { + "path": "/", + "type": "" + } + }, + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "settings-vol-config", + "configMap": { + "name": "container-azm-ms-agentconfig", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "omsagent-token-4d9bp", + "secret": { + "secretName": "omsagent-token-4d9bp", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "rdilip83/healthpreview06192019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "DaemonSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "300Mi" + }, + "requests": { + "cpu": "75m", + "memory": "225Mi" + } + }, + "volumeMounts": [ + { + "name": "host-root", + "readOnly": true, + "mountPath": "/hostfs" + }, + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "settings-vol-config", + "readOnly": true, + "mountPath": "/etc/config/settings" + }, + { + "name": "omsagent-token-4d9bp", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000001" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:28:55Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:20Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:20Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:28:55Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.73", + "startTime": "2019-06-19T06:28:55Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-06-19T06:29:19Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/healthpreview06192019:latest", + "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "containerID": "docker://20e770236356a355504ff6629d456e2c974026fb9e63e92a60570c088b1682e6" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-ft74m", + "generateName": "omsagent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-ft74m", + "uid": "83882687-925b-11e9-a358-000d3a53d49f", + "resourceVersion": "7987185", + "creationTimestamp": "2019-06-19T06:28:55Z", + "labels": { + "controller-revision-hash": "775fd7566d", + "dsName": "omsagent-ds", + "pod-template-generation": "1" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "5.0.0-1", + "schema-versions": "v1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "omsagent", + "uid": "838334ba-925b-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "host-root", + "hostPath": { + "path": "/", + "type": "" + } + }, + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "settings-vol-config", + "configMap": { + "name": "container-azm-ms-agentconfig", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "omsagent-token-4d9bp", + "secret": { + "secretName": "omsagent-token-4d9bp", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "rdilip83/healthpreview06192019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "DaemonSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "300Mi" + }, + "requests": { + "cpu": "75m", + "memory": "225Mi" + } + }, + "volumeMounts": [ + { + "name": "host-root", + "readOnly": true, + "mountPath": "/hostfs" + }, + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "settings-vol-config", + "readOnly": true, + "mountPath": "/etc/config/settings" + }, + { + "name": "omsagent-token-4d9bp", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "k8s-master-15159885-0", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-master-15159885-0" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:28:55Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:08Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:08Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:28:55Z" + } + ], + "hostIP": "10.255.255.5", + "podIP": "10.240.0.26", + "startTime": "2019-06-19T06:28:55Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-06-19T06:29:08Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/healthpreview06192019:latest", + "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "containerID": "docker://d84656fd8f69b15c9244a4fcb4feec35ecc980e7d9a32a4ebc365bc3647a931b" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-pgnrl", + "generateName": "omsagent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-pgnrl", + "uid": "838ae71f-925b-11e9-a358-000d3a53d49f", + "resourceVersion": "7987213", + "creationTimestamp": "2019-06-19T06:28:55Z", + "labels": { + "controller-revision-hash": "775fd7566d", + "dsName": "omsagent-ds", + "pod-template-generation": "1" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "5.0.0-1", + "schema-versions": "v1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "omsagent", + "uid": "838334ba-925b-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "host-root", + "hostPath": { + "path": "/", + "type": "" + } + }, + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "settings-vol-config", + "configMap": { + "name": "container-azm-ms-agentconfig", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "omsagent-token-4d9bp", + "secret": { + "secretName": "omsagent-token-4d9bp", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "rdilip83/healthpreview06192019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "DaemonSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "300Mi" + }, + "requests": { + "cpu": "75m", + "memory": "225Mi" + } + }, + "volumeMounts": [ + { + "name": "host-root", + "readOnly": true, + "mountPath": "/hostfs" + }, + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "settings-vol-config", + "readOnly": true, + "mountPath": "/etc/config/settings" + }, + { + "name": "omsagent-token-4d9bp", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "k8s-agentpool1-15159885-vmss000000", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchFields": [ + { + "key": "metadata.name", + "operator": "In", + "values": [ + "k8s-agentpool1-15159885-vmss000000" + ] + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:28:55Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:16Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:16Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:28:55Z" + } + ], + "hostIP": "10.240.0.34", + "podIP": "10.240.0.57", + "startTime": "2019-06-19T06:28:55Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-06-19T06:29:16Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/healthpreview06192019:latest", + "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "containerID": "docker://1ba0bfe43e9beba393b61908f37fa66b235e8a3b51788a6c1acbe0ccdf8c5063" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-rs-bf764d6d5-8cdqt", + "generateName": "omsagent-rs-bf764d6d5-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-rs-bf764d6d5-8cdqt", + "uid": "83b5c241-925b-11e9-a358-000d3a53d49f", + "resourceVersion": "7987238", + "creationTimestamp": "2019-06-19T06:28:56Z", + "labels": { + "pod-template-hash": "bf764d6d5", + "rsName": "omsagent-rs" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "5.0.0-1", + "schema-versions": "v1" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "omsagent-rs-bf764d6d5", + "uid": "83b06cb0-925b-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "omsagent-rs-config", + "configMap": { + "name": "omsagent-rs-config", + "defaultMode": 420 + } + }, + { + "name": "settings-vol-config", + "configMap": { + "name": "container-azm-ms-agentconfig", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "azurefile-pv", + "persistentVolumeClaim": { + "claimName": "azurefile" + } + }, + { + "name": "omsagent-token-4d9bp", + "secret": { + "secretName": "omsagent-token-4d9bp", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "rdilip83/healthpreview06192019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + }, + { + "name": "in-rs-tcp", + "containerPort": 25235, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "ReplicaSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "500Mi" + }, + "requests": { + "cpu": "50m", + "memory": "175Mi" + } + }, + "volumeMounts": [ + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "readOnly": true, + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "omsagent-rs-config", + "mountPath": "/etc/config" + }, + { + "name": "settings-vol-config", + "readOnly": true, + "mountPath": "/etc/config/settings" + }, + { + "name": "azurefile-pv", + "mountPath": "/mnt/azure" + }, + { + "name": "omsagent-token-4d9bp", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true, + "procMount": "Default" + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux", + "kubernetes.io/role": "agent" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:26Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:26Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-19T06:29:14Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.91", + "startTime": "2019-06-19T06:29:18Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-06-19T06:29:25Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/healthpreview06192019:latest", + "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", + "containerID": "docker://c107f2a9097802e3151aa2556d424621e0c5b8b07e251dcb30dba09a42e39887" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "tiller-deploy-7bfcdc49d6-4w8pz", + "generateName": "tiller-deploy-7bfcdc49d6-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/tiller-deploy-7bfcdc49d6-4w8pz", + "uid": "39c3e0a4-60c9-11e9-a358-000d3a53d49f", + "resourceVersion": "7362282", + "creationTimestamp": "2019-04-17T04:28:18Z", + "labels": { + "app": "helm", + "name": "tiller", + "pod-template-hash": "7bfcdc49d6" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "tiller-deploy-7bfcdc49d6", + "uid": "39c0b65b-60c9-11e9-a358-000d3a53d49f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "tiller-token-c8tgp", + "secret": { + "secretName": "tiller-token-c8tgp", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "tiller", + "image": "gcr.io/kubernetes-helm/tiller:v2.11.0", + "ports": [ + { + "name": "tiller", + "containerPort": 44134, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "TILLER_NAMESPACE", + "value": "kube-system" + }, + { + "name": "TILLER_HISTORY_MAX", + "value": "0" + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "150Mi" + }, + "requests": { + "cpu": "50m", + "memory": "150Mi" + } + }, + "volumeMounts": [ + { + "name": "tiller-token-c8tgp", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/liveness", + "port": 44135, + "scheme": "HTTP" + }, + "initialDelaySeconds": 1, + "timeoutSeconds": 1, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "readinessProbe": { + "httpGet": { + "path": "/readiness", + "port": 44135, + "scheme": "HTTP" + }, + "initialDelaySeconds": 1, + "timeoutSeconds": 1, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "tiller", + "serviceAccount": "tiller", + "nodeName": "k8s-agentpool1-15159885-vmss000001", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:53Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-14T07:59:53Z" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-17T04:28:18Z" + } + ], + "hostIP": "10.240.0.65", + "podIP": "10.240.0.70", + "startTime": "2019-04-17T04:28:18Z", + "containerStatuses": [ + { + "name": "tiller", + "state": { + "running": { + "startedAt": "2019-06-14T07:59:38Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 255, + "reason": "Error", + "startedAt": "2019-04-17T04:28:27Z", + "finishedAt": "2019-06-14T07:58:50Z", + "containerID": "docker://65215f91d8557104345b0cb69dd906d574c6f57fcabba5fde3bebd58c275f2d1" + } + }, + "ready": true, + "restartCount": 1, + "image": "gcr.io/kubernetes-helm/tiller:v2.11.0", + "imageID": "docker-pullable://gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", + "containerID": "docker://2805575e748637b71e67f7601288d62405fea9b185a3aa852b82e8700320f708" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + } + ] +} \ No newline at end of file diff --git a/inventory/deployments.json b/inventory/deployments.json new file mode 100644 index 000000000..ab4eb2e48 --- /dev/null +++ b/inventory/deployments.json @@ -0,0 +1,1702 @@ +{ + "items": [ + { + "metadata": { + "name": "diliprdeploymentnodeapps", + "namespace": "default", + "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/diliprdeploymentnodeapps", + "uid": "ee1b111d-526e-11e9-a899-6a5520730c61", + "resourceVersion": "4597575", + "generation": 1, + "creationTimestamp": "2019-03-29T22:06:40Z", + "labels": { + "diliprdeploymentLabel1": "d1", + "diliprdeploymentLabel2": "d2" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"diliprdeploymentLabel1\":\"d1\",\"diliprdeploymentLabel2\":\"d2\"},\"name\":\"diliprdeploymentnodeapps\",\"namespace\":\"default\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"app\":\"diliprsnodeapppod\"}},\"template\":{\"metadata\":{\"labels\":{\"app\":\"diliprsnodeapppod\",\"diliprPodLabel1\":\"p1\",\"diliprPodLabel2\":\"p2\"}},\"spec\":{\"containers\":[{\"image\":\"rdilip83/logeverysecond:v2\",\"name\":\"diliprcontainerhelloapp\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "app": "diliprsnodeapppod" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "diliprsnodeapppod", + "diliprPodLabel1": "p1", + "diliprPodLabel2": "p2" + } + }, + "spec": { + "containers": [ + { + "name": "diliprcontainerhelloapp", + "image": "rdilip83/logeverysecond:v2", + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": "25%", + "maxSurge": "25%" + } + }, + "revisionHistoryLimit": 2, + "progressDeadlineSeconds": 600 + }, + "status": { + "observedGeneration": 1, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Progressing", + "status": "True", + "lastUpdateTime": "2019-03-29T22:06:47Z", + "lastTransitionTime": "2019-03-29T22:06:40Z", + "reason": "NewReplicaSetAvailable", + "message": "ReplicaSet \"diliprdeploymentnodeapps-c4fdfb446\" has successfully progressed." + }, + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-22T19:28:18Z", + "lastTransitionTime": "2019-04-22T19:28:18Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "nginx-deployment", + "namespace": "default", + "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/nginx-deployment", + "uid": "6d8b998a-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952435", + "generation": 1, + "creationTimestamp": "2019-05-22T22:32:05Z", + "labels": { + "app": "nginx" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1" + } + }, + "spec": { + "replicas": 3, + "selector": { + "matchLabels": { + "app": "nginx" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "nginx" + } + }, + "spec": { + "containers": [ + { + "name": "nginx", + "image": "nginx:1.7.9", + "ports": [ + { + "containerPort": 80, + "protocol": "TCP" + } + ], + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 3, + "updatedReplicas": 3, + "readyReplicas": 3, + "availableReplicas": 3, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-05-22T22:32:14Z", + "lastTransitionTime": "2019-05-22T22:32:14Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps", + "namespace": "default", + "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/vishwadeploymentnodeapps", + "uid": "cf108fee-5261-11e9-a899-6a5520730c61", + "resourceVersion": "7580681", + "generation": 1, + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwadeploymentLabel1": "d1", + "VishwadeploymentLabel2": "d2" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"VishwadeploymentLabel1\":\"d1\",\"VishwadeploymentLabel2\":\"d2\"},\"name\":\"vishwadeploymentnodeapps\",\"namespace\":\"default\"},\"spec\":{\"replicas\":10,\"selector\":{\"matchLabels\":{\"app\":\"vishwasnodeapppod\"}},\"template\":{\"metadata\":{\"labels\":{\"VishwaPodLabel1\":\"p1\",\"VishwaPodLabel2\":\"p2\",\"app\":\"vishwasnodeapppod\"}},\"spec\":{\"containers\":[{\"image\":\"vishiy/hello:err100eps\",\"name\":\"vishwacontainerhelloapp\"}]}}}}\n" + } + }, + "spec": { + "replicas": 10, + "selector": { + "matchLabels": { + "app": "vishwasnodeapppod" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod" + } + }, + "spec": { + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": "25%", + "maxSurge": "25%" + } + }, + "revisionHistoryLimit": 2, + "progressDeadlineSeconds": 600 + }, + "status": { + "observedGeneration": 1, + "replicas": 10, + "updatedReplicas": 10, + "readyReplicas": 10, + "availableReplicas": 10, + "conditions": [ + { + "type": "Progressing", + "status": "True", + "lastUpdateTime": "2019-03-29T20:32:58Z", + "lastTransitionTime": "2019-03-29T20:32:45Z", + "reason": "NewReplicaSetAvailable", + "message": "ReplicaSet \"vishwadeploymentnodeapps-8686cf54db\" has successfully progressed." + }, + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-22T19:28:16Z", + "lastTransitionTime": "2019-04-22T19:28:16Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "heapster", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/heapster", + "uid": "c531be6a-8d02-11e9-8800-bad6d46463ba", + "resourceVersion": "10235925", + "generation": 2, + "creationTimestamp": "2019-06-12T11:11:04Z", + "labels": { + "addonmanager.kubernetes.io/mode": "EnsureExists", + "k8s-app": "heapster", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "2" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "heapster" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "heapster" + } + }, + "spec": { + "volumes": [ + { + "name": "heapster-config-volume", + "configMap": { + "name": "heapster-config", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "heapster", + "image": "k8s.gcr.io/heapster-amd64:v1.5.3", + "command": [ + "/heapster", + "--source=kubernetes.summary_api:\"\"" + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "livenessProbe": { + "httpGet": { + "path": "/healthz", + "port": 8082, + "scheme": "HTTP" + }, + "initialDelaySeconds": 180, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "heapster-nanny", + "image": "k8s.gcr.io/addon-resizer:1.8.1", + "command": [ + "/pod_nanny", + "--config-dir=/etc/config", + "--cpu=80m", + "--extra-cpu=0.5m", + "--memory=140Mi", + "--extra-memory=4Mi", + "--threshold=5", + "--deployment=heapster", + "--container=heapster", + "--poll-period=300000", + "--estimator=exponential" + ], + "env": [ + { + "name": "MY_POD_NAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.name" + } + } + }, + { + "name": "MY_POD_NAMESPACE", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "90Mi" + }, + "requests": { + "cpu": "50m", + "memory": "90Mi" + } + }, + "volumeMounts": [ + { + "name": "heapster-config-volume", + "mountPath": "/etc/config" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "heapster", + "serviceAccount": "heapster", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 2, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-06-12T11:11:05Z", + "lastTransitionTime": "2019-06-12T11:11:05Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ], + "collisionCount": 1 + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "kube-dns-autoscaler", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kube-dns-autoscaler", + "uid": "4509acaf-44e5-11e9-9920-423525a6b683", + "resourceVersion": "7854804", + "generation": 2, + "creationTimestamp": "2019-03-12T16:38:30Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "kube-dns-autoscaler", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "2", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns-autoscaler\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kube-dns-autoscaler\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns-autoscaler\"}},\"template\":{\"metadata\":{\"annotations\":{\"scheduler.alpha.kubernetes.io/critical-pod\":\"\",\"seccomp.security.alpha.kubernetes.io/pod\":\"docker/default\"},\"labels\":{\"k8s-app\":\"kube-dns-autoscaler\"}},\"spec\":{\"containers\":[{\"command\":[\"/cluster-proportional-autoscaler\",\"--namespace=kube-system\",\"--configmap=kube-dns-autoscaler\",\"--target=deployment/kube-dns-v20\",\"--default-params={\\\"ladder\\\":{\\\"coresToReplicas\\\":[[1,2],[512,3],[1024,4],[2048,5]],\\\"nodesToReplicas\\\":[[1,2],[8,3],[16,4],[32,5]]}}\",\"--logtostderr=true\",\"--v=2\"],\"image\":\"k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2\",\"name\":\"autoscaler\",\"resources\":{\"requests\":{\"cpu\":\"20m\",\"memory\":\"10Mi\"}}}],\"dnsPolicy\":\"Default\",\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kube-dns-autoscaler\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "kube-dns-autoscaler" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "kube-dns-autoscaler" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "", + "seccomp.security.alpha.kubernetes.io/pod": "docker/default" + } + }, + "spec": { + "containers": [ + { + "name": "autoscaler", + "image": "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2", + "command": [ + "/cluster-proportional-autoscaler", + "--namespace=kube-system", + "--configmap=kube-dns-autoscaler", + "--target=deployment/kube-dns-v20", + "--default-params={\"ladder\":{\"coresToReplicas\":[[1,2],[512,3],[1024,4],[2048,5]],\"nodesToReplicas\":[[1,2],[8,3],[16,4],[32,5]]}}", + "--logtostderr=true", + "--v=2" + ], + "resources": { + "requests": { + "cpu": "20m", + "memory": "10Mi" + } + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "serviceAccountName": "kube-dns-autoscaler", + "serviceAccount": "kube-dns-autoscaler", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 2, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-03-12T16:38:30Z", + "lastTransitionTime": "2019-03-12T16:38:30Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "kube-dns-v20", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kube-dns-v20", + "uid": "4523fcd7-44e5-11e9-9920-423525a6b683", + "resourceVersion": "7854961", + "generation": 4, + "creationTimestamp": "2019-03-12T16:38:30Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "kube-dns", + "kubernetes.io/cluster-service": "true", + "version": "v20" + }, + "annotations": { + "deployment.kubernetes.io/revision": "3", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"version\":\"v20\"},\"name\":\"kube-dns-v20\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns\",\"version\":\"v20\"}},\"template\":{\"metadata\":{\"annotations\":{\"prometheus.io/port\":\"10055\",\"prometheus.io/scrape\":\"true\"},\"labels\":{\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"version\":\"v20\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}},\"podAntiAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"kubernetes.io/hostname\"},\"weight\":100}]}},\"containers\":[{\"args\":[\"--kubecfg-file=/config/kubeconfig\",\"--config-dir=/kube-dns-config\",\"--domain=cluster.local.\",\"--dns-port=10053\",\"--v=2\"],\"env\":[{\"name\":\"PROMETHEUS_PORT\",\"value\":\"10055\"}],\"image\":\"k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/healthcheck/kubedns\",\"port\":10054,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"kubedns\",\"ports\":[{\"containerPort\":10053,\"name\":\"dns-local\",\"protocol\":\"UDP\"},{\"containerPort\":10053,\"name\":\"dns-tcp-local\",\"protocol\":\"TCP\"},{\"containerPort\":10055,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"readinessProbe\":{\"httpGet\":{\"path\":\"/readiness\",\"port\":8081,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":5},\"resources\":{\"limits\":{\"memory\":\"170Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"70Mi\"}},\"volumeMounts\":[{\"mountPath\":\"/kube-dns-config\",\"name\":\"kube-dns-config\"},{\"mountPath\":\"/config\",\"name\":\"kubedns-kubecfg\",\"readOnly\":true}]},{\"args\":[\"-v=2\",\"-logtostderr\",\"-configDir=/kube-dns-config\",\"-restartDnsmasq=true\",\"--\",\"-k\",\"--cache-size=1000\",\"--no-negcache\",\"--no-resolv\",\"--server=127.0.0.1#10053\",\"--server=/cluster.local/127.0.0.1#10053\",\"--server=/in-addr.arpa/127.0.0.1#10053\",\"--server=/ip6.arpa/127.0.0.1#10053\",\"--log-facility=-\"],\"image\":\"k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10\",\"name\":\"dnsmasq\",\"ports\":[{\"containerPort\":53,\"name\":\"dns\",\"protocol\":\"UDP\"},{\"containerPort\":53,\"name\":\"dns-tcp\",\"protocol\":\"TCP\"}],\"volumeMounts\":[{\"mountPath\":\"/kube-dns-config\",\"name\":\"kube-dns-config\"}]},{\"args\":[\"--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 \\u003e/dev/null || exit 1; done\",\"--url=/healthz-dnsmasq\",\"--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 \\u003e/dev/null || exit 1; done\",\"--url=/healthz-kubedns\",\"--port=8080\",\"--quiet\"],\"env\":[{\"name\":\"PROBE_DOMAINS\",\"value\":\"bing.com kubernetes.default.svc.cluster.local\"}],\"image\":\"k8s.gcr.io/exechealthz-amd64:1.2\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/healthz-dnsmasq\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"healthz\",\"ports\":[{\"containerPort\":8080,\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"memory\":\"50Mi\"},\"requests\":{\"cpu\":\"10m\",\"memory\":\"50Mi\"}}},{\"args\":[\"--v=2\",\"--logtostderr\",\"--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV\",\"--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV\"],\"image\":\"k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10\",\"livenessProbe\":{\"httpGet\":{\"path\":\"/metrics\",\"port\":10054,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"sidecar\",\"ports\":[{\"containerPort\":10054,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"resources\":{\"requests\":{\"cpu\":\"10m\",\"memory\":\"20Mi\"}}}],\"dnsPolicy\":\"Default\",\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kube-dns\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}],\"volumes\":[{\"configMap\":{\"name\":\"kube-dns\",\"optional\":true},\"name\":\"kube-dns-config\"},{\"configMap\":{\"name\":\"kubedns-kubecfg\"},\"name\":\"kubedns-kubecfg\"}]}}}}\n" + } + }, + "spec": { + "replicas": 2, + "selector": { + "matchLabels": { + "k8s-app": "kube-dns", + "version": "v20" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "kube-dns", + "kubernetes.io/cluster-service": "true", + "version": "v20" + }, + "annotations": { + "prometheus.io/port": "10055", + "prometheus.io/scrape": "true" + } + }, + "spec": { + "volumes": [ + { + "name": "kube-dns-config", + "configMap": { + "name": "kube-dns", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "kubedns-kubecfg", + "configMap": { + "name": "kubedns-kubecfg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kubedns", + "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", + "args": [ + "--kubecfg-file=/config/kubeconfig", + "--config-dir=/kube-dns-config", + "--domain=cluster.local.", + "--dns-port=10053", + "--v=2" + ], + "ports": [ + { + "name": "dns-local", + "containerPort": 10053, + "protocol": "UDP" + }, + { + "name": "dns-tcp-local", + "containerPort": 10053, + "protocol": "TCP" + }, + { + "name": "metrics", + "containerPort": 10055, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "PROMETHEUS_PORT", + "value": "10055" + } + ], + "resources": { + "limits": { + "memory": "170Mi" + }, + "requests": { + "cpu": "100m", + "memory": "70Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-config", + "mountPath": "/kube-dns-config" + }, + { + "name": "kubedns-kubecfg", + "readOnly": true, + "mountPath": "/config" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthcheck/kubedns", + "port": 10054, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "readinessProbe": { + "httpGet": { + "path": "/readiness", + "port": 8081, + "scheme": "HTTP" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "dnsmasq", + "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", + "args": [ + "-v=2", + "-logtostderr", + "-configDir=/kube-dns-config", + "-restartDnsmasq=true", + "--", + "-k", + "--cache-size=1000", + "--no-negcache", + "--no-resolv", + "--server=127.0.0.1#10053", + "--server=/cluster.local/127.0.0.1#10053", + "--server=/in-addr.arpa/127.0.0.1#10053", + "--server=/ip6.arpa/127.0.0.1#10053", + "--log-facility=-" + ], + "ports": [ + { + "name": "dns", + "containerPort": 53, + "protocol": "UDP" + }, + { + "name": "dns-tcp", + "containerPort": 53, + "protocol": "TCP" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "kube-dns-config", + "mountPath": "/kube-dns-config" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "healthz", + "image": "k8s.gcr.io/exechealthz-amd64:1.2", + "args": [ + "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 >/dev/null || exit 1; done", + "--url=/healthz-dnsmasq", + "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 >/dev/null || exit 1; done", + "--url=/healthz-kubedns", + "--port=8080", + "--quiet" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "PROBE_DOMAINS", + "value": "bing.com kubernetes.default.svc.cluster.local" + } + ], + "resources": { + "limits": { + "memory": "50Mi" + }, + "requests": { + "cpu": "10m", + "memory": "50Mi" + } + }, + "livenessProbe": { + "httpGet": { + "path": "/healthz-dnsmasq", + "port": 8080, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "sidecar", + "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", + "args": [ + "--v=2", + "--logtostderr", + "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV", + "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV" + ], + "ports": [ + { + "name": "metrics", + "containerPort": 10054, + "protocol": "TCP" + } + ], + "resources": { + "requests": { + "cpu": "10m", + "memory": "20Mi" + } + }, + "livenessProbe": { + "httpGet": { + "path": "/metrics", + "port": 10054, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-dns", + "serviceAccount": "kube-dns", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + }, + "podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [ + { + "weight": 100, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "kubernetes.io/hostname" + } + } + ] + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 4, + "replicas": 2, + "updatedReplicas": 2, + "readyReplicas": 2, + "availableReplicas": 2, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-03-12T16:41:46Z", + "lastTransitionTime": "2019-03-12T16:41:46Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "kubernetes-dashboard", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kubernetes-dashboard", + "uid": "45b9cc8d-44e5-11e9-9920-423525a6b683", + "resourceVersion": "7854801", + "generation": 3, + "creationTimestamp": "2019-03-12T16:38:31Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "kubernetes-dashboard", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "3", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kubernetes-dashboard\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"strategy\":{\"rollingUpdate\":{\"maxSurge\":0,\"maxUnavailable\":1},\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"image\":\"k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1\",\"livenessProbe\":{\"failureThreshold\":3,\"httpGet\":{\"path\":\"/\",\"port\":9090,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"periodSeconds\":10,\"successThreshold\":1,\"timeoutSeconds\":30},\"name\":\"main\",\"ports\":[{\"containerPort\":9090,\"name\":\"http\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"100m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"50Mi\"}}}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kubernetes-dashboard\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "kubernetes-dashboard", + "kubernetes.io/cluster-service": "true" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "kubernetes-dashboard", + "kubernetes.io/cluster-service": "true" + } + }, + "spec": { + "containers": [ + { + "name": "main", + "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", + "ports": [ + { + "name": "http", + "containerPort": 9090, + "protocol": "TCP" + } + ], + "resources": { + "limits": { + "cpu": "100m", + "memory": "500Mi" + }, + "requests": { + "cpu": "100m", + "memory": "50Mi" + } + }, + "livenessProbe": { + "httpGet": { + "path": "/", + "port": 9090, + "scheme": "HTTP" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 30, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kubernetes-dashboard", + "serviceAccount": "kubernetes-dashboard", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 0 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 3, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-03-12T16:38:32Z", + "lastTransitionTime": "2019-03-12T16:38:32Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "metrics-server", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/metrics-server", + "uid": "45556857-44e5-11e9-9920-423525a6b683", + "resourceVersion": "7854802", + "generation": 2, + "creationTimestamp": "2019-03-12T16:38:31Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "k8s-app": "metrics-server", + "kubernetes.io/cluster-service": "true" + }, + "annotations": { + "deployment.kubernetes.io/revision": "2", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"metrics-server\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"metrics-server\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"metrics-server\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"metrics-server\"},\"name\":\"metrics-server\"},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"command\":[\"/metrics-server\",\"--source=kubernetes.summary_api:''\"],\"image\":\"k8s.gcr.io/metrics-server-amd64:v0.2.1\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"metrics-server\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"metrics-server\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "metrics-server" + } + }, + "template": { + "metadata": { + "name": "metrics-server", + "creationTimestamp": null, + "labels": { + "k8s-app": "metrics-server" + } + }, + "spec": { + "containers": [ + { + "name": "metrics-server", + "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", + "command": [ + "/metrics-server", + "--source=kubernetes.summary_api:''" + ], + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "metrics-server", + "serviceAccount": "metrics-server", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 2, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-03-12T16:38:31Z", + "lastTransitionTime": "2019-03-12T16:38:31Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "omsagent-rs", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/omsagent-rs", + "uid": "b819b214-6876-11e9-8b55-963bcaafdced", + "resourceVersion": "5486655", + "generation": 2, + "creationTimestamp": "2019-04-26T22:57:51Z", + "labels": { + "rsName": "omsagent-rs" + }, + "annotations": { + "deployment.kubernetes.io/revision": "2", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"name\":\"omsagent-rs\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"rsName\":\"omsagent-rs\"}},\"strategy\":{\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"agentVersion\":\"1.10.0.1\",\"dockerProviderVersion\":\"4.0.0-0\"},\"labels\":{\"rsName\":\"omsagent-rs\"}},\"spec\":{\"containers\":[{\"env\":[{\"name\":\"AKS_RESOURCE_ID\",\"value\":\"/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test\"},{\"name\":\"AKS_REGION\",\"value\":\"eastus\"},{\"name\":\"DISABLE_KUBE_SYSTEM_LOG_COLLECTION\",\"value\":\"true\"},{\"name\":\"CONTROLLER_TYPE\",\"value\":\"ReplicaSet\"},{\"name\":\"NODE_IP\",\"valueFrom\":{\"fieldRef\":{\"fieldPath\":\"status.hostIP\"}}}],\"image\":\"microsoft/oms:healthpreview04262019\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/bin/bash\",\"-c\",\"ps -ef | grep main | grep -v \\\"grep\\\"\"]},\"initialDelaySeconds\":60,\"periodSeconds\":60},\"name\":\"omsagent\",\"ports\":[{\"containerPort\":25225,\"protocol\":\"TCP\"},{\"containerPort\":25235,\"name\":\"in-rs-tcp\",\"protocol\":\"TCP\"},{\"containerPort\":25224,\"protocol\":\"UDP\"}],\"resources\":{\"limits\":{\"cpu\":\"150m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"50m\",\"memory\":\"100Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/var/run/host\",\"name\":\"docker-sock\"},{\"mountPath\":\"/var/log\",\"name\":\"host-log\"},{\"mountPath\":\"/var/lib/docker/containers\",\"name\":\"containerlog-path\"},{\"mountPath\":\"/etc/kubernetes/host\",\"name\":\"azure-json-path\"},{\"mountPath\":\"/etc/omsagent-secret\",\"name\":\"omsagent-secret\",\"readOnly\":true},{\"mountPath\":\"/etc/config\",\"name\":\"omsagent-rs-config\"}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\",\"kubernetes.io/role\":\"agent\"},\"serviceAccountName\":\"omsagent\",\"volumes\":[{\"hostPath\":{\"path\":\"/var/run\"},\"name\":\"docker-sock\"},{\"hostPath\":{\"path\":\"/etc/hostname\"},\"name\":\"container-hostname\"},{\"hostPath\":{\"path\":\"/var/log\"},\"name\":\"host-log\"},{\"hostPath\":{\"path\":\"/var/lib/docker/containers\"},\"name\":\"containerlog-path\"},{\"hostPath\":{\"path\":\"/etc/kubernetes\"},\"name\":\"azure-json-path\"},{\"name\":\"omsagent-secret\",\"secret\":{\"secretName\":\"omsagent-secret\"}},{\"configMap\":{\"name\":\"omsagent-rs-config\"},\"name\":\"omsagent-rs-config\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "rsName": "omsagent-rs" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "rsName": "omsagent-rs" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "4.0.0-0" + } + }, + "spec": { + "volumes": [ + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "omsagent-rs-config", + "configMap": { + "name": "omsagent-rs-config", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "microsoft/oms:healthpreview04262019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "name": "in-rs-tcp", + "containerPort": 25235, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "ReplicaSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "500Mi" + }, + "requests": { + "cpu": "50m", + "memory": "100Mi" + } + }, + "volumeMounts": [ + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "readOnly": true, + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "omsagent-rs-config", + "mountPath": "/etc/config" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main | grep -v \"grep\"" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux", + "kubernetes.io/role": "agent" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 2, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-04-26T22:57:51Z", + "lastTransitionTime": "2019-04-26T22:57:51Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "tunnelfront", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/tunnelfront", + "uid": "45e524e6-44e5-11e9-9920-423525a6b683", + "resourceVersion": "9030842", + "generation": 3, + "creationTimestamp": "2019-03-12T16:38:32Z", + "labels": { + "addonmanager.kubernetes.io/mode": "Reconcile", + "component": "tunnel", + "kubernetes.io/cluster-service": "true", + "tier": "node" + }, + "annotations": { + "deployment.kubernetes.io/revision": "3", + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"component\":\"tunnel\",\"kubernetes.io/cluster-service\":\"true\",\"tier\":\"node\"},\"name\":\"tunnelfront\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"component\":\"tunnel\"}},\"template\":{\"metadata\":{\"labels\":{\"component\":\"tunnel\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"env\":[{\"name\":\"OVERRIDE_TUNNEL_SERVER_NAME\",\"value\":\"t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io\"},{\"name\":\"TUNNEL_CLUSTERUSER_NAME\",\"value\":\"28957308\"},{\"name\":\"TUNNELGATEWAY_SERVER_NAME\",\"value\":\"dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io\"},{\"name\":\"TUNNELGATEWAY_SSH_PORT\",\"value\":\"22\"},{\"name\":\"TUNNELGATEWAY_TLS_PORT\",\"value\":\"443\"},{\"name\":\"KUBE_CONFIG\",\"value\":\"/etc/kubernetes/kubeconfig/kubeconfig\"}],\"image\":\"docker.io/deis/hcp-tunnel-front:v1.9.2-v4.0.7\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/lib/tunnel-front/check-tunnel-connection.sh\"]},\"failureThreshold\":12,\"initialDelaySeconds\":10,\"periodSeconds\":60},\"name\":\"tunnel-front\",\"resources\":{\"requests\":{\"cpu\":\"10m\",\"memory\":\"64Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/etc/kubernetes/kubeconfig\",\"name\":\"kubeconfig\",\"readOnly\":true},{\"mountPath\":\"/etc/kubernetes/certs\",\"name\":\"certificates\",\"readOnly\":true}]}],\"dnsPolicy\":\"Default\",\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"tunnelfront\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}],\"volumes\":[{\"configMap\":{\"name\":\"tunnelfront-kubecfg\",\"optional\":true},\"name\":\"kubeconfig\"},{\"hostPath\":{\"path\":\"/etc/kubernetes/certs\"},\"name\":\"certificates\"}]}}}}\n" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "component": "tunnel" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "component": "tunnel" + } + }, + "spec": { + "volumes": [ + { + "name": "kubeconfig", + "configMap": { + "name": "tunnelfront-kubecfg", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "certificates", + "hostPath": { + "path": "/etc/kubernetes/certs", + "type": "" + } + } + ], + "containers": [ + { + "name": "tunnel-front", + "image": "docker.io/deis/hcp-tunnel-front:v1.9.2-v4.0.7", + "env": [ + { + "name": "OVERRIDE_TUNNEL_SERVER_NAME", + "value": "t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "TUNNEL_CLUSTERUSER_NAME", + "value": "28957308" + }, + { + "name": "TUNNELGATEWAY_SERVER_NAME", + "value": "dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io" + }, + { + "name": "TUNNELGATEWAY_SSH_PORT", + "value": "22" + }, + { + "name": "TUNNELGATEWAY_TLS_PORT", + "value": "443" + }, + { + "name": "KUBE_CONFIG", + "value": "/etc/kubernetes/kubeconfig/kubeconfig" + } + ], + "resources": { + "requests": { + "cpu": "10m", + "memory": "64Mi" + } + }, + "volumeMounts": [ + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/etc/kubernetes/kubeconfig" + }, + { + "name": "certificates", + "readOnly": true, + "mountPath": "/etc/kubernetes/certs" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/lib/tunnel-front/check-tunnel-connection.sh" + ] + }, + "initialDelaySeconds": 10, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 12 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "tunnelfront", + "serviceAccount": "tunnelfront", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + } + ], + "priorityClassName": "system-node-critical" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 3, + "replicas": 1, + "updatedReplicas": 1, + "readyReplicas": 1, + "availableReplicas": 1, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-03-12T16:38:32Z", + "lastTransitionTime": "2019-03-12T16:38:32Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + }, + { + "metadata": { + "name": "nginx-deployment", + "namespace": "test", + "selfLink": "/apis/extensions/v1beta1/namespaces/test/deployments/nginx-deployment", + "uid": "841dd837-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952520", + "generation": 1, + "creationTimestamp": "2019-05-22T22:32:43Z", + "labels": { + "app": "nginx" + }, + "annotations": { + "deployment.kubernetes.io/revision": "1" + } + }, + "spec": { + "replicas": 2, + "selector": { + "matchLabels": { + "app": "nginx" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "nginx" + } + }, + "spec": { + "containers": [ + { + "name": "front-end", + "image": "nginx", + "ports": [ + { + "containerPort": 81, + "protocol": "TCP" + } + ], + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "Always" + }, + { + "name": "rss-reader", + "image": "nickchase/rss-php-nginx:v1", + "ports": [ + { + "containerPort": 88, + "protocol": "TCP" + } + ], + "resources": {}, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {}, + "schedulerName": "default-scheduler" + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + }, + "revisionHistoryLimit": 10, + "progressDeadlineSeconds": 2147483647 + }, + "status": { + "observedGeneration": 1, + "replicas": 2, + "updatedReplicas": 2, + "readyReplicas": 2, + "availableReplicas": 2, + "conditions": [ + { + "type": "Available", + "status": "True", + "lastUpdateTime": "2019-05-22T22:32:51Z", + "lastTransitionTime": "2019-05-22T22:32:51Z", + "reason": "MinimumReplicasAvailable", + "message": "Deployment has minimum availability." + } + ] + }, + "apiVersion": "extensions/v1beta1", + "kind": "Deployment" + } + ] +} \ No newline at end of file diff --git a/inventory/nodes.json b/inventory/nodes.json new file mode 100644 index 000000000..9ccb0501e --- /dev/null +++ b/inventory/nodes.json @@ -0,0 +1,964 @@ +{ + "items": [ + { + "metadata": { + "name": "aks-nodepool1-19574989-0", + "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-0", + "uid": "9012b16c-44e5-11e9-9920-423525a6b683", + "resourceVersion": "9742037", + "creationTimestamp": "2019-03-12T16:40:36Z", + "labels": { + "agentpool": "nodepool1", + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/instance-type": "Standard_DS1_v2", + "beta.kubernetes.io/os": "linux", + "failure-domain.beta.kubernetes.io/region": "eastus", + "failure-domain.beta.kubernetes.io/zone": "0", + "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus", + "kubernetes.io/hostname": "aks-nodepool1-19574989-0", + "kubernetes.io/role": "agent", + "node-role.kubernetes.io/agent": "", + "storageprofile": "managed", + "storagetier": "Premium_LRS" + }, + "annotations": { + "node.alpha.kubernetes.io/ttl": "0", + "volumes.kubernetes.io/controller-managed-attach-detach": "true" + } + }, + "spec": { + "podCIDR": "10.244.1.0/24", + "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-0" + }, + "status": { + "capacity": { + "cpu": "1", + "ephemeral-storage": "30428648Ki", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "3524612Ki", + "pods": "110" + }, + "allocatable": { + "cpu": "940m", + "ephemeral-storage": "28043041951", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "2504708Ki", + "pods": "110" + }, + "conditions": [ + { + "type": "NetworkUnavailable", + "status": "False", + "lastHeartbeatTime": "2019-03-12T16:42:18Z", + "lastTransitionTime": "2019-03-12T16:42:18Z", + "reason": "RouteCreated", + "message": "RouteController created a route" + }, + { + "type": "OutOfDisk", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:00Z", + "lastTransitionTime": "2019-04-22T19:28:17Z", + "reason": "KubeletHasSufficientDisk", + "message": "kubelet has sufficient disk space available" + }, + { + "type": "MemoryPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:00Z", + "lastTransitionTime": "2019-04-22T19:28:17Z", + "reason": "KubeletHasSufficientMemory", + "message": "kubelet has sufficient memory available" + }, + { + "type": "DiskPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:00Z", + "lastTransitionTime": "2019-04-22T19:28:17Z", + "reason": "KubeletHasNoDiskPressure", + "message": "kubelet has no disk pressure" + }, + { + "type": "PIDPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:00Z", + "lastTransitionTime": "2019-03-12T16:40:36Z", + "reason": "KubeletHasSufficientPID", + "message": "kubelet has sufficient PID available" + }, + { + "type": "Ready", + "status": "True", + "lastHeartbeatTime": "2019-06-08T00:47:00Z", + "lastTransitionTime": "2019-04-22T19:28:17Z", + "reason": "KubeletReady", + "message": "kubelet is posting ready status. AppArmor enabled" + } + ], + "addresses": [ + { + "type": "Hostname", + "address": "aks-nodepool1-19574989-0" + }, + { + "type": "InternalIP", + "address": "10.240.0.4" + } + ], + "daemonEndpoints": { + "kubeletEndpoint": { + "Port": 10250 + } + }, + "nodeInfo": { + "machineID": "cc9ed99e383540a4b0379995bb779221", + "systemUUID": "301B3B88-C7BD-3D45-A3CB-3CD66A42EB6F", + "bootID": "d8f6c00f-a085-450e-bf5c-12e651a0fcfc", + "kernelVersion": "4.15.0-1037-azure", + "osImage": "Ubuntu 16.04.5 LTS", + "containerRuntimeVersion": "docker://3.0.4", + "kubeletVersion": "v1.11.8", + "kubeProxyVersion": "v1.11.8", + "operatingSystem": "linux", + "architecture": "amd64" + }, + "images": [ + { + "names": [ + "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", + "nickchase/rss-php-nginx:v1" + ], + "sizeBytes": 677038498 + }, + { + "names": [ + "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "rdilip83/jsonlogger:v12" + ], + "sizeBytes": 676594134 + }, + { + "names": [ + "rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1", + "rdilip83/logeverysecond:v2" + ], + "sizeBytes": 674931590 + }, + { + "names": [ + "deis/hcp-tunnel-front@sha256:a067679f0ab376197a344cd410821cf07d69fc322dcd9af4a9229250da725ce2", + "deis/hcp-tunnel-front:v1.9.2-v4.0.4" + ], + "sizeBytes": 640504769 + }, + { + "names": [ + "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", + "k8s.gcr.io/hyperkube-amd64:v1.11.8" + ], + "sizeBytes": 615263658 + }, + { + "names": [ + "rdilip83/ha@sha256:40208587069b52338eefc24627bab5d10aa6ddc4f3a2a15dee74ad442327765f", + "rdilip83/ha:0426" + ], + "sizeBytes": 514907213 + }, + { + "names": [ + "microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", + "microsoft/oms:healthpreview04262019" + ], + "sizeBytes": 514907213 + }, + { + "names": [ + "microsoft/oms@sha256:63efbe8fc36635e37aee8c2c631a9d251ab1c736f0c86007c2607987a4bdd8a5", + "microsoft/oms:ciprod04232019" + ], + "sizeBytes": 487282051 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:2050713f627ac24dfd7d3c2594accc49491320711027668d96d3227b8f7ad721", + "rdilip83/kubehealth:2.0" + ], + "sizeBytes": 458285595 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:be84b745efda62fd0d4960d385714737a9b87d02adbc4b841c4a0b5db0495d52", + "rdilip83/kubehealth:1.0" + ], + "sizeBytes": 458243187 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:eb1c97ad840e4b8c84e9e15b7d148960af6a436f497834eda439fe2f9530435c", + "rdilip83/kubehealth:v3" + ], + "sizeBytes": 458243111 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:c43697cca29a63b442ff1414cfa5e72ee1779c4314fac9431760e1973c649a97", + "rdilip83/kubehealth:v2" + ], + "sizeBytes": 458243081 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:899ee16fed942a999a887b7f46702803a1a354517ea04e7191031cbdbc67e3c5", + "rdilip83/kubehealth:v1" + ], + "sizeBytes": 458242872 + }, + { + "names": [ + "deis/hcp-tunnel-front@sha256:ab4e468fe95b18c65dee93543d8d2ca115121728371b2fc467947a8cc9165272", + "deis/hcp-tunnel-front:v1.9.2-v4.0.5" + ], + "sizeBytes": 380477207 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", + "k8s.gcr.io/cluster-autoscaler:v1.12.2" + ], + "sizeBytes": 232167833 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", + "k8s.gcr.io/cluster-autoscaler:v1.3.4" + ], + "sizeBytes": 217264129 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", + "k8s.gcr.io/cluster-autoscaler:v1.3.3" + ], + "sizeBytes": 217259793 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", + "k8s.gcr.io/cluster-autoscaler:v1.2.2" + ], + "sizeBytes": 208688449 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", + "k8s.gcr.io/cluster-autoscaler:v1.1.2" + ], + "sizeBytes": 198265853 + }, + { + "names": [ + "k8s.gcr.io/cloud-controller-manager-amd64@sha256:bc7dc1bd3891ef77a19b5ecf1df50f0ee75266cd797c9f3ff508b40a86c737a0", + "k8s.gcr.io/cloud-controller-manager-amd64:v1.11.8" + ], + "sizeBytes": 139540150 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", + "k8s.gcr.io/cluster-autoscaler:v1.13.1" + ], + "sizeBytes": 136618018 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", + "containernetworking/networkmonitor:v0.0.5" + ], + "sizeBytes": 122043325 + }, + { + "names": [ + "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" + ], + "sizeBytes": 121711221 + }, + { + "names": [ + "nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", + "nginx:latest" + ], + "sizeBytes": 109331233 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.8" + ], + "sizeBytes": 99631084 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", + "k8s.gcr.io/kube-addon-manager-amd64:v8.9" + ], + "sizeBytes": 99240637 + }, + { + "names": [ + "nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", + "nginx:1.7.9" + ], + "sizeBytes": 91664166 + }, + { + "names": [ + "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", + "microsoft/virtual-kubelet:latest" + ], + "sizeBytes": 83395521 + }, + { + "names": [ + "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", + "deis/kube-svc-redirect:v1.0.2" + ], + "sizeBytes": 82897218 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.6" + ], + "sizeBytes": 78384272 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", + "k8s.gcr.io/heapster-amd64:v1.5.1" + ], + "sizeBytes": 75318380 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", + "k8s.gcr.io/heapster-amd64:v1.5.4" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", + "k8s.gcr.io/heapster-amd64:v1.5.3" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", + "k8s.gcr.io/rescheduler:v0.3.1" + ], + "sizeBytes": 74659350 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", + "gcr.io/kubernetes-helm/tiller:v2.11.0" + ], + "sizeBytes": 71821984 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", + "gcr.io/kubernetes-helm/tiller:v2.8.1" + ], + "sizeBytes": 71509364 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", + "k8s.gcr.io/kube-addon-manager-amd64:v8.7" + ], + "sizeBytes": 63322109 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", + "nvidia/k8s-device-plugin:1.11" + ], + "sizeBytes": 63138633 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", + "nvidia/k8s-device-plugin:1.10" + ], + "sizeBytes": 63130377 + }, + { + "names": [ + "vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "vishiy/hello:err100eps" + ], + "sizeBytes": 54649865 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" + ], + "sizeBytes": 51157394 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", + "quay.io/coreos/flannel:v0.8.0" + ], + "sizeBytes": 50732259 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" + ], + "sizeBytes": 49387411 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" + ], + "sizeBytes": 49052023 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", + "k8s.gcr.io/rescheduler:v0.4.0" + ], + "sizeBytes": 48973149 + }, + { + "names": [ + "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0" + ], + "sizeBytes": 48645472 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", + "quay.io/coreos/flannel:v0.10.0" + ], + "sizeBytes": 44598861 + }, + { + "names": [ + "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", + "k8s.gcr.io/metrics-server-amd64:v0.2.1" + ], + "sizeBytes": 42541759 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" + ], + "sizeBytes": 42210862 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", + "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" + ], + "sizeBytes": 42033070 + } + ] + }, + "apiVersion": "v1", + "kind": "Node" + }, + { + "metadata": { + "name": "aks-nodepool1-19574989-1", + "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-1", + "uid": "8e1b5c77-44e5-11e9-9920-423525a6b683", + "resourceVersion": "9742045", + "creationTimestamp": "2019-03-12T16:40:33Z", + "labels": { + "agentpool": "nodepool1", + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/instance-type": "Standard_DS1_v2", + "beta.kubernetes.io/os": "linux", + "failure-domain.beta.kubernetes.io/region": "eastus", + "failure-domain.beta.kubernetes.io/zone": "1", + "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus", + "kubernetes.io/hostname": "aks-nodepool1-19574989-1", + "kubernetes.io/role": "agent", + "node-role.kubernetes.io/agent": "", + "storageprofile": "managed", + "storagetier": "Premium_LRS" + }, + "annotations": { + "node.alpha.kubernetes.io/ttl": "0", + "volumes.kubernetes.io/controller-managed-attach-detach": "true" + } + }, + "spec": { + "podCIDR": "10.244.0.0/24", + "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-1" + }, + "status": { + "capacity": { + "cpu": "1", + "ephemeral-storage": "30428648Ki", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "3524612Ki", + "pods": "110" + }, + "allocatable": { + "cpu": "940m", + "ephemeral-storage": "28043041951", + "hugepages-1Gi": "0", + "hugepages-2Mi": "0", + "memory": "2504708Ki", + "pods": "110" + }, + "conditions": [ + { + "type": "NetworkUnavailable", + "status": "False", + "lastHeartbeatTime": "2019-03-12T16:42:30Z", + "lastTransitionTime": "2019-03-12T16:42:30Z", + "reason": "RouteCreated", + "message": "RouteController created a route" + }, + { + "type": "OutOfDisk", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:06Z", + "lastTransitionTime": "2019-05-19T14:13:53Z", + "reason": "KubeletHasSufficientDisk", + "message": "kubelet has sufficient disk space available" + }, + { + "type": "MemoryPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:06Z", + "lastTransitionTime": "2019-05-19T14:13:53Z", + "reason": "KubeletHasSufficientMemory", + "message": "kubelet has sufficient memory available" + }, + { + "type": "DiskPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:06Z", + "lastTransitionTime": "2019-05-19T14:13:53Z", + "reason": "KubeletHasNoDiskPressure", + "message": "kubelet has no disk pressure" + }, + { + "type": "PIDPressure", + "status": "False", + "lastHeartbeatTime": "2019-06-08T00:47:06Z", + "lastTransitionTime": "2019-03-12T16:40:33Z", + "reason": "KubeletHasSufficientPID", + "message": "kubelet has sufficient PID available" + }, + { + "type": "Ready", + "status": "True", + "lastHeartbeatTime": "2019-06-08T00:47:06Z", + "lastTransitionTime": "2019-05-19T14:13:53Z", + "reason": "KubeletReady", + "message": "kubelet is posting ready status. AppArmor enabled" + } + ], + "addresses": [ + { + "type": "Hostname", + "address": "aks-nodepool1-19574989-1" + }, + { + "type": "InternalIP", + "address": "10.240.0.5" + } + ], + "daemonEndpoints": { + "kubeletEndpoint": { + "Port": 10250 + } + }, + "nodeInfo": { + "machineID": "1954026de5e6436788f214eb0dfd6a13", + "systemUUID": "17A6A78E-D3E2-2A4F-852B-C91D933C8D5B", + "bootID": "4c822e6d-c2e5-4697-9a01-467e04804fc1", + "kernelVersion": "4.15.0-1037-azure", + "osImage": "Ubuntu 16.04.5 LTS", + "containerRuntimeVersion": "docker://3.0.4", + "kubeletVersion": "v1.11.8", + "kubeProxyVersion": "v1.11.8", + "operatingSystem": "linux", + "architecture": "amd64" + }, + "images": [ + { + "names": [ + "perl@sha256:268e7af9853bcc6d2100e2ad76e928c2ca861518217c269b8a762849a8617c12", + "perl:latest" + ], + "sizeBytes": 890592834 + }, + { + "names": [ + "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", + "nickchase/rss-php-nginx:v1" + ], + "sizeBytes": 677038498 + }, + { + "names": [ + "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", + "rdilip83/jsonlogger:v12" + ], + "sizeBytes": 676594134 + }, + { + "names": [ + "rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1", + "rdilip83/logeverysecond:v2" + ], + "sizeBytes": 674931590 + }, + { + "names": [ + "deis/hcp-tunnel-front@sha256:a067679f0ab376197a344cd410821cf07d69fc322dcd9af4a9229250da725ce2", + "deis/hcp-tunnel-front:v1.9.2-v4.0.4" + ], + "sizeBytes": 640504769 + }, + { + "names": [ + "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", + "k8s.gcr.io/hyperkube-amd64:v1.11.8" + ], + "sizeBytes": 615263658 + }, + { + "names": [ + "microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", + "microsoft/oms:healthpreview04262019" + ], + "sizeBytes": 514907213 + }, + { + "names": [ + "rdilip83/ha@sha256:40208587069b52338eefc24627bab5d10aa6ddc4f3a2a15dee74ad442327765f", + "rdilip83/ha:0426" + ], + "sizeBytes": 514907213 + }, + { + "names": [ + "microsoft/oms@sha256:63efbe8fc36635e37aee8c2c631a9d251ab1c736f0c86007c2607987a4bdd8a5", + "microsoft/oms:ciprod04232019" + ], + "sizeBytes": 487282051 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:2050713f627ac24dfd7d3c2594accc49491320711027668d96d3227b8f7ad721", + "rdilip83/kubehealth:2.0" + ], + "sizeBytes": 458285595 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:be84b745efda62fd0d4960d385714737a9b87d02adbc4b841c4a0b5db0495d52", + "rdilip83/kubehealth:1.0" + ], + "sizeBytes": 458243187 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:eb1c97ad840e4b8c84e9e15b7d148960af6a436f497834eda439fe2f9530435c", + "rdilip83/kubehealth:v3" + ], + "sizeBytes": 458243111 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:c43697cca29a63b442ff1414cfa5e72ee1779c4314fac9431760e1973c649a97", + "rdilip83/kubehealth:v2" + ], + "sizeBytes": 458243081 + }, + { + "names": [ + "rdilip83/kubehealth@sha256:899ee16fed942a999a887b7f46702803a1a354517ea04e7191031cbdbc67e3c5", + "rdilip83/kubehealth:v1" + ], + "sizeBytes": 458242872 + }, + { + "names": [ + "deis/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41", + "deis/hcp-tunnel-front:v1.9.2-v4.0.7" + ], + "sizeBytes": 383483267 + }, + { + "names": [ + "deis/hcp-tunnel-front@sha256:ab4e468fe95b18c65dee93543d8d2ca115121728371b2fc467947a8cc9165272", + "deis/hcp-tunnel-front:v1.9.2-v4.0.5" + ], + "sizeBytes": 380477207 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", + "k8s.gcr.io/cluster-autoscaler:v1.12.2" + ], + "sizeBytes": 232167833 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", + "k8s.gcr.io/cluster-autoscaler:v1.3.4" + ], + "sizeBytes": 217264129 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", + "k8s.gcr.io/cluster-autoscaler:v1.3.3" + ], + "sizeBytes": 217259793 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", + "k8s.gcr.io/cluster-autoscaler:v1.2.2" + ], + "sizeBytes": 208688449 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", + "k8s.gcr.io/cluster-autoscaler:v1.1.2" + ], + "sizeBytes": 198265853 + }, + { + "names": [ + "k8s.gcr.io/cloud-controller-manager-amd64@sha256:bc7dc1bd3891ef77a19b5ecf1df50f0ee75266cd797c9f3ff508b40a86c737a0", + "k8s.gcr.io/cloud-controller-manager-amd64:v1.11.8" + ], + "sizeBytes": 139540150 + }, + { + "names": [ + "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", + "k8s.gcr.io/cluster-autoscaler:v1.13.1" + ], + "sizeBytes": 136618018 + }, + { + "names": [ + "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", + "containernetworking/networkmonitor:v0.0.5" + ], + "sizeBytes": 122043325 + }, + { + "names": [ + "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" + ], + "sizeBytes": 121711221 + }, + { + "names": [ + "nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", + "nginx:latest" + ], + "sizeBytes": 109331233 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.8" + ], + "sizeBytes": 99631084 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", + "k8s.gcr.io/kube-addon-manager-amd64:v8.9" + ], + "sizeBytes": 99240637 + }, + { + "names": [ + "nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", + "nginx:1.7.9" + ], + "sizeBytes": 91664166 + }, + { + "names": [ + "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", + "microsoft/virtual-kubelet:latest" + ], + "sizeBytes": 83395521 + }, + { + "names": [ + "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", + "deis/kube-svc-redirect:v1.0.2" + ], + "sizeBytes": 82897218 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", + "k8s.gcr.io/kube-addon-manager-amd64:v8.6" + ], + "sizeBytes": 78384272 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", + "k8s.gcr.io/heapster-amd64:v1.5.1" + ], + "sizeBytes": 75318380 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", + "k8s.gcr.io/heapster-amd64:v1.5.3" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", + "k8s.gcr.io/heapster-amd64:v1.5.4" + ], + "sizeBytes": 75318342 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", + "k8s.gcr.io/rescheduler:v0.3.1" + ], + "sizeBytes": 74659350 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", + "gcr.io/kubernetes-helm/tiller:v2.11.0" + ], + "sizeBytes": 71821984 + }, + { + "names": [ + "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", + "gcr.io/kubernetes-helm/tiller:v2.8.1" + ], + "sizeBytes": 71509364 + }, + { + "names": [ + "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", + "k8s.gcr.io/kube-addon-manager-amd64:v8.7" + ], + "sizeBytes": 63322109 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", + "nvidia/k8s-device-plugin:1.11" + ], + "sizeBytes": 63138633 + }, + { + "names": [ + "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", + "nvidia/k8s-device-plugin:1.10" + ], + "sizeBytes": 63130377 + }, + { + "names": [ + "vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "vishiy/hello:err100eps" + ], + "sizeBytes": 54649865 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" + ], + "sizeBytes": 51157394 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", + "quay.io/coreos/flannel:v0.8.0" + ], + "sizeBytes": 50732259 + }, + { + "names": [ + "k8s.gcr.io/cluster-proportional-autoscaler-amd64@sha256:003f98d9f411ddfa6ff6d539196355e03ddd69fa4ed38c7ffb8fec6f729afe2d", + "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2" + ], + "sizeBytes": 49648481 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" + ], + "sizeBytes": 49387411 + }, + { + "names": [ + "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", + "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" + ], + "sizeBytes": 49052023 + }, + { + "names": [ + "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", + "k8s.gcr.io/rescheduler:v0.4.0" + ], + "sizeBytes": 48973149 + }, + { + "names": [ + "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", + "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0" + ], + "sizeBytes": 48645472 + }, + { + "names": [ + "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", + "quay.io/coreos/flannel:v0.10.0" + ], + "sizeBytes": 44598861 + } + ] + }, + "apiVersion": "v1", + "kind": "Node" + } + ] +} \ No newline at end of file diff --git a/inventory/pods.json b/inventory/pods.json new file mode 100644 index 000000000..ab7ca36db --- /dev/null +++ b/inventory/pods.json @@ -0,0 +1,6971 @@ +{ + "items": [ + { + "metadata": { + "name": "diliprdeploymentnodeapps-c4fdfb446-mzcsr", + "generateName": "diliprdeploymentnodeapps-c4fdfb446-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/diliprdeploymentnodeapps-c4fdfb446-mzcsr", + "uid": "ee31a9ce-526e-11e9-a899-6a5520730c61", + "resourceVersion": "4597573", + "creationTimestamp": "2019-03-29T22:06:40Z", + "labels": { + "app": "diliprsnodeapppod", + "diliprPodLabel1": "p1", + "diliprPodLabel2": "p2", + "pod-template-hash": "709896002" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "diliprdeploymentnodeapps-c4fdfb446", + "uid": "ee1e78e0-526e-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "diliprcontainerhelloapp", + "image": "rdilip83/logeverysecond:v2", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T22:06:40Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T22:06:47Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T22:06:40Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.98", + "startTime": "2019-03-29T22:06:40Z", + "containerStatuses": [ + { + "name": "diliprcontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T22:06:47Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "rdilip83/logeverysecond:v2", + "imageID": "docker-pullable://rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1", + "containerID": "docker://80562bb7c90ccdde41c7ba85a2a8e063a0f6f19a79d54895d82f3d7f1023ad8c" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "nginx-deployment-67594d6bf6-m2vwn", + "generateName": "nginx-deployment-67594d6bf6-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/nginx-deployment-67594d6bf6-m2vwn", + "uid": "6d9749a6-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952432", + "creationTimestamp": "2019-05-22T22:32:05Z", + "labels": { + "app": "nginx", + "pod-template-hash": "2315082692" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "nginx-deployment-67594d6bf6", + "uid": "6d8e3428-7ce1-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "nginx", + "image": "nginx:1.7.9", + "ports": [ + { + "containerPort": 80, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:06Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:14Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:06Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.128", + "startTime": "2019-05-22T22:32:06Z", + "containerStatuses": [ + { + "name": "nginx", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:12Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:1.7.9", + "imageID": "docker-pullable://nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", + "containerID": "docker://b9acafd3f81d61df5c7c51ac8ae081b115759e5aa4f744b23ba4d0641732ae1f" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "nginx-deployment-67594d6bf6-tl5v6", + "generateName": "nginx-deployment-67594d6bf6-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/nginx-deployment-67594d6bf6-tl5v6", + "uid": "6d940ac3-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952430", + "creationTimestamp": "2019-05-22T22:32:05Z", + "labels": { + "app": "nginx", + "pod-template-hash": "2315082692" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "nginx-deployment-67594d6bf6", + "uid": "6d8e3428-7ce1-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "nginx", + "image": "nginx:1.7.9", + "ports": [ + { + "containerPort": 80, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:06Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:14Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:05Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.127", + "startTime": "2019-05-22T22:32:06Z", + "containerStatuses": [ + { + "name": "nginx", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:12Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:1.7.9", + "imageID": "docker-pullable://nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", + "containerID": "docker://1a6ca08a1005680e0fc2789a5ae3dc0f8790eda724c8033f5271f8d50415cc04" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "nginx-deployment-67594d6bf6-wc6bj", + "generateName": "nginx-deployment-67594d6bf6-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/nginx-deployment-67594d6bf6-wc6bj", + "uid": "6d96e65b-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952421", + "creationTimestamp": "2019-05-22T22:32:05Z", + "labels": { + "app": "nginx", + "pod-template-hash": "2315082692" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "nginx-deployment-67594d6bf6", + "uid": "6d8e3428-7ce1-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "nginx", + "image": "nginx:1.7.9", + "ports": [ + { + "containerPort": 80, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:06Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:12Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:05Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.102", + "startTime": "2019-05-22T22:32:06Z", + "containerStatuses": [ + { + "name": "nginx", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:11Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:1.7.9", + "imageID": "docker-pullable://nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", + "containerID": "docker://658d95bf8e28d6eda298b621e404f95925c7b0d92034ab149ff439aaeb839601" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "pi-m8ccw", + "generateName": "pi-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/pi-m8ccw", + "uid": "9fb16aaa-7ccc-11e9-8d23-32c49ee6f300", + "resourceVersion": "7940877", + "creationTimestamp": "2019-05-22T20:03:10Z", + "labels": { + "controller-uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300", + "job-name": "pi" + }, + "ownerReferences": [ + { + "apiVersion": "batch/v1", + "kind": "Job", + "name": "pi", + "uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "pi", + "image": "perl", + "command": [ + "perl", + "-Mbignum=bpi", + "-wle", + "print bpi(2000)" + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "Always" + } + ], + "restartPolicy": "Never", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Succeeded", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T20:03:10Z", + "reason": "PodCompleted" + }, + { + "type": "Ready", + "status": "False", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T20:04:43Z", + "reason": "PodCompleted" + }, + { + "type": "ContainersReady", + "status": "False", + "lastProbeTime": null, + "lastTransitionTime": null, + "reason": "PodCompleted" + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T20:03:10Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.100", + "startTime": "2019-05-22T20:03:10Z", + "containerStatuses": [ + { + "name": "pi", + "state": { + "terminated": { + "exitCode": 0, + "reason": "Completed", + "startedAt": "2019-05-22T20:04:36Z", + "finishedAt": "2019-05-22T20:04:41Z", + "containerID": "docker://1772f34611835a233cd4b41c69ed0b9b23ed14003085e82e39e53d15ba8dd9d5" + } + }, + "lastState": {}, + "ready": false, + "restartCount": 0, + "image": "perl:latest", + "imageID": "docker-pullable://perl@sha256:268e7af9853bcc6d2100e2ad76e928c2ca861518217c269b8a762849a8617c12", + "containerID": "docker://1772f34611835a233cd4b41c69ed0b9b23ed14003085e82e39e53d15ba8dd9d5" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "rss-site", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/rss-site", + "uid": "68a34ea4-7ce4-11e9-8d23-32c49ee6f300", + "resourceVersion": "7954135", + "creationTimestamp": "2019-05-22T22:53:26Z", + "labels": { + "app": "web" + }, + "annotations": { + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Pod\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"web\"},\"name\":\"rss-site\",\"namespace\":\"default\"},\"spec\":{\"containers\":[{\"image\":\"nginx\",\"name\":\"front-end\",\"ports\":[{\"containerPort\":80}]},{\"image\":\"nickchase/rss-php-nginx:v1\",\"name\":\"rss-reader\",\"ports\":[{\"containerPort\":88}]}]}}\n" + } + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "front-end", + "image": "nginx", + "ports": [ + { + "containerPort": 80, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "Always" + }, + { + "name": "rss-reader", + "image": "nickchase/rss-php-nginx:v1", + "ports": [ + { + "containerPort": 88, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:53:26Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:53:33Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:53:26Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.104", + "startTime": "2019-05-22T22:53:26Z", + "containerStatuses": [ + { + "name": "front-end", + "state": { + "running": { + "startedAt": "2019-05-22T22:53:31Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:latest", + "imageID": "docker-pullable://nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", + "containerID": "docker://b27414a5d79e4d94bb84b03944e0eea1b6af4e2f4e31033c541159ec70e08d49" + }, + { + "name": "rss-reader", + "state": { + "running": { + "startedAt": "2019-05-22T22:53:33Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nickchase/rss-php-nginx:v1", + "imageID": "docker-pullable://nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", + "containerID": "docker://be831ee23d69827386f29625751bae70d56b35b3a8b4ed02f242cbfd08ddd5d7" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-bpcpf", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-bpcpf", + "uid": "cf23dfc8-5261-11e9-a899-6a5520730c61", + "resourceVersion": "4597543", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:57Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.92", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:55Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://3d87ae8f8543c8bc19ebd2e98cb76cc283e8837e4fab2b2557113c35169ffaa3" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-c6j7k", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-c6j7k", + "uid": "cf23bf6d-5261-11e9-a899-6a5520730c61", + "resourceVersion": "7580635", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:57Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.62", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:54Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://24cbb8235f78a86b434cecd783403ca033c827be8a9c7e6c3948f0c2cb7781ca" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-ft4lb", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-ft4lb", + "uid": "cf2b40bb-5261-11e9-a899-6a5520730c61", + "resourceVersion": "4597538", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:55Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.91", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:54Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://d76d3179c2cee3149fb8d642adf232cdb6a7c61fb9e501115992d8e268e9b049" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-gl26j", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-gl26j", + "uid": "cf2bc896-5261-11e9-a899-6a5520730c61", + "resourceVersion": "7580640", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:58Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.66", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:57Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://0e2458ebe42fbc427ccd83827aa96299e76b11c7b0aa1ff7a4622041485fb945" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-j88g7", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-j88g7", + "uid": "cf31de9c-5261-11e9-a899-6a5520730c61", + "resourceVersion": "7580649", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:57Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.63", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:55Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://62afa45f4c898e8a5b67e83fb838dbfda2194f3c9b421046071459592b56ddf6" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-kmk9k", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-kmk9k", + "uid": "cf317bb0-5261-11e9-a899-6a5520730c61", + "resourceVersion": "4597548", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:58Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.93", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:57Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://4b63f7603a1ab5db7dd23af2e5d76b5ad715b53abb957a9f97a5174ae3ce09e7" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-ntfdd", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-ntfdd", + "uid": "cf2b6c48-5261-11e9-a899-6a5520730c61", + "resourceVersion": "4597569", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:58Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.94", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:57Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://45a30da82055fc9f8e4000124b0a3ca1445809af4183ae4258b7e8edc829a46c" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-rdnb2", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-rdnb2", + "uid": "cf1bf952-5261-11e9-a899-6a5520730c61", + "resourceVersion": "4597535", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:55Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.90", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:54Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://4d79b7011d2e1916e7e565a99f9f6f72dcdc83597bdbd44ce218142465cd466e" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-tjvff", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-tjvff", + "uid": "cf317302-5261-11e9-a899-6a5520730c61", + "resourceVersion": "7580633", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:58Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.64", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:56Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://7172a29b88e7631ef6ae3cc3361084fe7ef5ddf41d7a3a2c340f48b614b18aa4" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "vishwadeploymentnodeapps-8686cf54db-xjc55", + "generateName": "vishwadeploymentnodeapps-8686cf54db-", + "namespace": "default", + "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-xjc55", + "uid": "cf2b7ddd-5261-11e9-a899-6a5520730c61", + "resourceVersion": "7580655", + "creationTimestamp": "2019-03-29T20:32:45Z", + "labels": { + "VishwaPodLabel1": "p1", + "VishwaPodLabel2": "p2", + "app": "vishwasnodeapppod", + "pod-template-hash": "4242791086" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "vishwadeploymentnodeapps-8686cf54db", + "uid": "cf14b523-5261-11e9-a899-6a5520730c61", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-kbbjr", + "secret": { + "secretName": "default-token-kbbjr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "vishwacontainerhelloapp", + "image": "vishiy/hello:err100eps", + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-kbbjr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:58Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-03-29T20:32:45Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.65", + "startTime": "2019-03-29T20:32:45Z", + "containerStatuses": [ + { + "name": "vishwacontainerhelloapp", + "state": { + "running": { + "startedAt": "2019-03-29T20:32:57Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "vishiy/hello:err100eps", + "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", + "containerID": "docker://591d4eb3cbcc39d98b2d7d69f040b373b1fd5238ef618cadf82bd66e053bf847" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "heapster-85796b5fdd-47bwj", + "generateName": "heapster-85796b5fdd-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/heapster-85796b5fdd-47bwj", + "uid": "b13b1438-8787-11e9-9d68-7abe3d755a8f", + "resourceVersion": "9459173", + "creationTimestamp": "2019-06-05T11:47:27Z", + "labels": { + "k8s-app": "heapster", + "pod-template-hash": "4135261988" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "heapster-85796b5fdd", + "uid": "b0e6a4f1-8787-11e9-9d68-7abe3d755a8f", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "heapster-config-volume", + "configMap": { + "name": "heapster-config", + "defaultMode": 420 + } + }, + { + "name": "heapster-token-7z7c5", + "secret": { + "secretName": "heapster-token-7z7c5", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "heapster", + "image": "k8s.gcr.io/heapster-amd64:v1.5.3", + "command": [ + "/heapster", + "--source=kubernetes.summary_api:\"\"" + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "volumeMounts": [ + { + "name": "heapster-token-7z7c5", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthz", + "port": 8082, + "scheme": "HTTP" + }, + "initialDelaySeconds": 180, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "heapster-nanny", + "image": "k8s.gcr.io/addon-resizer:1.8.1", + "command": [ + "/pod_nanny", + "--config-dir=/etc/config", + "--cpu=80m", + "--extra-cpu=0.5m", + "--memory=140Mi", + "--extra-memory=4Mi", + "--threshold=5", + "--deployment=heapster", + "--container=heapster", + "--poll-period=300000", + "--estimator=exponential" + ], + "env": [ + { + "name": "MY_POD_NAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.name" + } + } + }, + { + "name": "MY_POD_NAMESPACE", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "90Mi" + }, + "requests": { + "cpu": "50m", + "memory": "90Mi" + } + }, + "volumeMounts": [ + { + "name": "heapster-config-volume", + "mountPath": "/etc/config" + }, + { + "name": "heapster-token-7z7c5", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "heapster", + "serviceAccount": "heapster", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-05T11:47:28Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-05T11:47:38Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-05T11:47:28Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.111", + "startTime": "2019-06-05T11:47:28Z", + "containerStatuses": [ + { + "name": "heapster", + "state": { + "running": { + "startedAt": "2019-06-05T11:47:36Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/heapster-amd64:v1.5.3", + "imageID": "docker-pullable://k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", + "containerID": "docker://4e1172f79b32d748b594fcf1d28804ccee06137f717ad23f6d4231cba93b9ea7" + }, + { + "name": "heapster-nanny", + "state": { + "running": { + "startedAt": "2019-06-05T11:47:37Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/addon-resizer:1.8.1", + "imageID": "docker-pullable://k8s.gcr.io/addon-resizer@sha256:507aa9845ecce1fdde4d61f530c802f4dc2974c700ce0db7730866e442db958d", + "containerID": "docker://5f638d06b66eda2c9d371a2af7ea6483ecd618ce3db39667e95c3100cbda3880" + } + ], + "qosClass": "Guaranteed" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-dns-autoscaler-544c7b6b44-p5k4f", + "generateName": "kube-dns-autoscaler-544c7b6b44-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-autoscaler-544c7b6b44-p5k4f", + "uid": "b248f888-7c31-11e9-a084-2a3503e86e0a", + "resourceVersion": "7854795", + "creationTimestamp": "2019-05-22T01:34:09Z", + "labels": { + "k8s-app": "kube-dns-autoscaler", + "pod-template-hash": "1007362600" + }, + "annotations": { + "scheduler.alpha.kubernetes.io/critical-pod": "", + "seccomp.security.alpha.kubernetes.io/pod": "docker/default" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "kube-dns-autoscaler-544c7b6b44", + "uid": "b2443a3a-7c31-11e9-a084-2a3503e86e0a", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kube-dns-autoscaler-token-zkxt8", + "secret": { + "secretName": "kube-dns-autoscaler-token-zkxt8", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "autoscaler", + "image": "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2", + "command": [ + "/cluster-proportional-autoscaler", + "--namespace=kube-system", + "--configmap=kube-dns-autoscaler", + "--target=deployment/kube-dns-v20", + "--default-params={\"ladder\":{\"coresToReplicas\":[[1,2],[512,3],[1024,4],[2048,5]],\"nodesToReplicas\":[[1,2],[8,3],[16,4],[32,5]]}}", + "--logtostderr=true", + "--v=2" + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "20m", + "memory": "10Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-autoscaler-token-zkxt8", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "serviceAccountName": "kube-dns-autoscaler", + "serviceAccount": "kube-dns-autoscaler", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:09Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:24Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:09Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.92", + "startTime": "2019-05-22T01:34:09Z", + "containerStatuses": [ + { + "name": "autoscaler", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:22Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2", + "imageID": "docker-pullable://k8s.gcr.io/cluster-proportional-autoscaler-amd64@sha256:003f98d9f411ddfa6ff6d539196355e03ddd69fa4ed38c7ffb8fec6f729afe2d", + "containerID": "docker://099452c4e8eb1934199cc973acf0d2cc3045048a46f37ddefbf3a2efc19636fe" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-dns-v20-f96699fb4-b2wbl", + "generateName": "kube-dns-v20-f96699fb4-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-v20-f96699fb4-b2wbl", + "uid": "b2878580-7c31-11e9-a084-2a3503e86e0a", + "resourceVersion": "7854952", + "creationTimestamp": "2019-05-22T01:34:10Z", + "labels": { + "k8s-app": "kube-dns", + "kubernetes.io/cluster-service": "true", + "pod-template-hash": "952255960", + "version": "v20" + }, + "annotations": { + "prometheus.io/port": "10055", + "prometheus.io/scrape": "true" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "kube-dns-v20-f96699fb4", + "uid": "b2538f5f-7c31-11e9-a084-2a3503e86e0a", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kube-dns-config", + "configMap": { + "name": "kube-dns", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "kubedns-kubecfg", + "configMap": { + "name": "kubedns-kubecfg", + "defaultMode": 420 + } + }, + { + "name": "kube-dns-token-ghgtl", + "secret": { + "secretName": "kube-dns-token-ghgtl", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kubedns", + "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", + "args": [ + "--kubecfg-file=/config/kubeconfig", + "--config-dir=/kube-dns-config", + "--domain=cluster.local.", + "--dns-port=10053", + "--v=2" + ], + "ports": [ + { + "name": "dns-local", + "containerPort": 10053, + "protocol": "UDP" + }, + { + "name": "dns-tcp-local", + "containerPort": 10053, + "protocol": "TCP" + }, + { + "name": "metrics", + "containerPort": 10055, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "PROMETHEUS_PORT", + "value": "10055" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "memory": "170Mi" + }, + "requests": { + "cpu": "100m", + "memory": "70Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-config", + "mountPath": "/kube-dns-config" + }, + { + "name": "kubedns-kubecfg", + "readOnly": true, + "mountPath": "/config" + }, + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthcheck/kubedns", + "port": 10054, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "readinessProbe": { + "httpGet": { + "path": "/readiness", + "port": 8081, + "scheme": "HTTP" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "dnsmasq", + "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", + "args": [ + "-v=2", + "-logtostderr", + "-configDir=/kube-dns-config", + "-restartDnsmasq=true", + "--", + "-k", + "--cache-size=1000", + "--no-negcache", + "--no-resolv", + "--server=127.0.0.1#10053", + "--server=/cluster.local/127.0.0.1#10053", + "--server=/in-addr.arpa/127.0.0.1#10053", + "--server=/ip6.arpa/127.0.0.1#10053", + "--log-facility=-" + ], + "ports": [ + { + "name": "dns", + "containerPort": 53, + "protocol": "UDP" + }, + { + "name": "dns-tcp", + "containerPort": 53, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "kube-dns-config", + "mountPath": "/kube-dns-config" + }, + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "healthz", + "image": "k8s.gcr.io/exechealthz-amd64:1.2", + "args": [ + "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 >/dev/null || exit 1; done", + "--url=/healthz-dnsmasq", + "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 >/dev/null || exit 1; done", + "--url=/healthz-kubedns", + "--port=8080", + "--quiet" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "PROBE_DOMAINS", + "value": "bing.com kubernetes.default.svc.cluster.local" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "memory": "50Mi" + }, + "requests": { + "cpu": "10m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthz-dnsmasq", + "port": 8080, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "sidecar", + "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", + "args": [ + "--v=2", + "--logtostderr", + "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV", + "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV" + ], + "ports": [ + { + "name": "metrics", + "containerPort": 10054, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "10m", + "memory": "20Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/metrics", + "port": 10054, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-dns", + "serviceAccount": "kube-dns", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + }, + "podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [ + { + "weight": 100, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "kubernetes.io/hostname" + } + } + ] + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:11Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:35:00Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:10Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.93", + "startTime": "2019-05-22T01:34:11Z", + "containerStatuses": [ + { + "name": "dnsmasq", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:28Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", + "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", + "containerID": "docker://2851cb02128e1042279e414d6d2f6d5e5cbc0bfbb3656188ce1f8210bfbabb3f" + }, + { + "name": "healthz", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:31Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/exechealthz-amd64:1.2", + "imageID": "docker-pullable://k8s.gcr.io/exechealthz-amd64@sha256:503e158c3f65ed7399f54010571c7c977ade7fe59010695f48d9650d83488c0a", + "containerID": "docker://a69227f3a15bf4e154206668ce2ed313630871981c89710916a3c8341bff1f02" + }, + { + "name": "kubedns", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:24Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", + "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "containerID": "docker://65e83e304744f37ba779cc7313ae981e912d3fb5ae536f0f78e6d287db6c141a" + }, + { + "name": "sidecar", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:33Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", + "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", + "containerID": "docker://9a825dc6951f5db61af58fc028bb8ab64b06d0d4ec64f81b3dd0d586a913e34a" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-dns-v20-f96699fb4-vltph", + "generateName": "kube-dns-v20-f96699fb4-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-v20-f96699fb4-vltph", + "uid": "b25c1fd9-7c31-11e9-a084-2a3503e86e0a", + "resourceVersion": "7854924", + "creationTimestamp": "2019-05-22T01:34:09Z", + "labels": { + "k8s-app": "kube-dns", + "kubernetes.io/cluster-service": "true", + "pod-template-hash": "952255960", + "version": "v20" + }, + "annotations": { + "prometheus.io/port": "10055", + "prometheus.io/scrape": "true" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "kube-dns-v20-f96699fb4", + "uid": "b2538f5f-7c31-11e9-a084-2a3503e86e0a", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kube-dns-config", + "configMap": { + "name": "kube-dns", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "kubedns-kubecfg", + "configMap": { + "name": "kubedns-kubecfg", + "defaultMode": 420 + } + }, + { + "name": "kube-dns-token-ghgtl", + "secret": { + "secretName": "kube-dns-token-ghgtl", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kubedns", + "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", + "args": [ + "--kubecfg-file=/config/kubeconfig", + "--config-dir=/kube-dns-config", + "--domain=cluster.local.", + "--dns-port=10053", + "--v=2" + ], + "ports": [ + { + "name": "dns-local", + "containerPort": 10053, + "protocol": "UDP" + }, + { + "name": "dns-tcp-local", + "containerPort": 10053, + "protocol": "TCP" + }, + { + "name": "metrics", + "containerPort": 10055, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "PROMETHEUS_PORT", + "value": "10055" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "memory": "170Mi" + }, + "requests": { + "cpu": "100m", + "memory": "70Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-config", + "mountPath": "/kube-dns-config" + }, + { + "name": "kubedns-kubecfg", + "readOnly": true, + "mountPath": "/config" + }, + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthcheck/kubedns", + "port": 10054, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "readinessProbe": { + "httpGet": { + "path": "/readiness", + "port": 8081, + "scheme": "HTTP" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "dnsmasq", + "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", + "args": [ + "-v=2", + "-logtostderr", + "-configDir=/kube-dns-config", + "-restartDnsmasq=true", + "--", + "-k", + "--cache-size=1000", + "--no-negcache", + "--no-resolv", + "--server=127.0.0.1#10053", + "--server=/cluster.local/127.0.0.1#10053", + "--server=/in-addr.arpa/127.0.0.1#10053", + "--server=/ip6.arpa/127.0.0.1#10053", + "--log-facility=-" + ], + "ports": [ + { + "name": "dns", + "containerPort": 53, + "protocol": "UDP" + }, + { + "name": "dns-tcp", + "containerPort": 53, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "kube-dns-config", + "mountPath": "/kube-dns-config" + }, + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "healthz", + "image": "k8s.gcr.io/exechealthz-amd64:1.2", + "args": [ + "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 >/dev/null || exit 1; done", + "--url=/healthz-dnsmasq", + "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 >/dev/null || exit 1; done", + "--url=/healthz-kubedns", + "--port=8080", + "--quiet" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "PROBE_DOMAINS", + "value": "bing.com kubernetes.default.svc.cluster.local" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "memory": "50Mi" + }, + "requests": { + "cpu": "10m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/healthz-dnsmasq", + "port": 8080, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 5 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "sidecar", + "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", + "args": [ + "--v=2", + "--logtostderr", + "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV", + "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV" + ], + "ports": [ + { + "name": "metrics", + "containerPort": 10054, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "10m", + "memory": "20Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-dns-token-ghgtl", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/metrics", + "port": 10054, + "scheme": "HTTP" + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 5, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-dns", + "serviceAccount": "kube-dns", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + }, + "podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [ + { + "weight": 100, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [ + { + "key": "k8s-app", + "operator": "In", + "values": [ + "kube-dns" + ] + } + ] + }, + "topologyKey": "kubernetes.io/hostname" + } + } + ] + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:11Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:56Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:09Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.118", + "startTime": "2019-05-22T01:34:11Z", + "containerStatuses": [ + { + "name": "dnsmasq", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:25Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", + "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", + "containerID": "docker://6212f59ccc732480813f4a7e2fdf8eb84e7cebfab3c178135af8dbd1d0a26aba" + }, + { + "name": "healthz", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:29Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/exechealthz-amd64:1.2", + "imageID": "docker-pullable://k8s.gcr.io/exechealthz-amd64@sha256:503e158c3f65ed7399f54010571c7c977ade7fe59010695f48d9650d83488c0a", + "containerID": "docker://d2ad8cbaa535e6587de5ffbe712194a6d8c9572166f7f6bd8be6b241cc064c83" + }, + { + "name": "kubedns", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:20Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", + "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", + "containerID": "docker://9e049dae9ef2667ff5e6af168f9cf6fd1e820dd7c7cc2af02c71e0f4d8382291" + }, + { + "name": "sidecar", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:31Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", + "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", + "containerID": "docker://b71282c7c0793242c5b2bf0a8c35f0c4e82c7c6224651cbaa8bdd3f94554b49e" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-proxy-4hjws", + "generateName": "kube-proxy-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-4hjws", + "uid": "8cf7c410-88f4-11e9-b1b0-5eb4a3e9de7d", + "resourceVersion": "9661065", + "creationTimestamp": "2019-06-07T07:19:12Z", + "labels": { + "component": "kube-proxy", + "controller-revision-hash": "1271944371", + "pod-template-generation": "16", + "tier": "node" + }, + "annotations": { + "aks.microsoft.com/release-time": "seconds:1559735217 nanos:797729016 ", + "remediator.aks.microsoft.com/kube-proxy-restart": "7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-proxy", + "uid": "45640bf6-44e5-11e9-9920-423525a6b683", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kubeconfig", + "hostPath": { + "path": "/var/lib/kubelet", + "type": "" + } + }, + { + "name": "certificates", + "hostPath": { + "path": "/etc/kubernetes/certs", + "type": "" + } + }, + { + "name": "kube-proxy-token-f5vbg", + "secret": { + "secretName": "kube-proxy-token-f5vbg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kube-proxy", + "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", + "command": [ + "/hyperkube", + "proxy", + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--cluster-cidr=10.244.0.0/16", + "--feature-gates=ExperimentalCriticalPodAnnotation=true" + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "100m" + } + }, + "volumeMounts": [ + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/var/lib/kubelet" + }, + { + "name": "certificates", + "readOnly": true, + "mountPath": "/etc/kubernetes/certs" + }, + { + "name": "kube-proxy-token-f5vbg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-proxy", + "serviceAccount": "kube-proxy", + "nodeName": "aks-nodepool1-19574989-1", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-07T07:19:13Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-07T07:19:18Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-07T07:19:13Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.240.0.5", + "startTime": "2019-06-07T07:19:13Z", + "containerStatuses": [ + { + "name": "kube-proxy", + "state": { + "running": { + "startedAt": "2019-06-07T07:19:17Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", + "containerID": "docker://c1c114cb18056d4f9820d127e53f1c4d3f976d52c8e6522fee07f4727db96c66" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-proxy-j847t", + "generateName": "kube-proxy-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-j847t", + "uid": "a490c6e6-88f4-11e9-b1b0-5eb4a3e9de7d", + "resourceVersion": "9661132", + "creationTimestamp": "2019-06-07T07:19:52Z", + "labels": { + "component": "kube-proxy", + "controller-revision-hash": "1271944371", + "pod-template-generation": "16", + "tier": "node" + }, + "annotations": { + "aks.microsoft.com/release-time": "seconds:1559735217 nanos:797729016 ", + "remediator.aks.microsoft.com/kube-proxy-restart": "7" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-proxy", + "uid": "45640bf6-44e5-11e9-9920-423525a6b683", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kubeconfig", + "hostPath": { + "path": "/var/lib/kubelet", + "type": "" + } + }, + { + "name": "certificates", + "hostPath": { + "path": "/etc/kubernetes/certs", + "type": "" + } + }, + { + "name": "kube-proxy-token-f5vbg", + "secret": { + "secretName": "kube-proxy-token-f5vbg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "kube-proxy", + "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", + "command": [ + "/hyperkube", + "proxy", + "--kubeconfig=/var/lib/kubelet/kubeconfig", + "--cluster-cidr=10.244.0.0/16", + "--feature-gates=ExperimentalCriticalPodAnnotation=true" + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "100m" + } + }, + "volumeMounts": [ + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/var/lib/kubelet" + }, + { + "name": "certificates", + "readOnly": true, + "mountPath": "/etc/kubernetes/certs" + }, + { + "name": "kube-proxy-token-f5vbg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-proxy", + "serviceAccount": "kube-proxy", + "nodeName": "aks-nodepool1-19574989-0", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-07T07:19:52Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-07T07:19:57Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-07T07:19:52Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.240.0.4", + "startTime": "2019-06-07T07:19:52Z", + "containerStatuses": [ + { + "name": "kube-proxy", + "state": { + "running": { + "startedAt": "2019-06-07T07:19:57Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", + "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", + "containerID": "docker://79872f04462bb77d8c9f121e39910af80fdbb73c874bfafdf29f16f8df1f5011" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-svc-redirect-bfkqr", + "generateName": "kube-svc-redirect-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-bfkqr", + "uid": "cbfbc2f5-7c31-11e9-a710-f6837ce82b50", + "resourceVersion": "7854957", + "creationTimestamp": "2019-05-22T01:34:52Z", + "labels": { + "component": "kube-svc-redirect", + "controller-revision-hash": "3858141844", + "pod-template-generation": "3", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-svc-redirect", + "uid": "45a5fc62-44e5-11e9-9920-423525a6b683", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "azureproxy-nginx", + "configMap": { + "name": "azureproxy-nginx", + "defaultMode": 420 + } + }, + { + "name": "azureproxy-configs", + "configMap": { + "name": "azureproxy-config", + "defaultMode": 420 + } + }, + { + "name": "azureproxy-certs", + "secret": { + "secretName": "azureproxy-certs", + "defaultMode": 420 + } + }, + { + "name": "kube-svc-redirector-token-ngjg2", + "secret": { + "secretName": "kube-svc-redirector-token-ngjg2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "redirector", + "image": "docker.io/deis/kube-svc-redirect:v1.0.2", + "env": [ + { + "name": "KUBERNETES_SVC_IP", + "value": "10.0.0.1" + }, + { + "name": "KUBE_SVC_REDIRECTOR_PROXY_IP", + "value": "127.0.0.1:14612" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "5m", + "memory": "2Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-svc-redirector-token-ngjg2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "capabilities": { + "add": [ + "NET_ADMIN" + ] + } + } + }, + { + "name": "azureproxy", + "image": "nginx:1.13.12-alpine", + "ports": [ + { + "hostPort": 14612, + "containerPort": 14612, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "5m", + "memory": "32Mi" + } + }, + "volumeMounts": [ + { + "name": "azureproxy-nginx", + "readOnly": true, + "mountPath": "/etc/nginx/nginx.conf", + "subPath": "nginx.conf" + }, + { + "name": "azureproxy-configs", + "readOnly": true, + "mountPath": "/etc/nginx/conf.d" + }, + { + "name": "azureproxy-certs", + "readOnly": true, + "mountPath": "/etc/nginx/certs" + }, + { + "name": "kube-svc-redirector-token-ngjg2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-svc-redirector", + "serviceAccount": "kube-svc-redirector", + "nodeName": "aks-nodepool1-19574989-1", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:53Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:35:01Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:53Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.240.0.5", + "startTime": "2019-05-22T01:34:53Z", + "containerStatuses": [ + { + "name": "azureproxy", + "state": { + "running": { + "startedAt": "2019-05-22T01:35:00Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:1.13.12-alpine", + "imageID": "docker-pullable://nginx@sha256:9d46fd628d54ebe1633ee3cf0fe2acfcc419cfae541c63056530e39cd5620366", + "containerID": "docker://1e6a9c4bd079d1a8aecfb3f597550b80d1017be90ca14dd4438420320ff7858d" + }, + { + "name": "redirector", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:59Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "deis/kube-svc-redirect:v1.0.2", + "imageID": "docker-pullable://deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", + "containerID": "docker://5fcd87041f2734df58b330a3516d64c4e3a25dc72542b20590ade3fecb9232f7" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kube-svc-redirect-jbzcf", + "generateName": "kube-svc-redirect-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-jbzcf", + "uid": "ebc3015a-7c31-11e9-a710-f6837ce82b50", + "resourceVersion": "7855073", + "creationTimestamp": "2019-05-22T01:35:46Z", + "labels": { + "component": "kube-svc-redirect", + "controller-revision-hash": "3858141844", + "pod-template-generation": "3", + "tier": "node" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "kube-svc-redirect", + "uid": "45a5fc62-44e5-11e9-9920-423525a6b683", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "azureproxy-nginx", + "configMap": { + "name": "azureproxy-nginx", + "defaultMode": 420 + } + }, + { + "name": "azureproxy-configs", + "configMap": { + "name": "azureproxy-config", + "defaultMode": 420 + } + }, + { + "name": "azureproxy-certs", + "secret": { + "secretName": "azureproxy-certs", + "defaultMode": 420 + } + }, + { + "name": "kube-svc-redirector-token-ngjg2", + "secret": { + "secretName": "kube-svc-redirector-token-ngjg2", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "redirector", + "image": "docker.io/deis/kube-svc-redirect:v1.0.2", + "env": [ + { + "name": "KUBERNETES_SVC_IP", + "value": "10.0.0.1" + }, + { + "name": "KUBE_SVC_REDIRECTOR_PROXY_IP", + "value": "127.0.0.1:14612" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "5m", + "memory": "2Mi" + } + }, + "volumeMounts": [ + { + "name": "kube-svc-redirector-token-ngjg2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "capabilities": { + "add": [ + "NET_ADMIN" + ] + } + } + }, + { + "name": "azureproxy", + "image": "nginx:1.13.12-alpine", + "ports": [ + { + "hostPort": 14612, + "containerPort": 14612, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "5m", + "memory": "32Mi" + } + }, + "volumeMounts": [ + { + "name": "azureproxy-nginx", + "readOnly": true, + "mountPath": "/etc/nginx/nginx.conf", + "subPath": "nginx.conf" + }, + { + "name": "azureproxy-configs", + "readOnly": true, + "mountPath": "/etc/nginx/conf.d" + }, + { + "name": "azureproxy-certs", + "readOnly": true, + "mountPath": "/etc/nginx/certs" + }, + { + "name": "kube-svc-redirector-token-ngjg2", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kube-svc-redirector", + "serviceAccount": "kube-svc-redirector", + "nodeName": "aks-nodepool1-19574989-0", + "hostNetwork": true, + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "operator": "Exists", + "effect": "NoExecute" + }, + { + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/network-unavailable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:35:46Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:35:53Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:35:46Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.240.0.4", + "startTime": "2019-05-22T01:35:46Z", + "containerStatuses": [ + { + "name": "azureproxy", + "state": { + "running": { + "startedAt": "2019-05-22T01:35:52Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:1.13.12-alpine", + "imageID": "docker-pullable://nginx@sha256:9d46fd628d54ebe1633ee3cf0fe2acfcc419cfae541c63056530e39cd5620366", + "containerID": "docker://100b561a08a0163ac8c591af88b4bdbafa107923e3a2732aae73a3808b178f18" + }, + { + "name": "redirector", + "state": { + "running": { + "startedAt": "2019-05-22T01:35:50Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "deis/kube-svc-redirect:v1.0.2", + "imageID": "docker-pullable://deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", + "containerID": "docker://4a76cf31a6cd132443cbb2dc6f66aa806af2f5a73b8cee3ea8286d078485192b" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "kubernetes-dashboard-596c5bdf67-f64bg", + "generateName": "kubernetes-dashboard-596c5bdf67-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/kubernetes-dashboard-596c5bdf67-f64bg", + "uid": "b372caee-7c31-11e9-a084-2a3503e86e0a", + "resourceVersion": "7854783", + "creationTimestamp": "2019-05-22T01:34:11Z", + "labels": { + "k8s-app": "kubernetes-dashboard", + "kubernetes.io/cluster-service": "true", + "pod-template-hash": "1527168923" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "kubernetes-dashboard-596c5bdf67", + "uid": "b2f0c2ad-7c31-11e9-a084-2a3503e86e0a", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kubernetes-dashboard-token-w4t8s", + "secret": { + "secretName": "kubernetes-dashboard-token-w4t8s", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "main", + "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", + "ports": [ + { + "name": "http", + "containerPort": 9090, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "cpu": "100m", + "memory": "500Mi" + }, + "requests": { + "cpu": "100m", + "memory": "50Mi" + } + }, + "volumeMounts": [ + { + "name": "kubernetes-dashboard-token-w4t8s", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "httpGet": { + "path": "/", + "port": 9090, + "scheme": "HTTP" + }, + "initialDelaySeconds": 30, + "timeoutSeconds": 30, + "periodSeconds": 10, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "kubernetes-dashboard", + "serviceAccount": "kubernetes-dashboard", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:12Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:24Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:11Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.120", + "startTime": "2019-05-22T01:34:12Z", + "containerStatuses": [ + { + "name": "main", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:22Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", + "imageID": "docker-pullable://k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", + "containerID": "docker://165645b78e131f68e533cd5aca6e363b75ea80a188f0bff1b7fb614c84893ef6" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "metrics-server-589d467559-l866n", + "generateName": "metrics-server-589d467559-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/metrics-server-589d467559-l866n", + "uid": "b28e6bc3-7c31-11e9-a084-2a3503e86e0a", + "resourceVersion": "7854782", + "creationTimestamp": "2019-05-22T01:34:10Z", + "labels": { + "k8s-app": "metrics-server", + "pod-template-hash": "1458023115" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "metrics-server-589d467559", + "uid": "b284ce8e-7c31-11e9-a084-2a3503e86e0a", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "metrics-server-token-qtdgm", + "secret": { + "secretName": "metrics-server-token-qtdgm", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "metrics-server", + "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", + "command": [ + "/metrics-server", + "--source=kubernetes.summary_api:''" + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "metrics-server-token-qtdgm", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "metrics-server", + "serviceAccount": "metrics-server", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:11Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:24Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:10Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.119", + "startTime": "2019-05-22T01:34:11Z", + "containerStatuses": [ + { + "name": "metrics-server", + "state": { + "running": { + "startedAt": "2019-05-22T01:34:21Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", + "imageID": "docker-pullable://k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", + "containerID": "docker://bc98c2f03836b32fcf0e39d6b7b7980ceb67fadb472c6c4a4bfdf4d08ee92677" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-9wqbf", + "generateName": "omsagent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-9wqbf", + "uid": "128ded84-6b78-11e9-8b55-963bcaafdced", + "resourceVersion": "5486687", + "creationTimestamp": "2019-04-30T18:45:06Z", + "labels": { + "controller-revision-hash": "776703171", + "dsName": "omsagent-ds", + "pod-template-generation": "2" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "4.0.0-0" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "omsagent", + "uid": "b7f05c92-6876-11e9-8b55-963bcaafdced", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "host-root", + "hostPath": { + "path": "/", + "type": "" + } + }, + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "omsagent-token-h5tmr", + "secret": { + "secretName": "omsagent-token-h5tmr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "microsoft/oms:healthpreview04262019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "DaemonSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "300Mi" + }, + "requests": { + "cpu": "50m", + "memory": "225Mi" + } + }, + "volumeMounts": [ + { + "name": "host-root", + "readOnly": true, + "mountPath": "/hostfs" + }, + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "readOnly": true, + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "omsagent-token-h5tmr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "(ps -ef | grep main | grep -v \"grep\") && (ps -ef | grep main | grep -v \"grep\")" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-30T18:45:06Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-30T18:45:12Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-30T18:45:06Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.115", + "startTime": "2019-04-30T18:45:06Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-04-30T18:45:11Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "microsoft/oms:healthpreview04262019", + "imageID": "docker-pullable://microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", + "containerID": "docker://1b1f6cad1cf954612695754847981ce8e632f0233bebf3c89dea1c127b157bc0" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-n5kss", + "generateName": "omsagent-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-n5kss", + "uid": "673da111-8270-11e9-8d23-32c49ee6f300", + "resourceVersion": "8740634", + "creationTimestamp": "2019-05-30T00:18:09Z", + "labels": { + "controller-revision-hash": "776703171", + "dsName": "omsagent-ds", + "pod-template-generation": "2" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "4.0.0-0" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "DaemonSet", + "name": "omsagent", + "uid": "b7f05c92-6876-11e9-8b55-963bcaafdced", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "host-root", + "hostPath": { + "path": "/", + "type": "" + } + }, + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "omsagent-token-h5tmr", + "secret": { + "secretName": "omsagent-token-h5tmr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "microsoft/oms:healthpreview04262019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "DaemonSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "300Mi" + }, + "requests": { + "cpu": "50m", + "memory": "225Mi" + } + }, + "volumeMounts": [ + { + "name": "host-root", + "readOnly": true, + "mountPath": "/hostfs" + }, + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "readOnly": true, + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "omsagent-token-h5tmr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "(ps -ef | grep main | grep -v \"grep\") && (ps -ef | grep main | grep -v \"grep\")" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node-role.kubernetes.io/master", + "operator": "Equal", + "value": "true", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute" + }, + { + "key": "node.kubernetes.io/disk-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/memory-pressure", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "key": "node.kubernetes.io/unschedulable", + "operator": "Exists", + "effect": "NoSchedule" + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-30T00:18:09Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-30T00:18:14Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-30T00:18:09Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.106", + "startTime": "2019-05-30T00:18:09Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-05-30T00:18:13Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "microsoft/oms:healthpreview04262019", + "imageID": "docker-pullable://microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", + "containerID": "docker://5eae48a3ebee8d930c0940e069a4d8720b432e62eba4a6197b6a6b552a573eb9" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "omsagent-rs-6dc57b8544-jh5vw", + "generateName": "omsagent-rs-6dc57b8544-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-rs-6dc57b8544-jh5vw", + "uid": "0971836e-6b78-11e9-8b55-963bcaafdced", + "resourceVersion": "7580651", + "creationTimestamp": "2019-04-30T18:44:51Z", + "labels": { + "pod-template-hash": "2871364100", + "rsName": "omsagent-rs" + }, + "annotations": { + "agentVersion": "1.10.0.1", + "dockerProviderVersion": "4.0.0-0" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "omsagent-rs-6dc57b8544", + "uid": "096a6655-6b78-11e9-8b55-963bcaafdced", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "docker-sock", + "hostPath": { + "path": "/var/run", + "type": "" + } + }, + { + "name": "container-hostname", + "hostPath": { + "path": "/etc/hostname", + "type": "" + } + }, + { + "name": "host-log", + "hostPath": { + "path": "/var/log", + "type": "" + } + }, + { + "name": "containerlog-path", + "hostPath": { + "path": "/var/lib/docker/containers", + "type": "" + } + }, + { + "name": "azure-json-path", + "hostPath": { + "path": "/etc/kubernetes", + "type": "" + } + }, + { + "name": "omsagent-secret", + "secret": { + "secretName": "omsagent-secret", + "defaultMode": 420 + } + }, + { + "name": "omsagent-rs-config", + "configMap": { + "name": "omsagent-rs-config", + "defaultMode": 420 + } + }, + { + "name": "omsagent-token-h5tmr", + "secret": { + "secretName": "omsagent-token-h5tmr", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "omsagent", + "image": "microsoft/oms:healthpreview04262019", + "ports": [ + { + "containerPort": 25225, + "protocol": "TCP" + }, + { + "name": "in-rs-tcp", + "containerPort": 25235, + "protocol": "TCP" + }, + { + "containerPort": 25224, + "protocol": "UDP" + } + ], + "env": [ + { + "name": "AKS_RESOURCE_ID", + "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" + }, + { + "name": "AKS_REGION", + "value": "eastus" + }, + { + "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", + "value": "true" + }, + { + "name": "CONTROLLER_TYPE", + "value": "ReplicaSet" + }, + { + "name": "NODE_IP", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "status.hostIP" + } + } + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "limits": { + "cpu": "150m", + "memory": "500Mi" + }, + "requests": { + "cpu": "50m", + "memory": "100Mi" + } + }, + "volumeMounts": [ + { + "name": "docker-sock", + "mountPath": "/var/run/host" + }, + { + "name": "host-log", + "mountPath": "/var/log" + }, + { + "name": "containerlog-path", + "mountPath": "/var/lib/docker/containers" + }, + { + "name": "azure-json-path", + "mountPath": "/etc/kubernetes/host" + }, + { + "name": "omsagent-secret", + "readOnly": true, + "mountPath": "/etc/omsagent-secret" + }, + { + "name": "omsagent-rs-config", + "mountPath": "/etc/config" + }, + { + "name": "omsagent-token-h5tmr", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/bin/bash", + "-c", + "ps -ef | grep main | grep -v \"grep\"" + ] + }, + "initialDelaySeconds": 60, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 3 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "nodeSelector": { + "beta.kubernetes.io/os": "linux", + "kubernetes.io/role": "agent" + }, + "serviceAccountName": "omsagent", + "serviceAccount": "omsagent", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-30T18:44:51Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-30T18:44:57Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-04-30T18:44:51Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.87", + "startTime": "2019-04-30T18:44:51Z", + "containerStatuses": [ + { + "name": "omsagent", + "state": { + "running": { + "startedAt": "2019-04-30T18:44:56Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "microsoft/oms:healthpreview04262019", + "imageID": "docker-pullable://microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", + "containerID": "docker://2ad967c98de3c3956481b419a78c9ffa2cf65e62ee2aa51cf22b48d5d1983d0c" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "tunnelfront-74599866d6-fkhbd", + "generateName": "tunnelfront-74599866d6-", + "namespace": "kube-system", + "selfLink": "/api/v1/namespaces/kube-system/pods/tunnelfront-74599866d6-fkhbd", + "uid": "b345ca4e-7c31-11e9-a084-2a3503e86e0a", + "resourceVersion": "9030840", + "creationTimestamp": "2019-05-22T01:34:11Z", + "labels": { + "component": "tunnel", + "pod-template-hash": "3015542282" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "tunnelfront-74599866d6", + "uid": "b31abdc6-7c31-11e9-a084-2a3503e86e0a", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "kubeconfig", + "configMap": { + "name": "tunnelfront-kubecfg", + "defaultMode": 420, + "optional": true + } + }, + { + "name": "certificates", + "hostPath": { + "path": "/etc/kubernetes/certs", + "type": "" + } + }, + { + "name": "tunnelfront-token-njgvg", + "secret": { + "secretName": "tunnelfront-token-njgvg", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "tunnel-front", + "image": "docker.io/deis/hcp-tunnel-front:v1.9.2-v4.0.7", + "env": [ + { + "name": "OVERRIDE_TUNNEL_SERVER_NAME", + "value": "t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "TUNNEL_CLUSTERUSER_NAME", + "value": "28957308" + }, + { + "name": "TUNNELGATEWAY_SERVER_NAME", + "value": "dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io" + }, + { + "name": "TUNNELGATEWAY_SSH_PORT", + "value": "22" + }, + { + "name": "TUNNELGATEWAY_TLS_PORT", + "value": "443" + }, + { + "name": "KUBE_CONFIG", + "value": "/etc/kubernetes/kubeconfig/kubeconfig" + }, + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": { + "requests": { + "cpu": "10m", + "memory": "64Mi" + } + }, + "volumeMounts": [ + { + "name": "kubeconfig", + "readOnly": true, + "mountPath": "/etc/kubernetes/kubeconfig" + }, + { + "name": "certificates", + "readOnly": true, + "mountPath": "/etc/kubernetes/certs" + }, + { + "name": "tunnelfront-token-njgvg", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "livenessProbe": { + "exec": { + "command": [ + "/lib/tunnel-front/check-tunnel-connection.sh" + ] + }, + "initialDelaySeconds": 10, + "timeoutSeconds": 1, + "periodSeconds": 60, + "successThreshold": 1, + "failureThreshold": 12 + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent", + "securityContext": { + "privileged": true + } + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "Default", + "nodeSelector": { + "beta.kubernetes.io/os": "linux" + }, + "serviceAccountName": "tunnelfront", + "serviceAccount": "tunnelfront", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "kubernetes.azure.com/cluster", + "operator": "Exists" + } + ] + } + ] + } + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "CriticalAddonsOnly", + "operator": "Exists" + }, + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "system-node-critical", + "priority": 2000001000 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:13Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-06-01T14:56:47Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T01:34:11Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.94", + "startTime": "2019-05-22T01:34:13Z", + "containerStatuses": [ + { + "name": "tunnel-front", + "state": { + "running": { + "startedAt": "2019-06-01T14:56:46Z" + } + }, + "lastState": { + "terminated": { + "exitCode": 137, + "reason": "Error", + "startedAt": "2019-05-22T01:34:26Z", + "finishedAt": "2019-06-01T14:56:42Z", + "containerID": "docker://077af2518bf2bf16e136c9d71453799398a7a6d8f854a0af106d972922f0588a" + } + }, + "ready": true, + "restartCount": 1, + "image": "deis/hcp-tunnel-front:v1.9.2-v4.0.7", + "imageID": "docker-pullable://deis/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41", + "containerID": "docker://113cbbf2ad5a68809b0a1e133e88bc090731544ce230d67f0ae73692d95fd576" + } + ], + "qosClass": "Burstable" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "nginx-deployment-7c47c68ddb-dlkd9", + "generateName": "nginx-deployment-7c47c68ddb-", + "namespace": "test", + "selfLink": "/api/v1/namespaces/test/pods/nginx-deployment-7c47c68ddb-dlkd9", + "uid": "842aba73-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952515", + "creationTimestamp": "2019-05-22T22:32:43Z", + "labels": { + "app": "nginx", + "pod-template-hash": "3703724886" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "nginx-deployment-7c47c68ddb", + "uid": "8424475c-7ce1-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-8slws", + "secret": { + "secretName": "default-token-8slws", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "front-end", + "image": "nginx", + "ports": [ + { + "containerPort": 81, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-8slws", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "Always" + }, + { + "name": "rss-reader", + "image": "nickchase/rss-php-nginx:v1", + "ports": [ + { + "containerPort": 88, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-8slws", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-1", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:43Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:50Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:43Z" + } + ], + "hostIP": "10.240.0.5", + "podIP": "10.244.0.103", + "startTime": "2019-05-22T22:32:43Z", + "containerStatuses": [ + { + "name": "front-end", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:48Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:latest", + "imageID": "docker-pullable://nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", + "containerID": "docker://5820180607dfee4af5839033758c0dd3d53eb844616b575b1eed3b509f806da9" + }, + { + "name": "rss-reader", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:50Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nickchase/rss-php-nginx:v1", + "imageID": "docker-pullable://nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", + "containerID": "docker://e8fb5dfa7453640278a77eac9148d820d12eeddbe7951b68c988b573ca524c2b" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + }, + { + "metadata": { + "name": "nginx-deployment-7c47c68ddb-j2wmf", + "generateName": "nginx-deployment-7c47c68ddb-", + "namespace": "test", + "selfLink": "/api/v1/namespaces/test/pods/nginx-deployment-7c47c68ddb-j2wmf", + "uid": "842dd513-7ce1-11e9-8d23-32c49ee6f300", + "resourceVersion": "7952518", + "creationTimestamp": "2019-05-22T22:32:43Z", + "labels": { + "app": "nginx", + "pod-template-hash": "3703724886" + }, + "ownerReferences": [ + { + "apiVersion": "apps/v1", + "kind": "ReplicaSet", + "name": "nginx-deployment-7c47c68ddb", + "uid": "8424475c-7ce1-11e9-8d23-32c49ee6f300", + "controller": true, + "blockOwnerDeletion": true + } + ] + }, + "spec": { + "volumes": [ + { + "name": "default-token-8slws", + "secret": { + "secretName": "default-token-8slws", + "defaultMode": 420 + } + } + ], + "containers": [ + { + "name": "front-end", + "image": "nginx", + "ports": [ + { + "containerPort": 81, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-8slws", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "Always" + }, + { + "name": "rss-reader", + "image": "nickchase/rss-php-nginx:v1", + "ports": [ + { + "containerPort": 88, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "KUBERNETES_PORT_443_TCP_ADDR", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + }, + { + "name": "KUBERNETES_PORT", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_PORT_443_TCP", + "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" + }, + { + "name": "KUBERNETES_SERVICE_HOST", + "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "default-token-8slws", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "nodeName": "aks-nodepool1-19574989-0", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priority": 0 + }, + "status": { + "phase": "Running", + "conditions": [ + { + "type": "Initialized", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:43Z" + }, + { + "type": "Ready", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:51Z" + }, + { + "type": "ContainersReady", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": null + }, + { + "type": "PodScheduled", + "status": "True", + "lastProbeTime": null, + "lastTransitionTime": "2019-05-22T22:32:43Z" + } + ], + "hostIP": "10.240.0.4", + "podIP": "10.244.1.129", + "startTime": "2019-05-22T22:32:43Z", + "containerStatuses": [ + { + "name": "front-end", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:48Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nginx:latest", + "imageID": "docker-pullable://nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", + "containerID": "docker://dac553a97432f5c2afd757308d7703a037b35bbc562de1abf6a54e03e100a2ba" + }, + { + "name": "rss-reader", + "state": { + "running": { + "startedAt": "2019-05-22T22:32:50Z" + } + }, + "lastState": {}, + "ready": true, + "restartCount": 0, + "image": "nickchase/rss-php-nginx:v1", + "imageID": "docker-pullable://nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", + "containerID": "docker://a173f4ed2e191702e35d8cfe074272bc0b6c5b1d5874bd1513451eb25308cf8d" + } + ], + "qosClass": "BestEffort" + }, + "apiVersion": "v1", + "kind": "Pod" + } + ] +} \ No newline at end of file diff --git a/test/code/plugin/health/health_model_builder_test.rb b/test/code/plugin/health/health_model_builder_test.rb index ac51893da..df921049c 100644 --- a/test/code/plugin/health/health_model_builder_test.rb +++ b/test/code/plugin/health/health_model_builder_test.rb @@ -7,207 +7,201 @@ class FilterHealthModelBuilderTest < Test::Unit::TestCase include HealthModel - # def test_event_stream - # #setup - # health_definition_path = 'C:\AzureMonitor\ContainerInsights\Docker-Provider\installer\conf\health_model_definition.json' - # health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) - # monitor_factory = MonitorFactory.new - # hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) - # # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side - # state_finalizers = [AggregateMonitorStateFinalizer.new] - # monitor_set = MonitorSet.new - # model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) - - # nodes_file_map = { - # "extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", - # "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - # } - - # pods_file_map = { - # "extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", - # "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - # } - - # cluster_labels = { - # 'container.azm.ms/cluster-region' => 'eastus', - # 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', - # 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', - # 'container.azm.ms/cluster-name' => 'dilipr-health-test' - # } - - # cluster_id = 'fake_cluster_id' - - # #test - # state = HealthMonitorState.new() - # generator = HealthMissingSignalGenerator.new - - # for scenario in ["first", "second", "third"] - # mock_data_path = "C:/AzureMonitor/ContainerInsights/Docker-Provider/health_records/#{scenario}_daemon_set_signals.json" - # file = File.read(mock_data_path) - # records = JSON.parse(file) - - # node_inventory = JSON.parse(File.read(nodes_file_map[scenario])) - # pod_inventory = JSON.parse(File.read(pods_file_map[scenario])) - # deployment_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/deployments.json")) - # resources = HealthKubernetesResources.instance - # resources.node_inventory = node_inventory - # resources.pod_inventory = pod_inventory - # resources.deployment_inventory = deployment_inventory - - # workload_names = resources.get_workload_names - # provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, "C:/AzureMonitor/ContainerInsights/Docker-Provider/installer/conf/healthmonitorconfig.json") - - # health_monitor_records = [] - # records.each do |record| - # monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] - # monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] - # health_monitor_record = HealthMonitorRecord.new( - # record[HealthMonitorRecordFields::MONITOR_ID], - # record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - # record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], - # record[HealthMonitorRecordFields::DETAILS]["state"], - # provider.get_labels(record), - # provider.get_config(monitor_id), - # record[HealthMonitorRecordFields::DETAILS] - # ) - - # state.update_state(health_monitor_record, - # provider.get_config(health_monitor_record.monitor_id) - # ) - - # # get the health state based on the monitor's operational state - # # update state calls updates the state of the monitor based on configuration and history of the the monitor records - # health_monitor_record.state = state.get_state(monitor_instance_id).new_state - # health_monitor_records.push(health_monitor_record) - # instance_state = state.get_state(monitor_instance_id) - # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - # end - - - # #handle kube api down - # kube_api_down_handler = HealthKubeApiDownHandler.new - # health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) - - # # Dedupe daemonset signals - # # Remove unit monitor signals for “gone” objects - # reducer = HealthSignalReducer.new() - # reduced_records = reducer.reduce_signals(health_monitor_records, resources) - - # cluster_id = 'fake_cluster_id' - - # #get the list of 'none' and 'unknown' signals - # missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) - # #update state for missing signals - # missing_signals.each{|signal| - # state.update_state(signal, - # provider.get_config(signal.monitor_id) - # ) - # } - # generator.update_last_received_records(reduced_records) - # reduced_records.push(*missing_signals) - - # # build the health model - # all_records = reduced_records - # model_builder.process_records(all_records) - # all_monitors = model_builder.finalize_model - - # # update the state for aggregate monitors (unit monitors are updated above) - # all_monitors.each{|monitor_instance_id, monitor| - # if monitor.is_aggregate_monitor - # state.update_state(monitor, - # provider.get_config(monitor.monitor_id) - # ) - # end - - # instance_state = state.get_state(monitor_instance_id) - # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - # should_send = instance_state.should_send - - # # always send cluster monitor as a heartbeat - # if !should_send && monitor_instance_id != MonitorId::CLUSTER - # all_monitors.delete(monitor_instance_id) - # end - # } - - # records_to_send = [] - # all_monitors.keys.each{|key| - # record = provider.get_record(all_monitors[key], state) - # puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" - # } - # # for each key in monitor.keys, - # # get the state from health_monitor_state - # # generate the record to send - # serializer = HealthStateSerializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') - # serializer.serialize(state) - - # deserializer = HealthStateDeserializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') - # deserialized_state = deserializer.deserialize - - # after_state = HealthMonitorState.new - # # deserialized_state.each{|k,v| - # # attrs = v.gsub(/(\n|# "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", + #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", - "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", - "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", } pods_file_map = { - "extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", + #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + } + + cluster_labels = { + 'container.azm.ms/cluster-region' => 'eastus', + 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', + 'container.azm.ms/cluster-name' => 'dilipr-health-test' + } + + cluster_id = 'fake_cluster_id' + + #test + state = HealthMonitorState.new() + generator = HealthMissingSignalGenerator.new + + for scenario in ["first", "second", "third"] + mock_data_path = File.join(__dir__, "../../../../health_records/#{scenario}_daemon_set_signals.json") + file = File.read(mock_data_path) + records = JSON.parse(file) + + node_inventory = JSON.parse(File.read(nodes_file_map[scenario])) + pod_inventory = JSON.parse(File.read(pods_file_map[scenario])) + deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/deployments.json"))) + resources = HealthKubernetesResources.instance + resources.node_inventory = node_inventory + resources.pod_inventory = pod_inventory + resources.deployment_inventory = deployment_inventory + + workload_names = resources.get_workload_names + provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../..//installer/conf/healthmonitorconfig.json")) + + health_monitor_records = [] + records.each do |record| + monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + health_monitor_record = HealthMonitorRecord.new( + record[HealthMonitorRecordFields::MONITOR_ID], + record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + record[HealthMonitorRecordFields::DETAILS]["state"], + provider.get_labels(record), + provider.get_config(monitor_id), + record[HealthMonitorRecordFields::DETAILS] + ) + + state.update_state(health_monitor_record, + provider.get_config(health_monitor_record.monitor_id) + ) + + # get the health state based on the monitor's operational state + # update state calls updates the state of the monitor based on configuration and history of the the monitor records + health_monitor_record.state = state.get_state(monitor_instance_id).new_state + health_monitor_records.push(health_monitor_record) + instance_state = state.get_state(monitor_instance_id) + #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + end + + + #handle kube api down + kube_api_down_handler = HealthKubeApiDownHandler.new + health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) + + # Dedupe daemonset signals + # Remove unit monitor signals for “gone” objects + reducer = HealthSignalReducer.new() + reduced_records = reducer.reduce_signals(health_monitor_records, resources) + + cluster_id = 'fake_cluster_id' + + #get the list of 'none' and 'unknown' signals + missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) + #update state for missing signals + missing_signals.each{|signal| + state.update_state(signal, + provider.get_config(signal.monitor_id) + ) + } + generator.update_last_received_records(reduced_records) + reduced_records.push(*missing_signals) + + # build the health model + all_records = reduced_records + model_builder.process_records(all_records) + all_monitors = model_builder.finalize_model + + # update the state for aggregate monitors (unit monitors are updated above) + all_monitors.each{|monitor_instance_id, monitor| + if monitor.is_aggregate_monitor + state.update_state(monitor, + provider.get_config(monitor.monitor_id) + ) + end + + instance_state = state.get_state(monitor_instance_id) + #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + should_send = instance_state.should_send + + # always send cluster monitor as a heartbeat + if !should_send && monitor_instance_id != MonitorId::CLUSTER + all_monitors.delete(monitor_instance_id) + end + } + + records_to_send = [] + all_monitors.keys.each{|key| + record = provider.get_record(all_monitors[key], state) + #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + } + + if scenario == "first" + assert_equal 50, all_monitors.size + elsif scenario == "second" + assert_equal 34, all_monitors.size + elsif scenario == "third" + assert_equal 5, all_monitors.size + end + # for each key in monitor.keys, + # get the state from health_monitor_state + # generate the record to send + serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json')) + serializer.serialize(state) + + deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json')) + deserialized_state = deserializer.deserialize + + after_state = HealthMonitorState.new + after_state.initialize_state(deserialized_state) + end + end + + def test_event_stream_aks_engine + + #setup + health_definition_path = File.join(__dir__, '../../../../installer\conf\health_model_definition.json') + health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + monitor_factory = MonitorFactory.new + hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) + state_finalizers = [AggregateMonitorStateFinalizer.new] + monitor_set = MonitorSet.new + model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) + + nodes_file_map = { + #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", + #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + } + + pods_file_map = { + #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", + #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", @@ -227,20 +221,20 @@ def test_event_stream_aks_engine generator = HealthMissingSignalGenerator.new for scenario in 1..3 - mock_data_path = "C:/AzureMonitor/ContainerInsights/Docker-Provider/health_records/aks-engine/aks-engine-#{scenario}.json" + mock_data_path = File.join(__dir__, "../../../../health_records/aks-engine/aks-engine-#{scenario}.json") file = File.read(mock_data_path) records = JSON.parse(file) node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"])) pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"])) - deployment_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/deployments.json")) + deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/aks-engine/deployments.json"))) resources = HealthKubernetesResources.instance resources.node_inventory = node_inventory resources.pod_inventory = pod_inventory resources.deployment_inventory = deployment_inventory workload_names = resources.get_workload_names - provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, "C:/AzureMonitor/ContainerInsights/Docker-Provider/installer/conf/healthmonitorconfig.json") + provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) health_monitor_records = [] records.each do |record| @@ -317,22 +311,27 @@ def test_event_stream_aks_engine records_to_send = [] all_monitors.keys.each{|key| record = provider.get_record(all_monitors[key], state) - puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" } + + if scenario == 1 + assert_equal 58, all_monitors.size + elsif scenario == 2 + assert_equal 37, all_monitors.size + elsif scenario == 3 + assert_equal 6, all_monitors.size + end # for each key in monitor.keys, # get the state from health_monitor_state # generate the record to send - serializer = HealthStateSerializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') + serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json')) serializer.serialize(state) - deserializer = HealthStateDeserializer.new('C:\AzureMonitor\ContainerInsights\Docker-Provider\health_records\health_model_state.json') + deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json')) deserialized_state = deserializer.deserialize after_state = HealthMonitorState.new after_state.initialize_state(deserialized_state) - - puts "#{state.to_h == after_state.to_h}" - puts scenario end end end \ No newline at end of file diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb index b68291cda..1eff19085 100644 --- a/test/code/plugin/health/health_monitor_state_spec.rb +++ b/test/code/plugin/health/health_monitor_state_spec.rb @@ -125,5 +125,52 @@ def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso860 monitor_state.new_state.must_equal "fail" end + it 'updates should_send to false for monitors which need consistent state change and has NO state change' do + #arrange + state = HealthMonitorState.new + mock_monitor = Mock.new + def mock_monitor.state; "pass"; end + def mock_monitor.monitor_id; "monitor_id"; end + def mock_monitor.monitor_instance_id; "monitor_instance_id"; end + def mock_monitor.transition_date_time; Time.now.utc.iso8601; end + def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + + config = JSON.parse('{ + "WarnThresholdPercentage": 80.0, + "FailThresholdPercentage": 90.0, + "ConsecutiveSamplesForStateTransition": 3 + }') + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal true + monitor_state.old_state.must_equal "none" + monitor_state.new_state.must_equal "none" + + + #arrange + def mock_monitor.state; "pass"; end + def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal false + + #act + state.update_state(mock_monitor, config) + monitor_state.should_send.must_equal true + monitor_state.old_state.must_equal "none" + monitor_state.new_state.must_equal "pass" + + #act + state.update_state(mock_monitor, config) + monitor_state = state.get_state("monitor_instance_id") + #assert + monitor_state.should_send.must_equal false + monitor_state.old_state.must_equal "none" + monitor_state.new_state.must_equal "pass" + end end \ No newline at end of file From 8af35545e17c12b4499e861a383614e8ac9ed49b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 11:31:22 -0700 Subject: [PATCH 70/90] committing missing renamed file --- .../plugin/health/parent_monitor_provider.rb | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 source/code/plugin/health/parent_monitor_provider.rb diff --git a/source/code/plugin/health/parent_monitor_provider.rb b/source/code/plugin/health/parent_monitor_provider.rb new file mode 100644 index 000000000..6a27f11d8 --- /dev/null +++ b/source/code/plugin/health/parent_monitor_provider.rb @@ -0,0 +1,86 @@ +module HealthModel + class ParentMonitorProvider + + attr_reader :health_model_definition, :parent_monitor_mapping, :parent_monitor_instance_mapping + + def initialize(definition) + @health_model_definition = definition + @parent_monitor_mapping = {} #monitorId --> parent_monitor_id mapping + @parent_monitor_instance_mapping = {} #child monitor id -- > parent monitor instance mapping. Used in instances when the node no longer exists and impossible to compute from kube api results + end + + # gets the parent monitor id given the state transition. It requires the monitor id and labels to determine the parent id + def get_parent_monitor_id(monitor) + monitor_id = monitor.monitor_id + + # cache the parent monitor id so it is not recomputed every time + if @parent_monitor_mapping.key?(monitor.monitor_instance_id) + return @parent_monitor_mapping[monitor.monitor_instance_id] + end + + if @health_model_definition.key?(monitor_id) + parent_monitor_id = @health_model_definition[monitor_id]['parent_monitor_id'] + # check parent_monitor_id is an array, then evaluate the conditions, else return the parent_monitor_id + if parent_monitor_id.is_a?(String) + @parent_monitor_mapping[monitor.monitor_instance_id] = parent_monitor_id + return parent_monitor_id + end + if parent_monitor_id.nil? + conditions = @health_model_definition[monitor_id]['conditions'] + if !conditions.nil? && conditions.is_a?(Array) + labels = monitor.labels + conditions.each{|condition| + left = "#{labels[condition['key']]}" + op = "#{condition['operator']}" + right = "#{condition['value']}" + cond = left.send(op.to_sym, right) + + if cond + @parent_monitor_mapping[monitor.monitor_instance_id] = condition['parent_id'] + return condition['parent_id'] + end + } + end + raise "Conditions were not met to determine the parent monitor id" if monitor_id != MonitorId::CLUSTER + end + else + raise "Invalid Monitor Id #{monitor_id} in get_parent_monitor_id" + end + end + + def get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id) + labels_to_copy = @health_model_definition[monitor_id]['labels'] + if labels_to_copy.nil? + return {} + end + parent_monitor_labels = {} + labels_to_copy.each{|label| + parent_monitor_labels[label] = monitor_labels[label] + } + return parent_monitor_labels + end + + def get_parent_monitor_config(parent_monitor_id) + return @health_model_definition[parent_monitor_id] + end + + def get_parent_monitor_instance_id(monitor_instance_id, parent_monitor_id, parent_monitor_labels) + if @parent_monitor_instance_mapping.key?(monitor_instance_id) + return @parent_monitor_instance_mapping[monitor_instance_id] + end + + labels = AggregateMonitorInstanceIdLabels.get_labels_for(parent_monitor_id) + if !labels.is_a?(Array) + raise "Expected #{labels} to be an Array for #{parent_monitor_id}" + end + values = labels.map{|label| parent_monitor_labels[label]} + if values.nil? || values.empty? || values.size == 0 + @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_id + return parent_monitor_id + end + parent_monitor_instance_id = "#{parent_monitor_id}-#{values.join('-')}" + @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_instance_id + return parent_monitor_instance_id + end + end +end \ No newline at end of file From 02bce13d3ca016502e04aa286076d75b8204971c Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 11:36:28 -0700 Subject: [PATCH 71/90] Fix base_Container.data --- installer/datafiles/base_container.data | 51 ++++++++++++------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 7c427fea0..89ac1ad23 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -112,7 +112,7 @@ MAINTAINER: 'Microsoft Corporation' /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf; installer/conf/telegraf-rs.conf; 644; root; root /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh; installer/scripts/TelegrafTCPErrorTelemetry.sh; 755; root; root /opt/livenessprobe.sh; installer/scripts/livenessprobe.sh; 755; root; root -/opt/tomlparser.rb; installer/scripts/tomlparser.rb; 755; root; root +/opt/tomlparser.rb; installer/scripts/tomlparser.rb; 755; root; root @@ -123,31 +123,30 @@ MAINTAINER: 'Microsoft Corporation' /etc/opt/microsoft/docker-cimprov/health/health_model_definition.json; installer/conf/health_model_definition.json; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb source/code/plugin/health/aggregate_monitor.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor_instance_id_labels.rb source/code/plugin/health/aggregate_monitor_instance_id_labels.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb source/code/plugin/health/health_kube_api_down_handler.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_missing_signal_generator.rb source/code/plugin/health/health_missing_signal_generator.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_buffer.rb source/code/plugin/health/health_model_buffer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_builder.rb source/code/plugin/health/health_model_builder.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_constants.rb source/code/plugin/health/health_model_constants.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_definition.rb source/code/plugin/health/health_model_definition.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb source/code/plugin/health/health_model_definition_parser.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb source/code/plugin/health/health_monitor_provider.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_record.rb source/code/plugin/health/health_monitor_record.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_state.rb source/code/plugin/health/health_monitor_state.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb source/code/plugin/health/health_monitor_utils.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb source/code/plugin/health/health_signal_reducer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_state_deserializer.rb source/code/plugin/health/health_state_deserializer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_state_serializer.rb source/code/plugin/health/health_state_serializer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/monitor_factory.rb source/code/plugin/health/monitor_factory.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/monitor_set.rb source/code/plugin/health/monitor_set.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/node_missing_signal_state.rb source/code/plugin/health/node_missing_signal_state.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/node_monitor_hierarchy_reducer.rb source/code/plugin/health/node_monitor_hierarchy_reducer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/unit_monitor.rb source/code/plugin/health/unit_monitor.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb; source/code/plugin/health/aggregate_monitor.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/aggregate_monitor_instance_id_labels.rb; source/code/plugin/health/aggregate_monitor_instance_id_labels.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb; source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb; source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb; source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb; source/code/plugin/health/health_kube_api_down_handler.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_missing_signal_generator.rb; source/code/plugin/health/health_missing_signal_generator.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_buffer.rb; source/code/plugin/health/health_model_buffer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_builder.rb; source/code/plugin/health/health_model_builder.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_constants.rb; source/code/plugin/health/health_model_constants.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/parent_monitor_provider.rb; source/code/plugin/health/parent_monitor_provider.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb; source/code/plugin/health/health_model_definition_parser.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb; source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb; source/code/plugin/health/health_monitor_provider.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_record.rb; source/code/plugin/health/health_monitor_record.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_state.rb; source/code/plugin/health/health_monitor_state.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb; source/code/plugin/health/health_monitor_utils.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb; source/code/plugin/health/health_signal_reducer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_state_deserializer.rb; source/code/plugin/health/health_state_deserializer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_state_serializer.rb; source/code/plugin/health/health_state_serializer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/monitor_factory.rb; source/code/plugin/health/monitor_factory.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/monitor_set.rb; source/code/plugin/health/monitor_set.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/node_missing_signal_state.rb; source/code/plugin/health/node_missing_signal_state.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/unit_monitor.rb; source/code/plugin/health/unit_monitor.rb; 644; root; root %Links From 0d4ae84f583eaa4874f23abff49fceff16c1734f Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 11:42:14 -0700 Subject: [PATCH 72/90] Added test_helpers.rb --- test/code/plugin/test_helpers.rb | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 test/code/plugin/test_helpers.rb diff --git a/test/code/plugin/test_helpers.rb b/test/code/plugin/test_helpers.rb new file mode 100644 index 000000000..00c06e30b --- /dev/null +++ b/test/code/plugin/test_helpers.rb @@ -0,0 +1,2 @@ +require "minitest/spec" +require 'minitest/autorun' \ No newline at end of file From 60384df5271f129a6bad310099df31bdda6dc041 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 13:37:48 -0700 Subject: [PATCH 73/90] Fix ruby 1.9 issue where __dir__is not recognized --- test/code/plugin/health/aggregate_monitor_spec.rb | 3 +-- .../plugin/health/aggregate_monitor_state_finalizer_spec.rb | 2 +- test/code/plugin/health/health_hierarchy_builder_spec.rb | 2 +- test/code/plugin/health/health_kubernetes_resource_spec.rb | 2 +- .../code/plugin/health/health_missing_signal_generator_spec.rb | 2 +- test/code/plugin/health/health_model_builder_spec.rb | 2 +- test/code/plugin/health/health_model_definition_parser_spec.rb | 2 +- test/code/plugin/health/health_monitor_state_spec.rb | 2 +- test/code/plugin/health/health_signal_reducer_spec.rb | 2 +- test/code/plugin/health/kube_api_down_handler_spec.rb | 2 +- test/code/plugin/health/monitor_factory_spec.rb | 2 +- test/code/plugin/health/monitor_set_spec.rb | 2 +- test/code/plugin/health/parent_monitor_provider_spec.rb | 2 +- 13 files changed, 13 insertions(+), 14 deletions(-) diff --git a/test/code/plugin/health/aggregate_monitor_spec.rb b/test/code/plugin/health/aggregate_monitor_spec.rb index 08cfafacd..0e9692df3 100644 --- a/test/code/plugin/health/aggregate_monitor_spec.rb +++ b/test/code/plugin/health/aggregate_monitor_spec.rb @@ -1,7 +1,6 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } - +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel describe "AggregateMonitor Spec" do diff --git a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb index 2d0873849..f07b07448 100644 --- a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb +++ b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_hierarchy_builder_spec.rb b/test/code/plugin/health/health_hierarchy_builder_spec.rb index 701fab691..e35723258 100644 --- a/test/code/plugin/health/health_hierarchy_builder_spec.rb +++ b/test/code/plugin/health/health_hierarchy_builder_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb index 093c685df..b02b1d036 100644 --- a/test/code/plugin/health/health_kubernetes_resource_spec.rb +++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel describe "HealthKubernetesResources spec" do diff --git a/test/code/plugin/health/health_missing_signal_generator_spec.rb b/test/code/plugin/health/health_missing_signal_generator_spec.rb index 01cba8028..1f65f41f3 100644 --- a/test/code/plugin/health/health_missing_signal_generator_spec.rb +++ b/test/code/plugin/health/health_missing_signal_generator_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_model_builder_spec.rb b/test/code/plugin/health/health_model_builder_spec.rb index 4a98695c5..383a0c34f 100644 --- a/test/code/plugin/health/health_model_builder_spec.rb +++ b/test/code/plugin/health/health_model_builder_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_model_definition_parser_spec.rb b/test/code/plugin/health/health_model_definition_parser_spec.rb index 5b74c29a4..e486b0fd6 100644 --- a/test/code/plugin/health/health_model_definition_parser_spec.rb +++ b/test/code/plugin/health/health_model_definition_parser_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel describe "HealthModelDefinitionParser spec " do diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb index 1eff19085..3a9c9a8a8 100644 --- a/test/code/plugin/health/health_monitor_state_spec.rb +++ b/test/code/plugin/health/health_monitor_state_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_signal_reducer_spec.rb b/test/code/plugin/health/health_signal_reducer_spec.rb index 667649876..ab55c6a30 100644 --- a/test/code/plugin/health/health_signal_reducer_spec.rb +++ b/test/code/plugin/health/health_signal_reducer_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/kube_api_down_handler_spec.rb b/test/code/plugin/health/kube_api_down_handler_spec.rb index f0be88feb..71f34d62f 100644 --- a/test/code/plugin/health/kube_api_down_handler_spec.rb +++ b/test/code/plugin/health/kube_api_down_handler_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel describe "KubeApiDownHandler spec" do diff --git a/test/code/plugin/health/monitor_factory_spec.rb b/test/code/plugin/health/monitor_factory_spec.rb index 69dcaf914..93c9a4203 100644 --- a/test/code/plugin/health/monitor_factory_spec.rb +++ b/test/code/plugin/health/monitor_factory_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel describe "MonitorFactory Spec" do diff --git a/test/code/plugin/health/monitor_set_spec.rb b/test/code/plugin/health/monitor_set_spec.rb index 5d6efb4eb..2a98272bb 100644 --- a/test/code/plugin/health/monitor_set_spec.rb +++ b/test/code/plugin/health/monitor_set_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel describe "MonitorSet Spec" do diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb index d8cef316a..cee9026a8 100644 --- a/test/code/plugin/health/parent_monitor_provider_spec.rb +++ b/test/code/plugin/health/parent_monitor_provider_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require_relative file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } include HealthModel include Minitest From ce8c74845986de7b4d6011e34de2c24f1bfe291a Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 13:59:19 -0700 Subject: [PATCH 74/90] moving some methods into health_monitor_helpers, so that unit tests can run without loading health_monitor_utils which depends on a whole lot of base agent classes --- .../plugin/health/health_monitor_helpers.rb | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 source/code/plugin/health/health_monitor_helpers.rb diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb new file mode 100644 index 000000000..9e2977a0e --- /dev/null +++ b/source/code/plugin/health/health_monitor_helpers.rb @@ -0,0 +1,36 @@ +require 'logger' +require 'digest' + +module HealthModel + # static class that provides a bunch of utility methods + class HealthMonitorHelpers + + @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log" + + if Gem.win_platform? #unit testing on windows dev machine + @log_path = "C:\Temp\health_monitors.log" + end + + @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + + class << self + def is_node_monitor(monitor_id) + return (monitor_id == HealthMonitorConstants::NODE_CPU_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_MEMORY_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID) + end + + def is_pods_ready_monitor(monitor_id) + return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) + end + + def get_log_handle + return @log + end + + def get_monitor_instance_id(monitor_id, args = []) + string_to_hash = args.join("/") + return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}" + end + end + + end +end From c70cfe7c4ee961e45a8c44677d04680df4403f6f Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 14:02:34 -0700 Subject: [PATCH 75/90] Changed references to health_monitor_helpers --- .../plugin/health/health_missing_signal_generator.rb | 10 +++++----- source/code/plugin/health/health_monitor_state.rb | 2 +- source/code/plugin/health/health_signal_reducer.rb | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb index 589dfa234..67b9f6b1b 100644 --- a/source/code/plugin/health/health_missing_signal_generator.rb +++ b/source/code/plugin/health/health_missing_signal_generator.rb @@ -23,11 +23,11 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory nodes.each{|node| node_signals_hash[node] = [HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID] } - log = HealthMonitorUtils.get_log_handle + log = HealthMonitorHelpers.get_log_handle log.info "last_received_records #{@last_received_records.size} nodes #{nodes}" @last_received_records.each{|monitor_instance_id, monitor| if !health_monitor_records_map.key?(monitor_instance_id) - if HealthMonitorUtils.is_node_monitor(monitor.monitor_id) + if HealthMonitorHelpers.is_node_monitor(monitor.monitor_id) node_name = monitor.labels['kubernetes.io/hostname'] new_monitor = HealthMonitorRecord.new( monitor.monitor_id, @@ -50,7 +50,7 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory end missing_signals_map[monitor_instance_id] = new_monitor log.info "Added missing signal #{new_monitor.monitor_instance_id} #{new_monitor.state}" - elsif HealthMonitorUtils.is_pods_ready_monitor(monitor.monitor_id) + elsif HealthMonitorHelpers.is_pods_ready_monitor(monitor.monitor_id) lookup = "#{monitor.labels['container.azm.ms/namespace']}~~#{monitor.labels['container.azm.ms/workload-name']}" new_monitor = HealthMonitorRecord.new( monitor.monitor_id, @@ -79,7 +79,7 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory health_monitor_records.each{|health_monitor_record| # remove signals from the list of expected signals if we see them in the list of current signals - if HealthMonitorUtils.is_node_monitor(health_monitor_record.monitor_id) + if HealthMonitorHelpers.is_node_monitor(health_monitor_record.monitor_id) node_name = health_monitor_record.labels['kubernetes.io/hostname'] if node_signals_hash.key?(node_name) signals = node_signals_hash[node_name] @@ -97,7 +97,7 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory # these signals need to be assigned an unknown state node_signals_hash.each{|node, monitor_ids| monitor_ids.each{|monitor_id| - monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [cluster_id, node]) + monitor_instance_id = HealthMonitorHelpers.get_monitor_instance_id(monitor_id, [cluster_id, node]) new_monitor = HealthMonitorRecord.new( monitor_id, monitor_instance_id, diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index c78effe2e..beffb6f4a 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -51,7 +51,7 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor ) samples_to_keep = 1 monitor_instance_id = monitor.monitor_instance_id - log = HealthMonitorUtils.get_log_handle + log = HealthMonitorHelpers.get_log_handle current_time = Time.now.utc.iso8601 health_monitor_instance_state = get_state(monitor_instance_id) if !health_monitor_instance_state.nil? diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb index 0b6d51b99..a30755efe 100644 --- a/source/code/plugin/health/health_signal_reducer.rb +++ b/source/code/plugin/health/health_signal_reducer.rb @@ -21,13 +21,13 @@ def reduce_signals(health_monitor_records, health_k8s_inventory) puts 'Duplicate Daemon Set signal' reduced_signals_map[monitor_instance_id] = health_monitor_record end - elsif HealthMonitorUtils.is_node_monitor(monitor_id) + elsif HealthMonitorHelpers.is_node_monitor(monitor_id) node_name = health_monitor_record.labels['kubernetes.io/hostname'] if (node_name.nil? || !nodes.include?(node_name)) # only add daemon set records if node is present in the inventory next end reduced_signals_map[monitor_instance_id] = health_monitor_record - elsif HealthMonitorUtils.is_pods_ready_monitor(monitor_id) + elsif HealthMonitorHelpers.is_pods_ready_monitor(monitor_id) workload_name = health_monitor_record.labels['container.azm.ms/workload-name'] namespace = health_monitor_record.labels['container.azm.ms/namespace'] lookup = "#{namespace}~~#{workload_name}" From 603ab252ce479da4c0b329aeff76189f4dc7b334 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 15:44:55 -0700 Subject: [PATCH 76/90] Fixing ruby incompatibility errors --- .../health/health_kubernetes_resources.rb | 1 + .../plugin/health/aggregate_monitor_spec.rb | 2 +- .../aggregate_monitor_state_finalizer_spec.rb | 2 +- .../health/health_hierarchy_builder_spec.rb | 2 +- .../health/health_kubernetes_resource_spec.rb | 2 +- .../health_missing_signal_generator_spec.rb | 2 +- .../health/health_model_builder_spec.rb | 2 +- .../health_model_definition_parser_spec.rb | 5 +- .../health/health_monitor_state_spec.rb | 2 +- .../health/health_signal_reducer_spec.rb | 2 +- .../health/kube_api_down_handler_spec.rb | 2 +- .../plugin/health/monitor_factory_spec.rb | 2 +- test/code/plugin/health/monitor_set_spec.rb | 2 +- .../health/parent_monitor_provider_spec.rb | 2 +- test/code/plugin/health/tests/.gitignore | 8 -- test/code/plugin/health/tests/.travis.yml | 7 -- .../plugin/health/tests/CODE_OF_CONDUCT.md | 74 ------------------- test/code/plugin/health/tests/Gemfile | 4 - test/code/plugin/health/tests/LICENSE.txt | 21 ------ test/code/plugin/health/tests/README.md | 43 ----------- test/code/plugin/health/tests/Rakefile | 10 --- test/code/plugin/health/tests/bin/console | 14 ---- test/code/plugin/health/tests/bin/setup | 8 -- test/code/plugin/health/tests/lib/tests.rb | 6 -- .../plugin/health/tests/lib/tests/version.rb | 3 - .../plugin/health/tests/test/test_helper.rb | 4 - .../plugin/health/tests/test/tests_test.rb | 11 --- test/code/plugin/health/tests/tests.gemspec | 34 --------- test/code/plugin/test_helpers.rb | 1 + 29 files changed, 17 insertions(+), 261 deletions(-) delete mode 100644 test/code/plugin/health/tests/.gitignore delete mode 100644 test/code/plugin/health/tests/.travis.yml delete mode 100644 test/code/plugin/health/tests/CODE_OF_CONDUCT.md delete mode 100644 test/code/plugin/health/tests/Gemfile delete mode 100644 test/code/plugin/health/tests/LICENSE.txt delete mode 100644 test/code/plugin/health/tests/README.md delete mode 100644 test/code/plugin/health/tests/Rakefile delete mode 100644 test/code/plugin/health/tests/bin/console delete mode 100644 test/code/plugin/health/tests/bin/setup delete mode 100644 test/code/plugin/health/tests/lib/tests.rb delete mode 100644 test/code/plugin/health/tests/lib/tests/version.rb delete mode 100644 test/code/plugin/health/tests/test/test_helper.rb delete mode 100644 test/code/plugin/health/tests/test/tests_test.rb delete mode 100644 test/code/plugin/health/tests/tests.gemspec diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb index b11bfafc5..53f879bf5 100644 --- a/source/code/plugin/health/health_kubernetes_resources.rb +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -14,6 +14,7 @@ def initialize @nodes = [] @pods = [] @workloads = [] + @log = HealthMonitorHelpers.get_log_handle end def get_node_inventory diff --git a/test/code/plugin/health/aggregate_monitor_spec.rb b/test/code/plugin/health/aggregate_monitor_spec.rb index 0e9692df3..8cdf0a029 100644 --- a/test/code/plugin/health/aggregate_monitor_spec.rb +++ b/test/code/plugin/health/aggregate_monitor_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel describe "AggregateMonitor Spec" do diff --git a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb index f07b07448..ffdf49baf 100644 --- a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb +++ b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_hierarchy_builder_spec.rb b/test/code/plugin/health/health_hierarchy_builder_spec.rb index e35723258..d3f8d180f 100644 --- a/test/code/plugin/health/health_hierarchy_builder_spec.rb +++ b/test/code/plugin/health/health_hierarchy_builder_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb index b02b1d036..bb2781bdf 100644 --- a/test/code/plugin/health/health_kubernetes_resource_spec.rb +++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel describe "HealthKubernetesResources spec" do diff --git a/test/code/plugin/health/health_missing_signal_generator_spec.rb b/test/code/plugin/health/health_missing_signal_generator_spec.rb index 1f65f41f3..610e871f8 100644 --- a/test/code/plugin/health/health_missing_signal_generator_spec.rb +++ b/test/code/plugin/health/health_missing_signal_generator_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_model_builder_spec.rb b/test/code/plugin/health/health_model_builder_spec.rb index 383a0c34f..c063148c7 100644 --- a/test/code/plugin/health/health_model_builder_spec.rb +++ b/test/code/plugin/health/health_model_builder_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_model_definition_parser_spec.rb b/test/code/plugin/health/health_model_definition_parser_spec.rb index e486b0fd6..94a757fb4 100644 --- a/test/code/plugin/health/health_model_definition_parser_spec.rb +++ b/test/code/plugin/health/health_model_definition_parser_spec.rb @@ -1,12 +1,13 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel describe "HealthModelDefinitionParser spec " do it "parses the definition file correctly with the right conditions" do #arrange - parser = HealthModelDefinitionParser.new(File.join(__dir__, 'test_health_model_definition.json')) + + parser = HealthModelDefinitionParser.new(File.join(File.expand_path(File.dirname(__FILE__)), 'test_health_model_definition.json')) #act model_definition = parser.parse_file diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb index 3a9c9a8a8..67db69955 100644 --- a/test/code/plugin/health/health_monitor_state_spec.rb +++ b/test/code/plugin/health/health_monitor_state_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_signal_reducer_spec.rb b/test/code/plugin/health/health_signal_reducer_spec.rb index ab55c6a30..f3ae3d15d 100644 --- a/test/code/plugin/health/health_signal_reducer_spec.rb +++ b/test/code/plugin/health/health_signal_reducer_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/kube_api_down_handler_spec.rb b/test/code/plugin/health/kube_api_down_handler_spec.rb index 71f34d62f..6e0513034 100644 --- a/test/code/plugin/health/kube_api_down_handler_spec.rb +++ b/test/code/plugin/health/kube_api_down_handler_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel describe "KubeApiDownHandler spec" do diff --git a/test/code/plugin/health/monitor_factory_spec.rb b/test/code/plugin/health/monitor_factory_spec.rb index 93c9a4203..e800e58e5 100644 --- a/test/code/plugin/health/monitor_factory_spec.rb +++ b/test/code/plugin/health/monitor_factory_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel describe "MonitorFactory Spec" do diff --git a/test/code/plugin/health/monitor_set_spec.rb b/test/code/plugin/health/monitor_set_spec.rb index 2a98272bb..a9ba68989 100644 --- a/test/code/plugin/health/monitor_set_spec.rb +++ b/test/code/plugin/health/monitor_set_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel describe "MonitorSet Spec" do diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb index cee9026a8..f38998e23 100644 --- a/test/code/plugin/health/parent_monitor_provider_spec.rb +++ b/test/code/plugin/health/parent_monitor_provider_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| puts "Requiring file #{file}"; require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/tests/.gitignore b/test/code/plugin/health/tests/.gitignore deleted file mode 100644 index 9106b2a34..000000000 --- a/test/code/plugin/health/tests/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -/.bundle/ -/.yardoc -/_yardoc/ -/coverage/ -/doc/ -/pkg/ -/spec/reports/ -/tmp/ diff --git a/test/code/plugin/health/tests/.travis.yml b/test/code/plugin/health/tests/.travis.yml deleted file mode 100644 index 686d6c428..000000000 --- a/test/code/plugin/health/tests/.travis.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -sudo: false -language: ruby -cache: bundler -rvm: - - 2.5.1 -before_install: gem install bundler -v 2.0.2 diff --git a/test/code/plugin/health/tests/CODE_OF_CONDUCT.md b/test/code/plugin/health/tests/CODE_OF_CONDUCT.md deleted file mode 100644 index 36f2de801..000000000 --- a/test/code/plugin/health/tests/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,74 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, gender identity and expression, level of experience, -nationality, personal appearance, race, religion, or sexual identity and -orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or -advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project e-mail -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at dilip.rangarajan@gmail.com. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ diff --git a/test/code/plugin/health/tests/Gemfile b/test/code/plugin/health/tests/Gemfile deleted file mode 100644 index b2501d8c1..000000000 --- a/test/code/plugin/health/tests/Gemfile +++ /dev/null @@ -1,4 +0,0 @@ -source "https://rubygems.org" - -# Specify your gem's dependencies in tests.gemspec -gemspec diff --git a/test/code/plugin/health/tests/LICENSE.txt b/test/code/plugin/health/tests/LICENSE.txt deleted file mode 100644 index d45d69768..000000000 --- a/test/code/plugin/health/tests/LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2019 r-dilip - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/test/code/plugin/health/tests/README.md b/test/code/plugin/health/tests/README.md deleted file mode 100644 index dcd05bc99..000000000 --- a/test/code/plugin/health/tests/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Tests - -Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/tests`. To experiment with that code, run `bin/console` for an interactive prompt. - -TODO: Delete this and the text above, and describe your gem - -## Installation - -Add this line to your application's Gemfile: - -```ruby -gem 'tests' -``` - -And then execute: - - $ bundle - -Or install it yourself as: - - $ gem install tests - -## Usage - -TODO: Write usage instructions here - -## Development - -After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. - -To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). - -## Contributing - -Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/tests. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. - -## License - -The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). - -## Code of Conduct - -Everyone interacting in the Tests project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/tests/blob/master/CODE_OF_CONDUCT.md). diff --git a/test/code/plugin/health/tests/Rakefile b/test/code/plugin/health/tests/Rakefile deleted file mode 100644 index d433a1edc..000000000 --- a/test/code/plugin/health/tests/Rakefile +++ /dev/null @@ -1,10 +0,0 @@ -require "bundler/gem_tasks" -require "rake/testtask" - -Rake::TestTask.new(:test) do |t| - t.libs << "test" - t.libs << "lib" - t.test_files = FileList["test/**/*_test.rb"] -end - -task :default => :test diff --git a/test/code/plugin/health/tests/bin/console b/test/code/plugin/health/tests/bin/console deleted file mode 100644 index 59eb02ef2..000000000 --- a/test/code/plugin/health/tests/bin/console +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require "bundler/setup" -require "tests" - -# You can add fixtures and/or initialization code here to make experimenting -# with your gem easier. You can also use a different console, if you like. - -# (If you use this, don't forget to add pry to your Gemfile!) -# require "pry" -# Pry.start - -require "irb" -IRB.start(__FILE__) diff --git a/test/code/plugin/health/tests/bin/setup b/test/code/plugin/health/tests/bin/setup deleted file mode 100644 index dce67d860..000000000 --- a/test/code/plugin/health/tests/bin/setup +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -IFS=$'\n\t' -set -vx - -bundle install - -# Do any other automated setup that you need to do here diff --git a/test/code/plugin/health/tests/lib/tests.rb b/test/code/plugin/health/tests/lib/tests.rb deleted file mode 100644 index ed7795e48..000000000 --- a/test/code/plugin/health/tests/lib/tests.rb +++ /dev/null @@ -1,6 +0,0 @@ -require "tests/version" - -module Tests - class Error < StandardError; end - # Your code goes here... -end diff --git a/test/code/plugin/health/tests/lib/tests/version.rb b/test/code/plugin/health/tests/lib/tests/version.rb deleted file mode 100644 index c1945fbde..000000000 --- a/test/code/plugin/health/tests/lib/tests/version.rb +++ /dev/null @@ -1,3 +0,0 @@ -module Tests - VERSION = "0.1.0" -end diff --git a/test/code/plugin/health/tests/test/test_helper.rb b/test/code/plugin/health/tests/test/test_helper.rb deleted file mode 100644 index ca750dd40..000000000 --- a/test/code/plugin/health/tests/test/test_helper.rb +++ /dev/null @@ -1,4 +0,0 @@ -$LOAD_PATH.unshift File.expand_path("../lib", __dir__) -require "tests" - -require "minitest/autorun" diff --git a/test/code/plugin/health/tests/test/tests_test.rb b/test/code/plugin/health/tests/test/tests_test.rb deleted file mode 100644 index 7bb30bfd8..000000000 --- a/test/code/plugin/health/tests/test/tests_test.rb +++ /dev/null @@ -1,11 +0,0 @@ -require "test_helper" - -class TestsTest < Minitest::Test - def test_that_it_has_a_version_number - refute_nil ::Tests::VERSION - end - - def test_it_does_something_useful - assert false - end -end diff --git a/test/code/plugin/health/tests/tests.gemspec b/test/code/plugin/health/tests/tests.gemspec deleted file mode 100644 index b551315cc..000000000 --- a/test/code/plugin/health/tests/tests.gemspec +++ /dev/null @@ -1,34 +0,0 @@ -lib = File.expand_path("lib", __dir__) -$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) -require "tests/version" - -Gem::Specification.new do |spec| - spec.name = "tests" - spec.version = Tests::VERSION - spec.authors = ["r-dilip"] - spec.email = ["dilip.rangarajan@gmail.com"] - - spec.summary = %q{TODO: Write a short summary, because RubyGems requires one.} - spec.description = %q{TODO: Write a longer description or delete this line.} - spec.homepage = "TODO: Put your gem's website or public repo URL here." - spec.license = "MIT" - - spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'" - - spec.metadata["homepage_uri"] = spec.homepage - spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here." - spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here." - - # Specify which files should be added to the gem when it is released. - # The `git ls-files -z` loads the files in the RubyGem that have been added into git. - spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do - `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } - end - spec.bindir = "exe" - spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } - spec.require_paths = ["lib"] - - spec.add_development_dependency "bundler", "~> 2.0" - spec.add_development_dependency "rake", "~> 10.0" - spec.add_development_dependency "minitest", "~> 5.0" -end diff --git a/test/code/plugin/test_helpers.rb b/test/code/plugin/test_helpers.rb index 00c06e30b..543f00ac9 100644 --- a/test/code/plugin/test_helpers.rb +++ b/test/code/plugin/test_helpers.rb @@ -1,2 +1,3 @@ +gem "minitest" require "minitest/spec" require 'minitest/autorun' \ No newline at end of file From 338b75262df0599dcc2db4ccdc31fe1e1884a7f3 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 16:11:07 -0700 Subject: [PATCH 77/90] Dont load health_monitor_utils --- test/code/plugin/health/aggregate_monitor_spec.rb | 2 +- .../plugin/health/aggregate_monitor_state_finalizer_spec.rb | 2 +- test/code/plugin/health/health_hierarchy_builder_spec.rb | 2 +- test/code/plugin/health/health_kubernetes_resource_spec.rb | 2 +- test/code/plugin/health/health_model_builder_spec.rb | 2 +- test/code/plugin/health/health_model_definition_parser_spec.rb | 2 +- test/code/plugin/health/health_monitor_state_spec.rb | 2 +- test/code/plugin/health/health_signal_reducer_spec.rb | 2 +- test/code/plugin/health/kube_api_down_handler_spec.rb | 2 +- test/code/plugin/health/monitor_factory_spec.rb | 2 +- test/code/plugin/health/monitor_set_spec.rb | 2 +- test/code/plugin/health/parent_monitor_provider_spec.rb | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/code/plugin/health/aggregate_monitor_spec.rb b/test/code/plugin/health/aggregate_monitor_spec.rb index 8cdf0a029..729965999 100644 --- a/test/code/plugin/health/aggregate_monitor_spec.rb +++ b/test/code/plugin/health/aggregate_monitor_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel describe "AggregateMonitor Spec" do diff --git a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb index ffdf49baf..f1ae0564d 100644 --- a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb +++ b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_hierarchy_builder_spec.rb b/test/code/plugin/health/health_hierarchy_builder_spec.rb index d3f8d180f..daafe0312 100644 --- a/test/code/plugin/health/health_hierarchy_builder_spec.rb +++ b/test/code/plugin/health/health_hierarchy_builder_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb index bb2781bdf..c27d969ec 100644 --- a/test/code/plugin/health/health_kubernetes_resource_spec.rb +++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel describe "HealthKubernetesResources spec" do diff --git a/test/code/plugin/health/health_model_builder_spec.rb b/test/code/plugin/health/health_model_builder_spec.rb index c063148c7..c49e6c92a 100644 --- a/test/code/plugin/health/health_model_builder_spec.rb +++ b/test/code/plugin/health/health_model_builder_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_model_definition_parser_spec.rb b/test/code/plugin/health/health_model_definition_parser_spec.rb index 94a757fb4..56551510b 100644 --- a/test/code/plugin/health/health_model_definition_parser_spec.rb +++ b/test/code/plugin/health/health_model_definition_parser_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel describe "HealthModelDefinitionParser spec " do diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb index 67db69955..5fa8a6c6e 100644 --- a/test/code/plugin/health/health_monitor_state_spec.rb +++ b/test/code/plugin/health/health_monitor_state_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/health_signal_reducer_spec.rb b/test/code/plugin/health/health_signal_reducer_spec.rb index f3ae3d15d..d074748b0 100644 --- a/test/code/plugin/health/health_signal_reducer_spec.rb +++ b/test/code/plugin/health/health_signal_reducer_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel include Minitest diff --git a/test/code/plugin/health/kube_api_down_handler_spec.rb b/test/code/plugin/health/kube_api_down_handler_spec.rb index 6e0513034..3f3f9b37f 100644 --- a/test/code/plugin/health/kube_api_down_handler_spec.rb +++ b/test/code/plugin/health/kube_api_down_handler_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel describe "KubeApiDownHandler spec" do diff --git a/test/code/plugin/health/monitor_factory_spec.rb b/test/code/plugin/health/monitor_factory_spec.rb index e800e58e5..2135808bd 100644 --- a/test/code/plugin/health/monitor_factory_spec.rb +++ b/test/code/plugin/health/monitor_factory_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel describe "MonitorFactory Spec" do diff --git a/test/code/plugin/health/monitor_set_spec.rb b/test/code/plugin/health/monitor_set_spec.rb index a9ba68989..1f4e970be 100644 --- a/test/code/plugin/health/monitor_set_spec.rb +++ b/test/code/plugin/health/monitor_set_spec.rb @@ -1,6 +1,6 @@ require_relative '../test_helpers' # consider doing this in test_helpers.rb so that this code is common -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel describe "MonitorSet Spec" do diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb index f38998e23..4e5c4eb63 100644 --- a/test/code/plugin/health/parent_monitor_provider_spec.rb +++ b/test/code/plugin/health/parent_monitor_provider_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } include HealthModel include Minitest From dd8dfeff6e838d8a1d483de12d192e1f91b97c21 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 16:17:39 -0700 Subject: [PATCH 78/90] Dumm commit to force pull --- test/code/plugin/health/health_missing_signal_generator_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/code/plugin/health/health_missing_signal_generator_spec.rb b/test/code/plugin/health/health_missing_signal_generator_spec.rb index 610e871f8..5a27bc1e4 100644 --- a/test/code/plugin/health/health_missing_signal_generator_spec.rb +++ b/test/code/plugin/health/health_missing_signal_generator_spec.rb @@ -1,5 +1,5 @@ require_relative '../test_helpers' -Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].each { |file| require file } +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each {|file| require file} include HealthModel include Minitest From c161fc1debc2491582b9a7aecbb993f9a6a9b143 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 17 Jul 2019 23:32:32 +0000 Subject: [PATCH 79/90] remove non existent file from base_container.data, update Makefile --- build/Makefile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/build/Makefile b/build/Makefile index b5312cfe3..257980160 100644 --- a/build/Makefile +++ b/build/Makefile @@ -91,9 +91,9 @@ CXXFLAGS = $(COMPILE_FLAGS) # Build targets ifeq ($(ULINUX),1) -all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit fluentbitplugin +all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit fluentbitplugin rubypluginstests else -all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) fluentbitplugin +all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) fluentbitplugin rubypluginstests endif clean : @@ -143,6 +143,15 @@ fluentbitplugin : make -C $(GO_SOURCE_DIR) fbplugin $(COPY) $(GO_SOURCE_DIR)/out_oms.so $(INTERMEDIATE_DIR) +rubypluginstests : + @echo "========================= Installing pre-reqs for running tests" + sudo apt-add-repository ppa:brightbox/ruby-ng -y + sudo apt-get update + sudo apt-get install ruby2.4 rake -y + sudo gem install minitest + @echo "========================= Running tests..." + rake test + #-------------------------------------------------------------------------------- # PAL build # From 142a5a580004a4beb1f7abba99367201ac2584bb Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 29 Jul 2019 19:31:52 -0700 Subject: [PATCH 80/90] Updated tomlparser.rb to handle agent_settings for health_model --- installer/scripts/tomlparser.rb | 37 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 3e7f48045..af8da9870 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -1,8 +1,10 @@ #!/usr/local/bin/ruby require_relative "tomlrb" +require 'json' -@configMapMountPath = "/etc/config/settings/log-data-collection-settings" +@log_settings_config_map_mount_path = "/etc/config/settings/log-data-collection-settings" +@agent_settings_config_map_mount_path = "/etc/config/settings/agent-settings" @configVersion = "" @configSchemaVersion = "" # Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist @@ -16,12 +18,12 @@ @excludePath = "*.csv2" #some invalid path # Use parser to parse the configmap toml file to a ruby structure -def parseConfigMap +def parseConfigMap(path) begin # Check to see if config map is created - if (File.file?(@configMapMountPath)) + if (File.file?(path)) puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values" - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) + parsedConfig = Tomlrb.load_file(path, symbolize_keys: true) puts "config::Successfully parsed mounted config map" return parsedConfig else @@ -117,19 +119,36 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::error::Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults" end end + + begin + if !parsedConfig.nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil? + @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled] + puts "enable_health_model = #{@enable_health_model}" + end + rescue => errorStr + puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults" + @enable_health_model = false + end end @configSchemaVersion = ENV['AZMON_AGENT_CFG_SCHEMA_VERSION'] puts "****************Start Config Processing********************" if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it - configMapSettings = parseConfigMap + configMapSettings = {} + + #iterate over every *settings file and build a hash of settings + Dir["/etc/config/settings/*settings"].each{|file| + puts "Parsing File #{file}" + settings = parseConfigMap(file) + configMapSettings = configMapSettings.merge(settings) + } + + puts "#{JSON.pretty_generate(configMapSettings)}" if !configMapSettings.nil? populateSettingValuesFromConfigMap(configMapSettings) end else - if (File.file?(@configMapMountPath)) - puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults" - end + puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults" @excludePath = "*_kube-system_*.log" end @@ -155,6 +174,8 @@ def populateSettingValuesFromConfigMap(parsedConfig) file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n") file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n") file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n") + #health_model settings + file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n") # Close file after writing all environment variables file.close puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' " From d9f2e4eef6f2615659e0973e57f020b13d1bd2df Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 30 Jul 2019 11:29:06 -0700 Subject: [PATCH 81/90] Toggle health plugins based on Feature flag --- installer/scripts/tomlparser.rb | 2 +- source/code/plugin/filter_cadvisor_health_container.rb | 6 +++++- source/code/plugin/filter_cadvisor_health_node.rb | 6 +++++- source/code/plugin/filter_health_model_builder.rb | 5 +++++ source/code/plugin/health/health_monitor_utils.rb | 9 +++++++++ source/code/plugin/in_kube_health.rb | 6 ++++++ 6 files changed, 31 insertions(+), 3 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index cf762af69..6a7068eb1 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -175,7 +175,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n") file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n") #health_model settings - file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n") + file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n") # Close file after writing all environment variables file.close puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' " diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb index 448a03885..4090092a9 100644 --- a/source/code/plugin/filter_cadvisor_health_container.rb +++ b/source/code/plugin/filter_cadvisor_health_container.rb @@ -29,7 +29,7 @@ class CAdvisor2ContainerHealthFilter < Filter @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion - + @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled def initialize super @@ -59,6 +59,10 @@ def start end def filter_stream(tag, es) + if !@@cluster_health_model_enabled + @log.info "Cluster Health Model disabled in filter_cadvisor_health_container" + return [] + end new_es = MultiEventStream.new #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) records_count = 0 diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index fcfa1cb1a..3f6a12f4f 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -30,7 +30,7 @@ class CAdvisor2NodeHealthFilter < Filter @@clusterName = KubernetesApiClient.getClusterName @@clusterId = KubernetesApiClient.getClusterId @@clusterRegion = KubernetesApiClient.getClusterRegion - + @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled def initialize super @@ -61,6 +61,10 @@ def start end def filter_stream(tag, es) + if !@@cluster_health_model_enabled + @log.info "Cluster Health Model disabled in filter_cadvisor_health_node" + return [] + end new_es = MultiEventStream.new #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName) records_count = 0 diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 8c6e82520..eed2a49dc 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -21,6 +21,7 @@ class FilterHealthModelBuilder < Filter @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime' @@cluster_id = KubernetesApiClient.getClusterId + @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled def initialize super @@ -72,6 +73,10 @@ def shutdown end def filter_stream(tag, es) + if !@@cluster_health_model_enabled + @log.info "Cluster Health Model disabled in filter_health_model_builder" + return [] + end new_es = MultiEventStream.new time = Time.now begin diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index b450ee3e0..df47529e6 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -52,6 +52,15 @@ def is_pods_ready_monitor(monitor_id) return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID) end + def is_cluster_health_model_enabled + enabled = ENV["AZMON_CLUSTER_ENABLE_HEALTH_MODEL"] + if !enabled.nil? && enabled.casecmp("true") == 0 + return true + else + return false + end + end + def get_pods_ready_hash(pod_inventory, deployment_inventory) pods_ready_percentage_hash = {} deployment_lookup = {} diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 7f8e5d523..46250c4ce 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -25,6 +25,7 @@ def initialize @@cluster_id = KubernetesApiClient.getClusterId @resources = HealthKubernetesResources.instance @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled end include HealthModel @@ -64,6 +65,11 @@ def shutdown end def enumerate + if !@@cluster_health_model_enabled + @log.info "Cluster Health Model disabled in in_kube_health" + return + end + begin currentTime = Time.now emitTime = currentTime.to_f From 7b09fcff554925e15be0267fd4b2a651a0338447 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Tue, 30 Jul 2019 13:54:57 -0700 Subject: [PATCH 82/90] Added health_monitor_helpers, and fixed log --- installer/datafiles/base_container.data | 5 +++-- source/code/plugin/in_kube_health.rb | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index e9483e44e..a6de00491 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -113,7 +113,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh; installer/scripts/TelegrafTCPErrorTelemetry.sh; 755; root; root /opt/livenessprobe.sh; installer/scripts/livenessprobe.sh; 755; root; root /opt/tomlparser.rb; installer/scripts/tomlparser.rb; 755; root; root -/opt/tomlparser-prom-customconfig.rb; installer/scripts/tomlparser-prom-customconfig.rb; 755; root; root +/opt/tomlparser-prom-customconfig.rb; installer/scripts/tomlparser-prom-customconfig.rb; 755; root; root @@ -136,7 +136,9 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/health_model_constants.rb; source/code/plugin/health/health_model_constants.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/parent_monitor_provider.rb; source/code/plugin/health/parent_monitor_provider.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb; source/code/plugin/health/health_model_definition_parser.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb; source/code/plugin/health/health_monitor_helpers.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb; source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb; source/code/plugin/health/health_monitor_helpers.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb; source/code/plugin/health/health_monitor_provider.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb; source/code/plugin/health/health_monitor_record.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb; source/code/plugin/health/health_monitor_state.rb; 644; root; root @@ -148,7 +150,6 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/monitor_set.rb; source/code/plugin/health/monitor_set.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/unit_monitor.rb; source/code/plugin/health/unit_monitor.rb; 644; root; root - %Links /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 46250c4ce..06dfc1814 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -66,7 +66,7 @@ def shutdown def enumerate if !@@cluster_health_model_enabled - @log.info "Cluster Health Model disabled in in_kube_health" + @@hmlog.info "Cluster Health Model disabled in in_kube_health" return end From fa5e31d3113e6aa56e82a9952c3756c10443e67b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 31 Jul 2019 17:26:51 -0700 Subject: [PATCH 83/90] Send start telemetry only if health model is enabled --- source/code/plugin/in_kube_health.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 06dfc1814..103921395 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -50,7 +50,9 @@ def start @@clusterCpuCapacity = cluster_capacity[0] @@clusterMemoryCapacity = cluster_capacity[1] @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" - ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {}) + if @@cluster_health_model_enabled + ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {}) + end end end From 0cf68705193a3a7bb1baaefd733803629b114b07 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 7 Aug 2019 09:28:35 -0700 Subject: [PATCH 84/90] PRfeedback --- installer/conf/container.conf | 4 +- installer/conf/kube.conf | 4 +- installer/scripts/tomlparser.rb | 3 +- .../plugin/filter_cadvisor_health_node.rb | 21 +++-- .../plugin/filter_health_model_builder.rb | 88 ++++++++++--------- .../plugin/health/health_hierarchy_builder.rb | 4 +- .../health/node_monitor_hierarchy_reducer.rb | 2 - source/code/plugin/in_kube_events.rb | 8 +- source/code/plugin/in_kube_health.rb | 46 +++++----- 9 files changed, 96 insertions(+), 84 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index ce33ebbc6..58a8bfba2 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -73,13 +73,11 @@ send_timeout 60s recover_wait 10s hard_timeout 60s - #phi_failure_detector false - #phi_threshold 32 heartbeat_type tcp host replicaset-service.kube-system - port 25235 + port 25227 diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 11b1ce841..4b4ec09ea 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -1,7 +1,7 @@ # Fluentd config file for OMS Docker - cluster components (kubeAPI) type forward - port 25235 + port 25227 bind 0.0.0.0 @@ -18,7 +18,7 @@ type kubeevents tag oms.containerinsights.KubeEvents run_interval 60s - log_level debug + log_level debug #Kubernetes logs diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 6a7068eb1..067586629 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -27,7 +27,7 @@ def parseConfigMap(path) puts "config::Successfully parsed mounted config map from #{path}" return parsedConfig else - puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults" + puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults for #{path}" @excludePath = "*_kube-system_*.log" return nil end @@ -143,7 +143,6 @@ def populateSettingValuesFromConfigMap(parsedConfig) configMapSettings = configMapSettings.merge(settings) } - puts "#{JSON.pretty_generate(configMapSettings)}" if !configMapSettings.nil? populateSettingValuesFromConfigMap(configMapSettings) end diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb index 3f6a12f4f..627a525e7 100644 --- a/source/code/plugin/filter_cadvisor_health_node.rb +++ b/source/code/plugin/filter_cadvisor_health_node.rb @@ -33,13 +33,17 @@ class CAdvisor2NodeHealthFilter < Filter @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled def initialize - super - @cpu_capacity = 0.0 - @memory_capacity = 0.0 - @last_resource_refresh = DateTime.now.to_time.to_i - @metrics_to_collect_hash = {} - @resources = HealthKubernetesResources.instance # this doesnt require node and pod inventory. So no need to populate them - @provider = HealthMonitorProvider.new(@@clusterId, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + begin + super + @cpu_capacity = 0.0 + @memory_capacity = 0.0 + @last_resource_refresh = DateTime.now.to_time.to_i + @metrics_to_collect_hash = {} + @resources = HealthKubernetesResources.instance # this doesnt require node and pod inventory. So no need to populate them + @provider = HealthMonitorProvider.new(@@clusterId, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + rescue => e + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) + end end def configure(conf) @@ -76,7 +80,8 @@ def filter_stream(tag, es) records_count += 1 end rescue => e - router.emit_error_event(tag, time, record, e) + @log.info "Error in filter_stream for filter_cadvisor_health_node #{e.message}" + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) end } @log.debug "Filter Records Count #{records_count}" diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index eed2a49dc..7c6d2ba20 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -24,43 +24,50 @@ class FilterHealthModelBuilder < Filter @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled def initialize - super - @buffer = HealthModel::HealthModelBuffer.new - @health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) - @monitor_factory = HealthModel::MonitorFactory.new - @hierarchy_builder = HealthHierarchyBuilder.new(@health_model_definition, @monitor_factory) - # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side - @state_finalizers = [HealthModel::AggregateMonitorStateFinalizer.new] - @monitor_set = HealthModel::MonitorSet.new - @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set) - @kube_api_down_handler = HealthKubeApiDownHandler.new - @resources = HealthKubernetesResources.instance - @reducer = HealthSignalReducer.new - @state = HealthMonitorState.new - @generator = HealthMissingSignalGenerator.new - #TODO: cluster_labels needs to be initialized - @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) - @serializer = HealthStateSerializer.new(@health_state_serialized_path) - @deserializer = HealthStateDeserializer.new(@health_state_serialized_path) - # TODO: in_kube_api_health should set these values - # resources.node_inventory = node_inventory - # resources.pod_inventory = pod_inventory - # resources.deployment_inventory = deployment_inventory - #TODO: check if the path exists - deserialized_state_info = @deserializer.deserialize - @state = HealthMonitorState.new - @state.initialize_state(deserialized_state_info) - @cluster_old_state = 'none' - @cluster_new_state = 'none' + begin + super + @buffer = HealthModel::HealthModelBuffer.new + @health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) + @monitor_factory = HealthModel::MonitorFactory.new + @hierarchy_builder = HealthHierarchyBuilder.new(@health_model_definition, @monitor_factory) + # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side + @state_finalizers = [HealthModel::AggregateMonitorStateFinalizer.new] + @monitor_set = HealthModel::MonitorSet.new + @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set) + @kube_api_down_handler = HealthKubeApiDownHandler.new + @resources = HealthKubernetesResources.instance + @reducer = HealthSignalReducer.new + @state = HealthMonitorState.new + @generator = HealthMissingSignalGenerator.new + #TODO: cluster_labels needs to be initialized + @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) + @serializer = HealthStateSerializer.new(@health_state_serialized_path) + @deserializer = HealthStateDeserializer.new(@health_state_serialized_path) + # TODO: in_kube_api_health should set these values + # resources.node_inventory = node_inventory + # resources.pod_inventory = pod_inventory + # resources.deployment_inventory = deployment_inventory + #TODO: check if the path exists + deserialized_state_info = @deserializer.deserialize + @state = HealthMonitorState.new + @state.initialize_state(deserialized_state_info) + @cluster_old_state = 'none' + @cluster_new_state = 'none' + rescue => e + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) + end end def configure(conf) - super - @log = nil - - if @enable_log - @log = Logger.new(@log_path, 'weekly') - @log.info 'Starting filter_health_model_builder plugin' + begin + super + @log = nil + if @enable_log + @log = Logger.new(@log_path, 'weekly') + @log.info 'Starting filter_health_model_builder plugin' + end + rescue => e + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) end end @@ -73,13 +80,14 @@ def shutdown end def filter_stream(tag, es) - if !@@cluster_health_model_enabled - @log.info "Cluster Health Model disabled in filter_health_model_builder" - return [] - end - new_es = MultiEventStream.new - time = Time.now begin + if !@@cluster_health_model_enabled + @log.info "Cluster Health Model disabled in filter_health_model_builder" + return [] + end + new_es = MultiEventStream.new + time = Time.now + if tag.start_with?("oms.api.KubeHealth.DaemonSet") records = [] if !es.nil? diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb index 8643f34a4..2da0050db 100644 --- a/source/code/plugin/health/health_hierarchy_builder.rb +++ b/source/code/plugin/health/health_hierarchy_builder.rb @@ -7,12 +7,12 @@ class HealthHierarchyBuilder def initialize(health_model_definition, monitor_factory) if !health_model_definition.is_a?(ParentMonitorProvider) - raise 'Invalid Type Expected: ParentMonitorProvider Actual: #{@health_model_definition.class.name}' + raise "Invalid Type Expected: ParentMonitorProvider Actual: #{@health_model_definition.class.name}" end @health_model_definition = health_model_definition if !monitor_factory.is_a?(MonitorFactory) - raise 'Invalid Type Expected: ParentMonitorProvider Actual: #{@monitor_factory.class.name}' + raise "Invalid Type Expected: MonitorFactory Actual: #{@monitor_factory.class.name}" end @monitor_factory = monitor_factory end diff --git a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb index a063e0ae3..aafbd07a8 100644 --- a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb +++ b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb @@ -12,7 +12,6 @@ def finalize(monitor_set) monitors_to_reduce.each do |monitor_to_reduce| monitor = monitor_set.get_monitor(monitor_to_reduce) if !monitor.nil? - puts "Before Deleting #{monitor_set.get_size}" if monitor.is_aggregate_monitor && monitor.get_member_monitors.size == 1 #copy the children of member monitor as children of parent member_monitor_instance_id = monitor.get_member_monitors[0] #gets the only member monitor instance id @@ -26,7 +25,6 @@ def finalize(monitor_set) # delete the member monitor from the monitor_set monitor_set.delete(member_monitor_instance_id) end - puts "After Deleting #{monitor_set.get_size}" end end end diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 11ab2e919..f177b62bf 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -67,7 +67,7 @@ def enumerate(eventList = nil) newEventQueryState.push(eventId) if !eventQueryState.empty? && eventQueryState.include?(eventId) next - end + end record["ObjectKind"] = items["involvedObject"]["kind"] record["Namespace"] = items["involvedObject"]["namespace"] record["Name"] = items["involvedObject"]["name"] @@ -84,7 +84,7 @@ def enumerate(eventList = nil) else record["Computer"] = (OMS::Common.get_hostname) end - record['ClusterName'] = KubernetesApiClient.getClusterName + record["ClusterName"] = KubernetesApiClient.getClusterName record["ClusterId"] = KubernetesApiClient.getClusterId wrapper = { "DataType" => "KUBE_EVENTS_BLOB", @@ -94,12 +94,12 @@ def enumerate(eventList = nil) eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream - end + end writeEventQueryState(newEventQueryState) rescue => errorStr $log.debug_backtrace(errorStr.backtrace) ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end + end end def run_periodic diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 103921395..d9672da3b 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -37,23 +37,27 @@ def configure(conf) end def start - if @run_interval - @finished = false - @condition = ConditionVariable.new - @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) - - @@hmlog = HealthMonitorUtils.get_log_handle - @@clusterName = KubernetesApiClient.getClusterName - @@clusterRegion = KubernetesApiClient.getClusterRegion - cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog) - @@clusterCpuCapacity = cluster_capacity[0] - @@clusterMemoryCapacity = cluster_capacity[1] - @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" - if @@cluster_health_model_enabled - ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {}) + begin + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + + @@hmlog = HealthMonitorUtils.get_log_handle + @@clusterName = KubernetesApiClient.getClusterName + @@clusterRegion = KubernetesApiClient.getClusterRegion + cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog) + @@clusterCpuCapacity = cluster_capacity[0] + @@clusterMemoryCapacity = cluster_capacity[1] + @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}" + if @@cluster_health_model_enabled + ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {}) + end + end + rescue => e + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) end - end end def shutdown @@ -67,12 +71,12 @@ def shutdown end def enumerate - if !@@cluster_health_model_enabled - @@hmlog.info "Cluster Health Model disabled in in_kube_health" - return - end - begin + if !@@cluster_health_model_enabled + @@hmlog.info "Cluster Health Model disabled in in_kube_health" + return + end + currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 From 5d92eee7eba2a4d48ba948530527a0d77f86f719 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 7 Aug 2019 19:01:14 +0000 Subject: [PATCH 85/90] Renamed offending file name that was causing ruby to fail loading --- ...ate_monitor_instance_id_labels.rb => agg_monitor_id_labels.rb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename source/code/plugin/health/{aggregate_monitor_instance_id_labels.rb => agg_monitor_id_labels.rb} (100%) diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb similarity index 100% rename from source/code/plugin/health/aggregate_monitor_instance_id_labels.rb rename to source/code/plugin/health/agg_monitor_id_labels.rb From 69e9aac5fc0c0c5d22ede389a24380c90a77108b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 7 Aug 2019 19:03:25 +0000 Subject: [PATCH 86/90] change name in base_container --- installer/datafiles/base_container.data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 89ac1ad23..5c38184bf 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -124,7 +124,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb; source/code/plugin/health/aggregate_monitor.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor_instance_id_labels.rb; source/code/plugin/health/aggregate_monitor_instance_id_labels.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb; source/code/plugin/health/agg_monitor_id_labels.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb; source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb; source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb; source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root From 6917ea2b41b2a551f39d75eb180526e4f44e8d58 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 7 Aug 2019 19:05:38 +0000 Subject: [PATCH 87/90] Remove non existent file --- installer/datafiles/base_container.data | 1 - 1 file changed, 1 deletion(-) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 5c38184bf..131bb5607 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -145,7 +145,6 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/health_state_serializer.rb; source/code/plugin/health/health_state_serializer.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/monitor_factory.rb; source/code/plugin/health/monitor_factory.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/monitor_set.rb; source/code/plugin/health/monitor_set.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/node_missing_signal_state.rb; source/code/plugin/health/node_missing_signal_state.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/unit_monitor.rb; source/code/plugin/health/unit_monitor.rb; 644; root; root From ea4664962a2567a4846ee9e842f4baca099959cb Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 7 Aug 2019 20:08:07 +0000 Subject: [PATCH 88/90] Add health_monitor_helpers --- installer/datafiles/base_container.data | 1 + 1 file changed, 1 insertion(+) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 131bb5607..20adcd242 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -139,6 +139,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb; source/code/plugin/health/health_monitor_provider.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb; source/code/plugin/health/health_monitor_record.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb; source/code/plugin/health/health_monitor_state.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb; source/code/plugin/health/health_monitor_helpers.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb; source/code/plugin/health/health_monitor_utils.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb; source/code/plugin/health/health_signal_reducer.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_state_deserializer.rb; source/code/plugin/health/health_state_deserializer.rb; 644; root; root From 0cd2b80d4b073418674e73e851eb2a4879169026 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Mon, 12 Aug 2019 18:15:27 -0700 Subject: [PATCH 89/90] Use CRD for state persistence (#248) * Use CRD to maintain state * Update file names in base_container.data * Added unit test for cluster_health_state * Added dummy ca.crt test file * Fix test failing in Linux * Updated service name to healthmodel-replicaset-service * Remove unused code, fix base_container.data * remove references to serializer/deserializer * Flip order of update state, so that even if it fails, data will be sent, remove unnecessary logging * Change the name to cluster-health-state from chs * remove test json file * Fixing hardcoded labels --- health_records/aks-engine/aks-engine-1.json | 549 -- health_records/aks-engine/aks-engine-2.json | 549 -- health_records/aks-engine/aks-engine-3.json | 549 -- health_records/first_daemon_set_signals.json | 456 -- health_records/health_model_state.json | 1272 --- .../health_model_state_aks-engine.json | 1497 ---- health_records/second_daemon_set_signals.json | 456 -- health_records/third_daemon_set_signals.json | 456 -- installer/conf/container.conf | 2 +- installer/datafiles/base_container.data | 6 +- inventory/aks-engine/deployments.json | 1139 --- inventory/aks-engine/nodes.json | 1439 ---- inventory/aks-engine/pods.json | 6622 ---------------- inventory/deployments.json | 1702 ---- inventory/nodes.json | 964 --- inventory/pods.json | 6971 ----------------- .../plugin/filter_health_model_builder.rb | 24 +- ..._id_labels.rb => agg_monitor_id_labels.rb} | 10 +- .../plugin/health/cluster_health_state.rb | 116 + .../health/health_missing_signal_generator.rb | 2 +- .../plugin/health/health_model_constants.rb | 9 + .../plugin/health/health_monitor_provider.rb | 6 +- .../plugin/health/health_monitor_state.rb | 12 +- .../plugin/health/health_signal_reducer.rb | 4 +- .../health/health_state_deserializer.rb | 36 - .../plugin/health/health_state_serializer.rb | 20 - test/code/plugin/health/ca.crt | 1 + .../health/cluster_health_state_spec.rb | 37 + .../health_missing_signal_generator_spec.rb | 12 +- .../health/health_signal_reducer_spec.rb | 12 +- .../health/parent_monitor_provider_spec.rb | 4 +- 31 files changed, 216 insertions(+), 24718 deletions(-) delete mode 100644 health_records/aks-engine/aks-engine-1.json delete mode 100644 health_records/aks-engine/aks-engine-2.json delete mode 100644 health_records/aks-engine/aks-engine-3.json delete mode 100644 health_records/first_daemon_set_signals.json delete mode 100644 health_records/health_model_state.json delete mode 100644 health_records/health_model_state_aks-engine.json delete mode 100644 health_records/second_daemon_set_signals.json delete mode 100644 health_records/third_daemon_set_signals.json delete mode 100644 inventory/aks-engine/deployments.json delete mode 100644 inventory/aks-engine/nodes.json delete mode 100644 inventory/aks-engine/pods.json delete mode 100644 inventory/deployments.json delete mode 100644 inventory/nodes.json delete mode 100644 inventory/pods.json rename source/code/plugin/health/{aggregate_monitor_instance_id_labels.rb => agg_monitor_id_labels.rb} (55%) create mode 100644 source/code/plugin/health/cluster_health_state.rb delete mode 100644 source/code/plugin/health/health_state_deserializer.rb delete mode 100644 source/code/plugin/health/health_state_serializer.rb create mode 100644 test/code/plugin/health/ca.crt create mode 100644 test/code/plugin/health/cluster_health_state_spec.rb diff --git a/health_records/aks-engine/aks-engine-1.json b/health_records/aks-engine/aks-engine-1.json deleted file mode 100644 index 593cba634..000000000 --- a/health_records/aks-engine/aks-engine-1.json +++ /dev/null @@ -1,549 +0,0 @@ -[ - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:41:34Z", - "state": "pass", - "details": { - "memoryRssBytes": 693760000.0, - "memoryUtilizationPercentage": 9.52 - } - }, - "AgentCollectionTime": "2019-06-19T19:41:44Z", - "TimeFirstObserved": "2019-06-19T19:41:44Z", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:41:34Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 116.89870505, - "cpuUtilizationPercentage": 5.84 - } - }, - "AgentCollectionTime": "2019-06-19T19:41:44Z", - "TimeFirstObserved": "2019-06-19T19:41:44Z", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:41:38Z", - "state": "fail", - "details": { - "memoryRssBytes": 578871296.0, - "memoryUtilizationPercentage": 7.92 - } - }, - "AgentCollectionTime": "2019-06-19T19:41:39Z", - "TimeFirstObserved": "2019-06-19T19:41:39Z", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:41:38Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 68.11712868852459, - "cpuUtilizationPercentage": 3.41 - } - }, - "AgentCollectionTime": "2019-06-19T19:41:39Z", - "TimeFirstObserved": "2019-06-19T19:41:39Z", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:41:53Z", - "state": "pass", - "details": { - "memoryRssBytes": 1582518272.0, - "memoryUtilizationPercentage": 21.65 - } - }, - "AgentCollectionTime": "2019-06-19T19:42:02Z", - "TimeFirstObserved": "2019-06-19T19:42:02Z", - "NodeName": "k8s-master-15159885-0" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:41:53Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 191.90451009836065, - "cpuUtilizationPercentage": 9.6 - } - }, - "AgentCollectionTime": "2019-06-19T19:42:02Z", - "TimeFirstObserved": "2019-06-19T19:42:02Z", - "NodeName": "k8s-master-15159885-0" - }, - { - "MonitorId": "kube_api_status", - "MonitorInstanceId": "kube_api_status", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "audit-id": "2abd071d-9c29-477b-9cb8-8457fc0e2d48", - "content-type": "application/json", - "date": "Wed, 19 Jun 2019 19:43:13 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "subscribed_capacity_cpu", - "MonitorInstanceId": "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 18000.0, - "clusterCpuRequests": 1606.0 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "subscribed_capacity_memory", - "MonitorInstanceId": "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 65701564416.0, - "clusterMemoryRequests": 2493513728.0 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-cni-networkmonitor", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-ip-masq-agent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~blobfuse-flexvol-installer", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~coredns", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~keyvault-flexvolume", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-27e541d084fab4b79e80381168eead29", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-7c98478057c9066c415f39d201d13455", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tiller-deploy", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "totalPods": 5, - "podsReady": 5, - "workloadName": "default~~diliprnodejsonlog", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:43:13Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:43:13Z", - "TimeFirstObserved": "2019-06-19T19:43:13Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-master-15159885-0" - } -] \ No newline at end of file diff --git a/health_records/aks-engine/aks-engine-2.json b/health_records/aks-engine/aks-engine-2.json deleted file mode 100644 index 383594a9e..000000000 --- a/health_records/aks-engine/aks-engine-2.json +++ /dev/null @@ -1,549 +0,0 @@ -[ - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:42:34Z", - "state": "pass", - "details": { - "memoryRssBytes": 656310272.0, - "memoryUtilizationPercentage": 9.01 - } - }, - "AgentCollectionTime": "2019-06-19T19:42:45Z", - "TimeFirstObserved": "2019-06-19T19:42:45Z", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:42:34Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 110.81541546666666, - "cpuUtilizationPercentage": 5.54 - } - }, - "AgentCollectionTime": "2019-06-19T19:42:45Z", - "TimeFirstObserved": "2019-06-19T19:42:45Z", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:42:38Z", - "state": "pass", - "details": { - "memoryRssBytes": 579223552.0, - "memoryUtilizationPercentage": 7.93 - } - }, - "AgentCollectionTime": "2019-06-19T19:42:39Z", - "TimeFirstObserved": "2019-06-19T19:42:39Z", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:42:38Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 69.03265261666667, - "cpuUtilizationPercentage": 3.45 - } - }, - "AgentCollectionTime": "2019-06-19T19:42:39Z", - "TimeFirstObserved": "2019-06-19T19:42:39Z", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:42:53Z", - "state": "pass", - "details": { - "memoryRssBytes": 1583378432.0, - "memoryUtilizationPercentage": 21.67 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:02Z", - "TimeFirstObserved": "2019-06-19T19:43:02Z", - "NodeName": "k8s-master-15159885-0" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:42:53Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 192.56217935, - "cpuUtilizationPercentage": 9.63 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:02Z", - "TimeFirstObserved": "2019-06-19T19:43:02Z", - "NodeName": "k8s-master-15159885-0" - }, - { - "MonitorId": "kube_api_status", - "MonitorInstanceId": "kube_api_status", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "audit-id": "475b1f63-6d5f-40ba-88bc-161d5ac8de85", - "content-type": "application/json", - "date": "Wed, 19 Jun 2019 19:44:14 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "subscribed_capacity_cpu", - "MonitorInstanceId": "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 18000.0, - "clusterCpuRequests": 1606.0 - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "subscribed_capacity_memory", - "MonitorInstanceId": "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 65701564416.0, - "clusterMemoryRequests": 2493513728.0 - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-cni-networkmonitor", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-ip-masq-agent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~blobfuse-flexvol-installer", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~coredns", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~keyvault-flexvolume", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-27e541d084fab4b79e80381168eead29", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-7c98478057c9066c415f39d201d13455", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tiller-deploy", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 5, - "podsReady": 5, - "workloadName": "default~~diliprnodejsonlog", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:44:14Z", - "TimeFirstObserved": "2019-06-19T19:44:14Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-master-15159885-0" - } -] \ No newline at end of file diff --git a/health_records/aks-engine/aks-engine-3.json b/health_records/aks-engine/aks-engine-3.json deleted file mode 100644 index 674a87a9c..000000000 --- a/health_records/aks-engine/aks-engine-3.json +++ /dev/null @@ -1,549 +0,0 @@ -[ - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:43:44Z", - "state": "pass", - "details": { - "memoryRssBytes": 668811264.0, - "memoryUtilizationPercentage": 9.18 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:45Z", - "TimeFirstObserved": "2019-06-19T19:43:45Z", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:43:44Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 91.65759084285713, - "cpuUtilizationPercentage": 4.58 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:45Z", - "TimeFirstObserved": "2019-06-19T19:43:45Z", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:43:38Z", - "state": "pass", - "details": { - "memoryRssBytes": 577175552.0, - "memoryUtilizationPercentage": 7.9 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:39Z", - "TimeFirstObserved": "2019-06-19T19:43:39Z", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:43:38Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 69.51127465, - "cpuUtilizationPercentage": 3.48 - } - }, - "AgentCollectionTime": "2019-06-19T19:43:39Z", - "TimeFirstObserved": "2019-06-19T19:43:39Z", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:43:53Z", - "state": "pass", - "details": { - "memoryRssBytes": 1582354432.0, - "memoryUtilizationPercentage": 21.65 - } - }, - "AgentCollectionTime": "2019-06-19T19:44:02Z", - "TimeFirstObserved": "2019-06-19T19:44:02Z", - "NodeName": "k8s-master-15159885-0" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:43:53Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 192.88827258333333, - "cpuUtilizationPercentage": 9.64 - } - }, - "AgentCollectionTime": "2019-06-19T19:44:02Z", - "TimeFirstObserved": "2019-06-19T19:44:02Z", - "NodeName": "k8s-master-15159885-0" - }, - { - "MonitorId": "kube_api_status", - "MonitorInstanceId": "kube_api_status", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "audit-id": "2116a117-abc9-4a72-a4d9-ea1bd111b127", - "content-type": "application/json", - "date": "Wed, 19 Jun 2019 19:45:15 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "subscribed_capacity_cpu", - "MonitorInstanceId": "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 18000.0, - "clusterCpuRequests": 1606.0 - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "subscribed_capacity_memory", - "MonitorInstanceId": "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 65701564416.0, - "clusterMemoryRequests": 2493513728.0 - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-cni-networkmonitor", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-ip-masq-agent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~blobfuse-flexvol-installer", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~coredns", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~keyvault-flexvolume", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-27e541d084fab4b79e80381168eead29", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-7c98478057c9066c415f39d201d13455", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tiller-deploy", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 5, - "podsReady": 5, - "workloadName": "default~~diliprnodejsonlog", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-agentpool1-15159885-vmss000000" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-c74648e5c2362fcdc295a88737fdb134", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-agentpool1-15159885-vmss000001" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-d31bde3d37d8ad276db34f17aa1ec71a", - "Details": { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-19T19:45:15Z", - "TimeFirstObserved": "2019-06-19T19:45:15Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview", - "NodeName": "k8s-master-15159885-0" - } -] \ No newline at end of file diff --git a/health_records/first_daemon_set_signals.json b/health_records/first_daemon_set_signals.json deleted file mode 100644 index b540294f5..000000000 --- a/health_records/first_daemon_set_signals.json +++ /dev/null @@ -1,456 +0,0 @@ -[ - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:31:03Z", - "state": "pass", - "details": { - "memoryRssBytes": 953540608.0, - "memoryUtilizationPercentage": 26.42 - } - }, - "AgentCollectionTime": "2019-06-08T00:31:05Z", - "TimeFirstObserved": "2019-06-08T00:31:05Z", - "NodeName": "aks-nodepool1-19574989-1" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:31:03Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 253.0181856885246, - "cpuUtilizationPercentage": 25.3 - } - }, - "AgentCollectionTime": "2019-06-08T00:31:05Z", - "TimeFirstObserved": "2019-06-08T00:31:05Z", - "NodeName": "aks-nodepool1-19574989-1" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:31:36Z", - "state": "pass", - "details": { - "memoryRssBytes": 786239488.0, - "memoryUtilizationPercentage": 21.78 - } - }, - "AgentCollectionTime": "2019-06-08T00:31:42Z", - "TimeFirstObserved": "2019-06-08T00:31:42Z", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:31:36Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 216.61802946666666, - "cpuUtilizationPercentage": 21.66 - } - }, - "AgentCollectionTime": "2019-06-08T00:31:42Z", - "TimeFirstObserved": "2019-06-08T00:31:42Z", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "kube_api_status", - "MonitorInstanceId": "kube_api_status", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "content-type": "application/json", - "date": "Sat, 08 Jun 2019 00:31:49 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "subscribed_capacity_cpu", - "MonitorInstanceId": "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 4000.0, - "clusterCpuRequests": 878.0 - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "subscribed_capacity_memory", - "MonitorInstanceId": "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 14436810752.0, - "clusterMemoryRequests": 1379926016.0 - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-dns-autoscaler", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-dns-v20", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-svc-redirect", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tunnelfront", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~diliprdeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "default~~nginx-deployment", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~rss-site", - "namespace": "default", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "workloadName": "default~~vishwadeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "test~~nginx-deployment", - "namespace": "test", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:31:49Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-08T00:31:49Z", - "TimeFirstObserved": "2019-06-08T00:31:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "NodeName": "aks-nodepool1-19574989-1" - } -] \ No newline at end of file diff --git a/health_records/health_model_state.json b/health_records/health_model_state.json deleted file mode 100644 index 8efa173c3..000000000 --- a/health_records/health_model_state.json +++ /dev/null @@ -1,1272 +0,0 @@ -{ - "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:31:03Z", - "state": "pass", - "details": { - "memoryRssBytes": 953540608.0, - "memoryUtilizationPercentage": 26.42 - } - }, - { - "timestamp": "2019-06-08T00:32:03Z", - "state": "pass", - "details": { - "memoryRssBytes": 927559680.0, - "memoryUtilizationPercentage": 25.7 - } - }, - { - "timestamp": "2019-06-08T00:33:03Z", - "state": "pass", - "details": { - "memoryRssBytes": 930779136.0, - "memoryUtilizationPercentage": 25.79 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:31:03Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 253.0181856885246, - "cpuUtilizationPercentage": 25.3 - } - }, - { - "timestamp": "2019-06-08T00:32:03Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 251.33592345, - "cpuUtilizationPercentage": 25.13 - } - }, - { - "timestamp": "2019-06-08T00:33:03Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 254.57922858333333, - "cpuUtilizationPercentage": 25.46 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:31:36Z", - "state": "pass", - "details": { - "memoryRssBytes": 786239488.0, - "memoryUtilizationPercentage": 21.78 - } - }, - { - "timestamp": "2019-06-08T00:32:37Z", - "state": "pass", - "details": { - "memoryRssBytes": 748142592.0, - "memoryUtilizationPercentage": 20.73 - } - }, - { - "timestamp": "2019-06-08T00:33:37Z", - "state": "pass", - "details": { - "memoryRssBytes": 749514752.0, - "memoryUtilizationPercentage": 20.77 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:31:36Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 216.61802946666666, - "cpuUtilizationPercentage": 21.66 - } - }, - { - "timestamp": "2019-06-08T00:32:37Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 207.61622447540984, - "cpuUtilizationPercentage": 20.76 - } - }, - { - "timestamp": "2019-06-08T00:33:37Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 214.84893721666666, - "cpuUtilizationPercentage": 21.48 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "kube_api_status": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "content-type": "application/json", - "date": "Sat, 08 Jun 2019 00:33:50 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 4000.0, - "clusterCpuRequests": 878.0 - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 14436810752.0, - "clusterMemoryRequests": 1379926016.0 - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-dns-autoscaler", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-dns-autoscaler", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-dns-v20", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-dns-v20", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-svc-redirect", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-svc-redirect", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tunnelfront", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tunnelfront", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~diliprdeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~diliprdeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "default~~nginx-deployment", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "default~~nginx-deployment", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~rss-site", - "namespace": "default", - "workloadKind": "Pod" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~rss-site", - "namespace": "default", - "workloadKind": "Pod" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "workloadName": "default~~vishwadeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "workloadName": "default~~vishwadeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "test~~nginx-deployment", - "namespace": "test", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "test~~nginx-deployment", - "namespace": "test", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node_condition-2b129a9a5633c0cf8f621601c6f8bb32": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node_condition-af2f3c986ea63b47fc7d59b71abb37b8": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node-nodepool1-agent-aks-nodepool1-19574989-1": { - "prev_sent_record_time": "2019-06-08T00:31:05Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:05Z", - "prev_records": [ - { - "details": { - "pass": [ - "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "node_condition-af2f3c986ea63b47fc7d59b71abb37b8" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:33:05Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "agent_node_pool-nodepool1": { - "prev_sent_record_time": "2019-06-08T00:31:05Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:05Z", - "prev_records": [ - { - "details": { - "pass": [ - "node-nodepool1-agent-aks-nodepool1-19574989-1", - "node-nodepool1-agent-aks-nodepool1-19574989-0" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:33:05Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "all_nodes": { - "prev_sent_record_time": "2019-06-08T00:31:05Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:05Z", - "prev_records": [ - { - "details": { - "pass": [ - "agent_node_pool-nodepool1" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:33:05Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "cluster": { - "prev_sent_record_time": "2019-06-08T00:31:05Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:05Z", - "prev_records": [ - { - "details": { - "pass": [ - "all_nodes", - "k8s_infrastructure", - "all_workloads" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:05Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node-nodepool1-agent-aks-nodepool1-19574989-0": { - "prev_sent_record_time": "2019-06-08T00:31:42Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:42Z", - "prev_records": [ - { - "details": { - "pass": [ - "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "node_condition-2b129a9a5633c0cf8f621601c6f8bb32" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:33:42Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "k8s_infrastructure": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "details": { - "pass": [ - "kube_api_status", - "system_workload-kube-system-heapster", - "system_workload-kube-system-kube-dns-autoscaler", - "system_workload-kube-system-kube-dns-v20", - "system_workload-kube-system-kube-proxy", - "system_workload-kube-system-kube-svc-redirect", - "system_workload-kube-system-kubernetes-dashboard", - "system_workload-kube-system-metrics-server", - "system_workload-kube-system-omsagent", - "system_workload-kube-system-omsagent-rs", - "system_workload-kube-system-tunnelfront" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "capacity": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "details": { - "pass": [ - "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", - "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:33:51Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "all_workloads": { - "prev_sent_record_time": "2019-06-08T00:31:49Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-08T00:31:49Z", - "prev_records": [ - { - "details": { - "pass": [ - "capacity", - "all_namespaces" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-heapster": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-dns-autoscaler": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-dns-v20": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-proxy": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-svc-redirect": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kubernetes-dashboard": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-metrics-server": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-omsagent": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-omsagent-rs": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-tunnelfront": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload-default-diliprdeploymentnodeapps": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "namespace-default": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload-default-diliprdeploymentnodeapps", - "user_workload-default-nginx-deployment", - "user_workload-default-rss-site", - "user_workload-default-vishwadeploymentnodeapps" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "all_namespaces": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "namespace-default", - "namespace-test" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload-default-nginx-deployment": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload-default-rss-site": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload-default-vishwadeploymentnodeapps": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload-test-nginx-deployment": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "namespace-test": { - "prev_sent_record_time": "2019-07-17T18:02:10Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:10Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload-test-nginx-deployment" - ] - }, - "state": "pass", - "timestamp": "2019-06-08T00:31:49Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - } -} \ No newline at end of file diff --git a/health_records/health_model_state_aks-engine.json b/health_records/health_model_state_aks-engine.json deleted file mode 100644 index a72cf67b3..000000000 --- a/health_records/health_model_state_aks-engine.json +++ /dev/null @@ -1,1497 +0,0 @@ -{ - "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:41:34Z", - "state": "pass", - "details": { - "memoryRssBytes": 693760000.0, - "memoryUtilizationPercentage": 9.52 - } - }, - { - "timestamp": "2019-06-19T19:42:34Z", - "state": "pass", - "details": { - "memoryRssBytes": 656310272.0, - "memoryUtilizationPercentage": 9.01 - } - }, - { - "timestamp": "2019-06-19T19:43:44Z", - "state": "pass", - "details": { - "memoryRssBytes": 668811264.0, - "memoryUtilizationPercentage": 9.18 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:41:34Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 116.89870505, - "cpuUtilizationPercentage": 5.84 - } - }, - { - "timestamp": "2019-06-19T19:42:34Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 110.81541546666666, - "cpuUtilizationPercentage": 5.54 - } - }, - { - "timestamp": "2019-06-19T19:43:44Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 91.65759084285713, - "cpuUtilizationPercentage": 4.58 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c": { - "prev_sent_record_time": "2019-06-19T19:41:39Z", - "old_state": "none", - "new_state": "none", - "state_change_time": "2019-06-19T19:41:39Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:41:38Z", - "state": "fail", - "details": { - "memoryRssBytes": 578871296.0, - "memoryUtilizationPercentage": 7.92 - } - }, - { - "timestamp": "2019-06-19T19:42:38Z", - "state": "pass", - "details": { - "memoryRssBytes": 579223552.0, - "memoryUtilizationPercentage": 7.93 - } - }, - { - "timestamp": "2019-06-19T19:43:38Z", - "state": "pass", - "details": { - "memoryRssBytes": 577175552.0, - "memoryUtilizationPercentage": 7.9 - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:41:38Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 68.11712868852459, - "cpuUtilizationPercentage": 3.41 - } - }, - { - "timestamp": "2019-06-19T19:42:38Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 69.03265261666667, - "cpuUtilizationPercentage": 3.45 - } - }, - { - "timestamp": "2019-06-19T19:43:38Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 69.51127465, - "cpuUtilizationPercentage": 3.48 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:41:53Z", - "state": "pass", - "details": { - "memoryRssBytes": 1582518272.0, - "memoryUtilizationPercentage": 21.65 - } - }, - { - "timestamp": "2019-06-19T19:42:53Z", - "state": "pass", - "details": { - "memoryRssBytes": 1583378432.0, - "memoryUtilizationPercentage": 21.67 - } - }, - { - "timestamp": "2019-06-19T19:43:53Z", - "state": "pass", - "details": { - "memoryRssBytes": 1582354432.0, - "memoryUtilizationPercentage": 21.65 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:41:53Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 191.90451009836065, - "cpuUtilizationPercentage": 9.6 - } - }, - { - "timestamp": "2019-06-19T19:42:53Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 192.56217935, - "cpuUtilizationPercentage": 9.63 - } - }, - { - "timestamp": "2019-06-19T19:43:53Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 192.88827258333333, - "cpuUtilizationPercentage": 9.64 - } - } - ], - "is_state_change_consistent": true, - "should_send": true - }, - "kube_api_status": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "audit-id": "2116a117-abc9-4a72-a4d9-ea1bd111b127", - "content-type": "application/json", - "date": "Wed, 19 Jun 2019 19:45:15 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 18000.0, - "clusterCpuRequests": 1606.0 - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 65701564416.0, - "clusterMemoryRequests": 2493513728.0 - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-cni-networkmonitor", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-cni-networkmonitor", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-ip-masq-agent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~azure-ip-masq-agent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~blobfuse-flexvol-installer", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~blobfuse-flexvol-installer", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~coredns", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~coredns", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~keyvault-flexvolume", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~keyvault-flexvolume", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-addon-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-apiserver-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-27e541d084fab4b79e80381168eead29": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-controller-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-scheduler-k8s-master-15159885-0", - "namespace": "kube-system", - "workloadKind": "Pod" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-7c98478057c9066c415f39d201d13455": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tiller-deploy", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tiller-deploy", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:44:14Z", - "state": "pass", - "details": { - "totalPods": 5, - "podsReady": 5, - "workloadName": "default~~diliprnodejsonlog", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "totalPods": 5, - "podsReady": 5, - "workloadName": "default~~diliprnodejsonlog", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node_condition-c74648e5c2362fcdc295a88737fdb134": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node_condition-d31bde3d37d8ad276db34f17aa1ec71a": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "timestamp": "2019-06-19T19:45:15Z", - "state": "pass", - "details": { - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000001": { - "prev_sent_record_time": "2019-06-19T19:41:44Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:41:44Z", - "prev_records": [ - { - "details": { - "pass": [ - "node_memory_utilization-c74648e5c2362fcdc295a88737fdb134", - "node_cpu_utilization-c74648e5c2362fcdc295a88737fdb134", - "node_condition-c74648e5c2362fcdc295a88737fdb134" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:45Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "agent_node_pool-agentpool1": { - "prev_sent_record_time": "2019-06-19T19:41:39Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:41:39Z", - "prev_records": [ - { - "details": { - "pass": [ - "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000001", - "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000000" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:39Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "all_nodes": { - "prev_sent_record_time": "2019-06-19T19:41:39Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:41:39Z", - "prev_records": [ - { - "details": { - "pass": [ - "agent_node_pool-agentpool1", - "master_node_pool" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:39Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "cluster": { - "prev_sent_record_time": "2019-06-19T19:41:39Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:41:39Z", - "prev_records": [ - { - "details": { - "pass": [ - "all_nodes", - "k8s_infrastructure", - "all_workloads" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:41:39Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node-agentpool1-agent-k8s-agentpool1-15159885-vmss000000": { - "prev_sent_record_time": "2019-06-19T19:41:39Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:41:39Z", - "prev_records": [ - { - "details": { - "none": [ - "node_memory_utilization-fbf8e2b103dce1d6b0adefda04bfc87c" - ], - "pass": [ - "node_cpu_utilization-fbf8e2b103dce1d6b0adefda04bfc87c", - "node_condition-fbf8e2b103dce1d6b0adefda04bfc87c" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:39Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "node--master-k8s-master-15159885-0": { - "prev_sent_record_time": "2019-06-19T19:42:02Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:42:02Z", - "prev_records": [ - { - "details": { - "pass": [ - "node_memory_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "node_cpu_utilization-d31bde3d37d8ad276db34f17aa1ec71a", - "node_condition-d31bde3d37d8ad276db34f17aa1ec71a" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:44:02Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "master_node_pool": { - "prev_sent_record_time": "2019-06-19T19:42:02Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:42:02Z", - "prev_records": [ - { - "details": { - "pass": [ - "node--master-k8s-master-15159885-0" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:44:02Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "k8s_infrastructure": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "details": { - "pass": [ - "kube_api_status", - "system_workload-kube-system-azure-cni-networkmonitor", - "system_workload-kube-system-azure-ip-masq-agent", - "system_workload-kube-system-blobfuse-flexvol-installer", - "system_workload-kube-system-coredns", - "system_workload-kube-system-heapster", - "system_workload-kube-system-keyvault-flexvolume", - "system_workload-kube-system-kube-addon-manager-k8s-master-15159885-0", - "system_workload-kube-system-kube-apiserver-k8s-master-15159885-0", - "system_workload-kube-system-kube-controller-manager-k8s-master-15159885-0", - "system_workload-kube-system-kube-proxy", - "system_workload-kube-system-kube-scheduler-k8s-master-15159885-0", - "system_workload-kube-system-kubernetes-dashboard", - "system_workload-kube-system-metrics-server", - "system_workload-kube-system-omsagent", - "system_workload-kube-system-omsagent-rs", - "system_workload-kube-system-tiller-deploy" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "capacity": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "details": { - "pass": [ - "subscribed_capacity_cpu-e07de265a0a132be38e486491b78067c", - "subscribed_capacity_memory-e07de265a0a132be38e486491b78067c" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:45:15Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "all_workloads": { - "prev_sent_record_time": "2019-06-19T19:43:13Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-06-19T19:43:13Z", - "prev_records": [ - { - "details": { - "pass": [ - "capacity", - "all_namespaces" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-azure-cni-networkmonitor": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-df01a18f5517897dad95f0b999cc9d7c" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-azure-ip-masq-agent": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-68739da95e81bcf8ab005699ca388a4f" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-blobfuse-flexvol-installer": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-391488c4aaa8dcbd64beca1405a617ad" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-coredns": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-88bb1b25c5f62cca4c5e335b1aa0a006" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-heapster": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-ffb237f52acc4a1cca7b61a080ad0bc7" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-keyvault-flexvolume": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-dbd2d551aaa5aa496c8828e1561fc877" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-addon-manager-k8s-master-15159885-0": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-35ec6eb563b285b32803cc13fe31ac62" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-apiserver-k8s-master-15159885-0": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-047a539a54ed0dba1c4d839202a66e71" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-controller-manager-k8s-master-15159885-0": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-27e541d084fab4b79e80381168eead29" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-proxy": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-b5563ebf777cfd7eba6b60219cc6290a" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kube-scheduler-k8s-master-15159885-0": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-1f3148b833758c579f539036b695c2f0" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-kubernetes-dashboard": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-7c98478057c9066c415f39d201d13455" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-metrics-server": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-ed7573dae1701088a1d0cd3a8f492ed0" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-omsagent": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-d886e78b79832e7337133cafd1a21bcf" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-omsagent-rs": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-d32a394b18c639dd53e936d042fe36a4" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "system_workload-kube-system-tiller-deploy": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "system_workload_pods_ready-76eff81892d597e12372a28b77a66a73" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "user_workload-default-diliprnodejsonlog": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload_pods_ready-ba138a5fed12485c414a6ce00a1d2626" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "namespace-default": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "user_workload-default-diliprnodejsonlog" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - }, - "all_namespaces": { - "prev_sent_record_time": "2019-07-17T18:02:15Z", - "old_state": "none", - "new_state": "pass", - "state_change_time": "2019-07-17T18:02:15Z", - "prev_records": [ - { - "details": { - "pass": [ - "namespace-default" - ] - }, - "state": "pass", - "timestamp": "2019-06-19T19:43:13Z" - } - ], - "is_state_change_consistent": false, - "should_send": false - } -} \ No newline at end of file diff --git a/health_records/second_daemon_set_signals.json b/health_records/second_daemon_set_signals.json deleted file mode 100644 index ba97c51f2..000000000 --- a/health_records/second_daemon_set_signals.json +++ /dev/null @@ -1,456 +0,0 @@ -[ - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:32:03Z", - "state": "pass", - "details": { - "memoryRssBytes": 927559680.0, - "memoryUtilizationPercentage": 25.7 - } - }, - "AgentCollectionTime": "2019-06-08T00:32:05Z", - "TimeFirstObserved": "2019-06-08T00:32:05Z", - "NodeName": "aks-nodepool1-19574989-1" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:32:03Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 251.33592345, - "cpuUtilizationPercentage": 25.13 - } - }, - "AgentCollectionTime": "2019-06-08T00:32:05Z", - "TimeFirstObserved": "2019-06-08T00:32:05Z", - "NodeName": "aks-nodepool1-19574989-1" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:32:37Z", - "state": "pass", - "details": { - "memoryRssBytes": 748142592.0, - "memoryUtilizationPercentage": 20.73 - } - }, - "AgentCollectionTime": "2019-06-08T00:32:42Z", - "TimeFirstObserved": "2019-06-08T00:32:42Z", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:32:37Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 207.61622447540984, - "cpuUtilizationPercentage": 20.76 - } - }, - "AgentCollectionTime": "2019-06-08T00:32:42Z", - "TimeFirstObserved": "2019-06-08T00:32:42Z", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "kube_api_status", - "MonitorInstanceId": "kube_api_status", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "content-type": "application/json", - "date": "Sat, 08 Jun 2019 00:32:49 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "subscribed_capacity_cpu", - "MonitorInstanceId": "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 4000.0, - "clusterCpuRequests": 878.0 - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "subscribed_capacity_memory", - "MonitorInstanceId": "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 14436810752.0, - "clusterMemoryRequests": 1379926016.0 - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-dns-autoscaler", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-dns-v20", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-svc-redirect", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tunnelfront", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~diliprdeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "default~~nginx-deployment", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~rss-site", - "namespace": "default", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "workloadName": "default~~vishwadeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "test~~nginx-deployment", - "namespace": "test", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:32:49Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-08T00:32:49Z", - "TimeFirstObserved": "2019-06-08T00:32:49Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "NodeName": "aks-nodepool1-19574989-1" - } -] \ No newline at end of file diff --git a/health_records/third_daemon_set_signals.json b/health_records/third_daemon_set_signals.json deleted file mode 100644 index cae617283..000000000 --- a/health_records/third_daemon_set_signals.json +++ /dev/null @@ -1,456 +0,0 @@ -[ - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:33:03Z", - "state": "pass", - "details": { - "memoryRssBytes": 930779136.0, - "memoryUtilizationPercentage": 25.79 - } - }, - "AgentCollectionTime": "2019-06-08T00:33:05Z", - "TimeFirstObserved": "2019-06-08T00:33:05Z", - "NodeName": "aks-nodepool1-19574989-1" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:33:03Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 254.57922858333333, - "cpuUtilizationPercentage": 25.46 - } - }, - "AgentCollectionTime": "2019-06-08T00:33:05Z", - "TimeFirstObserved": "2019-06-08T00:33:05Z", - "NodeName": "aks-nodepool1-19574989-1" - }, - { - "MonitorId": "node_memory_utilization", - "MonitorInstanceId": "node_memory_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:33:37Z", - "state": "pass", - "details": { - "memoryRssBytes": 749514752.0, - "memoryUtilizationPercentage": 20.77 - } - }, - "AgentCollectionTime": "2019-06-08T00:33:42Z", - "TimeFirstObserved": "2019-06-08T00:33:42Z", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "node_cpu_utilization", - "MonitorInstanceId": "node_cpu_utilization-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:33:37Z", - "state": "pass", - "details": { - "cpuUsageMillicores": 214.84893721666666, - "cpuUtilizationPercentage": 21.48 - } - }, - "AgentCollectionTime": "2019-06-08T00:33:42Z", - "TimeFirstObserved": "2019-06-08T00:33:42Z", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "kube_api_status", - "MonitorInstanceId": "kube_api_status", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "content-type": "application/json", - "date": "Sat, 08 Jun 2019 00:33:50 GMT", - "connection": "close", - "transfer-encoding": "chunked", - "ResponseCode": "200" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "subscribed_capacity_cpu", - "MonitorInstanceId": "subscribed_capacity_cpu-bef5af9d919a51c49ba49d07f5784471", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "clusterCpuCapacity": 4000.0, - "clusterCpuRequests": 878.0 - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "subscribed_capacity_memory", - "MonitorInstanceId": "subscribed_capacity_memory-bef5af9d919a51c49ba49d07f5784471", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "clusterMemoryCapacity": 14436810752.0, - "clusterMemoryRequests": 1379926016.0 - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-1cab3e34a98cbb58a619846d017333e6", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~heapster", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-a0ed056caebdc40d1c78a49e87306eb8", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kube-dns-autoscaler", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-2fa184d5953cc675f553f22e673a3112", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-dns-v20", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-3a08ca7a85c5f7b2eb912692aa0fe576", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-proxy", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b187658b280129d5d55e778e90bc76e7", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~kube-svc-redirect", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-3cd871884683b44eaf3d4bcbac980fa9", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~kubernetes-dashboard", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-e22a2867e6f46868e1ff14b04d05400a", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~metrics-server", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-e1a23f69ff6f2d20614f3253f1be61c6", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "kube-system~~omsagent", - "namespace": "kube-system", - "workloadKind": "DaemonSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-b6bdfa57cc73499c0163debad116ea9e", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~omsagent-rs", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "system_workload_pods_ready", - "MonitorInstanceId": "system_workload_pods_ready-a69167a8c8a128025d2a1b7adbbbf251", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "kube-system~~tunnelfront", - "namespace": "kube-system", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-18c49e512366cd770ea58fa6f2fbb72e", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~diliprdeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-c36b41dc3ddfd8830e5cef31b5c2738b", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 3, - "podsReady": 3, - "workloadName": "default~~nginx-deployment", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-8be6589a87324bd8010f0583b6e3c0a1", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 1, - "podsReady": 1, - "workloadName": "default~~rss-site", - "namespace": "default", - "workloadKind": "Pod" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-d8ce32d942df13b9f4b96ed0fef8efd9", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 10, - "podsReady": 10, - "workloadName": "default~~vishwadeploymentnodeapps", - "namespace": "default", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "user_workload_pods_ready", - "MonitorInstanceId": "user_workload_pods_ready-ea6e90712222c453f12f326f9a382e8e", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "totalPods": 2, - "podsReady": 2, - "workloadName": "test~~nginx-deployment", - "namespace": "test", - "workloadKind": "ReplicaSet" - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-2b129a9a5633c0cf8f621601c6f8bb32", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "NodeName": "aks-nodepool1-19574989-0" - }, - { - "MonitorId": "node_condition", - "MonitorInstanceId": "node_condition-af2f3c986ea63b47fc7d59b71abb37b8", - "Details": { - "timestamp": "2019-06-08T00:33:51Z", - "state": "pass", - "details": { - "NetworkUnavailable": { - "Reason": "RouteCreated", - "Message": "RouteController created a route" - }, - "OutOfDisk": { - "Reason": "KubeletHasSufficientDisk", - "Message": "kubelet has sufficient disk space available" - }, - "MemoryPressure": { - "Reason": "KubeletHasSufficientMemory", - "Message": "kubelet has sufficient memory available" - }, - "DiskPressure": { - "Reason": "KubeletHasNoDiskPressure", - "Message": "kubelet has no disk pressure" - }, - "PIDPressure": { - "Reason": "KubeletHasSufficientPID", - "Message": "kubelet has sufficient PID available" - }, - "Ready": { - "Reason": "KubeletReady", - "Message": "kubelet is posting ready status. AppArmor enabled" - } - } - }, - "AgentCollectionTime": "2019-06-08T00:33:51Z", - "TimeFirstObserved": "2019-06-08T00:33:51Z", - "ClusterId": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test", - "NodeName": "aks-nodepool1-19574989-1" - } -] \ No newline at end of file diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 58a8bfba2..6d810a0e2 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -76,7 +76,7 @@ heartbeat_type tcp - host replicaset-service.kube-system + host healthmodel-replicaset-service.kube-system port 25227 diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index a6de00491..29311d35c 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -125,8 +125,9 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb; source/code/plugin/health/aggregate_monitor.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/aggregate_monitor_instance_id_labels.rb; source/code/plugin/health/aggregate_monitor_instance_id_labels.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb; source/code/plugin/health/agg_monitor_id_labels.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb; source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/cluster_health_state.rb; source/code/plugin/health/cluster_health_state.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb; source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb; source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb; source/code/plugin/health/health_kube_api_down_handler.rb; 644; root; root @@ -142,10 +143,9 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb; source/code/plugin/health/health_monitor_provider.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb; source/code/plugin/health/health_monitor_record.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb; source/code/plugin/health/health_monitor_state.rb; 644; root; root +/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb; source/code/plugin/health/health_monitor_helpers.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb; source/code/plugin/health/health_monitor_utils.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb; source/code/plugin/health/health_signal_reducer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_state_deserializer.rb; source/code/plugin/health/health_state_deserializer.rb; 644; root; root -/opt/microsoft/omsagent/plugin/health/health_state_serializer.rb; source/code/plugin/health/health_state_serializer.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/monitor_factory.rb; source/code/plugin/health/monitor_factory.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/monitor_set.rb; source/code/plugin/health/monitor_set.rb; 644; root; root /opt/microsoft/omsagent/plugin/health/unit_monitor.rb; source/code/plugin/health/unit_monitor.rb; 644; root; root diff --git a/inventory/aks-engine/deployments.json b/inventory/aks-engine/deployments.json deleted file mode 100644 index a356331b0..000000000 --- a/inventory/aks-engine/deployments.json +++ /dev/null @@ -1,1139 +0,0 @@ -{ - "items": [ - { - "metadata": { - "name": "diliprnodejsonlog", - "namespace": "default", - "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/diliprnodejsonlog", - "uid": "ecc14148-66ec-11e9-a358-000d3a53d49f", - "resourceVersion": "7362279", - "generation": 1, - "creationTimestamp": "2019-04-24T23:58:58Z", - "labels": { - "VishwadeploymentLabel1": "d1", - "VishwadeploymentLabel2": "d2" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"VishwadeploymentLabel1\":\"d1\",\"VishwadeploymentLabel2\":\"d2\"},\"name\":\"diliprnodejsonlog\",\"namespace\":\"default\"},\"spec\":{\"replicas\":5,\"selector\":{\"matchLabels\":{\"app\":\"diliprnodejsonlog\"}},\"template\":{\"metadata\":{\"labels\":{\"VishwaPodLabel1\":\"p1\",\"VishwaPodLabel2\":\"p2\",\"app\":\"diliprnodejsonlog\"}},\"spec\":{\"containers\":[{\"image\":\"rdilip83/jsonlogger:v12\",\"name\":\"diliprnodejsonlogpodapp\",\"resources\":{\"limits\":{\"cpu\":\"20m\",\"memory\":\"60Mi\"},\"requests\":{\"cpu\":\"10m\",\"memory\":\"30Mi\"}}}]}}}}\n" - } - }, - "spec": { - "replicas": 5, - "selector": { - "matchLabels": { - "app": "diliprnodejsonlog" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "diliprnodejsonlog" - } - }, - "spec": { - "containers": [ - { - "name": "diliprnodejsonlogpodapp", - "image": "rdilip83/jsonlogger:v12", - "resources": { - "limits": { - "cpu": "20m", - "memory": "60Mi" - }, - "requests": { - "cpu": "10m", - "memory": "30Mi" - } - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": "25%", - "maxSurge": "25%" - } - }, - "revisionHistoryLimit": 2, - "progressDeadlineSeconds": 600 - }, - "status": { - "observedGeneration": 1, - "replicas": 5, - "updatedReplicas": 5, - "readyReplicas": 5, - "availableReplicas": 5, - "conditions": [ - { - "type": "Progressing", - "status": "True", - "lastUpdateTime": "2019-04-24T23:59:36Z", - "lastTransitionTime": "2019-04-24T23:58:58Z", - "reason": "NewReplicaSetAvailable", - "message": "ReplicaSet \"diliprnodejsonlog-5959b88bf7\" has successfully progressed." - }, - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-06-14T07:59:50Z", - "lastTransitionTime": "2019-06-14T07:59:50Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "coredns", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/coredns", - "uid": "3c60dd90-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "675", - "generation": 1, - "creationTimestamp": "2019-04-17T04:28:22Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "kube-dns", - "kubernetes.io/cluster-service": "true", - "kubernetes.io/name": "CoreDNS" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"kubernetes.io/name\":\"CoreDNS\"},\"name\":\"coredns\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns\"}},\"strategy\":{\"rollingUpdate\":{\"maxUnavailable\":1},\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"seccomp.security.alpha.kubernetes.io/pod\":\"docker/default\"},\"labels\":{\"k8s-app\":\"kube-dns\"}},\"spec\":{\"affinity\":{\"podAntiAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"failure-domain.beta.kubernetes.io/zone\"},\"weight\":10},{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"kubernetes.io/hostname\"},\"weight\":5}]}},\"containers\":[{\"args\":[\"-conf\",\"/etc/coredns/Corefile\"],\"image\":\"k8s.gcr.io/coredns:1.2.2\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/health\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"coredns\",\"ports\":[{\"containerPort\":53,\"name\":\"dns\",\"protocol\":\"UDP\"},{\"containerPort\":53,\"name\":\"dns-tcp\",\"protocol\":\"TCP\"},{\"containerPort\":9153,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"memory\":\"170Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"70Mi\"}},\"securityContext\":{\"allowPrivilegeEscalation\":false,\"capabilities\":{\"add\":[\"NET_BIND_SERVICE\"],\"drop\":[\"all\"]},\"readOnlyRootFilesystem\":true},\"volumeMounts\":[{\"mountPath\":\"/etc/coredns\",\"name\":\"config-volume\",\"readOnly\":true},{\"mountPath\":\"/tmp\",\"name\":\"tmp\"}]}],\"dnsPolicy\":\"Default\",\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"coredns\",\"tolerations\":[{\"effect\":\"NoSchedule\",\"key\":\"node-role.kubernetes.io/master\"},{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"},{\"effect\":\"NoExecute\",\"operator\":\"Exists\"},{\"effect\":\"NoSchedule\",\"operator\":\"Exists\"}],\"volumes\":[{\"emptyDir\":{},\"name\":\"tmp\"},{\"configMap\":{\"items\":[{\"key\":\"Corefile\",\"path\":\"Corefile\"}],\"name\":\"coredns\"},\"name\":\"config-volume\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "kube-dns" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "kube-dns" - }, - "annotations": { - "seccomp.security.alpha.kubernetes.io/pod": "docker/default" - } - }, - "spec": { - "volumes": [ - { - "name": "tmp", - "emptyDir": {} - }, - { - "name": "config-volume", - "configMap": { - "name": "coredns", - "items": [ - { - "key": "Corefile", - "path": "Corefile" - } - ], - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "coredns", - "image": "k8s.gcr.io/coredns:1.2.2", - "args": [ - "-conf", - "/etc/coredns/Corefile" - ], - "ports": [ - { - "name": "dns", - "containerPort": 53, - "protocol": "UDP" - }, - { - "name": "dns-tcp", - "containerPort": 53, - "protocol": "TCP" - }, - { - "name": "metrics", - "containerPort": 9153, - "protocol": "TCP" - } - ], - "resources": { - "limits": { - "memory": "170Mi" - }, - "requests": { - "cpu": "100m", - "memory": "70Mi" - } - }, - "volumeMounts": [ - { - "name": "config-volume", - "readOnly": true, - "mountPath": "/etc/coredns" - }, - { - "name": "tmp", - "mountPath": "/tmp" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/health", - "port": 8080, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "capabilities": { - "add": [ - "NET_BIND_SERVICE" - ], - "drop": [ - "all" - ] - }, - "readOnlyRootFilesystem": true, - "allowPrivilegeEscalation": false, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "coredns", - "serviceAccount": "coredns", - "securityContext": {}, - "affinity": { - "podAntiAffinity": { - "preferredDuringSchedulingIgnoredDuringExecution": [ - { - "weight": 10, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "failure-domain.beta.kubernetes.io/zone" - } - }, - { - "weight": 5, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "kubernetes.io/hostname" - } - } - ] - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "effect": "NoSchedule" - }, - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-17T04:28:23Z", - "lastTransitionTime": "2019-04-17T04:28:23Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "heapster", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/heapster", - "uid": "3981201f-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362276", - "generation": 1, - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "addonmanager.kubernetes.io/mode": "EnsureExists", - "k8s-app": "heapster", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "heapster" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "heapster" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "" - } - }, - "spec": { - "volumes": [ - { - "name": "heapster-config-volume", - "configMap": { - "name": "heapster-config", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "heapster", - "image": "k8s.gcr.io/heapster-amd64:v1.5.4", - "command": [ - "/heapster", - "--source=kubernetes.summary_api:''" - ], - "resources": { - "limits": { - "cpu": "88m", - "memory": "204Mi" - }, - "requests": { - "cpu": "88m", - "memory": "204Mi" - } - }, - "livenessProbe": { - "httpGet": { - "path": "/healthz", - "port": 8082, - "scheme": "HTTP" - }, - "initialDelaySeconds": 180, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "heapster-nanny", - "image": "k8s.gcr.io/addon-resizer:1.8.4", - "command": [ - "/pod_nanny", - "--config-dir=/etc/config", - "--cpu=80m", - "--extra-cpu=0.5m", - "--memory=140Mi", - "--extra-memory=4Mi", - "--threshold=5", - "--deployment=heapster", - "--container=heapster", - "--poll-period=300000", - "--estimator=exponential" - ], - "env": [ - { - "name": "MY_POD_NAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.name" - } - } - }, - { - "name": "MY_POD_NAMESPACE", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.namespace" - } - } - } - ], - "resources": { - "limits": { - "cpu": "88m", - "memory": "204Mi" - }, - "requests": { - "cpu": "88m", - "memory": "204Mi" - } - }, - "volumeMounts": [ - { - "name": "heapster-config-volume", - "mountPath": "/etc/config" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "heapster", - "serviceAccount": "heapster", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-17T04:28:18Z", - "lastTransitionTime": "2019-04-17T04:28:18Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "kubernetes-dashboard", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kubernetes-dashboard", - "uid": "3cef280a-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110361", - "generation": 1, - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "kubernetes-dashboard", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kubernetes-dashboard\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"k8s-app\":\"kubernetes-dashboard\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"kubernetes-dashboard\"}},\"spec\":{\"containers\":[{\"args\":[\"--auto-generate-certificates\",\"--heapster-host=http://heapster.kube-system:80\"],\"image\":\"k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"httpGet\":{\"path\":\"/\",\"port\":8443,\"scheme\":\"HTTPS\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":30},\"name\":\"kubernetes-dashboard\",\"ports\":[{\"containerPort\":8443,\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"300m\",\"memory\":\"150Mi\"},\"requests\":{\"cpu\":\"300m\",\"memory\":\"150Mi\"}},\"volumeMounts\":[{\"mountPath\":\"/certs\",\"name\":\"kubernetes-dashboard-certs\"}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"serviceAccountName\":\"kubernetes-dashboard\",\"volumes\":[{\"emptyDir\":{},\"name\":\"kubernetes-dashboard-certs\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "kubernetes-dashboard" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "kubernetes-dashboard" - } - }, - "spec": { - "volumes": [ - { - "name": "kubernetes-dashboard-certs", - "emptyDir": {} - } - ], - "containers": [ - { - "name": "kubernetes-dashboard", - "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", - "args": [ - "--auto-generate-certificates", - "--heapster-host=http://heapster.kube-system:80" - ], - "ports": [ - { - "containerPort": 8443, - "protocol": "TCP" - } - ], - "resources": { - "limits": { - "cpu": "300m", - "memory": "150Mi" - }, - "requests": { - "cpu": "300m", - "memory": "150Mi" - } - }, - "volumeMounts": [ - { - "name": "kubernetes-dashboard-certs", - "mountPath": "/certs" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/", - "port": 8443, - "scheme": "HTTPS" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 30, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kubernetes-dashboard", - "serviceAccount": "kubernetes-dashboard", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-17T04:28:24Z", - "lastTransitionTime": "2019-04-17T04:28:24Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "metrics-server", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/metrics-server", - "uid": "3caeca05-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110358", - "generation": 1, - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "metrics-server", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"metrics-server\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"metrics-server\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"metrics-server\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"metrics-server\"},\"name\":\"metrics-server\"},\"spec\":{\"containers\":[{\"command\":[\"/metrics-server\",\"--source=kubernetes.summary_api:''\"],\"image\":\"k8s.gcr.io/metrics-server-amd64:v0.2.1\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"metrics-server\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"serviceAccountName\":\"metrics-server\"}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "metrics-server" - } - }, - "template": { - "metadata": { - "name": "metrics-server", - "creationTimestamp": null, - "labels": { - "k8s-app": "metrics-server" - } - }, - "spec": { - "containers": [ - { - "name": "metrics-server", - "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", - "command": [ - "/metrics-server", - "--source=kubernetes.summary_api:''" - ], - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "metrics-server", - "serviceAccount": "metrics-server", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-17T04:28:23Z", - "lastTransitionTime": "2019-04-17T04:28:23Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "omsagent-rs", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/omsagent-rs", - "uid": "83ab437f-925b-11e9-a358-000d3a53d49f", - "resourceVersion": "7987241", - "generation": 1, - "creationTimestamp": "2019-06-19T06:28:56Z", - "labels": { - "rsName": "omsagent-rs" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"name\":\"omsagent-rs\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"rsName\":\"omsagent-rs\"}},\"strategy\":{\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"agentVersion\":\"1.10.0.1\",\"dockerProviderVersion\":\"5.0.0-1\",\"schema-versions\":\"v1\"},\"labels\":{\"rsName\":\"omsagent-rs\"}},\"spec\":{\"containers\":[{\"env\":[{\"name\":\"AKS_RESOURCE_ID\",\"value\":\"/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview\"},{\"name\":\"AKS_REGION\",\"value\":\"eastus\"},{\"name\":\"DISABLE_KUBE_SYSTEM_LOG_COLLECTION\",\"value\":\"true\"},{\"name\":\"CONTROLLER_TYPE\",\"value\":\"ReplicaSet\"},{\"name\":\"NODE_IP\",\"valueFrom\":{\"fieldRef\":{\"fieldPath\":\"status.hostIP\"}}}],\"image\":\"rdilip83/healthpreview06192019\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/bin/bash\",\"-c\",\"ps -ef | grep main\"]},\"initialDelaySeconds\":60,\"periodSeconds\":60},\"name\":\"omsagent\",\"ports\":[{\"containerPort\":25225,\"protocol\":\"TCP\"},{\"containerPort\":25224,\"protocol\":\"UDP\"},{\"containerPort\":25235,\"name\":\"in-rs-tcp\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"150m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"50m\",\"memory\":\"175Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/var/run/host\",\"name\":\"docker-sock\"},{\"mountPath\":\"/var/log\",\"name\":\"host-log\"},{\"mountPath\":\"/var/lib/docker/containers\",\"name\":\"containerlog-path\"},{\"mountPath\":\"/etc/kubernetes/host\",\"name\":\"azure-json-path\"},{\"mountPath\":\"/etc/omsagent-secret\",\"name\":\"omsagent-secret\",\"readOnly\":true},{\"mountPath\":\"/etc/config\",\"name\":\"omsagent-rs-config\"},{\"mountPath\":\"/etc/config/settings\",\"name\":\"settings-vol-config\",\"readOnly\":true},{\"mountPath\":\"/mnt/azure\",\"name\":\"azurefile-pv\"}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\",\"kubernetes.io/role\":\"agent\"},\"serviceAccountName\":\"omsagent\",\"volumes\":[{\"hostPath\":{\"path\":\"/var/run\"},\"name\":\"docker-sock\"},{\"hostPath\":{\"path\":\"/etc/hostname\"},\"name\":\"container-hostname\"},{\"hostPath\":{\"path\":\"/var/log\"},\"name\":\"host-log\"},{\"hostPath\":{\"path\":\"/var/lib/docker/containers\"},\"name\":\"containerlog-path\"},{\"hostPath\":{\"path\":\"/etc/kubernetes\"},\"name\":\"azure-json-path\"},{\"name\":\"omsagent-secret\",\"secret\":{\"secretName\":\"omsagent-secret\"}},{\"configMap\":{\"name\":\"omsagent-rs-config\"},\"name\":\"omsagent-rs-config\"},{\"configMap\":{\"name\":\"container-azm-ms-agentconfig\",\"optional\":true},\"name\":\"settings-vol-config\"},{\"name\":\"azurefile-pv\",\"persistentVolumeClaim\":{\"claimName\":\"azurefile\"}}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "rsName": "omsagent-rs" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "rsName": "omsagent-rs" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "5.0.0-1", - "schema-versions": "v1" - } - }, - "spec": { - "volumes": [ - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "omsagent-rs-config", - "configMap": { - "name": "omsagent-rs-config", - "defaultMode": 420 - } - }, - { - "name": "settings-vol-config", - "configMap": { - "name": "container-azm-ms-agentconfig", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "azurefile-pv", - "persistentVolumeClaim": { - "claimName": "azurefile" - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "rdilip83/healthpreview06192019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - }, - { - "name": "in-rs-tcp", - "containerPort": 25235, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "ReplicaSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "500Mi" - }, - "requests": { - "cpu": "50m", - "memory": "175Mi" - } - }, - "volumeMounts": [ - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "readOnly": true, - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "omsagent-rs-config", - "mountPath": "/etc/config" - }, - { - "name": "settings-vol-config", - "readOnly": true, - "mountPath": "/etc/config/settings" - }, - { - "name": "azurefile-pv", - "mountPath": "/mnt/azure" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux", - "kubernetes.io/role": "agent" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-06-19T06:28:56Z", - "lastTransitionTime": "2019-06-19T06:28:56Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "tiller-deploy", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/tiller-deploy", - "uid": "39b60b02-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362285", - "generation": 1, - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "addonmanager.kubernetes.io/mode": "EnsureExists", - "app": "helm", - "kubernetes.io/cluster-service": "true", - "name": "tiller" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "helm", - "name": "tiller" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "app": "helm", - "name": "tiller" - } - }, - "spec": { - "containers": [ - { - "name": "tiller", - "image": "gcr.io/kubernetes-helm/tiller:v2.11.0", - "ports": [ - { - "name": "tiller", - "containerPort": 44134, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "TILLER_NAMESPACE", - "value": "kube-system" - }, - { - "name": "TILLER_HISTORY_MAX", - "value": "0" - } - ], - "resources": { - "limits": { - "cpu": "50m", - "memory": "150Mi" - }, - "requests": { - "cpu": "50m", - "memory": "150Mi" - } - }, - "livenessProbe": { - "httpGet": { - "path": "/liveness", - "port": 44135, - "scheme": "HTTP" - }, - "initialDelaySeconds": 1, - "timeoutSeconds": 1, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "readinessProbe": { - "httpGet": { - "path": "/readiness", - "port": 44135, - "scheme": "HTTP" - }, - "initialDelaySeconds": 1, - "timeoutSeconds": 1, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "tiller", - "serviceAccount": "tiller", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-17T04:28:18Z", - "lastTransitionTime": "2019-04-17T04:28:18Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - } - ] -} \ No newline at end of file diff --git a/inventory/aks-engine/nodes.json b/inventory/aks-engine/nodes.json deleted file mode 100644 index 8e913ec20..000000000 --- a/inventory/aks-engine/nodes.json +++ /dev/null @@ -1,1439 +0,0 @@ -{ - "items": [ - { - "metadata": { - "name": "k8s-agentpool1-15159885-vmss000000", - "selfLink": "/api/v1/nodes/k8s-agentpool1-15159885-vmss000000", - "uid": "2f606f53-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "8059379", - "creationTimestamp": "2019-04-17T04:28:01Z", - "labels": { - "agentpool": "agentpool1", - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "Standard_D2_v2", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eastus", - "failure-domain.beta.kubernetes.io/zone": "0", - "kubernetes.azure.com/cluster": "aks-engine-health", - "kubernetes.io/hostname": "k8s-agentpool1-15159885-vmss000000", - "kubernetes.io/role": "agent", - "node-role.kubernetes.io/agent": "", - "storageprofile": "managed", - "storagetier": "Standard_LRS" - }, - "annotations": { - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true" - } - }, - "spec": { - "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/aks-engine-health/providers/Microsoft.Compute/virtualMachineScaleSets/k8s-agentpool1-15159885-vmss/virtualMachines/0" - }, - "status": { - "capacity": { - "attachable-volumes-azure-disk": "8", - "cpu": "2", - "ephemeral-storage": "30428648Ki", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "7137036Ki", - "pods": "30" - }, - "allocatable": { - "attachable-volumes-azure-disk": "8", - "cpu": "2", - "ephemeral-storage": "28043041951", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "6369036Ki", - "pods": "30" - }, - "conditions": [ - { - "type": "OutOfDisk", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:57Z", - "lastTransitionTime": "2019-05-27T13:06:02Z", - "reason": "KubeletHasSufficientDisk", - "message": "kubelet has sufficient disk space available" - }, - { - "type": "MemoryPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:57Z", - "lastTransitionTime": "2019-05-27T13:06:02Z", - "reason": "KubeletHasSufficientMemory", - "message": "kubelet has sufficient memory available" - }, - { - "type": "DiskPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:57Z", - "lastTransitionTime": "2019-05-27T13:06:02Z", - "reason": "KubeletHasNoDiskPressure", - "message": "kubelet has no disk pressure" - }, - { - "type": "PIDPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:57Z", - "lastTransitionTime": "2019-04-17T04:27:58Z", - "reason": "KubeletHasSufficientPID", - "message": "kubelet has sufficient PID available" - }, - { - "type": "Ready", - "status": "True", - "lastHeartbeatTime": "2019-06-19T20:09:57Z", - "lastTransitionTime": "2019-05-27T13:06:02Z", - "reason": "KubeletReady", - "message": "kubelet is posting ready status. AppArmor enabled" - } - ], - "addresses": [ - { - "type": "Hostname", - "address": "k8s-agentpool1-15159885-vmss000000" - }, - { - "type": "InternalIP", - "address": "10.240.0.34" - } - ], - "daemonEndpoints": { - "kubeletEndpoint": { - "Port": 10250 - } - }, - "nodeInfo": { - "machineID": "01beaa0c1a9c4687b859844363b7c73a", - "systemUUID": "32977732-40C8-5F45-A56F-043EB7178A9A", - "bootID": "08a81d2d-1223-4ce7-82f2-0fe218610052", - "kernelVersion": "4.15.0-1041-azure", - "osImage": "Ubuntu 16.04.6 LTS", - "containerRuntimeVersion": "docker://3.0.4", - "kubeletVersion": "v1.12.7", - "kubeProxyVersion": "v1.12.7", - "operatingSystem": "linux", - "architecture": "amd64" - }, - "images": [ - { - "names": [ - "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "rdilip83/jsonlogger:v12" - ], - "sizeBytes": 676594134 - }, - { - "names": [ - "k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "k8s.gcr.io/hyperkube-amd64:v1.12.7" - ], - "sizeBytes": 635452178 - }, - { - "names": [ - "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "rdilip83/healthpreview06192019:latest" - ], - "sizeBytes": 493799437 - }, - { - "names": [ - "microsoft/oms@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", - "rdilip83/hp@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", - "microsoft/oms:healthpreview04172019", - "rdilip83/hp:0417" - ], - "sizeBytes": 458385124 - }, - { - "names": [ - "microsoft/oms@sha256:042f0216394fd0709e384dceffc8ab0f8c983474313b63e9e27f98cf50825cee", - "microsoft/oms:healthpreview04152019" - ], - "sizeBytes": 458383728 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771", - "k8s.gcr.io/cluster-autoscaler:v1.12.3" - ], - "sizeBytes": 232229241 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", - "k8s.gcr.io/cluster-autoscaler:v1.12.2" - ], - "sizeBytes": 232167833 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207", - "k8s.gcr.io/cluster-autoscaler:v1.3.7" - ], - "sizeBytes": 217353965 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", - "k8s.gcr.io/cluster-autoscaler:v1.3.4" - ], - "sizeBytes": 217264129 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", - "k8s.gcr.io/cluster-autoscaler:v1.3.3" - ], - "sizeBytes": 217259793 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", - "k8s.gcr.io/cluster-autoscaler:v1.2.2" - ], - "sizeBytes": 208688449 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", - "k8s.gcr.io/cluster-autoscaler:v1.1.2" - ], - "sizeBytes": 198265853 - }, - { - "names": [ - "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56", - "containernetworking/azure-npm:v1.0.18" - ], - "sizeBytes": 170727162 - }, - { - "names": [ - "k8s.gcr.io/cloud-controller-manager-amd64@sha256:c5b5c835a4a435da69f226fc08e27d2cc52e5911acc8215bb2c6372932bee7c0", - "k8s.gcr.io/cloud-controller-manager-amd64:v1.12.7" - ], - "sizeBytes": 144153702 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b", - "k8s.gcr.io/cluster-autoscaler:v1.14.0" - ], - "sizeBytes": 142102721 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7", - "k8s.gcr.io/cluster-autoscaler:v1.13.2" - ], - "sizeBytes": 136684274 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", - "k8s.gcr.io/cluster-autoscaler:v1.13.1" - ], - "sizeBytes": 136618018 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", - "containernetworking/networkmonitor:v0.0.6" - ], - "sizeBytes": 123663837 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", - "containernetworking/networkmonitor:v0.0.5" - ], - "sizeBytes": 122043325 - }, - { - "names": [ - "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" - ], - "sizeBytes": 121711221 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.8" - ], - "sizeBytes": 99631084 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", - "k8s.gcr.io/kube-addon-manager-amd64:v8.9" - ], - "sizeBytes": 99240637 - }, - { - "names": [ - "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", - "microsoft/virtual-kubelet:latest" - ], - "sizeBytes": 83395521 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9", - "k8s.gcr.io/kube-addon-manager-amd64:v9.0" - ], - "sizeBytes": 83077558 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.6" - ], - "sizeBytes": 78384272 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", - "k8s.gcr.io/heapster-amd64:v1.5.1" - ], - "sizeBytes": 75318380 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", - "k8s.gcr.io/heapster-amd64:v1.5.4" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", - "k8s.gcr.io/heapster-amd64:v1.5.3" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", - "k8s.gcr.io/rescheduler:v0.3.1" - ], - "sizeBytes": 74659350 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", - "gcr.io/kubernetes-helm/tiller:v2.11.0" - ], - "sizeBytes": 71821984 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", - "gcr.io/kubernetes-helm/tiller:v2.8.1" - ], - "sizeBytes": 71509364 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", - "k8s.gcr.io/kube-addon-manager-amd64:v8.7" - ], - "sizeBytes": 63322109 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", - "nvidia/k8s-device-plugin:1.11" - ], - "sizeBytes": 63138633 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", - "nvidia/k8s-device-plugin:1.10" - ], - "sizeBytes": 63130377 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" - ], - "sizeBytes": 51157394 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", - "quay.io/coreos/flannel:v0.8.0" - ], - "sizeBytes": 50732259 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" - ], - "sizeBytes": 49387411 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" - ], - "sizeBytes": 49052023 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", - "k8s.gcr.io/rescheduler:v0.4.0" - ], - "sizeBytes": 48973149 - }, - { - "names": [ - "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "k8s.gcr.io/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", - "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0" - ], - "sizeBytes": 48645472 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", - "quay.io/coreos/flannel:v0.10.0" - ], - "sizeBytes": 44598861 - }, - { - "names": [ - "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", - "k8s.gcr.io/metrics-server-amd64:v0.2.1" - ], - "sizeBytes": 42541759 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" - ], - "sizeBytes": 42210862 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" - ], - "sizeBytes": 42033070 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10" - ], - "sizeBytes": 41635309 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:46b933bb70270c8a02fa6b6f87d440f6f1fce1a5a2a719e164f83f7b109f7544", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.5" - ], - "sizeBytes": 41423617 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:93c827f018cf3322f1ff2aa80324a0306048b0a69bc274e423071fb0d2d29d8b", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.8" - ], - "sizeBytes": 40951779 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10" - ], - "sizeBytes": 40372149 - }, - { - "names": [ - "k8s.gcr.io/coredns@sha256:02382353821b12c21b062c59184e227e001079bb13ebd01f9d3270ba0fcbf1e4", - "k8s.gcr.io/coredns:1.3.1" - ], - "sizeBytes": 40303560 - }, - { - "names": [ - "k8s.gcr.io/coredns@sha256:81936728011c0df9404cb70b95c17bbc8af922ec9a70d0561a5d01fefa6ffa51", - "k8s.gcr.io/coredns:1.2.6" - ], - "sizeBytes": 40017418 - } - ] - }, - "apiVersion": "v1", - "kind": "Node" - }, - { - "metadata": { - "name": "k8s-agentpool1-15159885-vmss000001", - "selfLink": "/api/v1/nodes/k8s-agentpool1-15159885-vmss000001", - "uid": "2f589cda-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "8059374", - "creationTimestamp": "2019-04-17T04:28:01Z", - "labels": { - "agentpool": "agentpool1", - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "Standard_D2_v2", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eastus", - "failure-domain.beta.kubernetes.io/zone": "1", - "kubernetes.azure.com/cluster": "aks-engine-health", - "kubernetes.io/hostname": "k8s-agentpool1-15159885-vmss000001", - "kubernetes.io/role": "agent", - "node-role.kubernetes.io/agent": "", - "storageprofile": "managed", - "storagetier": "Standard_LRS" - }, - "annotations": { - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true" - } - }, - "spec": { - "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/aks-engine-health/providers/Microsoft.Compute/virtualMachineScaleSets/k8s-agentpool1-15159885-vmss/virtualMachines/1" - }, - "status": { - "capacity": { - "attachable-volumes-azure-disk": "8", - "cpu": "2", - "ephemeral-storage": "30428648Ki", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "7113156Ki", - "pods": "30" - }, - "allocatable": { - "attachable-volumes-azure-disk": "8", - "cpu": "2", - "ephemeral-storage": "28043041951", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "6345156Ki", - "pods": "30" - }, - "conditions": [ - { - "type": "OutOfDisk", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:53Z", - "lastTransitionTime": "2019-06-14T07:59:09Z", - "reason": "KubeletHasSufficientDisk", - "message": "kubelet has sufficient disk space available" - }, - { - "type": "MemoryPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:53Z", - "lastTransitionTime": "2019-06-14T07:59:09Z", - "reason": "KubeletHasSufficientMemory", - "message": "kubelet has sufficient memory available" - }, - { - "type": "DiskPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:53Z", - "lastTransitionTime": "2019-06-14T07:59:09Z", - "reason": "KubeletHasNoDiskPressure", - "message": "kubelet has no disk pressure" - }, - { - "type": "PIDPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:53Z", - "lastTransitionTime": "2019-04-17T04:27:57Z", - "reason": "KubeletHasSufficientPID", - "message": "kubelet has sufficient PID available" - }, - { - "type": "Ready", - "status": "True", - "lastHeartbeatTime": "2019-06-19T20:09:53Z", - "lastTransitionTime": "2019-06-14T07:59:09Z", - "reason": "KubeletReady", - "message": "kubelet is posting ready status. AppArmor enabled" - } - ], - "addresses": [ - { - "type": "Hostname", - "address": "k8s-agentpool1-15159885-vmss000001" - }, - { - "type": "InternalIP", - "address": "10.240.0.65" - } - ], - "daemonEndpoints": { - "kubeletEndpoint": { - "Port": 10250 - } - }, - "nodeInfo": { - "machineID": "b961d62fbbc543c0a421e9c1aa2499fd", - "systemUUID": "858668CE-5637-0148-A5DD-811E6E452DA9", - "bootID": "812a23fb-95b8-463c-81c5-63c1b289e8e9", - "kernelVersion": "4.15.0-1046-azure", - "osImage": "Ubuntu 16.04.6 LTS", - "containerRuntimeVersion": "docker://3.0.4", - "kubeletVersion": "v1.12.7", - "kubeProxyVersion": "v1.12.7", - "operatingSystem": "linux", - "architecture": "amd64" - }, - "images": [ - { - "names": [ - "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "rdilip83/jsonlogger:v12" - ], - "sizeBytes": 676594134 - }, - { - "names": [ - "k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "k8s.gcr.io/hyperkube-amd64:v1.12.7" - ], - "sizeBytes": 635452178 - }, - { - "names": [ - "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "rdilip83/healthpreview06192019:latest" - ], - "sizeBytes": 493799437 - }, - { - "names": [ - "microsoft/oms@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", - "rdilip83/hp@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", - "microsoft/oms:healthpreview04172019", - "rdilip83/hp:0417" - ], - "sizeBytes": 458385124 - }, - { - "names": [ - "microsoft/oms@sha256:042f0216394fd0709e384dceffc8ab0f8c983474313b63e9e27f98cf50825cee", - "microsoft/oms:healthpreview04152019" - ], - "sizeBytes": 458383728 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771", - "k8s.gcr.io/cluster-autoscaler:v1.12.3" - ], - "sizeBytes": 232229241 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", - "k8s.gcr.io/cluster-autoscaler:v1.12.2" - ], - "sizeBytes": 232167833 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207", - "k8s.gcr.io/cluster-autoscaler:v1.3.7" - ], - "sizeBytes": 217353965 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", - "k8s.gcr.io/cluster-autoscaler:v1.3.4" - ], - "sizeBytes": 217264129 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", - "k8s.gcr.io/cluster-autoscaler:v1.3.3" - ], - "sizeBytes": 217259793 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", - "k8s.gcr.io/cluster-autoscaler:v1.2.2" - ], - "sizeBytes": 208688449 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", - "k8s.gcr.io/cluster-autoscaler:v1.1.2" - ], - "sizeBytes": 198265853 - }, - { - "names": [ - "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56", - "containernetworking/azure-npm:v1.0.18" - ], - "sizeBytes": 170727162 - }, - { - "names": [ - "k8s.gcr.io/cloud-controller-manager-amd64@sha256:c5b5c835a4a435da69f226fc08e27d2cc52e5911acc8215bb2c6372932bee7c0", - "k8s.gcr.io/cloud-controller-manager-amd64:v1.12.7" - ], - "sizeBytes": 144153702 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b", - "k8s.gcr.io/cluster-autoscaler:v1.14.0" - ], - "sizeBytes": 142102721 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7", - "k8s.gcr.io/cluster-autoscaler:v1.13.2" - ], - "sizeBytes": 136684274 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", - "k8s.gcr.io/cluster-autoscaler:v1.13.1" - ], - "sizeBytes": 136618018 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", - "containernetworking/networkmonitor:v0.0.6" - ], - "sizeBytes": 123663837 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", - "containernetworking/networkmonitor:v0.0.5" - ], - "sizeBytes": 122043325 - }, - { - "names": [ - "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" - ], - "sizeBytes": 121711221 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.8" - ], - "sizeBytes": 99631084 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", - "k8s.gcr.io/kube-addon-manager-amd64:v8.9" - ], - "sizeBytes": 99240637 - }, - { - "names": [ - "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", - "microsoft/virtual-kubelet:latest" - ], - "sizeBytes": 83395521 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9", - "k8s.gcr.io/kube-addon-manager-amd64:v9.0" - ], - "sizeBytes": 83077558 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.6" - ], - "sizeBytes": 78384272 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", - "k8s.gcr.io/heapster-amd64:v1.5.1" - ], - "sizeBytes": 75318380 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", - "k8s.gcr.io/heapster-amd64:v1.5.4" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", - "k8s.gcr.io/heapster-amd64:v1.5.3" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", - "k8s.gcr.io/rescheduler:v0.3.1" - ], - "sizeBytes": 74659350 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", - "gcr.io/kubernetes-helm/tiller:v2.11.0" - ], - "sizeBytes": 71821984 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", - "gcr.io/kubernetes-helm/tiller:v2.8.1" - ], - "sizeBytes": 71509364 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", - "k8s.gcr.io/kube-addon-manager-amd64:v8.7" - ], - "sizeBytes": 63322109 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", - "nvidia/k8s-device-plugin:1.11" - ], - "sizeBytes": 63138633 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", - "nvidia/k8s-device-plugin:1.10" - ], - "sizeBytes": 63130377 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" - ], - "sizeBytes": 51157394 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", - "quay.io/coreos/flannel:v0.8.0" - ], - "sizeBytes": 50732259 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" - ], - "sizeBytes": 49387411 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" - ], - "sizeBytes": 49052023 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", - "k8s.gcr.io/rescheduler:v0.4.0" - ], - "sizeBytes": 48973149 - }, - { - "names": [ - "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "k8s.gcr.io/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", - "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0" - ], - "sizeBytes": 48645472 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", - "quay.io/coreos/flannel:v0.10.0" - ], - "sizeBytes": 44598861 - }, - { - "names": [ - "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", - "k8s.gcr.io/metrics-server-amd64:v0.2.1" - ], - "sizeBytes": 42541759 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" - ], - "sizeBytes": 42210862 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" - ], - "sizeBytes": 42033070 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10" - ], - "sizeBytes": 41635309 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:46b933bb70270c8a02fa6b6f87d440f6f1fce1a5a2a719e164f83f7b109f7544", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.5" - ], - "sizeBytes": 41423617 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:93c827f018cf3322f1ff2aa80324a0306048b0a69bc274e423071fb0d2d29d8b", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.8" - ], - "sizeBytes": 40951779 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10" - ], - "sizeBytes": 40372149 - }, - { - "names": [ - "k8s.gcr.io/coredns@sha256:02382353821b12c21b062c59184e227e001079bb13ebd01f9d3270ba0fcbf1e4", - "k8s.gcr.io/coredns:1.3.1" - ], - "sizeBytes": 40303560 - }, - { - "names": [ - "k8s.gcr.io/coredns@sha256:81936728011c0df9404cb70b95c17bbc8af922ec9a70d0561a5d01fefa6ffa51", - "k8s.gcr.io/coredns:1.2.6" - ], - "sizeBytes": 40017418 - } - ] - }, - "apiVersion": "v1", - "kind": "Node" - }, - { - "metadata": { - "name": "k8s-master-15159885-0", - "selfLink": "/api/v1/nodes/k8s-master-15159885-0", - "uid": "32b3af81-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "8059368", - "creationTimestamp": "2019-04-17T04:28:06Z", - "labels": { - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "Standard_D2_v2", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eastus", - "failure-domain.beta.kubernetes.io/zone": "0", - "kubernetes.azure.com/cluster": "aks-engine-health", - "kubernetes.io/hostname": "k8s-master-15159885-0", - "kubernetes.io/role": "master", - "node-role.kubernetes.io/master": "" - }, - "annotations": { - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true" - } - }, - "spec": { - "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/aks-engine-health/providers/Microsoft.Compute/virtualMachines/k8s-master-15159885-0", - "taints": [ - { - "key": "node-role.kubernetes.io/master", - "value": "true", - "effect": "NoSchedule" - } - ] - }, - "status": { - "capacity": { - "attachable-volumes-azure-disk": "8", - "cpu": "2", - "ephemeral-storage": "30428648Ki", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "7137036Ki", - "pods": "30" - }, - "allocatable": { - "attachable-volumes-azure-disk": "8", - "cpu": "2", - "ephemeral-storage": "28043041951", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "6369036Ki", - "pods": "30" - }, - "conditions": [ - { - "type": "OutOfDisk", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:49Z", - "lastTransitionTime": "2019-04-17T04:28:06Z", - "reason": "KubeletHasSufficientDisk", - "message": "kubelet has sufficient disk space available" - }, - { - "type": "MemoryPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:49Z", - "lastTransitionTime": "2019-04-17T04:28:06Z", - "reason": "KubeletHasSufficientMemory", - "message": "kubelet has sufficient memory available" - }, - { - "type": "DiskPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:49Z", - "lastTransitionTime": "2019-04-17T04:28:06Z", - "reason": "KubeletHasNoDiskPressure", - "message": "kubelet has no disk pressure" - }, - { - "type": "PIDPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-19T20:09:49Z", - "lastTransitionTime": "2019-04-17T04:28:06Z", - "reason": "KubeletHasSufficientPID", - "message": "kubelet has sufficient PID available" - }, - { - "type": "Ready", - "status": "True", - "lastHeartbeatTime": "2019-06-19T20:09:49Z", - "lastTransitionTime": "2019-04-17T04:28:06Z", - "reason": "KubeletReady", - "message": "kubelet is posting ready status. AppArmor enabled" - } - ], - "addresses": [ - { - "type": "Hostname", - "address": "k8s-master-15159885-0" - }, - { - "type": "InternalIP", - "address": "10.255.255.5" - } - ], - "daemonEndpoints": { - "kubeletEndpoint": { - "Port": 10250 - } - }, - "nodeInfo": { - "machineID": "93481a94c1844562bd1e450604d6281f", - "systemUUID": "98EB8773-7ACD-F94F-9D56-C530369EF42E", - "bootID": "68915d7c-2ecd-4992-b69c-d63751683c5e", - "kernelVersion": "4.15.0-1041-azure", - "osImage": "Ubuntu 16.04.6 LTS", - "containerRuntimeVersion": "docker://3.0.4", - "kubeletVersion": "v1.12.7", - "kubeProxyVersion": "v1.12.7", - "operatingSystem": "linux", - "architecture": "amd64" - }, - "images": [ - { - "names": [ - "k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "k8s.gcr.io/hyperkube-amd64:v1.12.7" - ], - "sizeBytes": 635452178 - }, - { - "names": [ - "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "rdilip83/healthpreview06192019:latest" - ], - "sizeBytes": 493799437 - }, - { - "names": [ - "microsoft/oms@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", - "rdilip83/hp@sha256:7164890d6c2ec47a3588ee801e08a2e90e7ae650f5b4da3a1baf17de95e745b0", - "microsoft/oms:healthpreview04172019", - "rdilip83/hp:0417" - ], - "sizeBytes": 458385124 - }, - { - "names": [ - "microsoft/oms@sha256:042f0216394fd0709e384dceffc8ab0f8c983474313b63e9e27f98cf50825cee", - "microsoft/oms:healthpreview04152019" - ], - "sizeBytes": 458383728 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771", - "k8s.gcr.io/cluster-autoscaler:v1.12.3" - ], - "sizeBytes": 232229241 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", - "k8s.gcr.io/cluster-autoscaler:v1.12.2" - ], - "sizeBytes": 232167833 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207", - "k8s.gcr.io/cluster-autoscaler:v1.3.7" - ], - "sizeBytes": 217353965 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", - "k8s.gcr.io/cluster-autoscaler:v1.3.4" - ], - "sizeBytes": 217264129 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", - "k8s.gcr.io/cluster-autoscaler:v1.3.3" - ], - "sizeBytes": 217259793 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", - "k8s.gcr.io/cluster-autoscaler:v1.2.2" - ], - "sizeBytes": 208688449 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", - "k8s.gcr.io/cluster-autoscaler:v1.1.2" - ], - "sizeBytes": 198265853 - }, - { - "names": [ - "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56", - "containernetworking/azure-npm:v1.0.18" - ], - "sizeBytes": 170727162 - }, - { - "names": [ - "k8s.gcr.io/cloud-controller-manager-amd64@sha256:c5b5c835a4a435da69f226fc08e27d2cc52e5911acc8215bb2c6372932bee7c0", - "k8s.gcr.io/cloud-controller-manager-amd64:v1.12.7" - ], - "sizeBytes": 144153702 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b", - "k8s.gcr.io/cluster-autoscaler:v1.14.0" - ], - "sizeBytes": 142102721 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7", - "k8s.gcr.io/cluster-autoscaler:v1.13.2" - ], - "sizeBytes": 136684274 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", - "k8s.gcr.io/cluster-autoscaler:v1.13.1" - ], - "sizeBytes": 136618018 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", - "containernetworking/networkmonitor:v0.0.6" - ], - "sizeBytes": 123663837 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", - "containernetworking/networkmonitor:v0.0.5" - ], - "sizeBytes": 122043325 - }, - { - "names": [ - "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" - ], - "sizeBytes": 121711221 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.8" - ], - "sizeBytes": 99631084 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", - "k8s.gcr.io/kube-addon-manager-amd64:v8.9" - ], - "sizeBytes": 99240637 - }, - { - "names": [ - "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", - "microsoft/virtual-kubelet:latest" - ], - "sizeBytes": 83395521 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9", - "k8s.gcr.io/kube-addon-manager-amd64:v9.0" - ], - "sizeBytes": 83077558 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.6" - ], - "sizeBytes": 78384272 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", - "k8s.gcr.io/heapster-amd64:v1.5.1" - ], - "sizeBytes": 75318380 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", - "k8s.gcr.io/heapster-amd64:v1.5.3" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", - "k8s.gcr.io/heapster-amd64:v1.5.4" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", - "k8s.gcr.io/rescheduler:v0.3.1" - ], - "sizeBytes": 74659350 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", - "gcr.io/kubernetes-helm/tiller:v2.11.0" - ], - "sizeBytes": 71821984 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", - "gcr.io/kubernetes-helm/tiller:v2.8.1" - ], - "sizeBytes": 71509364 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", - "k8s.gcr.io/kube-addon-manager-amd64:v8.7" - ], - "sizeBytes": 63322109 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", - "nvidia/k8s-device-plugin:1.11" - ], - "sizeBytes": 63138633 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", - "nvidia/k8s-device-plugin:1.10" - ], - "sizeBytes": 63130377 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" - ], - "sizeBytes": 51157394 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", - "quay.io/coreos/flannel:v0.8.0" - ], - "sizeBytes": 50732259 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" - ], - "sizeBytes": 49387411 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" - ], - "sizeBytes": 49052023 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", - "k8s.gcr.io/rescheduler:v0.4.0" - ], - "sizeBytes": 48973149 - }, - { - "names": [ - "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "k8s.gcr.io/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", - "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0" - ], - "sizeBytes": 48645472 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", - "quay.io/coreos/flannel:v0.10.0" - ], - "sizeBytes": 44598861 - }, - { - "names": [ - "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", - "k8s.gcr.io/metrics-server-amd64:v0.2.1" - ], - "sizeBytes": 42541759 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" - ], - "sizeBytes": 42210862 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" - ], - "sizeBytes": 42033070 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10" - ], - "sizeBytes": 41635309 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:46b933bb70270c8a02fa6b6f87d440f6f1fce1a5a2a719e164f83f7b109f7544", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.5" - ], - "sizeBytes": 41423617 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:93c827f018cf3322f1ff2aa80324a0306048b0a69bc274e423071fb0d2d29d8b", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.8" - ], - "sizeBytes": 40951779 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10" - ], - "sizeBytes": 40372149 - }, - { - "names": [ - "k8s.gcr.io/coredns@sha256:02382353821b12c21b062c59184e227e001079bb13ebd01f9d3270ba0fcbf1e4", - "k8s.gcr.io/coredns:1.3.1" - ], - "sizeBytes": 40303560 - }, - { - "names": [ - "k8s.gcr.io/coredns@sha256:81936728011c0df9404cb70b95c17bbc8af922ec9a70d0561a5d01fefa6ffa51", - "k8s.gcr.io/coredns:1.2.6" - ], - "sizeBytes": 40017418 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:ed10a8d06cde253474a7e9cfa65bbdf5f8f0c6a377f66c033e623ff63e9d5edf", - "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.15.0" - ], - "sizeBytes": 39814576 - } - ] - }, - "apiVersion": "v1", - "kind": "Node" - } - ] -} \ No newline at end of file diff --git a/inventory/aks-engine/pods.json b/inventory/aks-engine/pods.json deleted file mode 100644 index 16cc66b78..000000000 --- a/inventory/aks-engine/pods.json +++ /dev/null @@ -1,6622 +0,0 @@ -{ - "items": [ - { - "metadata": { - "name": "diliprnodejsonlog-5959b88bf7-8qk62", - "generateName": "diliprnodejsonlog-5959b88bf7-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-8qk62", - "uid": "eccca9a1-66ec-11e9-a358-000d3a53d49f", - "resourceVersion": "5110373", - "creationTimestamp": "2019-04-24T23:58:58Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "diliprnodejsonlog", - "pod-template-hash": "5959b88bf7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "diliprnodejsonlog-5959b88bf7", - "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-c5cvg", - "secret": { - "secretName": "default-token-c5cvg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "diliprnodejsonlogpodapp", - "image": "rdilip83/jsonlogger:v12", - "resources": { - "limits": { - "cpu": "20m", - "memory": "60Mi" - }, - "requests": { - "cpu": "10m", - "memory": "30Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-c5cvg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:59:28Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:59:28Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.44", - "startTime": "2019-04-24T23:58:58Z", - "containerStatuses": [ - { - "name": "diliprnodejsonlogpodapp", - "state": { - "running": { - "startedAt": "2019-04-24T23:59:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/jsonlogger:v12", - "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "containerID": "docker://e01c161647be4ba9c88056ae101f6cb71923182c7c523ec56257c3f8125ad825" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "diliprnodejsonlog-5959b88bf7-8vttp", - "generateName": "diliprnodejsonlog-5959b88bf7-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-8vttp", - "uid": "eccf9171-66ec-11e9-a358-000d3a53d49f", - "resourceVersion": "7362277", - "creationTimestamp": "2019-04-24T23:58:58Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "diliprnodejsonlog", - "pod-template-hash": "5959b88bf7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "diliprnodejsonlog-5959b88bf7", - "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-c5cvg", - "secret": { - "secretName": "default-token-c5cvg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "diliprnodejsonlogpodapp", - "image": "rdilip83/jsonlogger:v12", - "resources": { - "limits": { - "cpu": "20m", - "memory": "60Mi" - }, - "requests": { - "cpu": "10m", - "memory": "30Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-c5cvg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:59Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:52Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:52Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.66", - "startTime": "2019-04-24T23:58:59Z", - "containerStatuses": [ - { - "name": "diliprnodejsonlogpodapp", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:50Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-24T23:59:35Z", - "finishedAt": "2019-06-14T07:58:49Z", - "containerID": "docker://9669e88adb7e0431fb51223ec0ae76013f7f8809cb1f5125eb9426d707004e64" - } - }, - "ready": true, - "restartCount": 1, - "image": "rdilip83/jsonlogger:v12", - "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "containerID": "docker://efdfd688957948aa6797a72a7b2256517a6867b690e9bc4ecf33723d5a190f4e" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "diliprnodejsonlog-5959b88bf7-k4dvh", - "generateName": "diliprnodejsonlog-5959b88bf7-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-k4dvh", - "uid": "ecca1b71-66ec-11e9-a358-000d3a53d49f", - "resourceVersion": "7362259", - "creationTimestamp": "2019-04-24T23:58:58Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "diliprnodejsonlog", - "pod-template-hash": "5959b88bf7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "diliprnodejsonlog-5959b88bf7", - "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-c5cvg", - "secret": { - "secretName": "default-token-c5cvg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "diliprnodejsonlogpodapp", - "image": "rdilip83/jsonlogger:v12", - "resources": { - "limits": { - "cpu": "20m", - "memory": "60Mi" - }, - "requests": { - "cpu": "10m", - "memory": "30Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-c5cvg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:59Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:50Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:50Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.69", - "startTime": "2019-04-24T23:58:59Z", - "containerStatuses": [ - { - "name": "diliprnodejsonlogpodapp", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:47Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-24T23:59:35Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://2d9d3776aea273be3b7c4f2f40065f30b4dc0dca669db8f9f83ca2930667f0ff" - } - }, - "ready": true, - "restartCount": 1, - "image": "rdilip83/jsonlogger:v12", - "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "containerID": "docker://fa8dd2896fc2d8fc86a665e1e0c89ebc87ed1b976c61960ec01611534a8360f7" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "diliprnodejsonlog-5959b88bf7-lc9jh", - "generateName": "diliprnodejsonlog-5959b88bf7-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-lc9jh", - "uid": "eccf6462-66ec-11e9-a358-000d3a53d49f", - "resourceVersion": "5110364", - "creationTimestamp": "2019-04-24T23:58:58Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "diliprnodejsonlog", - "pod-template-hash": "5959b88bf7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "diliprnodejsonlog-5959b88bf7", - "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-c5cvg", - "secret": { - "secretName": "default-token-c5cvg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "diliprnodejsonlogpodapp", - "image": "rdilip83/jsonlogger:v12", - "resources": { - "limits": { - "cpu": "20m", - "memory": "60Mi" - }, - "requests": { - "cpu": "10m", - "memory": "30Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-c5cvg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:59:29Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:59:29Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.54", - "startTime": "2019-04-24T23:58:58Z", - "containerStatuses": [ - { - "name": "diliprnodejsonlogpodapp", - "state": { - "running": { - "startedAt": "2019-04-24T23:59:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/jsonlogger:v12", - "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "containerID": "docker://daecdde84dc4f84318cd3502ce83f43ef9c864b1f8f2dcad88163b6eb8bc9d11" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "diliprnodejsonlog-5959b88bf7-rrvkx", - "generateName": "diliprnodejsonlog-5959b88bf7-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/diliprnodejsonlog-5959b88bf7-rrvkx", - "uid": "ecccf8cb-66ec-11e9-a358-000d3a53d49f", - "resourceVersion": "7362253", - "creationTimestamp": "2019-04-24T23:58:58Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "diliprnodejsonlog", - "pod-template-hash": "5959b88bf7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "diliprnodejsonlog-5959b88bf7", - "uid": "ecc3eca0-66ec-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-c5cvg", - "secret": { - "secretName": "default-token-c5cvg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "diliprnodejsonlogpodapp", - "image": "rdilip83/jsonlogger:v12", - "resources": { - "limits": { - "cpu": "20m", - "memory": "60Mi" - }, - "requests": { - "cpu": "10m", - "memory": "30Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-c5cvg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:59Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:49Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:49Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-24T23:58:58Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.82", - "startTime": "2019-04-24T23:58:59Z", - "containerStatuses": [ - { - "name": "diliprnodejsonlogpodapp", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:45Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-24T23:59:35Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://184537356d8008b41e4abc884ca6c2b8bae85982d9be8b3eddcebfb0c03301b2" - } - }, - "ready": true, - "restartCount": 1, - "image": "rdilip83/jsonlogger:v12", - "imageID": "docker-pullable://rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "containerID": "docker://71e246dcbaabe0d58aed303879e5d03dd937ba1d8ab97f55bc55bb0b9099b536" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "azure-cni-networkmonitor-nf2sl", - "generateName": "azure-cni-networkmonitor-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/azure-cni-networkmonitor-nf2sl", - "uid": "398ac33e-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110349", - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "controller-revision-hash": "57ccd9984c", - "k8s-app": "azure-cnms", - "pod-template-generation": "1" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "azure-cni-networkmonitor", - "uid": "3968a5f4-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "log", - "hostPath": { - "path": "/var/log", - "type": "Directory" - } - }, - { - "name": "ebtables-rule-repo", - "hostPath": { - "path": "/var/run/", - "type": "Directory" - } - }, - { - "name": "telemetry", - "hostPath": { - "path": "/opt/cni/bin", - "type": "Directory" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "azure-cnms", - "image": "containernetworking/networkmonitor:v0.0.6", - "env": [ - { - "name": "HOSTNAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "spec.nodeName" - } - } - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "ebtables-rule-repo", - "mountPath": "/var/run" - }, - { - "name": "log", - "mountPath": "/var/log" - }, - { - "name": "telemetry", - "mountPath": "/opt/cni/bin" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000000" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:25Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:25Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.34", - "startTime": "2019-04-17T04:28:18Z", - "containerStatuses": [ - { - "name": "azure-cnms", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:24Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "containernetworking/networkmonitor:v0.0.6", - "imageID": "docker-pullable://containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", - "containerID": "docker://05e533e79958c1e28594be54effb8191d22648d1b2b1085a327e84f8eb203222" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "azure-cni-networkmonitor-wnkxs", - "generateName": "azure-cni-networkmonitor-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/azure-cni-networkmonitor-wnkxs", - "uid": "39887ab5-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362209", - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "controller-revision-hash": "57ccd9984c", - "k8s-app": "azure-cnms", - "pod-template-generation": "1" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "azure-cni-networkmonitor", - "uid": "3968a5f4-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "log", - "hostPath": { - "path": "/var/log", - "type": "Directory" - } - }, - { - "name": "ebtables-rule-repo", - "hostPath": { - "path": "/var/run/", - "type": "Directory" - } - }, - { - "name": "telemetry", - "hostPath": { - "path": "/opt/cni/bin", - "type": "Directory" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "azure-cnms", - "image": "containernetworking/networkmonitor:v0.0.6", - "env": [ - { - "name": "HOSTNAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "spec.nodeName" - } - } - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "ebtables-rule-repo", - "mountPath": "/var/run" - }, - { - "name": "log", - "mountPath": "/var/log" - }, - { - "name": "telemetry", - "mountPath": "/opt/cni/bin" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000001" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:39Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:39Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.65", - "startTime": "2019-04-17T04:28:18Z", - "containerStatuses": [ - { - "name": "azure-cnms", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:19Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:25Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://7ff49388bac2a94afec799ec20fe4ecfea439eaab51400cb543f0c4933f6812d" - } - }, - "ready": true, - "restartCount": 1, - "image": "containernetworking/networkmonitor:v0.0.6", - "imageID": "docker-pullable://containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", - "containerID": "docker://bf0154934a9a3080e3df001e9ed855acf8efb17dd18af8e8227a4f58a354c83a" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "azure-cni-networkmonitor-zjztb", - "generateName": "azure-cni-networkmonitor-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/azure-cni-networkmonitor-zjztb", - "uid": "398adddd-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "474", - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "controller-revision-hash": "57ccd9984c", - "k8s-app": "azure-cnms", - "pod-template-generation": "1" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "azure-cni-networkmonitor", - "uid": "3968a5f4-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "log", - "hostPath": { - "path": "/var/log", - "type": "Directory" - } - }, - { - "name": "ebtables-rule-repo", - "hostPath": { - "path": "/var/run/", - "type": "Directory" - } - }, - { - "name": "telemetry", - "hostPath": { - "path": "/opt/cni/bin", - "type": "Directory" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "azure-cnms", - "image": "containernetworking/networkmonitor:v0.0.6", - "env": [ - { - "name": "HOSTNAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "spec.nodeName" - } - } - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "ebtables-rule-repo", - "mountPath": "/var/run" - }, - { - "name": "log", - "mountPath": "/var/log" - }, - { - "name": "telemetry", - "mountPath": "/opt/cni/bin" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-master-15159885-0" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:22Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:22Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:28:18Z", - "containerStatuses": [ - { - "name": "azure-cnms", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:21Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "containernetworking/networkmonitor:v0.0.6", - "imageID": "docker-pullable://containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e", - "containerID": "docker://4477e44f375bb4402c6a47794e8ba257f99548502735519c2cb4c4adf4c91fd4" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "azure-ip-masq-agent-5jxwv", - "generateName": "azure-ip-masq-agent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/azure-ip-masq-agent-5jxwv", - "uid": "3c83177c-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "668", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "controller-revision-hash": "85c7cb54c8", - "k8s-app": "azure-ip-masq-agent", - "pod-template-generation": "1", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "azure-ip-masq-agent", - "uid": "3c7be430-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "azure-ip-masq-agent-config-volume", - "configMap": { - "name": "azure-ip-masq-agent-config", - "defaultMode": 420 - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "azure-ip-masq-agent", - "image": "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0", - "resources": { - "limits": { - "cpu": "50m", - "memory": "250Mi" - }, - "requests": { - "cpu": "50m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "azure-ip-masq-agent-config-volume", - "mountPath": "/etc/config" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-master-15159885-0" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:28:23Z", - "containerStatuses": [ - { - "name": "azure-ip-masq-agent", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", - "imageID": "docker-pullable://gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "containerID": "docker://8df696bf9c1380bb36bbea1214f493c2c3020a360f95635d20b1ff11bd09122f" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "azure-ip-masq-agent-7l7br", - "generateName": "azure-ip-masq-agent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/azure-ip-masq-agent-7l7br", - "uid": "3c83cb0c-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110351", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "controller-revision-hash": "85c7cb54c8", - "k8s-app": "azure-ip-masq-agent", - "pod-template-generation": "1", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "azure-ip-masq-agent", - "uid": "3c7be430-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "azure-ip-masq-agent-config-volume", - "configMap": { - "name": "azure-ip-masq-agent-config", - "defaultMode": 420 - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "azure-ip-masq-agent", - "image": "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0", - "resources": { - "limits": { - "cpu": "50m", - "memory": "250Mi" - }, - "requests": { - "cpu": "50m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "azure-ip-masq-agent-config-volume", - "mountPath": "/etc/config" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000000" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.34", - "startTime": "2019-04-17T04:28:23Z", - "containerStatuses": [ - { - "name": "azure-ip-masq-agent", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", - "imageID": "docker-pullable://gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "containerID": "docker://a47cc35959cf657fa5d4f422008ea2600ed11d16fb24ce3caf8c913df9f558b4" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "azure-ip-masq-agent-dvnl8", - "generateName": "azure-ip-masq-agent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/azure-ip-masq-agent-dvnl8", - "uid": "3c7e9b23-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362234", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "controller-revision-hash": "85c7cb54c8", - "k8s-app": "azure-ip-masq-agent", - "pod-template-generation": "1", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "azure-ip-masq-agent", - "uid": "3c7be430-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "azure-ip-masq-agent-config-volume", - "configMap": { - "name": "azure-ip-masq-agent-config", - "defaultMode": 420 - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "azure-ip-masq-agent", - "image": "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0", - "resources": { - "limits": { - "cpu": "50m", - "memory": "250Mi" - }, - "requests": { - "cpu": "50m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "azure-ip-masq-agent-config-volume", - "mountPath": "/etc/config" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000001" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:47Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:47Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.65", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "azure-ip-masq-agent", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:39Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:29Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://17afffc794c5b786053a388ffa9b2d7a3bdfffe5423ed884c8c1c49ff6437da1" - } - }, - "ready": true, - "restartCount": 1, - "image": "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0", - "imageID": "docker-pullable://gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "containerID": "docker://30b9b4ce91cc9c290f51bf3411d91369d7b5689d6927f89f939c12b827685ed0" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "blobfuse-flexvol-installer-pn29n", - "generateName": "blobfuse-flexvol-installer-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/blobfuse-flexvol-installer-pn29n", - "uid": "3c55a79a-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362256", - "creationTimestamp": "2019-04-17T04:28:22Z", - "labels": { - "controller-revision-hash": "b7d447cd7", - "kubernetes.io/cluster-service": "true", - "name": "blobfuse", - "pod-template-generation": "1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "blobfuse-flexvol-installer", - "uid": "3c4a7413-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "varlog", - "hostPath": { - "path": "/var/log/", - "type": "" - } - }, - { - "name": "volplugins", - "hostPath": { - "path": "/etc/kubernetes/volumeplugins/", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "blobfuse-flexvol-installer", - "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", - "resources": { - "limits": { - "cpu": "50m", - "memory": "100Mi" - }, - "requests": { - "cpu": "50m", - "memory": "100Mi" - } - }, - "volumeMounts": [ - { - "name": "volplugins", - "mountPath": "/etc/kubernetes/volumeplugins/" - }, - { - "name": "varlog", - "mountPath": "/var/log/" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000001" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:49Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:49Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:22Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.80", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "blobfuse-flexvol-installer", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:46Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:29Z", - "finishedAt": "2019-06-14T07:58:49Z", - "containerID": "docker://a3a6f00c2d127e8edcf32942f827a9c80c1d19119a22758f1d7663fb28e2b899" - } - }, - "ready": true, - "restartCount": 1, - "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", - "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume@sha256:23d8c6033f02a1ecad05127ebdc931bb871264228661bc122704b0974e4d9fdd", - "containerID": "docker://fe2e2fde1c62fd8cfc28688c6168920e664578bb97eeb99609f2d0277c46812c" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "blobfuse-flexvol-installer-sh7vs", - "generateName": "blobfuse-flexvol-installer-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/blobfuse-flexvol-installer-sh7vs", - "uid": "3c4e8b3f-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110368", - "creationTimestamp": "2019-04-17T04:28:22Z", - "labels": { - "controller-revision-hash": "b7d447cd7", - "kubernetes.io/cluster-service": "true", - "name": "blobfuse", - "pod-template-generation": "1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "blobfuse-flexvol-installer", - "uid": "3c4a7413-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "varlog", - "hostPath": { - "path": "/var/log/", - "type": "" - } - }, - { - "name": "volplugins", - "hostPath": { - "path": "/etc/kubernetes/volumeplugins/", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "blobfuse-flexvol-installer", - "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", - "resources": { - "limits": { - "cpu": "50m", - "memory": "100Mi" - }, - "requests": { - "cpu": "50m", - "memory": "100Mi" - } - }, - "volumeMounts": [ - { - "name": "volplugins", - "mountPath": "/etc/kubernetes/volumeplugins/" - }, - { - "name": "varlog", - "mountPath": "/var/log/" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000000" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:22Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:22Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.42", - "startTime": "2019-04-17T04:28:22Z", - "containerStatuses": [ - { - "name": "blobfuse-flexvol-installer", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume:1.0.8", - "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/blobfuse-flexvolume@sha256:23d8c6033f02a1ecad05127ebdc931bb871264228661bc122704b0974e4d9fdd", - "containerID": "docker://11c00901b3445daca28933112c94d9921c9e8daec48052d2188a54908337a1fe" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "coredns-69c4fccc6c-vqjd9", - "generateName": "coredns-69c4fccc6c-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/coredns-69c4fccc6c-vqjd9", - "uid": "3d100836-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "672", - "creationTimestamp": "2019-04-17T04:28:24Z", - "labels": { - "k8s-app": "kube-dns", - "pod-template-hash": "69c4fccc6c" - }, - "annotations": { - "seccomp.security.alpha.kubernetes.io/pod": "docker/default" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "coredns-69c4fccc6c", - "uid": "3c6392eb-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "tmp", - "emptyDir": {} - }, - { - "name": "config-volume", - "configMap": { - "name": "coredns", - "items": [ - { - "key": "Corefile", - "path": "Corefile" - } - ], - "defaultMode": 420 - } - }, - { - "name": "coredns-token-ltgr2", - "secret": { - "secretName": "coredns-token-ltgr2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "coredns", - "image": "k8s.gcr.io/coredns:1.2.2", - "args": [ - "-conf", - "/etc/coredns/Corefile" - ], - "ports": [ - { - "name": "dns", - "containerPort": 53, - "protocol": "UDP" - }, - { - "name": "dns-tcp", - "containerPort": 53, - "protocol": "TCP" - }, - { - "name": "metrics", - "containerPort": 9153, - "protocol": "TCP" - } - ], - "resources": { - "limits": { - "memory": "170Mi" - }, - "requests": { - "cpu": "100m", - "memory": "70Mi" - } - }, - "volumeMounts": [ - { - "name": "config-volume", - "readOnly": true, - "mountPath": "/etc/coredns" - }, - { - "name": "tmp", - "mountPath": "/tmp" - }, - { - "name": "coredns-token-ltgr2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/health", - "port": 8080, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "capabilities": { - "add": [ - "NET_BIND_SERVICE" - ], - "drop": [ - "all" - ] - }, - "readOnlyRootFilesystem": true, - "allowPrivilegeEscalation": false, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "coredns", - "serviceAccount": "coredns", - "nodeName": "k8s-master-15159885-0", - "securityContext": {}, - "affinity": { - "podAntiAffinity": { - "preferredDuringSchedulingIgnoredDuringExecution": [ - { - "weight": 10, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "failure-domain.beta.kubernetes.io/zone" - } - }, - { - "weight": 5, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "kubernetes.io/hostname" - } - } - ] - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "effect": "NoSchedule" - }, - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:29Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.240.0.11", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "coredns", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:29Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/coredns:1.2.2", - "imageID": "docker-pullable://k8s.gcr.io/coredns@sha256:3e2be1cec87aca0b74b7668bbe8c02964a95a402e45ceb51b2252629d608d03a", - "containerID": "docker://27eafce23c91980dd9dcab79f9afd23ba4cfd934ff9a4d1e6c91890adfdbcbdf" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "heapster-6f6cbcfcf6-nq9jg", - "generateName": "heapster-6f6cbcfcf6-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/heapster-6f6cbcfcf6-nq9jg", - "uid": "3cd3a1fa-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362273", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "k8s-app": "heapster", - "pod-template-hash": "6f6cbcfcf6" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "heapster-6f6cbcfcf6", - "uid": "398f0ce1-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "heapster-config-volume", - "configMap": { - "name": "heapster-config", - "defaultMode": 420 - } - }, - { - "name": "heapster-token-xhtkx", - "secret": { - "secretName": "heapster-token-xhtkx", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "heapster", - "image": "k8s.gcr.io/heapster-amd64:v1.5.4", - "command": [ - "/heapster", - "--source=kubernetes.summary_api:''" - ], - "resources": { - "limits": { - "cpu": "88m", - "memory": "204Mi" - }, - "requests": { - "cpu": "88m", - "memory": "204Mi" - } - }, - "volumeMounts": [ - { - "name": "heapster-token-xhtkx", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthz", - "port": 8082, - "scheme": "HTTP" - }, - "initialDelaySeconds": 180, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "heapster-nanny", - "image": "k8s.gcr.io/addon-resizer:1.8.4", - "command": [ - "/pod_nanny", - "--config-dir=/etc/config", - "--cpu=80m", - "--extra-cpu=0.5m", - "--memory=140Mi", - "--extra-memory=4Mi", - "--threshold=5", - "--deployment=heapster", - "--container=heapster", - "--poll-period=300000", - "--estimator=exponential" - ], - "env": [ - { - "name": "MY_POD_NAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.name" - } - } - }, - { - "name": "MY_POD_NAMESPACE", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.namespace" - } - } - } - ], - "resources": { - "limits": { - "cpu": "88m", - "memory": "204Mi" - }, - "requests": { - "cpu": "88m", - "memory": "204Mi" - } - }, - "volumeMounts": [ - { - "name": "heapster-config-volume", - "mountPath": "/etc/config" - }, - { - "name": "heapster-token-xhtkx", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "heapster", - "serviceAccount": "heapster", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:52Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:52Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.67", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "heapster", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:46Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:29Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://4fe82e9788f21c2659397e65b1b17b198fcf9039eaa8d36fade3802f69e4fa6e" - } - }, - "ready": true, - "restartCount": 1, - "image": "k8s.gcr.io/heapster-amd64:v1.5.4", - "imageID": "docker-pullable://k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", - "containerID": "docker://c3cfed70a5e504c1237f919bf74cbc304f343b3daf4fd7259a4030d8d5595afb" - }, - { - "name": "heapster-nanny", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:52Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:31Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://bf60e749edf895279e23c7476749e54a400d112c13b305ee3c14b0e1566b912e" - } - }, - "ready": true, - "restartCount": 1, - "image": "k8s.gcr.io/addon-resizer:1.8.4", - "imageID": "docker-pullable://k8s.gcr.io/addon-resizer@sha256:a31822f30e947885d038812f4a5a5675e72f92c06cef17b1989c80426aa89012", - "containerID": "docker://8097a0688928cb82264ec0b69246eafd5e3bca9fe526d3b997ae2ef2a601aa9b" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "keyvault-flexvolume-tcxxk", - "generateName": "keyvault-flexvolume-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/keyvault-flexvolume-tcxxk", - "uid": "398d186b-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110367", - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "addonmanager.kubernetes.io/mode": "EnsureExists", - "app": "keyvault-flexvolume", - "controller-revision-hash": "57fd55fc4c", - "kubernetes.io/cluster-service": "true", - "pod-template-generation": "1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "keyvault-flexvolume", - "uid": "396bd1ea-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "volplugins", - "hostPath": { - "path": "/etc/kubernetes/volumeplugins", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "keyvault-flexvolume", - "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", - "env": [ - { - "name": "TARGET_DIR", - "value": "/etc/kubernetes/volumeplugins" - } - ], - "resources": { - "limits": { - "cpu": "50m", - "memory": "100Mi" - }, - "requests": { - "cpu": "50m", - "memory": "100Mi" - } - }, - "volumeMounts": [ - { - "name": "volplugins", - "mountPath": "/etc/kubernetes/volumeplugins" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000000" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:25Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:25Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.46", - "startTime": "2019-04-17T04:28:18Z", - "containerStatuses": [ - { - "name": "keyvault-flexvolume", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:25Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", - "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume@sha256:4fd30d43947d4a54fc89ead7985beecfd3c9b2a93a0655a373b1608ab90bd5af", - "containerID": "docker://32a3b44a0a7e8ed53867743709d84db19a0bf7c6adacc28b43221c39a9b2c028" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "keyvault-flexvolume-wxzvc", - "generateName": "keyvault-flexvolume-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/keyvault-flexvolume-wxzvc", - "uid": "398a00ce-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362237", - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "addonmanager.kubernetes.io/mode": "EnsureExists", - "app": "keyvault-flexvolume", - "controller-revision-hash": "57fd55fc4c", - "kubernetes.io/cluster-service": "true", - "pod-template-generation": "1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "keyvault-flexvolume", - "uid": "396bd1ea-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "volplugins", - "hostPath": { - "path": "/etc/kubernetes/volumeplugins", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "keyvault-flexvolume", - "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", - "env": [ - { - "name": "TARGET_DIR", - "value": "/etc/kubernetes/volumeplugins" - } - ], - "resources": { - "limits": { - "cpu": "50m", - "memory": "100Mi" - }, - "requests": { - "cpu": "50m", - "memory": "100Mi" - } - }, - "volumeMounts": [ - { - "name": "volplugins", - "mountPath": "/etc/kubernetes/volumeplugins" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000001" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:47Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:47Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.68", - "startTime": "2019-04-17T04:28:18Z", - "containerStatuses": [ - { - "name": "keyvault-flexvolume", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:38Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:26Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://622bb2e556297f093a67c213a22adde5c0c1202be701fa5685e8801267c2a3e3" - } - }, - "ready": true, - "restartCount": 1, - "image": "mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume:v0.0.7", - "imageID": "docker-pullable://mcr.microsoft.com/k8s/flexvolume/keyvault-flexvolume@sha256:4fd30d43947d4a54fc89ead7985beecfd3c9b2a93a0655a373b1608ab90bd5af", - "containerID": "docker://ab2ce04c40220650f742f91297c317cbcb71fbd6a6d0d9909bb4c2578a8a2bc7" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-addon-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-addon-manager-k8s-master-15159885-0", - "uid": "531eee2e-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "752", - "creationTimestamp": "2019-04-17T04:29:01Z", - "annotations": { - "kubernetes.io/config.hash": "9b6d7694dd69e6f24ee489db7850e74c", - "kubernetes.io/config.mirror": "9b6d7694dd69e6f24ee489db7850e74c", - "kubernetes.io/config.seen": "2019-04-17T04:27:21.914647772Z", - "kubernetes.io/config.source": "file" - } - }, - "spec": { - "volumes": [ - { - "name": "addons", - "hostPath": { - "path": "/etc/kubernetes/addons", - "type": "" - } - }, - { - "name": "msi", - "hostPath": { - "path": "/var/lib/waagent/ManagedIdentity-Settings", - "type": "" - } - } - ], - "containers": [ - { - "name": "kube-addon-manager", - "image": "k8s.gcr.io/kube-addon-manager-amd64:v8.7", - "resources": { - "requests": { - "cpu": "5m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "addons", - "readOnly": true, - "mountPath": "/etc/kubernetes/addons" - }, - { - "name": "msi", - "readOnly": true, - "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "operator": "Exists", - "effect": "NoExecute" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:23Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:33Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:33Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:23Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:27:23Z", - "containerStatuses": [ - { - "name": "kube-addon-manager", - "state": { - "running": { - "startedAt": "2019-04-17T04:27:32Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/kube-addon-manager-amd64:v8.7", - "imageID": "docker-pullable://k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", - "containerID": "docker://532518ffaf3a046fc478091d8341f0855a3152a01ef630aa28f5347f9757b3a2" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-apiserver-k8s-master-15159885-0", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-apiserver-k8s-master-15159885-0", - "uid": "4ef304e5-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "753", - "creationTimestamp": "2019-04-17T04:28:54Z", - "labels": { - "component": "kube-apiserver", - "tier": "control-plane" - }, - "annotations": { - "kubernetes.io/config.hash": "ac8862219c876924f19eed3f22c7f33f", - "kubernetes.io/config.mirror": "ac8862219c876924f19eed3f22c7f33f", - "kubernetes.io/config.seen": "2019-04-17T04:27:21.914656172Z", - "kubernetes.io/config.source": "file" - } - }, - "spec": { - "volumes": [ - { - "name": "etc-kubernetes", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "var-lib-kubelet", - "hostPath": { - "path": "/var/lib/kubelet", - "type": "" - } - }, - { - "name": "msi", - "hostPath": { - "path": "/var/lib/waagent/ManagedIdentity-Settings", - "type": "" - } - }, - { - "name": "sock", - "hostPath": { - "path": "/opt", - "type": "" - } - }, - { - "name": "auditlog", - "hostPath": { - "path": "/var/log/kubeaudit", - "type": "" - } - } - ], - "containers": [ - { - "name": "kube-apiserver", - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "command": [ - "/hyperkube", - "apiserver" - ], - "args": [ - "--advertise-address=10.255.255.5", - "--allow-privileged=true", - "--anonymous-auth=false", - "--audit-log-maxage=30", - "--audit-log-maxbackup=10", - "--audit-log-maxsize=100", - "--audit-log-path=/var/log/kubeaudit/audit.log", - "--audit-policy-file=/etc/kubernetes/addons/audit-policy.yaml", - "--authorization-mode=Node,RBAC", - "--bind-address=0.0.0.0", - "--client-ca-file=/etc/kubernetes/certs/ca.crt", - "--cloud-config=/etc/kubernetes/azure.json", - "--cloud-provider=azure", - "--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,ValidatingAdmissionWebhook,ResourceQuota,ExtendedResourceToleration", - "--enable-bootstrap-token-auth=true", - "--etcd-cafile=/etc/kubernetes/certs/ca.crt", - "--etcd-certfile=/etc/kubernetes/certs/etcdclient.crt", - "--etcd-keyfile=/etc/kubernetes/certs/etcdclient.key", - "--etcd-servers=https://127.0.0.1:2379", - "--insecure-port=8080", - "--kubelet-client-certificate=/etc/kubernetes/certs/client.crt", - "--kubelet-client-key=/etc/kubernetes/certs/client.key", - "--profiling=false", - "--proxy-client-cert-file=/etc/kubernetes/certs/proxy.crt", - "--proxy-client-key-file=/etc/kubernetes/certs/proxy.key", - "--repair-malformed-updates=false", - "--requestheader-allowed-names=", - "--requestheader-client-ca-file=/etc/kubernetes/certs/proxy-ca.crt", - "--requestheader-extra-headers-prefix=X-Remote-Extra-", - "--requestheader-group-headers=X-Remote-Group", - "--requestheader-username-headers=X-Remote-User", - "--secure-port=443", - "--service-account-key-file=/etc/kubernetes/certs/apiserver.key", - "--service-account-lookup=true", - "--service-cluster-ip-range=10.0.0.0/16", - "--storage-backend=etcd3", - "--tls-cert-file=/etc/kubernetes/certs/apiserver.crt", - "--tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", - "--tls-private-key-file=/etc/kubernetes/certs/apiserver.key", - "--v=4" - ], - "resources": {}, - "volumeMounts": [ - { - "name": "etc-kubernetes", - "mountPath": "/etc/kubernetes" - }, - { - "name": "var-lib-kubelet", - "mountPath": "/var/lib/kubelet" - }, - { - "name": "msi", - "readOnly": true, - "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" - }, - { - "name": "sock", - "mountPath": "/opt" - }, - { - "name": "auditlog", - "mountPath": "/var/log/kubeaudit" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "operator": "Exists", - "effect": "NoExecute" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:23Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:32Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:32Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:23Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:27:23Z", - "containerStatuses": [ - { - "name": "kube-apiserver", - "state": { - "running": { - "startedAt": "2019-04-17T04:27:32Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "containerID": "docker://39ddc73db1132efddaaac859c77e2e2dbbe7af97e8de6340f45b56e5fa26d9ca" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-controller-manager-k8s-master-15159885-0", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-controller-manager-k8s-master-15159885-0", - "uid": "4e5a8160-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7367800", - "creationTimestamp": "2019-04-17T04:28:53Z", - "labels": { - "component": "kube-controller-manager", - "tier": "control-plane" - }, - "annotations": { - "kubernetes.io/config.hash": "09f954293df880f1d19599a8f2e11701", - "kubernetes.io/config.mirror": "09f954293df880f1d19599a8f2e11701", - "kubernetes.io/config.seen": "2019-04-17T04:27:21.914658072Z", - "kubernetes.io/config.source": "file" - } - }, - "spec": { - "volumes": [ - { - "name": "etc-kubernetes", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "var-lib-kubelet", - "hostPath": { - "path": "/var/lib/kubelet", - "type": "" - } - }, - { - "name": "msi", - "hostPath": { - "path": "/var/lib/waagent/ManagedIdentity-Settings", - "type": "" - } - } - ], - "containers": [ - { - "name": "kube-controller-manager", - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "command": [ - "/hyperkube", - "controller-manager" - ], - "args": [ - "--allocate-node-cidrs=false", - "--cloud-config=/etc/kubernetes/azure.json", - "--cloud-provider=azure", - "--cluster-cidr=10.240.0.0/12", - "--cluster-name=aks-engine-health", - "--cluster-signing-cert-file=/etc/kubernetes/certs/ca.crt", - "--cluster-signing-key-file=/etc/kubernetes/certs/ca.key", - "--configure-cloud-routes=false", - "--controllers=*,bootstrapsigner,tokencleaner", - "--feature-gates=LocalStorageCapacityIsolation=true,ServiceNodeExclusion=true", - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--leader-elect=true", - "--node-monitor-grace-period=40s", - "--pod-eviction-timeout=5m0s", - "--profiling=false", - "--root-ca-file=/etc/kubernetes/certs/ca.crt", - "--route-reconciliation-period=10s", - "--service-account-private-key-file=/etc/kubernetes/certs/apiserver.key", - "--terminated-pod-gc-threshold=5000", - "--use-service-account-credentials=true", - "--v=2" - ], - "resources": {}, - "volumeMounts": [ - { - "name": "etc-kubernetes", - "mountPath": "/etc/kubernetes" - }, - { - "name": "var-lib-kubelet", - "mountPath": "/var/lib/kubelet" - }, - { - "name": "msi", - "readOnly": true, - "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "operator": "Exists", - "effect": "NoExecute" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:23Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T09:02:46Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T09:02:46Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:23Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:27:23Z", - "containerStatuses": [ - { - "name": "kube-controller-manager", - "state": { - "running": { - "startedAt": "2019-06-14T09:02:46Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-06-11T17:20:25Z", - "finishedAt": "2019-06-14T09:02:40Z", - "containerID": "docker://c485dbd938dbded8ecc0ce25656c00b3efc702aa3403bbdd45bca5527ed70ac3" - } - }, - "ready": true, - "restartCount": 5, - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "containerID": "docker://981b3ead0f719297dd6fc7d8583733c10a8cd282e04aef00c94884bb67ff0dc3" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-proxy-8d8kh", - "generateName": "kube-proxy-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-8d8kh", - "uid": "3ccad1a3-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110371", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "component": "kube-proxy", - "controller-revision-hash": "77f7f9d65b", - "pod-template-generation": "1", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-proxy", - "uid": "3cc3b072-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "ssl-certs-host", - "hostPath": { - "path": "/usr/share/ca-certificates", - "type": "" - } - }, - { - "name": "kubeconfig", - "hostPath": { - "path": "/var/lib/kubelet/kubeconfig", - "type": "" - } - }, - { - "name": "etc-kubernetes", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "iptableslock", - "hostPath": { - "path": "/run/xtables.lock", - "type": "" - } - }, - { - "name": "kernelmodules", - "hostPath": { - "path": "/lib/modules/", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kube-proxy", - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "command": [ - "/hyperkube", - "proxy", - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--cluster-cidr=10.240.0.0/12", - "--feature-gates=ExperimentalCriticalPodAnnotation=true", - "--proxy-mode=iptables" - ], - "resources": { - "requests": { - "cpu": "100m" - } - }, - "volumeMounts": [ - { - "name": "ssl-certs-host", - "readOnly": true, - "mountPath": "/etc/ssl/certs" - }, - { - "name": "etc-kubernetes", - "readOnly": true, - "mountPath": "/etc/kubernetes" - }, - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/var/lib/kubelet/kubeconfig" - }, - { - "name": "iptableslock", - "mountPath": "/run/xtables.lock" - }, - { - "name": "kernelmodules", - "readOnly": true, - "mountPath": "/lib/modules/" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000000" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:30Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:30Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.34", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "kube-proxy", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "containerID": "docker://00c9202089cbbd3606f7bde9b1217ee2c00e24ab2090ffba36d65e44b2875423" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-proxy-gxs7l", - "generateName": "kube-proxy-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-gxs7l", - "uid": "3cc8af8a-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362266", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "component": "kube-proxy", - "controller-revision-hash": "77f7f9d65b", - "pod-template-generation": "1", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-proxy", - "uid": "3cc3b072-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "ssl-certs-host", - "hostPath": { - "path": "/usr/share/ca-certificates", - "type": "" - } - }, - { - "name": "kubeconfig", - "hostPath": { - "path": "/var/lib/kubelet/kubeconfig", - "type": "" - } - }, - { - "name": "etc-kubernetes", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "iptableslock", - "hostPath": { - "path": "/run/xtables.lock", - "type": "" - } - }, - { - "name": "kernelmodules", - "hostPath": { - "path": "/lib/modules/", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kube-proxy", - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "command": [ - "/hyperkube", - "proxy", - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--cluster-cidr=10.240.0.0/12", - "--feature-gates=ExperimentalCriticalPodAnnotation=true", - "--proxy-mode=iptables" - ], - "resources": { - "requests": { - "cpu": "100m" - } - }, - "volumeMounts": [ - { - "name": "ssl-certs-host", - "readOnly": true, - "mountPath": "/etc/ssl/certs" - }, - { - "name": "etc-kubernetes", - "readOnly": true, - "mountPath": "/etc/kubernetes" - }, - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/var/lib/kubelet/kubeconfig" - }, - { - "name": "iptableslock", - "mountPath": "/run/xtables.lock" - }, - { - "name": "kernelmodules", - "readOnly": true, - "mountPath": "/lib/modules/" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000001" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:50Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:50Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.65", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "kube-proxy", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:49Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:29Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://07135f639d2e785e4e34ed47b9175df3e9f122fdb8020a7d394eca04ca337d68" - } - }, - "ready": true, - "restartCount": 1, - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "containerID": "docker://4cf5ab87fc206f4ef92cfed7859392056418e0759c798c1b94d19f885d423ab2" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-proxy-vth8z", - "generateName": "kube-proxy-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-vth8z", - "uid": "3cca5c6e-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "645", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "component": "kube-proxy", - "controller-revision-hash": "77f7f9d65b", - "pod-template-generation": "1", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-proxy", - "uid": "3cc3b072-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "ssl-certs-host", - "hostPath": { - "path": "/usr/share/ca-certificates", - "type": "" - } - }, - { - "name": "kubeconfig", - "hostPath": { - "path": "/var/lib/kubelet/kubeconfig", - "type": "" - } - }, - { - "name": "etc-kubernetes", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "iptableslock", - "hostPath": { - "path": "/run/xtables.lock", - "type": "" - } - }, - { - "name": "kernelmodules", - "hostPath": { - "path": "/lib/modules/", - "type": "" - } - }, - { - "name": "default-token-297b2", - "secret": { - "secretName": "default-token-297b2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kube-proxy", - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "command": [ - "/hyperkube", - "proxy", - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--cluster-cidr=10.240.0.0/12", - "--feature-gates=ExperimentalCriticalPodAnnotation=true", - "--proxy-mode=iptables" - ], - "resources": { - "requests": { - "cpu": "100m" - } - }, - "volumeMounts": [ - { - "name": "ssl-certs-host", - "readOnly": true, - "mountPath": "/etc/ssl/certs" - }, - { - "name": "etc-kubernetes", - "readOnly": true, - "mountPath": "/etc/kubernetes" - }, - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/var/lib/kubelet/kubeconfig" - }, - { - "name": "iptableslock", - "mountPath": "/run/xtables.lock" - }, - { - "name": "kernelmodules", - "readOnly": true, - "mountPath": "/lib/modules/" - }, - { - "name": "default-token-297b2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-master-15159885-0" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:28Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:28Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "kube-proxy", - "state": { - "running": { - "startedAt": "2019-04-17T04:28:26Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "containerID": "docker://b84bffd5f1bca13f1b880363816417fb1b13938ad067530e36ba796ffa43a5a9" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-scheduler-k8s-master-15159885-0", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-scheduler-k8s-master-15159885-0", - "uid": "48fcfcf4-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7367801", - "creationTimestamp": "2019-04-17T04:28:44Z", - "labels": { - "component": "kube-scheduler", - "tier": "control-plane" - }, - "annotations": { - "kubernetes.io/config.hash": "0ae37fc92f08b39c7641e5282c92b279", - "kubernetes.io/config.mirror": "0ae37fc92f08b39c7641e5282c92b279", - "kubernetes.io/config.seen": "2019-04-17T04:27:21.914659772Z", - "kubernetes.io/config.source": "file" - } - }, - "spec": { - "volumes": [ - { - "name": "etc-kubernetes", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "var-lib-kubelet", - "hostPath": { - "path": "/var/lib/kubelet", - "type": "" - } - }, - { - "name": "msi", - "hostPath": { - "path": "/var/lib/waagent/ManagedIdentity-Settings", - "type": "" - } - } - ], - "containers": [ - { - "name": "kube-scheduler", - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "command": [ - "/hyperkube", - "scheduler" - ], - "args": [ - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--leader-elect=true", - "--profiling=false", - "--v=2" - ], - "resources": {}, - "volumeMounts": [ - { - "name": "etc-kubernetes", - "mountPath": "/etc/kubernetes" - }, - { - "name": "var-lib-kubelet", - "mountPath": "/var/lib/kubelet" - }, - { - "name": "msi", - "readOnly": true, - "mountPath": "/var/lib/waagent/ManagedIdentity-Settings" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeName": "k8s-master-15159885-0", - "hostNetwork": true, - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "operator": "Exists", - "effect": "NoExecute" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:26Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T09:02:46Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T09:02:46Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:27:26Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.255.255.5", - "startTime": "2019-04-17T04:27:26Z", - "containerStatuses": [ - { - "name": "kube-scheduler", - "state": { - "running": { - "startedAt": "2019-06-14T09:02:46Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 1, - "reason": "Error", - "startedAt": "2019-06-11T17:20:25Z", - "finishedAt": "2019-06-14T09:02:41Z", - "containerID": "docker://557db82de83e2fbea6d231e7ee9643501038e3d7bded085c65d8e9a392ec785a" - } - }, - "ready": true, - "restartCount": 6, - "image": "k8s.gcr.io/hyperkube-amd64:v1.12.7", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:db736d836f8d954178d121c00cfcf7c61ef0d433ca865c57ca5ddc905241fb9f", - "containerID": "docker://5445127376fb63c7f3a45781e499e90a7d1f668d8d43f4064cbe8de7f28e9dd8" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kubernetes-dashboard-9bf969764-nswqh", - "generateName": "kubernetes-dashboard-9bf969764-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kubernetes-dashboard-9bf969764-nswqh", - "uid": "3cfa7f6c-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110357", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "k8s-app": "kubernetes-dashboard", - "pod-template-hash": "9bf969764" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "kubernetes-dashboard-9bf969764", - "uid": "3cf41c1e-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kubernetes-dashboard-certs", - "emptyDir": {} - }, - { - "name": "kubernetes-dashboard-token-kpldg", - "secret": { - "secretName": "kubernetes-dashboard-token-kpldg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kubernetes-dashboard", - "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", - "args": [ - "--auto-generate-certificates", - "--heapster-host=http://heapster.kube-system:80" - ], - "ports": [ - { - "containerPort": 8443, - "protocol": "TCP" - } - ], - "resources": { - "limits": { - "cpu": "300m", - "memory": "150Mi" - }, - "requests": { - "cpu": "300m", - "memory": "150Mi" - } - }, - "volumeMounts": [ - { - "name": "kubernetes-dashboard-certs", - "mountPath": "/certs" - }, - { - "name": "kubernetes-dashboard-token-kpldg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/", - "port": 8443, - "scheme": "HTTPS" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 30, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kubernetes-dashboard", - "serviceAccount": "kubernetes-dashboard", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:29:04Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:29:04Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:24Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.40", - "startTime": "2019-04-17T04:28:24Z", - "containerStatuses": [ - { - "name": "kubernetes-dashboard", - "state": { - "running": { - "startedAt": "2019-04-17T04:29:03Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 1, - "reason": "Error", - "startedAt": "2019-04-17T04:28:29Z", - "finishedAt": "2019-04-17T04:29:02Z", - "containerID": "docker://28f5aa442fd07db9ceeb73f73f73622e515bba3f3f8d06494046d349c660d6a2" - } - }, - "ready": true, - "restartCount": 1, - "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", - "imageID": "docker-pullable://k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "containerID": "docker://9a5b3b9bbc3b298261a88db12106d0096a542332985fa03043c5dfe0e386dee9" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "metrics-server-67b4964794-l6qgt", - "generateName": "metrics-server-67b4964794-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/metrics-server-67b4964794-l6qgt", - "uid": "3cbba31a-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "5110354", - "creationTimestamp": "2019-04-17T04:28:23Z", - "labels": { - "k8s-app": "metrics-server", - "pod-template-hash": "67b4964794" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "metrics-server-67b4964794", - "uid": "3cb826ab-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "metrics-server-token-8d2n9", - "secret": { - "secretName": "metrics-server-token-8d2n9", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "metrics-server", - "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", - "command": [ - "/metrics-server", - "--source=kubernetes.summary_api:''" - ], - "resources": {}, - "volumeMounts": [ - { - "name": "metrics-server-token-8d2n9", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "metrics-server", - "serviceAccount": "metrics-server", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:29:04Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:29:04Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:23Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.62", - "startTime": "2019-04-17T04:28:23Z", - "containerStatuses": [ - { - "name": "metrics-server", - "state": { - "running": { - "startedAt": "2019-04-17T04:29:03Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:28Z", - "finishedAt": "2019-04-17T04:29:02Z", - "containerID": "docker://4aebee6450b565af51cddd7889d2a1af3d9de84d7cacd1f8d5679280f68c58a8" - } - }, - "ready": true, - "restartCount": 1, - "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", - "imageID": "docker-pullable://k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", - "containerID": "docker://cd21e1db0924d6ce7f2158268bd2bb7a658b624800cc7bfa6935154628f18593" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-62cnn", - "generateName": "omsagent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-62cnn", - "uid": "838b2248-925b-11e9-a358-000d3a53d49f", - "resourceVersion": "7987224", - "creationTimestamp": "2019-06-19T06:28:55Z", - "labels": { - "controller-revision-hash": "775fd7566d", - "dsName": "omsagent-ds", - "pod-template-generation": "1" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "5.0.0-1", - "schema-versions": "v1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "omsagent", - "uid": "838334ba-925b-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "host-root", - "hostPath": { - "path": "/", - "type": "" - } - }, - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "settings-vol-config", - "configMap": { - "name": "container-azm-ms-agentconfig", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "omsagent-token-4d9bp", - "secret": { - "secretName": "omsagent-token-4d9bp", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "rdilip83/healthpreview06192019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "DaemonSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "300Mi" - }, - "requests": { - "cpu": "75m", - "memory": "225Mi" - } - }, - "volumeMounts": [ - { - "name": "host-root", - "readOnly": true, - "mountPath": "/hostfs" - }, - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "settings-vol-config", - "readOnly": true, - "mountPath": "/etc/config/settings" - }, - { - "name": "omsagent-token-4d9bp", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000001" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:28:55Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:20Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:20Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:28:55Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.73", - "startTime": "2019-06-19T06:28:55Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-06-19T06:29:19Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/healthpreview06192019:latest", - "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "containerID": "docker://20e770236356a355504ff6629d456e2c974026fb9e63e92a60570c088b1682e6" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-ft74m", - "generateName": "omsagent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-ft74m", - "uid": "83882687-925b-11e9-a358-000d3a53d49f", - "resourceVersion": "7987185", - "creationTimestamp": "2019-06-19T06:28:55Z", - "labels": { - "controller-revision-hash": "775fd7566d", - "dsName": "omsagent-ds", - "pod-template-generation": "1" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "5.0.0-1", - "schema-versions": "v1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "omsagent", - "uid": "838334ba-925b-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "host-root", - "hostPath": { - "path": "/", - "type": "" - } - }, - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "settings-vol-config", - "configMap": { - "name": "container-azm-ms-agentconfig", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "omsagent-token-4d9bp", - "secret": { - "secretName": "omsagent-token-4d9bp", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "rdilip83/healthpreview06192019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "DaemonSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "300Mi" - }, - "requests": { - "cpu": "75m", - "memory": "225Mi" - } - }, - "volumeMounts": [ - { - "name": "host-root", - "readOnly": true, - "mountPath": "/hostfs" - }, - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "settings-vol-config", - "readOnly": true, - "mountPath": "/etc/config/settings" - }, - { - "name": "omsagent-token-4d9bp", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "k8s-master-15159885-0", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-master-15159885-0" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:28:55Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:08Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:08Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:28:55Z" - } - ], - "hostIP": "10.255.255.5", - "podIP": "10.240.0.26", - "startTime": "2019-06-19T06:28:55Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-06-19T06:29:08Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/healthpreview06192019:latest", - "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "containerID": "docker://d84656fd8f69b15c9244a4fcb4feec35ecc980e7d9a32a4ebc365bc3647a931b" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-pgnrl", - "generateName": "omsagent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-pgnrl", - "uid": "838ae71f-925b-11e9-a358-000d3a53d49f", - "resourceVersion": "7987213", - "creationTimestamp": "2019-06-19T06:28:55Z", - "labels": { - "controller-revision-hash": "775fd7566d", - "dsName": "omsagent-ds", - "pod-template-generation": "1" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "5.0.0-1", - "schema-versions": "v1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "omsagent", - "uid": "838334ba-925b-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "host-root", - "hostPath": { - "path": "/", - "type": "" - } - }, - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "settings-vol-config", - "configMap": { - "name": "container-azm-ms-agentconfig", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "omsagent-token-4d9bp", - "secret": { - "secretName": "omsagent-token-4d9bp", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "rdilip83/healthpreview06192019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "DaemonSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "300Mi" - }, - "requests": { - "cpu": "75m", - "memory": "225Mi" - } - }, - "volumeMounts": [ - { - "name": "host-root", - "readOnly": true, - "mountPath": "/hostfs" - }, - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "settings-vol-config", - "readOnly": true, - "mountPath": "/etc/config/settings" - }, - { - "name": "omsagent-token-4d9bp", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "k8s-agentpool1-15159885-vmss000000", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchFields": [ - { - "key": "metadata.name", - "operator": "In", - "values": [ - "k8s-agentpool1-15159885-vmss000000" - ] - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:28:55Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:16Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:16Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:28:55Z" - } - ], - "hostIP": "10.240.0.34", - "podIP": "10.240.0.57", - "startTime": "2019-06-19T06:28:55Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-06-19T06:29:16Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/healthpreview06192019:latest", - "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "containerID": "docker://1ba0bfe43e9beba393b61908f37fa66b235e8a3b51788a6c1acbe0ccdf8c5063" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-rs-bf764d6d5-8cdqt", - "generateName": "omsagent-rs-bf764d6d5-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-rs-bf764d6d5-8cdqt", - "uid": "83b5c241-925b-11e9-a358-000d3a53d49f", - "resourceVersion": "7987238", - "creationTimestamp": "2019-06-19T06:28:56Z", - "labels": { - "pod-template-hash": "bf764d6d5", - "rsName": "omsagent-rs" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "5.0.0-1", - "schema-versions": "v1" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "omsagent-rs-bf764d6d5", - "uid": "83b06cb0-925b-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "omsagent-rs-config", - "configMap": { - "name": "omsagent-rs-config", - "defaultMode": 420 - } - }, - { - "name": "settings-vol-config", - "configMap": { - "name": "container-azm-ms-agentconfig", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "azurefile-pv", - "persistentVolumeClaim": { - "claimName": "azurefile" - } - }, - { - "name": "omsagent-token-4d9bp", - "secret": { - "secretName": "omsagent-token-4d9bp", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "rdilip83/healthpreview06192019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - }, - { - "name": "in-rs-tcp", - "containerPort": 25235, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/vitaly-health-preview/providers/Microsoft.ContainerService/managedClusters/vitaly-health-preview" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "ReplicaSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "500Mi" - }, - "requests": { - "cpu": "50m", - "memory": "175Mi" - } - }, - "volumeMounts": [ - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "readOnly": true, - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "omsagent-rs-config", - "mountPath": "/etc/config" - }, - { - "name": "settings-vol-config", - "readOnly": true, - "mountPath": "/etc/config/settings" - }, - { - "name": "azurefile-pv", - "mountPath": "/mnt/azure" - }, - { - "name": "omsagent-token-4d9bp", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true, - "procMount": "Default" - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux", - "kubernetes.io/role": "agent" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:26Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:26Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-19T06:29:14Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.91", - "startTime": "2019-06-19T06:29:18Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-06-19T06:29:25Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/healthpreview06192019:latest", - "imageID": "docker-pullable://rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a", - "containerID": "docker://c107f2a9097802e3151aa2556d424621e0c5b8b07e251dcb30dba09a42e39887" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "tiller-deploy-7bfcdc49d6-4w8pz", - "generateName": "tiller-deploy-7bfcdc49d6-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/tiller-deploy-7bfcdc49d6-4w8pz", - "uid": "39c3e0a4-60c9-11e9-a358-000d3a53d49f", - "resourceVersion": "7362282", - "creationTimestamp": "2019-04-17T04:28:18Z", - "labels": { - "app": "helm", - "name": "tiller", - "pod-template-hash": "7bfcdc49d6" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "tiller-deploy-7bfcdc49d6", - "uid": "39c0b65b-60c9-11e9-a358-000d3a53d49f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "tiller-token-c8tgp", - "secret": { - "secretName": "tiller-token-c8tgp", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "tiller", - "image": "gcr.io/kubernetes-helm/tiller:v2.11.0", - "ports": [ - { - "name": "tiller", - "containerPort": 44134, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "TILLER_NAMESPACE", - "value": "kube-system" - }, - { - "name": "TILLER_HISTORY_MAX", - "value": "0" - } - ], - "resources": { - "limits": { - "cpu": "50m", - "memory": "150Mi" - }, - "requests": { - "cpu": "50m", - "memory": "150Mi" - } - }, - "volumeMounts": [ - { - "name": "tiller-token-c8tgp", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/liveness", - "port": 44135, - "scheme": "HTTP" - }, - "initialDelaySeconds": 1, - "timeoutSeconds": 1, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "readinessProbe": { - "httpGet": { - "path": "/readiness", - "port": 44135, - "scheme": "HTTP" - }, - "initialDelaySeconds": 1, - "timeoutSeconds": 1, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "tiller", - "serviceAccount": "tiller", - "nodeName": "k8s-agentpool1-15159885-vmss000001", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:53Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-14T07:59:53Z" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-17T04:28:18Z" - } - ], - "hostIP": "10.240.0.65", - "podIP": "10.240.0.70", - "startTime": "2019-04-17T04:28:18Z", - "containerStatuses": [ - { - "name": "tiller", - "state": { - "running": { - "startedAt": "2019-06-14T07:59:38Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 255, - "reason": "Error", - "startedAt": "2019-04-17T04:28:27Z", - "finishedAt": "2019-06-14T07:58:50Z", - "containerID": "docker://65215f91d8557104345b0cb69dd906d574c6f57fcabba5fde3bebd58c275f2d1" - } - }, - "ready": true, - "restartCount": 1, - "image": "gcr.io/kubernetes-helm/tiller:v2.11.0", - "imageID": "docker-pullable://gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", - "containerID": "docker://2805575e748637b71e67f7601288d62405fea9b185a3aa852b82e8700320f708" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - } - ] -} \ No newline at end of file diff --git a/inventory/deployments.json b/inventory/deployments.json deleted file mode 100644 index ab4eb2e48..000000000 --- a/inventory/deployments.json +++ /dev/null @@ -1,1702 +0,0 @@ -{ - "items": [ - { - "metadata": { - "name": "diliprdeploymentnodeapps", - "namespace": "default", - "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/diliprdeploymentnodeapps", - "uid": "ee1b111d-526e-11e9-a899-6a5520730c61", - "resourceVersion": "4597575", - "generation": 1, - "creationTimestamp": "2019-03-29T22:06:40Z", - "labels": { - "diliprdeploymentLabel1": "d1", - "diliprdeploymentLabel2": "d2" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"diliprdeploymentLabel1\":\"d1\",\"diliprdeploymentLabel2\":\"d2\"},\"name\":\"diliprdeploymentnodeapps\",\"namespace\":\"default\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"app\":\"diliprsnodeapppod\"}},\"template\":{\"metadata\":{\"labels\":{\"app\":\"diliprsnodeapppod\",\"diliprPodLabel1\":\"p1\",\"diliprPodLabel2\":\"p2\"}},\"spec\":{\"containers\":[{\"image\":\"rdilip83/logeverysecond:v2\",\"name\":\"diliprcontainerhelloapp\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "diliprsnodeapppod" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "app": "diliprsnodeapppod", - "diliprPodLabel1": "p1", - "diliprPodLabel2": "p2" - } - }, - "spec": { - "containers": [ - { - "name": "diliprcontainerhelloapp", - "image": "rdilip83/logeverysecond:v2", - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": "25%", - "maxSurge": "25%" - } - }, - "revisionHistoryLimit": 2, - "progressDeadlineSeconds": 600 - }, - "status": { - "observedGeneration": 1, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Progressing", - "status": "True", - "lastUpdateTime": "2019-03-29T22:06:47Z", - "lastTransitionTime": "2019-03-29T22:06:40Z", - "reason": "NewReplicaSetAvailable", - "message": "ReplicaSet \"diliprdeploymentnodeapps-c4fdfb446\" has successfully progressed." - }, - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-22T19:28:18Z", - "lastTransitionTime": "2019-04-22T19:28:18Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "nginx-deployment", - "namespace": "default", - "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/nginx-deployment", - "uid": "6d8b998a-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952435", - "generation": 1, - "creationTimestamp": "2019-05-22T22:32:05Z", - "labels": { - "app": "nginx" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1" - } - }, - "spec": { - "replicas": 3, - "selector": { - "matchLabels": { - "app": "nginx" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "app": "nginx" - } - }, - "spec": { - "containers": [ - { - "name": "nginx", - "image": "nginx:1.7.9", - "ports": [ - { - "containerPort": 80, - "protocol": "TCP" - } - ], - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 3, - "updatedReplicas": 3, - "readyReplicas": 3, - "availableReplicas": 3, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-05-22T22:32:14Z", - "lastTransitionTime": "2019-05-22T22:32:14Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps", - "namespace": "default", - "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/vishwadeploymentnodeapps", - "uid": "cf108fee-5261-11e9-a899-6a5520730c61", - "resourceVersion": "7580681", - "generation": 1, - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwadeploymentLabel1": "d1", - "VishwadeploymentLabel2": "d2" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"VishwadeploymentLabel1\":\"d1\",\"VishwadeploymentLabel2\":\"d2\"},\"name\":\"vishwadeploymentnodeapps\",\"namespace\":\"default\"},\"spec\":{\"replicas\":10,\"selector\":{\"matchLabels\":{\"app\":\"vishwasnodeapppod\"}},\"template\":{\"metadata\":{\"labels\":{\"VishwaPodLabel1\":\"p1\",\"VishwaPodLabel2\":\"p2\",\"app\":\"vishwasnodeapppod\"}},\"spec\":{\"containers\":[{\"image\":\"vishiy/hello:err100eps\",\"name\":\"vishwacontainerhelloapp\"}]}}}}\n" - } - }, - "spec": { - "replicas": 10, - "selector": { - "matchLabels": { - "app": "vishwasnodeapppod" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod" - } - }, - "spec": { - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": "25%", - "maxSurge": "25%" - } - }, - "revisionHistoryLimit": 2, - "progressDeadlineSeconds": 600 - }, - "status": { - "observedGeneration": 1, - "replicas": 10, - "updatedReplicas": 10, - "readyReplicas": 10, - "availableReplicas": 10, - "conditions": [ - { - "type": "Progressing", - "status": "True", - "lastUpdateTime": "2019-03-29T20:32:58Z", - "lastTransitionTime": "2019-03-29T20:32:45Z", - "reason": "NewReplicaSetAvailable", - "message": "ReplicaSet \"vishwadeploymentnodeapps-8686cf54db\" has successfully progressed." - }, - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-22T19:28:16Z", - "lastTransitionTime": "2019-04-22T19:28:16Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "heapster", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/heapster", - "uid": "c531be6a-8d02-11e9-8800-bad6d46463ba", - "resourceVersion": "10235925", - "generation": 2, - "creationTimestamp": "2019-06-12T11:11:04Z", - "labels": { - "addonmanager.kubernetes.io/mode": "EnsureExists", - "k8s-app": "heapster", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "2" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "heapster" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "heapster" - } - }, - "spec": { - "volumes": [ - { - "name": "heapster-config-volume", - "configMap": { - "name": "heapster-config", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "heapster", - "image": "k8s.gcr.io/heapster-amd64:v1.5.3", - "command": [ - "/heapster", - "--source=kubernetes.summary_api:\"\"" - ], - "resources": { - "limits": { - "cpu": "88m", - "memory": "204Mi" - }, - "requests": { - "cpu": "88m", - "memory": "204Mi" - } - }, - "livenessProbe": { - "httpGet": { - "path": "/healthz", - "port": 8082, - "scheme": "HTTP" - }, - "initialDelaySeconds": 180, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "heapster-nanny", - "image": "k8s.gcr.io/addon-resizer:1.8.1", - "command": [ - "/pod_nanny", - "--config-dir=/etc/config", - "--cpu=80m", - "--extra-cpu=0.5m", - "--memory=140Mi", - "--extra-memory=4Mi", - "--threshold=5", - "--deployment=heapster", - "--container=heapster", - "--poll-period=300000", - "--estimator=exponential" - ], - "env": [ - { - "name": "MY_POD_NAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.name" - } - } - }, - { - "name": "MY_POD_NAMESPACE", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.namespace" - } - } - } - ], - "resources": { - "limits": { - "cpu": "50m", - "memory": "90Mi" - }, - "requests": { - "cpu": "50m", - "memory": "90Mi" - } - }, - "volumeMounts": [ - { - "name": "heapster-config-volume", - "mountPath": "/etc/config" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "heapster", - "serviceAccount": "heapster", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 2, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-06-12T11:11:05Z", - "lastTransitionTime": "2019-06-12T11:11:05Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ], - "collisionCount": 1 - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "kube-dns-autoscaler", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kube-dns-autoscaler", - "uid": "4509acaf-44e5-11e9-9920-423525a6b683", - "resourceVersion": "7854804", - "generation": 2, - "creationTimestamp": "2019-03-12T16:38:30Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "kube-dns-autoscaler", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "2", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns-autoscaler\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kube-dns-autoscaler\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns-autoscaler\"}},\"template\":{\"metadata\":{\"annotations\":{\"scheduler.alpha.kubernetes.io/critical-pod\":\"\",\"seccomp.security.alpha.kubernetes.io/pod\":\"docker/default\"},\"labels\":{\"k8s-app\":\"kube-dns-autoscaler\"}},\"spec\":{\"containers\":[{\"command\":[\"/cluster-proportional-autoscaler\",\"--namespace=kube-system\",\"--configmap=kube-dns-autoscaler\",\"--target=deployment/kube-dns-v20\",\"--default-params={\\\"ladder\\\":{\\\"coresToReplicas\\\":[[1,2],[512,3],[1024,4],[2048,5]],\\\"nodesToReplicas\\\":[[1,2],[8,3],[16,4],[32,5]]}}\",\"--logtostderr=true\",\"--v=2\"],\"image\":\"k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2\",\"name\":\"autoscaler\",\"resources\":{\"requests\":{\"cpu\":\"20m\",\"memory\":\"10Mi\"}}}],\"dnsPolicy\":\"Default\",\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kube-dns-autoscaler\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "kube-dns-autoscaler" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "kube-dns-autoscaler" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "", - "seccomp.security.alpha.kubernetes.io/pod": "docker/default" - } - }, - "spec": { - "containers": [ - { - "name": "autoscaler", - "image": "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2", - "command": [ - "/cluster-proportional-autoscaler", - "--namespace=kube-system", - "--configmap=kube-dns-autoscaler", - "--target=deployment/kube-dns-v20", - "--default-params={\"ladder\":{\"coresToReplicas\":[[1,2],[512,3],[1024,4],[2048,5]],\"nodesToReplicas\":[[1,2],[8,3],[16,4],[32,5]]}}", - "--logtostderr=true", - "--v=2" - ], - "resources": { - "requests": { - "cpu": "20m", - "memory": "10Mi" - } - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "serviceAccountName": "kube-dns-autoscaler", - "serviceAccount": "kube-dns-autoscaler", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 2, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-03-12T16:38:30Z", - "lastTransitionTime": "2019-03-12T16:38:30Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "kube-dns-v20", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kube-dns-v20", - "uid": "4523fcd7-44e5-11e9-9920-423525a6b683", - "resourceVersion": "7854961", - "generation": 4, - "creationTimestamp": "2019-03-12T16:38:30Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "kube-dns", - "kubernetes.io/cluster-service": "true", - "version": "v20" - }, - "annotations": { - "deployment.kubernetes.io/revision": "3", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"version\":\"v20\"},\"name\":\"kube-dns-v20\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns\",\"version\":\"v20\"}},\"template\":{\"metadata\":{\"annotations\":{\"prometheus.io/port\":\"10055\",\"prometheus.io/scrape\":\"true\"},\"labels\":{\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"version\":\"v20\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}},\"podAntiAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"kubernetes.io/hostname\"},\"weight\":100}]}},\"containers\":[{\"args\":[\"--kubecfg-file=/config/kubeconfig\",\"--config-dir=/kube-dns-config\",\"--domain=cluster.local.\",\"--dns-port=10053\",\"--v=2\"],\"env\":[{\"name\":\"PROMETHEUS_PORT\",\"value\":\"10055\"}],\"image\":\"k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/healthcheck/kubedns\",\"port\":10054,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"kubedns\",\"ports\":[{\"containerPort\":10053,\"name\":\"dns-local\",\"protocol\":\"UDP\"},{\"containerPort\":10053,\"name\":\"dns-tcp-local\",\"protocol\":\"TCP\"},{\"containerPort\":10055,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"readinessProbe\":{\"httpGet\":{\"path\":\"/readiness\",\"port\":8081,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":5},\"resources\":{\"limits\":{\"memory\":\"170Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"70Mi\"}},\"volumeMounts\":[{\"mountPath\":\"/kube-dns-config\",\"name\":\"kube-dns-config\"},{\"mountPath\":\"/config\",\"name\":\"kubedns-kubecfg\",\"readOnly\":true}]},{\"args\":[\"-v=2\",\"-logtostderr\",\"-configDir=/kube-dns-config\",\"-restartDnsmasq=true\",\"--\",\"-k\",\"--cache-size=1000\",\"--no-negcache\",\"--no-resolv\",\"--server=127.0.0.1#10053\",\"--server=/cluster.local/127.0.0.1#10053\",\"--server=/in-addr.arpa/127.0.0.1#10053\",\"--server=/ip6.arpa/127.0.0.1#10053\",\"--log-facility=-\"],\"image\":\"k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10\",\"name\":\"dnsmasq\",\"ports\":[{\"containerPort\":53,\"name\":\"dns\",\"protocol\":\"UDP\"},{\"containerPort\":53,\"name\":\"dns-tcp\",\"protocol\":\"TCP\"}],\"volumeMounts\":[{\"mountPath\":\"/kube-dns-config\",\"name\":\"kube-dns-config\"}]},{\"args\":[\"--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 \\u003e/dev/null || exit 1; done\",\"--url=/healthz-dnsmasq\",\"--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 \\u003e/dev/null || exit 1; done\",\"--url=/healthz-kubedns\",\"--port=8080\",\"--quiet\"],\"env\":[{\"name\":\"PROBE_DOMAINS\",\"value\":\"bing.com kubernetes.default.svc.cluster.local\"}],\"image\":\"k8s.gcr.io/exechealthz-amd64:1.2\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/healthz-dnsmasq\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"healthz\",\"ports\":[{\"containerPort\":8080,\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"memory\":\"50Mi\"},\"requests\":{\"cpu\":\"10m\",\"memory\":\"50Mi\"}}},{\"args\":[\"--v=2\",\"--logtostderr\",\"--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV\",\"--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV\"],\"image\":\"k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10\",\"livenessProbe\":{\"httpGet\":{\"path\":\"/metrics\",\"port\":10054,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"sidecar\",\"ports\":[{\"containerPort\":10054,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"resources\":{\"requests\":{\"cpu\":\"10m\",\"memory\":\"20Mi\"}}}],\"dnsPolicy\":\"Default\",\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kube-dns\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}],\"volumes\":[{\"configMap\":{\"name\":\"kube-dns\",\"optional\":true},\"name\":\"kube-dns-config\"},{\"configMap\":{\"name\":\"kubedns-kubecfg\"},\"name\":\"kubedns-kubecfg\"}]}}}}\n" - } - }, - "spec": { - "replicas": 2, - "selector": { - "matchLabels": { - "k8s-app": "kube-dns", - "version": "v20" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "kube-dns", - "kubernetes.io/cluster-service": "true", - "version": "v20" - }, - "annotations": { - "prometheus.io/port": "10055", - "prometheus.io/scrape": "true" - } - }, - "spec": { - "volumes": [ - { - "name": "kube-dns-config", - "configMap": { - "name": "kube-dns", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "kubedns-kubecfg", - "configMap": { - "name": "kubedns-kubecfg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kubedns", - "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", - "args": [ - "--kubecfg-file=/config/kubeconfig", - "--config-dir=/kube-dns-config", - "--domain=cluster.local.", - "--dns-port=10053", - "--v=2" - ], - "ports": [ - { - "name": "dns-local", - "containerPort": 10053, - "protocol": "UDP" - }, - { - "name": "dns-tcp-local", - "containerPort": 10053, - "protocol": "TCP" - }, - { - "name": "metrics", - "containerPort": 10055, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "PROMETHEUS_PORT", - "value": "10055" - } - ], - "resources": { - "limits": { - "memory": "170Mi" - }, - "requests": { - "cpu": "100m", - "memory": "70Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-config", - "mountPath": "/kube-dns-config" - }, - { - "name": "kubedns-kubecfg", - "readOnly": true, - "mountPath": "/config" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthcheck/kubedns", - "port": 10054, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "readinessProbe": { - "httpGet": { - "path": "/readiness", - "port": 8081, - "scheme": "HTTP" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "dnsmasq", - "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", - "args": [ - "-v=2", - "-logtostderr", - "-configDir=/kube-dns-config", - "-restartDnsmasq=true", - "--", - "-k", - "--cache-size=1000", - "--no-negcache", - "--no-resolv", - "--server=127.0.0.1#10053", - "--server=/cluster.local/127.0.0.1#10053", - "--server=/in-addr.arpa/127.0.0.1#10053", - "--server=/ip6.arpa/127.0.0.1#10053", - "--log-facility=-" - ], - "ports": [ - { - "name": "dns", - "containerPort": 53, - "protocol": "UDP" - }, - { - "name": "dns-tcp", - "containerPort": 53, - "protocol": "TCP" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "kube-dns-config", - "mountPath": "/kube-dns-config" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "healthz", - "image": "k8s.gcr.io/exechealthz-amd64:1.2", - "args": [ - "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 >/dev/null || exit 1; done", - "--url=/healthz-dnsmasq", - "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 >/dev/null || exit 1; done", - "--url=/healthz-kubedns", - "--port=8080", - "--quiet" - ], - "ports": [ - { - "containerPort": 8080, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "PROBE_DOMAINS", - "value": "bing.com kubernetes.default.svc.cluster.local" - } - ], - "resources": { - "limits": { - "memory": "50Mi" - }, - "requests": { - "cpu": "10m", - "memory": "50Mi" - } - }, - "livenessProbe": { - "httpGet": { - "path": "/healthz-dnsmasq", - "port": 8080, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "sidecar", - "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", - "args": [ - "--v=2", - "--logtostderr", - "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV", - "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV" - ], - "ports": [ - { - "name": "metrics", - "containerPort": 10054, - "protocol": "TCP" - } - ], - "resources": { - "requests": { - "cpu": "10m", - "memory": "20Mi" - } - }, - "livenessProbe": { - "httpGet": { - "path": "/metrics", - "port": 10054, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-dns", - "serviceAccount": "kube-dns", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - }, - "podAntiAffinity": { - "preferredDuringSchedulingIgnoredDuringExecution": [ - { - "weight": 100, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "kubernetes.io/hostname" - } - } - ] - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 4, - "replicas": 2, - "updatedReplicas": 2, - "readyReplicas": 2, - "availableReplicas": 2, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-03-12T16:41:46Z", - "lastTransitionTime": "2019-03-12T16:41:46Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "kubernetes-dashboard", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kubernetes-dashboard", - "uid": "45b9cc8d-44e5-11e9-9920-423525a6b683", - "resourceVersion": "7854801", - "generation": 3, - "creationTimestamp": "2019-03-12T16:38:31Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "kubernetes-dashboard", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "3", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kubernetes-dashboard\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"strategy\":{\"rollingUpdate\":{\"maxSurge\":0,\"maxUnavailable\":1},\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"image\":\"k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1\",\"livenessProbe\":{\"failureThreshold\":3,\"httpGet\":{\"path\":\"/\",\"port\":9090,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"periodSeconds\":10,\"successThreshold\":1,\"timeoutSeconds\":30},\"name\":\"main\",\"ports\":[{\"containerPort\":9090,\"name\":\"http\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"100m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"50Mi\"}}}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kubernetes-dashboard\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "kubernetes-dashboard", - "kubernetes.io/cluster-service": "true" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "k8s-app": "kubernetes-dashboard", - "kubernetes.io/cluster-service": "true" - } - }, - "spec": { - "containers": [ - { - "name": "main", - "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", - "ports": [ - { - "name": "http", - "containerPort": 9090, - "protocol": "TCP" - } - ], - "resources": { - "limits": { - "cpu": "100m", - "memory": "500Mi" - }, - "requests": { - "cpu": "100m", - "memory": "50Mi" - } - }, - "livenessProbe": { - "httpGet": { - "path": "/", - "port": 9090, - "scheme": "HTTP" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 30, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kubernetes-dashboard", - "serviceAccount": "kubernetes-dashboard", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 0 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 3, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-03-12T16:38:32Z", - "lastTransitionTime": "2019-03-12T16:38:32Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "metrics-server", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/metrics-server", - "uid": "45556857-44e5-11e9-9920-423525a6b683", - "resourceVersion": "7854802", - "generation": 2, - "creationTimestamp": "2019-03-12T16:38:31Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "k8s-app": "metrics-server", - "kubernetes.io/cluster-service": "true" - }, - "annotations": { - "deployment.kubernetes.io/revision": "2", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"metrics-server\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"metrics-server\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"metrics-server\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"metrics-server\"},\"name\":\"metrics-server\"},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"command\":[\"/metrics-server\",\"--source=kubernetes.summary_api:''\"],\"image\":\"k8s.gcr.io/metrics-server-amd64:v0.2.1\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"metrics-server\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"metrics-server\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "k8s-app": "metrics-server" - } - }, - "template": { - "metadata": { - "name": "metrics-server", - "creationTimestamp": null, - "labels": { - "k8s-app": "metrics-server" - } - }, - "spec": { - "containers": [ - { - "name": "metrics-server", - "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", - "command": [ - "/metrics-server", - "--source=kubernetes.summary_api:''" - ], - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "metrics-server", - "serviceAccount": "metrics-server", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 2, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-03-12T16:38:31Z", - "lastTransitionTime": "2019-03-12T16:38:31Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "omsagent-rs", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/omsagent-rs", - "uid": "b819b214-6876-11e9-8b55-963bcaafdced", - "resourceVersion": "5486655", - "generation": 2, - "creationTimestamp": "2019-04-26T22:57:51Z", - "labels": { - "rsName": "omsagent-rs" - }, - "annotations": { - "deployment.kubernetes.io/revision": "2", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"name\":\"omsagent-rs\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"rsName\":\"omsagent-rs\"}},\"strategy\":{\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"agentVersion\":\"1.10.0.1\",\"dockerProviderVersion\":\"4.0.0-0\"},\"labels\":{\"rsName\":\"omsagent-rs\"}},\"spec\":{\"containers\":[{\"env\":[{\"name\":\"AKS_RESOURCE_ID\",\"value\":\"/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test\"},{\"name\":\"AKS_REGION\",\"value\":\"eastus\"},{\"name\":\"DISABLE_KUBE_SYSTEM_LOG_COLLECTION\",\"value\":\"true\"},{\"name\":\"CONTROLLER_TYPE\",\"value\":\"ReplicaSet\"},{\"name\":\"NODE_IP\",\"valueFrom\":{\"fieldRef\":{\"fieldPath\":\"status.hostIP\"}}}],\"image\":\"microsoft/oms:healthpreview04262019\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/bin/bash\",\"-c\",\"ps -ef | grep main | grep -v \\\"grep\\\"\"]},\"initialDelaySeconds\":60,\"periodSeconds\":60},\"name\":\"omsagent\",\"ports\":[{\"containerPort\":25225,\"protocol\":\"TCP\"},{\"containerPort\":25235,\"name\":\"in-rs-tcp\",\"protocol\":\"TCP\"},{\"containerPort\":25224,\"protocol\":\"UDP\"}],\"resources\":{\"limits\":{\"cpu\":\"150m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"50m\",\"memory\":\"100Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/var/run/host\",\"name\":\"docker-sock\"},{\"mountPath\":\"/var/log\",\"name\":\"host-log\"},{\"mountPath\":\"/var/lib/docker/containers\",\"name\":\"containerlog-path\"},{\"mountPath\":\"/etc/kubernetes/host\",\"name\":\"azure-json-path\"},{\"mountPath\":\"/etc/omsagent-secret\",\"name\":\"omsagent-secret\",\"readOnly\":true},{\"mountPath\":\"/etc/config\",\"name\":\"omsagent-rs-config\"}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\",\"kubernetes.io/role\":\"agent\"},\"serviceAccountName\":\"omsagent\",\"volumes\":[{\"hostPath\":{\"path\":\"/var/run\"},\"name\":\"docker-sock\"},{\"hostPath\":{\"path\":\"/etc/hostname\"},\"name\":\"container-hostname\"},{\"hostPath\":{\"path\":\"/var/log\"},\"name\":\"host-log\"},{\"hostPath\":{\"path\":\"/var/lib/docker/containers\"},\"name\":\"containerlog-path\"},{\"hostPath\":{\"path\":\"/etc/kubernetes\"},\"name\":\"azure-json-path\"},{\"name\":\"omsagent-secret\",\"secret\":{\"secretName\":\"omsagent-secret\"}},{\"configMap\":{\"name\":\"omsagent-rs-config\"},\"name\":\"omsagent-rs-config\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "rsName": "omsagent-rs" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "rsName": "omsagent-rs" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "4.0.0-0" - } - }, - "spec": { - "volumes": [ - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "omsagent-rs-config", - "configMap": { - "name": "omsagent-rs-config", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "microsoft/oms:healthpreview04262019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "name": "in-rs-tcp", - "containerPort": 25235, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "ReplicaSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "500Mi" - }, - "requests": { - "cpu": "50m", - "memory": "100Mi" - } - }, - "volumeMounts": [ - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "readOnly": true, - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "omsagent-rs-config", - "mountPath": "/etc/config" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main | grep -v \"grep\"" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux", - "kubernetes.io/role": "agent" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 2, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-04-26T22:57:51Z", - "lastTransitionTime": "2019-04-26T22:57:51Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "tunnelfront", - "namespace": "kube-system", - "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/tunnelfront", - "uid": "45e524e6-44e5-11e9-9920-423525a6b683", - "resourceVersion": "9030842", - "generation": 3, - "creationTimestamp": "2019-03-12T16:38:32Z", - "labels": { - "addonmanager.kubernetes.io/mode": "Reconcile", - "component": "tunnel", - "kubernetes.io/cluster-service": "true", - "tier": "node" - }, - "annotations": { - "deployment.kubernetes.io/revision": "3", - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"component\":\"tunnel\",\"kubernetes.io/cluster-service\":\"true\",\"tier\":\"node\"},\"name\":\"tunnelfront\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"component\":\"tunnel\"}},\"template\":{\"metadata\":{\"labels\":{\"component\":\"tunnel\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"env\":[{\"name\":\"OVERRIDE_TUNNEL_SERVER_NAME\",\"value\":\"t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io\"},{\"name\":\"TUNNEL_CLUSTERUSER_NAME\",\"value\":\"28957308\"},{\"name\":\"TUNNELGATEWAY_SERVER_NAME\",\"value\":\"dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io\"},{\"name\":\"TUNNELGATEWAY_SSH_PORT\",\"value\":\"22\"},{\"name\":\"TUNNELGATEWAY_TLS_PORT\",\"value\":\"443\"},{\"name\":\"KUBE_CONFIG\",\"value\":\"/etc/kubernetes/kubeconfig/kubeconfig\"}],\"image\":\"docker.io/deis/hcp-tunnel-front:v1.9.2-v4.0.7\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/lib/tunnel-front/check-tunnel-connection.sh\"]},\"failureThreshold\":12,\"initialDelaySeconds\":10,\"periodSeconds\":60},\"name\":\"tunnel-front\",\"resources\":{\"requests\":{\"cpu\":\"10m\",\"memory\":\"64Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/etc/kubernetes/kubeconfig\",\"name\":\"kubeconfig\",\"readOnly\":true},{\"mountPath\":\"/etc/kubernetes/certs\",\"name\":\"certificates\",\"readOnly\":true}]}],\"dnsPolicy\":\"Default\",\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"tunnelfront\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}],\"volumes\":[{\"configMap\":{\"name\":\"tunnelfront-kubecfg\",\"optional\":true},\"name\":\"kubeconfig\"},{\"hostPath\":{\"path\":\"/etc/kubernetes/certs\"},\"name\":\"certificates\"}]}}}}\n" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "component": "tunnel" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "component": "tunnel" - } - }, - "spec": { - "volumes": [ - { - "name": "kubeconfig", - "configMap": { - "name": "tunnelfront-kubecfg", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "certificates", - "hostPath": { - "path": "/etc/kubernetes/certs", - "type": "" - } - } - ], - "containers": [ - { - "name": "tunnel-front", - "image": "docker.io/deis/hcp-tunnel-front:v1.9.2-v4.0.7", - "env": [ - { - "name": "OVERRIDE_TUNNEL_SERVER_NAME", - "value": "t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "TUNNEL_CLUSTERUSER_NAME", - "value": "28957308" - }, - { - "name": "TUNNELGATEWAY_SERVER_NAME", - "value": "dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io" - }, - { - "name": "TUNNELGATEWAY_SSH_PORT", - "value": "22" - }, - { - "name": "TUNNELGATEWAY_TLS_PORT", - "value": "443" - }, - { - "name": "KUBE_CONFIG", - "value": "/etc/kubernetes/kubeconfig/kubeconfig" - } - ], - "resources": { - "requests": { - "cpu": "10m", - "memory": "64Mi" - } - }, - "volumeMounts": [ - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/etc/kubernetes/kubeconfig" - }, - { - "name": "certificates", - "readOnly": true, - "mountPath": "/etc/kubernetes/certs" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/lib/tunnel-front/check-tunnel-connection.sh" - ] - }, - "initialDelaySeconds": 10, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 12 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "tunnelfront", - "serviceAccount": "tunnelfront", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - } - ], - "priorityClassName": "system-node-critical" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 3, - "replicas": 1, - "updatedReplicas": 1, - "readyReplicas": 1, - "availableReplicas": 1, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-03-12T16:38:32Z", - "lastTransitionTime": "2019-03-12T16:38:32Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - }, - { - "metadata": { - "name": "nginx-deployment", - "namespace": "test", - "selfLink": "/apis/extensions/v1beta1/namespaces/test/deployments/nginx-deployment", - "uid": "841dd837-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952520", - "generation": 1, - "creationTimestamp": "2019-05-22T22:32:43Z", - "labels": { - "app": "nginx" - }, - "annotations": { - "deployment.kubernetes.io/revision": "1" - } - }, - "spec": { - "replicas": 2, - "selector": { - "matchLabels": { - "app": "nginx" - } - }, - "template": { - "metadata": { - "creationTimestamp": null, - "labels": { - "app": "nginx" - } - }, - "spec": { - "containers": [ - { - "name": "front-end", - "image": "nginx", - "ports": [ - { - "containerPort": 81, - "protocol": "TCP" - } - ], - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - }, - { - "name": "rss-reader", - "image": "nickchase/rss-php-nginx:v1", - "ports": [ - { - "containerPort": 88, - "protocol": "TCP" - } - ], - "resources": {}, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "securityContext": {}, - "schedulerName": "default-scheduler" - } - }, - "strategy": { - "type": "RollingUpdate", - "rollingUpdate": { - "maxUnavailable": 1, - "maxSurge": 1 - } - }, - "revisionHistoryLimit": 10, - "progressDeadlineSeconds": 2147483647 - }, - "status": { - "observedGeneration": 1, - "replicas": 2, - "updatedReplicas": 2, - "readyReplicas": 2, - "availableReplicas": 2, - "conditions": [ - { - "type": "Available", - "status": "True", - "lastUpdateTime": "2019-05-22T22:32:51Z", - "lastTransitionTime": "2019-05-22T22:32:51Z", - "reason": "MinimumReplicasAvailable", - "message": "Deployment has minimum availability." - } - ] - }, - "apiVersion": "extensions/v1beta1", - "kind": "Deployment" - } - ] -} \ No newline at end of file diff --git a/inventory/nodes.json b/inventory/nodes.json deleted file mode 100644 index 9ccb0501e..000000000 --- a/inventory/nodes.json +++ /dev/null @@ -1,964 +0,0 @@ -{ - "items": [ - { - "metadata": { - "name": "aks-nodepool1-19574989-0", - "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-0", - "uid": "9012b16c-44e5-11e9-9920-423525a6b683", - "resourceVersion": "9742037", - "creationTimestamp": "2019-03-12T16:40:36Z", - "labels": { - "agentpool": "nodepool1", - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "Standard_DS1_v2", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eastus", - "failure-domain.beta.kubernetes.io/zone": "0", - "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus", - "kubernetes.io/hostname": "aks-nodepool1-19574989-0", - "kubernetes.io/role": "agent", - "node-role.kubernetes.io/agent": "", - "storageprofile": "managed", - "storagetier": "Premium_LRS" - }, - "annotations": { - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true" - } - }, - "spec": { - "podCIDR": "10.244.1.0/24", - "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-0" - }, - "status": { - "capacity": { - "cpu": "1", - "ephemeral-storage": "30428648Ki", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "3524612Ki", - "pods": "110" - }, - "allocatable": { - "cpu": "940m", - "ephemeral-storage": "28043041951", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "2504708Ki", - "pods": "110" - }, - "conditions": [ - { - "type": "NetworkUnavailable", - "status": "False", - "lastHeartbeatTime": "2019-03-12T16:42:18Z", - "lastTransitionTime": "2019-03-12T16:42:18Z", - "reason": "RouteCreated", - "message": "RouteController created a route" - }, - { - "type": "OutOfDisk", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:00Z", - "lastTransitionTime": "2019-04-22T19:28:17Z", - "reason": "KubeletHasSufficientDisk", - "message": "kubelet has sufficient disk space available" - }, - { - "type": "MemoryPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:00Z", - "lastTransitionTime": "2019-04-22T19:28:17Z", - "reason": "KubeletHasSufficientMemory", - "message": "kubelet has sufficient memory available" - }, - { - "type": "DiskPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:00Z", - "lastTransitionTime": "2019-04-22T19:28:17Z", - "reason": "KubeletHasNoDiskPressure", - "message": "kubelet has no disk pressure" - }, - { - "type": "PIDPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:00Z", - "lastTransitionTime": "2019-03-12T16:40:36Z", - "reason": "KubeletHasSufficientPID", - "message": "kubelet has sufficient PID available" - }, - { - "type": "Ready", - "status": "True", - "lastHeartbeatTime": "2019-06-08T00:47:00Z", - "lastTransitionTime": "2019-04-22T19:28:17Z", - "reason": "KubeletReady", - "message": "kubelet is posting ready status. AppArmor enabled" - } - ], - "addresses": [ - { - "type": "Hostname", - "address": "aks-nodepool1-19574989-0" - }, - { - "type": "InternalIP", - "address": "10.240.0.4" - } - ], - "daemonEndpoints": { - "kubeletEndpoint": { - "Port": 10250 - } - }, - "nodeInfo": { - "machineID": "cc9ed99e383540a4b0379995bb779221", - "systemUUID": "301B3B88-C7BD-3D45-A3CB-3CD66A42EB6F", - "bootID": "d8f6c00f-a085-450e-bf5c-12e651a0fcfc", - "kernelVersion": "4.15.0-1037-azure", - "osImage": "Ubuntu 16.04.5 LTS", - "containerRuntimeVersion": "docker://3.0.4", - "kubeletVersion": "v1.11.8", - "kubeProxyVersion": "v1.11.8", - "operatingSystem": "linux", - "architecture": "amd64" - }, - "images": [ - { - "names": [ - "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", - "nickchase/rss-php-nginx:v1" - ], - "sizeBytes": 677038498 - }, - { - "names": [ - "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "rdilip83/jsonlogger:v12" - ], - "sizeBytes": 676594134 - }, - { - "names": [ - "rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1", - "rdilip83/logeverysecond:v2" - ], - "sizeBytes": 674931590 - }, - { - "names": [ - "deis/hcp-tunnel-front@sha256:a067679f0ab376197a344cd410821cf07d69fc322dcd9af4a9229250da725ce2", - "deis/hcp-tunnel-front:v1.9.2-v4.0.4" - ], - "sizeBytes": 640504769 - }, - { - "names": [ - "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", - "k8s.gcr.io/hyperkube-amd64:v1.11.8" - ], - "sizeBytes": 615263658 - }, - { - "names": [ - "rdilip83/ha@sha256:40208587069b52338eefc24627bab5d10aa6ddc4f3a2a15dee74ad442327765f", - "rdilip83/ha:0426" - ], - "sizeBytes": 514907213 - }, - { - "names": [ - "microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", - "microsoft/oms:healthpreview04262019" - ], - "sizeBytes": 514907213 - }, - { - "names": [ - "microsoft/oms@sha256:63efbe8fc36635e37aee8c2c631a9d251ab1c736f0c86007c2607987a4bdd8a5", - "microsoft/oms:ciprod04232019" - ], - "sizeBytes": 487282051 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:2050713f627ac24dfd7d3c2594accc49491320711027668d96d3227b8f7ad721", - "rdilip83/kubehealth:2.0" - ], - "sizeBytes": 458285595 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:be84b745efda62fd0d4960d385714737a9b87d02adbc4b841c4a0b5db0495d52", - "rdilip83/kubehealth:1.0" - ], - "sizeBytes": 458243187 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:eb1c97ad840e4b8c84e9e15b7d148960af6a436f497834eda439fe2f9530435c", - "rdilip83/kubehealth:v3" - ], - "sizeBytes": 458243111 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:c43697cca29a63b442ff1414cfa5e72ee1779c4314fac9431760e1973c649a97", - "rdilip83/kubehealth:v2" - ], - "sizeBytes": 458243081 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:899ee16fed942a999a887b7f46702803a1a354517ea04e7191031cbdbc67e3c5", - "rdilip83/kubehealth:v1" - ], - "sizeBytes": 458242872 - }, - { - "names": [ - "deis/hcp-tunnel-front@sha256:ab4e468fe95b18c65dee93543d8d2ca115121728371b2fc467947a8cc9165272", - "deis/hcp-tunnel-front:v1.9.2-v4.0.5" - ], - "sizeBytes": 380477207 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", - "k8s.gcr.io/cluster-autoscaler:v1.12.2" - ], - "sizeBytes": 232167833 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", - "k8s.gcr.io/cluster-autoscaler:v1.3.4" - ], - "sizeBytes": 217264129 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", - "k8s.gcr.io/cluster-autoscaler:v1.3.3" - ], - "sizeBytes": 217259793 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", - "k8s.gcr.io/cluster-autoscaler:v1.2.2" - ], - "sizeBytes": 208688449 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", - "k8s.gcr.io/cluster-autoscaler:v1.1.2" - ], - "sizeBytes": 198265853 - }, - { - "names": [ - "k8s.gcr.io/cloud-controller-manager-amd64@sha256:bc7dc1bd3891ef77a19b5ecf1df50f0ee75266cd797c9f3ff508b40a86c737a0", - "k8s.gcr.io/cloud-controller-manager-amd64:v1.11.8" - ], - "sizeBytes": 139540150 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", - "k8s.gcr.io/cluster-autoscaler:v1.13.1" - ], - "sizeBytes": 136618018 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", - "containernetworking/networkmonitor:v0.0.5" - ], - "sizeBytes": 122043325 - }, - { - "names": [ - "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" - ], - "sizeBytes": 121711221 - }, - { - "names": [ - "nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", - "nginx:latest" - ], - "sizeBytes": 109331233 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.8" - ], - "sizeBytes": 99631084 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", - "k8s.gcr.io/kube-addon-manager-amd64:v8.9" - ], - "sizeBytes": 99240637 - }, - { - "names": [ - "nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", - "nginx:1.7.9" - ], - "sizeBytes": 91664166 - }, - { - "names": [ - "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", - "microsoft/virtual-kubelet:latest" - ], - "sizeBytes": 83395521 - }, - { - "names": [ - "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", - "deis/kube-svc-redirect:v1.0.2" - ], - "sizeBytes": 82897218 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.6" - ], - "sizeBytes": 78384272 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", - "k8s.gcr.io/heapster-amd64:v1.5.1" - ], - "sizeBytes": 75318380 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", - "k8s.gcr.io/heapster-amd64:v1.5.4" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", - "k8s.gcr.io/heapster-amd64:v1.5.3" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", - "k8s.gcr.io/rescheduler:v0.3.1" - ], - "sizeBytes": 74659350 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", - "gcr.io/kubernetes-helm/tiller:v2.11.0" - ], - "sizeBytes": 71821984 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", - "gcr.io/kubernetes-helm/tiller:v2.8.1" - ], - "sizeBytes": 71509364 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", - "k8s.gcr.io/kube-addon-manager-amd64:v8.7" - ], - "sizeBytes": 63322109 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", - "nvidia/k8s-device-plugin:1.11" - ], - "sizeBytes": 63138633 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", - "nvidia/k8s-device-plugin:1.10" - ], - "sizeBytes": 63130377 - }, - { - "names": [ - "vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "vishiy/hello:err100eps" - ], - "sizeBytes": 54649865 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" - ], - "sizeBytes": 51157394 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", - "quay.io/coreos/flannel:v0.8.0" - ], - "sizeBytes": 50732259 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" - ], - "sizeBytes": 49387411 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" - ], - "sizeBytes": 49052023 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", - "k8s.gcr.io/rescheduler:v0.4.0" - ], - "sizeBytes": 48973149 - }, - { - "names": [ - "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0" - ], - "sizeBytes": 48645472 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", - "quay.io/coreos/flannel:v0.10.0" - ], - "sizeBytes": 44598861 - }, - { - "names": [ - "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", - "k8s.gcr.io/metrics-server-amd64:v0.2.1" - ], - "sizeBytes": 42541759 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:23df717980b4aa08d2da6c4cfa327f1b730d92ec9cf740959d2d5911830d82fb", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.8" - ], - "sizeBytes": 42210862 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:f80f5f9328107dc516d67f7b70054354b9367d31d4946a3bffd3383d83d7efe8", - "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.7" - ], - "sizeBytes": 42033070 - } - ] - }, - "apiVersion": "v1", - "kind": "Node" - }, - { - "metadata": { - "name": "aks-nodepool1-19574989-1", - "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-1", - "uid": "8e1b5c77-44e5-11e9-9920-423525a6b683", - "resourceVersion": "9742045", - "creationTimestamp": "2019-03-12T16:40:33Z", - "labels": { - "agentpool": "nodepool1", - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "Standard_DS1_v2", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eastus", - "failure-domain.beta.kubernetes.io/zone": "1", - "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus", - "kubernetes.io/hostname": "aks-nodepool1-19574989-1", - "kubernetes.io/role": "agent", - "node-role.kubernetes.io/agent": "", - "storageprofile": "managed", - "storagetier": "Premium_LRS" - }, - "annotations": { - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true" - } - }, - "spec": { - "podCIDR": "10.244.0.0/24", - "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-1" - }, - "status": { - "capacity": { - "cpu": "1", - "ephemeral-storage": "30428648Ki", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "3524612Ki", - "pods": "110" - }, - "allocatable": { - "cpu": "940m", - "ephemeral-storage": "28043041951", - "hugepages-1Gi": "0", - "hugepages-2Mi": "0", - "memory": "2504708Ki", - "pods": "110" - }, - "conditions": [ - { - "type": "NetworkUnavailable", - "status": "False", - "lastHeartbeatTime": "2019-03-12T16:42:30Z", - "lastTransitionTime": "2019-03-12T16:42:30Z", - "reason": "RouteCreated", - "message": "RouteController created a route" - }, - { - "type": "OutOfDisk", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:06Z", - "lastTransitionTime": "2019-05-19T14:13:53Z", - "reason": "KubeletHasSufficientDisk", - "message": "kubelet has sufficient disk space available" - }, - { - "type": "MemoryPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:06Z", - "lastTransitionTime": "2019-05-19T14:13:53Z", - "reason": "KubeletHasSufficientMemory", - "message": "kubelet has sufficient memory available" - }, - { - "type": "DiskPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:06Z", - "lastTransitionTime": "2019-05-19T14:13:53Z", - "reason": "KubeletHasNoDiskPressure", - "message": "kubelet has no disk pressure" - }, - { - "type": "PIDPressure", - "status": "False", - "lastHeartbeatTime": "2019-06-08T00:47:06Z", - "lastTransitionTime": "2019-03-12T16:40:33Z", - "reason": "KubeletHasSufficientPID", - "message": "kubelet has sufficient PID available" - }, - { - "type": "Ready", - "status": "True", - "lastHeartbeatTime": "2019-06-08T00:47:06Z", - "lastTransitionTime": "2019-05-19T14:13:53Z", - "reason": "KubeletReady", - "message": "kubelet is posting ready status. AppArmor enabled" - } - ], - "addresses": [ - { - "type": "Hostname", - "address": "aks-nodepool1-19574989-1" - }, - { - "type": "InternalIP", - "address": "10.240.0.5" - } - ], - "daemonEndpoints": { - "kubeletEndpoint": { - "Port": 10250 - } - }, - "nodeInfo": { - "machineID": "1954026de5e6436788f214eb0dfd6a13", - "systemUUID": "17A6A78E-D3E2-2A4F-852B-C91D933C8D5B", - "bootID": "4c822e6d-c2e5-4697-9a01-467e04804fc1", - "kernelVersion": "4.15.0-1037-azure", - "osImage": "Ubuntu 16.04.5 LTS", - "containerRuntimeVersion": "docker://3.0.4", - "kubeletVersion": "v1.11.8", - "kubeProxyVersion": "v1.11.8", - "operatingSystem": "linux", - "architecture": "amd64" - }, - "images": [ - { - "names": [ - "perl@sha256:268e7af9853bcc6d2100e2ad76e928c2ca861518217c269b8a762849a8617c12", - "perl:latest" - ], - "sizeBytes": 890592834 - }, - { - "names": [ - "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", - "nickchase/rss-php-nginx:v1" - ], - "sizeBytes": 677038498 - }, - { - "names": [ - "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8", - "rdilip83/jsonlogger:v12" - ], - "sizeBytes": 676594134 - }, - { - "names": [ - "rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1", - "rdilip83/logeverysecond:v2" - ], - "sizeBytes": 674931590 - }, - { - "names": [ - "deis/hcp-tunnel-front@sha256:a067679f0ab376197a344cd410821cf07d69fc322dcd9af4a9229250da725ce2", - "deis/hcp-tunnel-front:v1.9.2-v4.0.4" - ], - "sizeBytes": 640504769 - }, - { - "names": [ - "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", - "k8s.gcr.io/hyperkube-amd64:v1.11.8" - ], - "sizeBytes": 615263658 - }, - { - "names": [ - "microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", - "microsoft/oms:healthpreview04262019" - ], - "sizeBytes": 514907213 - }, - { - "names": [ - "rdilip83/ha@sha256:40208587069b52338eefc24627bab5d10aa6ddc4f3a2a15dee74ad442327765f", - "rdilip83/ha:0426" - ], - "sizeBytes": 514907213 - }, - { - "names": [ - "microsoft/oms@sha256:63efbe8fc36635e37aee8c2c631a9d251ab1c736f0c86007c2607987a4bdd8a5", - "microsoft/oms:ciprod04232019" - ], - "sizeBytes": 487282051 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:2050713f627ac24dfd7d3c2594accc49491320711027668d96d3227b8f7ad721", - "rdilip83/kubehealth:2.0" - ], - "sizeBytes": 458285595 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:be84b745efda62fd0d4960d385714737a9b87d02adbc4b841c4a0b5db0495d52", - "rdilip83/kubehealth:1.0" - ], - "sizeBytes": 458243187 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:eb1c97ad840e4b8c84e9e15b7d148960af6a436f497834eda439fe2f9530435c", - "rdilip83/kubehealth:v3" - ], - "sizeBytes": 458243111 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:c43697cca29a63b442ff1414cfa5e72ee1779c4314fac9431760e1973c649a97", - "rdilip83/kubehealth:v2" - ], - "sizeBytes": 458243081 - }, - { - "names": [ - "rdilip83/kubehealth@sha256:899ee16fed942a999a887b7f46702803a1a354517ea04e7191031cbdbc67e3c5", - "rdilip83/kubehealth:v1" - ], - "sizeBytes": 458242872 - }, - { - "names": [ - "deis/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41", - "deis/hcp-tunnel-front:v1.9.2-v4.0.7" - ], - "sizeBytes": 383483267 - }, - { - "names": [ - "deis/hcp-tunnel-front@sha256:ab4e468fe95b18c65dee93543d8d2ca115121728371b2fc467947a8cc9165272", - "deis/hcp-tunnel-front:v1.9.2-v4.0.5" - ], - "sizeBytes": 380477207 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c", - "k8s.gcr.io/cluster-autoscaler:v1.12.2" - ], - "sizeBytes": 232167833 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8", - "k8s.gcr.io/cluster-autoscaler:v1.3.4" - ], - "sizeBytes": 217264129 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30", - "k8s.gcr.io/cluster-autoscaler:v1.3.3" - ], - "sizeBytes": 217259793 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780", - "k8s.gcr.io/cluster-autoscaler:v1.2.2" - ], - "sizeBytes": 208688449 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:9a71e04fdb0be61f2087847b6c5d223db3de4768e0cf8941b550fe9d4a971f58", - "k8s.gcr.io/cluster-autoscaler:v1.1.2" - ], - "sizeBytes": 198265853 - }, - { - "names": [ - "k8s.gcr.io/cloud-controller-manager-amd64@sha256:bc7dc1bd3891ef77a19b5ecf1df50f0ee75266cd797c9f3ff508b40a86c737a0", - "k8s.gcr.io/cloud-controller-manager-amd64:v1.11.8" - ], - "sizeBytes": 139540150 - }, - { - "names": [ - "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af", - "k8s.gcr.io/cluster-autoscaler:v1.13.1" - ], - "sizeBytes": 136618018 - }, - { - "names": [ - "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d", - "containernetworking/networkmonitor:v0.0.5" - ], - "sizeBytes": 122043325 - }, - { - "names": [ - "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" - ], - "sizeBytes": 121711221 - }, - { - "names": [ - "nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", - "nginx:latest" - ], - "sizeBytes": 109331233 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.8" - ], - "sizeBytes": 99631084 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4", - "k8s.gcr.io/kube-addon-manager-amd64:v8.9" - ], - "sizeBytes": 99240637 - }, - { - "names": [ - "nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", - "nginx:1.7.9" - ], - "sizeBytes": 91664166 - }, - { - "names": [ - "microsoft/virtual-kubelet@sha256:9d2ac6238bb2b8b7a85a71ae6103c38bd387884519665f6f9d47fdc1fb8edb61", - "microsoft/virtual-kubelet:latest" - ], - "sizeBytes": 83395521 - }, - { - "names": [ - "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", - "deis/kube-svc-redirect:v1.0.2" - ], - "sizeBytes": 82897218 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60", - "k8s.gcr.io/kube-addon-manager-amd64:v8.6" - ], - "sizeBytes": 78384272 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:d4d10455d921802bdb004e7edfe423a2b2f88911319b48abf47e0af909f27f15", - "k8s.gcr.io/heapster-amd64:v1.5.1" - ], - "sizeBytes": 75318380 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", - "k8s.gcr.io/heapster-amd64:v1.5.3" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472", - "k8s.gcr.io/heapster-amd64:v1.5.4" - ], - "sizeBytes": 75318342 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:66a900b01c70d695e112d8fa7779255640aab77ccc31f2bb661e6c674fe0d162", - "k8s.gcr.io/rescheduler:v0.3.1" - ], - "sizeBytes": 74659350 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:f6d8f4ab9ba993b5f5b60a6edafe86352eabe474ffeb84cb6c79b8866dce45d1", - "gcr.io/kubernetes-helm/tiller:v2.11.0" - ], - "sizeBytes": 71821984 - }, - { - "names": [ - "gcr.io/kubernetes-helm/tiller@sha256:394fb7d5f2fbaca54f6a0dec387cef926f6ae359786c89f7da67db173b97a322", - "gcr.io/kubernetes-helm/tiller:v2.8.1" - ], - "sizeBytes": 71509364 - }, - { - "names": [ - "k8s.gcr.io/kube-addon-manager-amd64@sha256:d53486c3a0b49ebee019932878dc44232735d5622a51dbbdcec7124199020d09", - "k8s.gcr.io/kube-addon-manager-amd64:v8.7" - ], - "sizeBytes": 63322109 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:41b3531d338477d26eb1151c15d0bea130d31e690752315a5205d8094439b0a6", - "nvidia/k8s-device-plugin:1.11" - ], - "sizeBytes": 63138633 - }, - { - "names": [ - "nvidia/k8s-device-plugin@sha256:327487db623cc75bdff86e56942f4af280e5f3de907339d0141fdffaeef342b8", - "nvidia/k8s-device-plugin:1.10" - ], - "sizeBytes": 63130377 - }, - { - "names": [ - "vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "vishiy/hello:err100eps" - ], - "sizeBytes": 54649865 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13" - ], - "sizeBytes": 51157394 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:5fa9435c1e95be2ec4daa53a35c39d5e3cc99fce33ed4983f4bb707bc9fc175f", - "quay.io/coreos/flannel:v0.8.0" - ], - "sizeBytes": 50732259 - }, - { - "names": [ - "k8s.gcr.io/cluster-proportional-autoscaler-amd64@sha256:003f98d9f411ddfa6ff6d539196355e03ddd69fa4ed38c7ffb8fec6f729afe2d", - "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2" - ], - "sizeBytes": 49648481 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:1a3fc069de481ae690188f6f1ba4664b5cc7760af37120f70c86505c79eea61d", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.5" - ], - "sizeBytes": 49387411 - }, - { - "names": [ - "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:e7f673b2c5ccd047c48b4eecd5452b2db1b9454daf07b23068ad239f98afaa29", - "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.15.0" - ], - "sizeBytes": 49052023 - }, - { - "names": [ - "k8s.gcr.io/rescheduler@sha256:156cfbfd05a5a815206fd2eeb6cbdaf1596d71ea4b415d3a6c43071dd7b99450", - "k8s.gcr.io/rescheduler:v0.4.0" - ], - "sizeBytes": 48973149 - }, - { - "names": [ - "gcr.io/google-containers/ip-masq-agent-amd64@sha256:de02f321dc002b767b0db0c476541fa2b6b9b8315aad83e8c36e4afd578ea4fc", - "gcr.io/google-containers/ip-masq-agent-amd64:v2.0.0" - ], - "sizeBytes": 48645472 - }, - { - "names": [ - "quay.io/coreos/flannel@sha256:6ecef07be35e5e861016ee557f986f89ad8244df47198de379a1bf4e580185df", - "quay.io/coreos/flannel:v0.10.0" - ], - "sizeBytes": 44598861 - } - ] - }, - "apiVersion": "v1", - "kind": "Node" - } - ] -} \ No newline at end of file diff --git a/inventory/pods.json b/inventory/pods.json deleted file mode 100644 index ab7ca36db..000000000 --- a/inventory/pods.json +++ /dev/null @@ -1,6971 +0,0 @@ -{ - "items": [ - { - "metadata": { - "name": "diliprdeploymentnodeapps-c4fdfb446-mzcsr", - "generateName": "diliprdeploymentnodeapps-c4fdfb446-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/diliprdeploymentnodeapps-c4fdfb446-mzcsr", - "uid": "ee31a9ce-526e-11e9-a899-6a5520730c61", - "resourceVersion": "4597573", - "creationTimestamp": "2019-03-29T22:06:40Z", - "labels": { - "app": "diliprsnodeapppod", - "diliprPodLabel1": "p1", - "diliprPodLabel2": "p2", - "pod-template-hash": "709896002" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "diliprdeploymentnodeapps-c4fdfb446", - "uid": "ee1e78e0-526e-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "diliprcontainerhelloapp", - "image": "rdilip83/logeverysecond:v2", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T22:06:40Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T22:06:47Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T22:06:40Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.98", - "startTime": "2019-03-29T22:06:40Z", - "containerStatuses": [ - { - "name": "diliprcontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T22:06:47Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "rdilip83/logeverysecond:v2", - "imageID": "docker-pullable://rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1", - "containerID": "docker://80562bb7c90ccdde41c7ba85a2a8e063a0f6f19a79d54895d82f3d7f1023ad8c" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "nginx-deployment-67594d6bf6-m2vwn", - "generateName": "nginx-deployment-67594d6bf6-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/nginx-deployment-67594d6bf6-m2vwn", - "uid": "6d9749a6-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952432", - "creationTimestamp": "2019-05-22T22:32:05Z", - "labels": { - "app": "nginx", - "pod-template-hash": "2315082692" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "nginx-deployment-67594d6bf6", - "uid": "6d8e3428-7ce1-11e9-8d23-32c49ee6f300", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "nginx", - "image": "nginx:1.7.9", - "ports": [ - { - "containerPort": 80, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:06Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:14Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:06Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.128", - "startTime": "2019-05-22T22:32:06Z", - "containerStatuses": [ - { - "name": "nginx", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:12Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:1.7.9", - "imageID": "docker-pullable://nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", - "containerID": "docker://b9acafd3f81d61df5c7c51ac8ae081b115759e5aa4f744b23ba4d0641732ae1f" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "nginx-deployment-67594d6bf6-tl5v6", - "generateName": "nginx-deployment-67594d6bf6-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/nginx-deployment-67594d6bf6-tl5v6", - "uid": "6d940ac3-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952430", - "creationTimestamp": "2019-05-22T22:32:05Z", - "labels": { - "app": "nginx", - "pod-template-hash": "2315082692" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "nginx-deployment-67594d6bf6", - "uid": "6d8e3428-7ce1-11e9-8d23-32c49ee6f300", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "nginx", - "image": "nginx:1.7.9", - "ports": [ - { - "containerPort": 80, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:06Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:14Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:05Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.127", - "startTime": "2019-05-22T22:32:06Z", - "containerStatuses": [ - { - "name": "nginx", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:12Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:1.7.9", - "imageID": "docker-pullable://nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", - "containerID": "docker://1a6ca08a1005680e0fc2789a5ae3dc0f8790eda724c8033f5271f8d50415cc04" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "nginx-deployment-67594d6bf6-wc6bj", - "generateName": "nginx-deployment-67594d6bf6-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/nginx-deployment-67594d6bf6-wc6bj", - "uid": "6d96e65b-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952421", - "creationTimestamp": "2019-05-22T22:32:05Z", - "labels": { - "app": "nginx", - "pod-template-hash": "2315082692" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "nginx-deployment-67594d6bf6", - "uid": "6d8e3428-7ce1-11e9-8d23-32c49ee6f300", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "nginx", - "image": "nginx:1.7.9", - "ports": [ - { - "containerPort": 80, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:06Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:12Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:05Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.102", - "startTime": "2019-05-22T22:32:06Z", - "containerStatuses": [ - { - "name": "nginx", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:11Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:1.7.9", - "imageID": "docker-pullable://nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451", - "containerID": "docker://658d95bf8e28d6eda298b621e404f95925c7b0d92034ab149ff439aaeb839601" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "pi-m8ccw", - "generateName": "pi-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/pi-m8ccw", - "uid": "9fb16aaa-7ccc-11e9-8d23-32c49ee6f300", - "resourceVersion": "7940877", - "creationTimestamp": "2019-05-22T20:03:10Z", - "labels": { - "controller-uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300", - "job-name": "pi" - }, - "ownerReferences": [ - { - "apiVersion": "batch/v1", - "kind": "Job", - "name": "pi", - "uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "pi", - "image": "perl", - "command": [ - "perl", - "-Mbignum=bpi", - "-wle", - "print bpi(2000)" - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - } - ], - "restartPolicy": "Never", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Succeeded", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T20:03:10Z", - "reason": "PodCompleted" - }, - { - "type": "Ready", - "status": "False", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T20:04:43Z", - "reason": "PodCompleted" - }, - { - "type": "ContainersReady", - "status": "False", - "lastProbeTime": null, - "lastTransitionTime": null, - "reason": "PodCompleted" - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T20:03:10Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.100", - "startTime": "2019-05-22T20:03:10Z", - "containerStatuses": [ - { - "name": "pi", - "state": { - "terminated": { - "exitCode": 0, - "reason": "Completed", - "startedAt": "2019-05-22T20:04:36Z", - "finishedAt": "2019-05-22T20:04:41Z", - "containerID": "docker://1772f34611835a233cd4b41c69ed0b9b23ed14003085e82e39e53d15ba8dd9d5" - } - }, - "lastState": {}, - "ready": false, - "restartCount": 0, - "image": "perl:latest", - "imageID": "docker-pullable://perl@sha256:268e7af9853bcc6d2100e2ad76e928c2ca861518217c269b8a762849a8617c12", - "containerID": "docker://1772f34611835a233cd4b41c69ed0b9b23ed14003085e82e39e53d15ba8dd9d5" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "rss-site", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/rss-site", - "uid": "68a34ea4-7ce4-11e9-8d23-32c49ee6f300", - "resourceVersion": "7954135", - "creationTimestamp": "2019-05-22T22:53:26Z", - "labels": { - "app": "web" - }, - "annotations": { - "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Pod\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"web\"},\"name\":\"rss-site\",\"namespace\":\"default\"},\"spec\":{\"containers\":[{\"image\":\"nginx\",\"name\":\"front-end\",\"ports\":[{\"containerPort\":80}]},{\"image\":\"nickchase/rss-php-nginx:v1\",\"name\":\"rss-reader\",\"ports\":[{\"containerPort\":88}]}]}}\n" - } - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "front-end", - "image": "nginx", - "ports": [ - { - "containerPort": 80, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - }, - { - "name": "rss-reader", - "image": "nickchase/rss-php-nginx:v1", - "ports": [ - { - "containerPort": 88, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:53:26Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:53:33Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:53:26Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.104", - "startTime": "2019-05-22T22:53:26Z", - "containerStatuses": [ - { - "name": "front-end", - "state": { - "running": { - "startedAt": "2019-05-22T22:53:31Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:latest", - "imageID": "docker-pullable://nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", - "containerID": "docker://b27414a5d79e4d94bb84b03944e0eea1b6af4e2f4e31033c541159ec70e08d49" - }, - { - "name": "rss-reader", - "state": { - "running": { - "startedAt": "2019-05-22T22:53:33Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nickchase/rss-php-nginx:v1", - "imageID": "docker-pullable://nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", - "containerID": "docker://be831ee23d69827386f29625751bae70d56b35b3a8b4ed02f242cbfd08ddd5d7" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-bpcpf", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-bpcpf", - "uid": "cf23dfc8-5261-11e9-a899-6a5520730c61", - "resourceVersion": "4597543", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:57Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.92", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:55Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://3d87ae8f8543c8bc19ebd2e98cb76cc283e8837e4fab2b2557113c35169ffaa3" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-c6j7k", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-c6j7k", - "uid": "cf23bf6d-5261-11e9-a899-6a5520730c61", - "resourceVersion": "7580635", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:57Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.62", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:54Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://24cbb8235f78a86b434cecd783403ca033c827be8a9c7e6c3948f0c2cb7781ca" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-ft4lb", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-ft4lb", - "uid": "cf2b40bb-5261-11e9-a899-6a5520730c61", - "resourceVersion": "4597538", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:55Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.91", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:54Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://d76d3179c2cee3149fb8d642adf232cdb6a7c61fb9e501115992d8e268e9b049" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-gl26j", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-gl26j", - "uid": "cf2bc896-5261-11e9-a899-6a5520730c61", - "resourceVersion": "7580640", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:58Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.66", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:57Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://0e2458ebe42fbc427ccd83827aa96299e76b11c7b0aa1ff7a4622041485fb945" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-j88g7", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-j88g7", - "uid": "cf31de9c-5261-11e9-a899-6a5520730c61", - "resourceVersion": "7580649", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:57Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.63", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:55Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://62afa45f4c898e8a5b67e83fb838dbfda2194f3c9b421046071459592b56ddf6" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-kmk9k", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-kmk9k", - "uid": "cf317bb0-5261-11e9-a899-6a5520730c61", - "resourceVersion": "4597548", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:58Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.93", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:57Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://4b63f7603a1ab5db7dd23af2e5d76b5ad715b53abb957a9f97a5174ae3ce09e7" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-ntfdd", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-ntfdd", - "uid": "cf2b6c48-5261-11e9-a899-6a5520730c61", - "resourceVersion": "4597569", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:58Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.94", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:57Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://45a30da82055fc9f8e4000124b0a3ca1445809af4183ae4258b7e8edc829a46c" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-rdnb2", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-rdnb2", - "uid": "cf1bf952-5261-11e9-a899-6a5520730c61", - "resourceVersion": "4597535", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:55Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.90", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:54Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://4d79b7011d2e1916e7e565a99f9f6f72dcdc83597bdbd44ce218142465cd466e" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-tjvff", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-tjvff", - "uid": "cf317302-5261-11e9-a899-6a5520730c61", - "resourceVersion": "7580633", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:58Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.64", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:56Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://7172a29b88e7631ef6ae3cc3361084fe7ef5ddf41d7a3a2c340f48b614b18aa4" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "vishwadeploymentnodeapps-8686cf54db-xjc55", - "generateName": "vishwadeploymentnodeapps-8686cf54db-", - "namespace": "default", - "selfLink": "/api/v1/namespaces/default/pods/vishwadeploymentnodeapps-8686cf54db-xjc55", - "uid": "cf2b7ddd-5261-11e9-a899-6a5520730c61", - "resourceVersion": "7580655", - "creationTimestamp": "2019-03-29T20:32:45Z", - "labels": { - "VishwaPodLabel1": "p1", - "VishwaPodLabel2": "p2", - "app": "vishwasnodeapppod", - "pod-template-hash": "4242791086" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "vishwadeploymentnodeapps-8686cf54db", - "uid": "cf14b523-5261-11e9-a899-6a5520730c61", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-kbbjr", - "secret": { - "secretName": "default-token-kbbjr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "vishwacontainerhelloapp", - "image": "vishiy/hello:err100eps", - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-kbbjr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:58Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-03-29T20:32:45Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.65", - "startTime": "2019-03-29T20:32:45Z", - "containerStatuses": [ - { - "name": "vishwacontainerhelloapp", - "state": { - "running": { - "startedAt": "2019-03-29T20:32:57Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "vishiy/hello:err100eps", - "imageID": "docker-pullable://vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a", - "containerID": "docker://591d4eb3cbcc39d98b2d7d69f040b373b1fd5238ef618cadf82bd66e053bf847" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "heapster-85796b5fdd-47bwj", - "generateName": "heapster-85796b5fdd-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/heapster-85796b5fdd-47bwj", - "uid": "b13b1438-8787-11e9-9d68-7abe3d755a8f", - "resourceVersion": "9459173", - "creationTimestamp": "2019-06-05T11:47:27Z", - "labels": { - "k8s-app": "heapster", - "pod-template-hash": "4135261988" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "heapster-85796b5fdd", - "uid": "b0e6a4f1-8787-11e9-9d68-7abe3d755a8f", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "heapster-config-volume", - "configMap": { - "name": "heapster-config", - "defaultMode": 420 - } - }, - { - "name": "heapster-token-7z7c5", - "secret": { - "secretName": "heapster-token-7z7c5", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "heapster", - "image": "k8s.gcr.io/heapster-amd64:v1.5.3", - "command": [ - "/heapster", - "--source=kubernetes.summary_api:\"\"" - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "cpu": "88m", - "memory": "204Mi" - }, - "requests": { - "cpu": "88m", - "memory": "204Mi" - } - }, - "volumeMounts": [ - { - "name": "heapster-token-7z7c5", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthz", - "port": 8082, - "scheme": "HTTP" - }, - "initialDelaySeconds": 180, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "heapster-nanny", - "image": "k8s.gcr.io/addon-resizer:1.8.1", - "command": [ - "/pod_nanny", - "--config-dir=/etc/config", - "--cpu=80m", - "--extra-cpu=0.5m", - "--memory=140Mi", - "--extra-memory=4Mi", - "--threshold=5", - "--deployment=heapster", - "--container=heapster", - "--poll-period=300000", - "--estimator=exponential" - ], - "env": [ - { - "name": "MY_POD_NAME", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.name" - } - } - }, - { - "name": "MY_POD_NAMESPACE", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "metadata.namespace" - } - } - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "cpu": "50m", - "memory": "90Mi" - }, - "requests": { - "cpu": "50m", - "memory": "90Mi" - } - }, - "volumeMounts": [ - { - "name": "heapster-config-volume", - "mountPath": "/etc/config" - }, - { - "name": "heapster-token-7z7c5", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "heapster", - "serviceAccount": "heapster", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-05T11:47:28Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-05T11:47:38Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-05T11:47:28Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.111", - "startTime": "2019-06-05T11:47:28Z", - "containerStatuses": [ - { - "name": "heapster", - "state": { - "running": { - "startedAt": "2019-06-05T11:47:36Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/heapster-amd64:v1.5.3", - "imageID": "docker-pullable://k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a", - "containerID": "docker://4e1172f79b32d748b594fcf1d28804ccee06137f717ad23f6d4231cba93b9ea7" - }, - { - "name": "heapster-nanny", - "state": { - "running": { - "startedAt": "2019-06-05T11:47:37Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/addon-resizer:1.8.1", - "imageID": "docker-pullable://k8s.gcr.io/addon-resizer@sha256:507aa9845ecce1fdde4d61f530c802f4dc2974c700ce0db7730866e442db958d", - "containerID": "docker://5f638d06b66eda2c9d371a2af7ea6483ecd618ce3db39667e95c3100cbda3880" - } - ], - "qosClass": "Guaranteed" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-dns-autoscaler-544c7b6b44-p5k4f", - "generateName": "kube-dns-autoscaler-544c7b6b44-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-autoscaler-544c7b6b44-p5k4f", - "uid": "b248f888-7c31-11e9-a084-2a3503e86e0a", - "resourceVersion": "7854795", - "creationTimestamp": "2019-05-22T01:34:09Z", - "labels": { - "k8s-app": "kube-dns-autoscaler", - "pod-template-hash": "1007362600" - }, - "annotations": { - "scheduler.alpha.kubernetes.io/critical-pod": "", - "seccomp.security.alpha.kubernetes.io/pod": "docker/default" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "kube-dns-autoscaler-544c7b6b44", - "uid": "b2443a3a-7c31-11e9-a084-2a3503e86e0a", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kube-dns-autoscaler-token-zkxt8", - "secret": { - "secretName": "kube-dns-autoscaler-token-zkxt8", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "autoscaler", - "image": "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2", - "command": [ - "/cluster-proportional-autoscaler", - "--namespace=kube-system", - "--configmap=kube-dns-autoscaler", - "--target=deployment/kube-dns-v20", - "--default-params={\"ladder\":{\"coresToReplicas\":[[1,2],[512,3],[1024,4],[2048,5]],\"nodesToReplicas\":[[1,2],[8,3],[16,4],[32,5]]}}", - "--logtostderr=true", - "--v=2" - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "20m", - "memory": "10Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-autoscaler-token-zkxt8", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "serviceAccountName": "kube-dns-autoscaler", - "serviceAccount": "kube-dns-autoscaler", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:09Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:24Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:09Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.92", - "startTime": "2019-05-22T01:34:09Z", - "containerStatuses": [ - { - "name": "autoscaler", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:22Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2", - "imageID": "docker-pullable://k8s.gcr.io/cluster-proportional-autoscaler-amd64@sha256:003f98d9f411ddfa6ff6d539196355e03ddd69fa4ed38c7ffb8fec6f729afe2d", - "containerID": "docker://099452c4e8eb1934199cc973acf0d2cc3045048a46f37ddefbf3a2efc19636fe" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-dns-v20-f96699fb4-b2wbl", - "generateName": "kube-dns-v20-f96699fb4-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-v20-f96699fb4-b2wbl", - "uid": "b2878580-7c31-11e9-a084-2a3503e86e0a", - "resourceVersion": "7854952", - "creationTimestamp": "2019-05-22T01:34:10Z", - "labels": { - "k8s-app": "kube-dns", - "kubernetes.io/cluster-service": "true", - "pod-template-hash": "952255960", - "version": "v20" - }, - "annotations": { - "prometheus.io/port": "10055", - "prometheus.io/scrape": "true" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "kube-dns-v20-f96699fb4", - "uid": "b2538f5f-7c31-11e9-a084-2a3503e86e0a", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kube-dns-config", - "configMap": { - "name": "kube-dns", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "kubedns-kubecfg", - "configMap": { - "name": "kubedns-kubecfg", - "defaultMode": 420 - } - }, - { - "name": "kube-dns-token-ghgtl", - "secret": { - "secretName": "kube-dns-token-ghgtl", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kubedns", - "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", - "args": [ - "--kubecfg-file=/config/kubeconfig", - "--config-dir=/kube-dns-config", - "--domain=cluster.local.", - "--dns-port=10053", - "--v=2" - ], - "ports": [ - { - "name": "dns-local", - "containerPort": 10053, - "protocol": "UDP" - }, - { - "name": "dns-tcp-local", - "containerPort": 10053, - "protocol": "TCP" - }, - { - "name": "metrics", - "containerPort": 10055, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "PROMETHEUS_PORT", - "value": "10055" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "memory": "170Mi" - }, - "requests": { - "cpu": "100m", - "memory": "70Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-config", - "mountPath": "/kube-dns-config" - }, - { - "name": "kubedns-kubecfg", - "readOnly": true, - "mountPath": "/config" - }, - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthcheck/kubedns", - "port": 10054, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "readinessProbe": { - "httpGet": { - "path": "/readiness", - "port": 8081, - "scheme": "HTTP" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "dnsmasq", - "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", - "args": [ - "-v=2", - "-logtostderr", - "-configDir=/kube-dns-config", - "-restartDnsmasq=true", - "--", - "-k", - "--cache-size=1000", - "--no-negcache", - "--no-resolv", - "--server=127.0.0.1#10053", - "--server=/cluster.local/127.0.0.1#10053", - "--server=/in-addr.arpa/127.0.0.1#10053", - "--server=/ip6.arpa/127.0.0.1#10053", - "--log-facility=-" - ], - "ports": [ - { - "name": "dns", - "containerPort": 53, - "protocol": "UDP" - }, - { - "name": "dns-tcp", - "containerPort": 53, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "kube-dns-config", - "mountPath": "/kube-dns-config" - }, - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "healthz", - "image": "k8s.gcr.io/exechealthz-amd64:1.2", - "args": [ - "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 >/dev/null || exit 1; done", - "--url=/healthz-dnsmasq", - "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 >/dev/null || exit 1; done", - "--url=/healthz-kubedns", - "--port=8080", - "--quiet" - ], - "ports": [ - { - "containerPort": 8080, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "PROBE_DOMAINS", - "value": "bing.com kubernetes.default.svc.cluster.local" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "memory": "50Mi" - }, - "requests": { - "cpu": "10m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthz-dnsmasq", - "port": 8080, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "sidecar", - "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", - "args": [ - "--v=2", - "--logtostderr", - "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV", - "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV" - ], - "ports": [ - { - "name": "metrics", - "containerPort": 10054, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "10m", - "memory": "20Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/metrics", - "port": 10054, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-dns", - "serviceAccount": "kube-dns", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - }, - "podAntiAffinity": { - "preferredDuringSchedulingIgnoredDuringExecution": [ - { - "weight": 100, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "kubernetes.io/hostname" - } - } - ] - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:11Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:35:00Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:10Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.93", - "startTime": "2019-05-22T01:34:11Z", - "containerStatuses": [ - { - "name": "dnsmasq", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:28Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", - "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", - "containerID": "docker://2851cb02128e1042279e414d6d2f6d5e5cbc0bfbb3656188ce1f8210bfbabb3f" - }, - { - "name": "healthz", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:31Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/exechealthz-amd64:1.2", - "imageID": "docker-pullable://k8s.gcr.io/exechealthz-amd64@sha256:503e158c3f65ed7399f54010571c7c977ade7fe59010695f48d9650d83488c0a", - "containerID": "docker://a69227f3a15bf4e154206668ce2ed313630871981c89710916a3c8341bff1f02" - }, - { - "name": "kubedns", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:24Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", - "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "containerID": "docker://65e83e304744f37ba779cc7313ae981e912d3fb5ae536f0f78e6d287db6c141a" - }, - { - "name": "sidecar", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:33Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", - "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", - "containerID": "docker://9a825dc6951f5db61af58fc028bb8ab64b06d0d4ec64f81b3dd0d586a913e34a" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-dns-v20-f96699fb4-vltph", - "generateName": "kube-dns-v20-f96699fb4-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-v20-f96699fb4-vltph", - "uid": "b25c1fd9-7c31-11e9-a084-2a3503e86e0a", - "resourceVersion": "7854924", - "creationTimestamp": "2019-05-22T01:34:09Z", - "labels": { - "k8s-app": "kube-dns", - "kubernetes.io/cluster-service": "true", - "pod-template-hash": "952255960", - "version": "v20" - }, - "annotations": { - "prometheus.io/port": "10055", - "prometheus.io/scrape": "true" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "kube-dns-v20-f96699fb4", - "uid": "b2538f5f-7c31-11e9-a084-2a3503e86e0a", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kube-dns-config", - "configMap": { - "name": "kube-dns", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "kubedns-kubecfg", - "configMap": { - "name": "kubedns-kubecfg", - "defaultMode": 420 - } - }, - { - "name": "kube-dns-token-ghgtl", - "secret": { - "secretName": "kube-dns-token-ghgtl", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kubedns", - "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", - "args": [ - "--kubecfg-file=/config/kubeconfig", - "--config-dir=/kube-dns-config", - "--domain=cluster.local.", - "--dns-port=10053", - "--v=2" - ], - "ports": [ - { - "name": "dns-local", - "containerPort": 10053, - "protocol": "UDP" - }, - { - "name": "dns-tcp-local", - "containerPort": 10053, - "protocol": "TCP" - }, - { - "name": "metrics", - "containerPort": 10055, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "PROMETHEUS_PORT", - "value": "10055" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "memory": "170Mi" - }, - "requests": { - "cpu": "100m", - "memory": "70Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-config", - "mountPath": "/kube-dns-config" - }, - { - "name": "kubedns-kubecfg", - "readOnly": true, - "mountPath": "/config" - }, - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthcheck/kubedns", - "port": 10054, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "readinessProbe": { - "httpGet": { - "path": "/readiness", - "port": 8081, - "scheme": "HTTP" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "dnsmasq", - "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", - "args": [ - "-v=2", - "-logtostderr", - "-configDir=/kube-dns-config", - "-restartDnsmasq=true", - "--", - "-k", - "--cache-size=1000", - "--no-negcache", - "--no-resolv", - "--server=127.0.0.1#10053", - "--server=/cluster.local/127.0.0.1#10053", - "--server=/in-addr.arpa/127.0.0.1#10053", - "--server=/ip6.arpa/127.0.0.1#10053", - "--log-facility=-" - ], - "ports": [ - { - "name": "dns", - "containerPort": 53, - "protocol": "UDP" - }, - { - "name": "dns-tcp", - "containerPort": 53, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "kube-dns-config", - "mountPath": "/kube-dns-config" - }, - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "healthz", - "image": "k8s.gcr.io/exechealthz-amd64:1.2", - "args": [ - "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 >/dev/null || exit 1; done", - "--url=/healthz-dnsmasq", - "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 >/dev/null || exit 1; done", - "--url=/healthz-kubedns", - "--port=8080", - "--quiet" - ], - "ports": [ - { - "containerPort": 8080, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "PROBE_DOMAINS", - "value": "bing.com kubernetes.default.svc.cluster.local" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "memory": "50Mi" - }, - "requests": { - "cpu": "10m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/healthz-dnsmasq", - "port": 8080, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 5 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - }, - { - "name": "sidecar", - "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", - "args": [ - "--v=2", - "--logtostderr", - "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV", - "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV" - ], - "ports": [ - { - "name": "metrics", - "containerPort": 10054, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "10m", - "memory": "20Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-dns-token-ghgtl", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/metrics", - "port": 10054, - "scheme": "HTTP" - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 5, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-dns", - "serviceAccount": "kube-dns", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - }, - "podAntiAffinity": { - "preferredDuringSchedulingIgnoredDuringExecution": [ - { - "weight": 100, - "podAffinityTerm": { - "labelSelector": { - "matchExpressions": [ - { - "key": "k8s-app", - "operator": "In", - "values": [ - "kube-dns" - ] - } - ] - }, - "topologyKey": "kubernetes.io/hostname" - } - } - ] - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:11Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:56Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:09Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.118", - "startTime": "2019-05-22T01:34:11Z", - "containerStatuses": [ - { - "name": "dnsmasq", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:25Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10", - "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8", - "containerID": "docker://6212f59ccc732480813f4a7e2fdf8eb84e7cebfab3c178135af8dbd1d0a26aba" - }, - { - "name": "healthz", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:29Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/exechealthz-amd64:1.2", - "imageID": "docker-pullable://k8s.gcr.io/exechealthz-amd64@sha256:503e158c3f65ed7399f54010571c7c977ade7fe59010695f48d9650d83488c0a", - "containerID": "docker://d2ad8cbaa535e6587de5ffbe712194a6d8c9572166f7f6bd8be6b241cc064c83" - }, - { - "name": "kubedns", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:20Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13", - "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52", - "containerID": "docker://9e049dae9ef2667ff5e6af168f9cf6fd1e820dd7c7cc2af02c71e0f4d8382291" - }, - { - "name": "sidecar", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:31Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10", - "imageID": "docker-pullable://k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4", - "containerID": "docker://b71282c7c0793242c5b2bf0a8c35f0c4e82c7c6224651cbaa8bdd3f94554b49e" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-proxy-4hjws", - "generateName": "kube-proxy-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-4hjws", - "uid": "8cf7c410-88f4-11e9-b1b0-5eb4a3e9de7d", - "resourceVersion": "9661065", - "creationTimestamp": "2019-06-07T07:19:12Z", - "labels": { - "component": "kube-proxy", - "controller-revision-hash": "1271944371", - "pod-template-generation": "16", - "tier": "node" - }, - "annotations": { - "aks.microsoft.com/release-time": "seconds:1559735217 nanos:797729016 ", - "remediator.aks.microsoft.com/kube-proxy-restart": "7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-proxy", - "uid": "45640bf6-44e5-11e9-9920-423525a6b683", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kubeconfig", - "hostPath": { - "path": "/var/lib/kubelet", - "type": "" - } - }, - { - "name": "certificates", - "hostPath": { - "path": "/etc/kubernetes/certs", - "type": "" - } - }, - { - "name": "kube-proxy-token-f5vbg", - "secret": { - "secretName": "kube-proxy-token-f5vbg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kube-proxy", - "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", - "command": [ - "/hyperkube", - "proxy", - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--cluster-cidr=10.244.0.0/16", - "--feature-gates=ExperimentalCriticalPodAnnotation=true" - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "100m" - } - }, - "volumeMounts": [ - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/var/lib/kubelet" - }, - { - "name": "certificates", - "readOnly": true, - "mountPath": "/etc/kubernetes/certs" - }, - { - "name": "kube-proxy-token-f5vbg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-proxy", - "serviceAccount": "kube-proxy", - "nodeName": "aks-nodepool1-19574989-1", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-07T07:19:13Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-07T07:19:18Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-07T07:19:13Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.240.0.5", - "startTime": "2019-06-07T07:19:13Z", - "containerStatuses": [ - { - "name": "kube-proxy", - "state": { - "running": { - "startedAt": "2019-06-07T07:19:17Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", - "containerID": "docker://c1c114cb18056d4f9820d127e53f1c4d3f976d52c8e6522fee07f4727db96c66" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-proxy-j847t", - "generateName": "kube-proxy-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-j847t", - "uid": "a490c6e6-88f4-11e9-b1b0-5eb4a3e9de7d", - "resourceVersion": "9661132", - "creationTimestamp": "2019-06-07T07:19:52Z", - "labels": { - "component": "kube-proxy", - "controller-revision-hash": "1271944371", - "pod-template-generation": "16", - "tier": "node" - }, - "annotations": { - "aks.microsoft.com/release-time": "seconds:1559735217 nanos:797729016 ", - "remediator.aks.microsoft.com/kube-proxy-restart": "7" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-proxy", - "uid": "45640bf6-44e5-11e9-9920-423525a6b683", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kubeconfig", - "hostPath": { - "path": "/var/lib/kubelet", - "type": "" - } - }, - { - "name": "certificates", - "hostPath": { - "path": "/etc/kubernetes/certs", - "type": "" - } - }, - { - "name": "kube-proxy-token-f5vbg", - "secret": { - "secretName": "kube-proxy-token-f5vbg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "kube-proxy", - "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", - "command": [ - "/hyperkube", - "proxy", - "--kubeconfig=/var/lib/kubelet/kubeconfig", - "--cluster-cidr=10.244.0.0/16", - "--feature-gates=ExperimentalCriticalPodAnnotation=true" - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "100m" - } - }, - "volumeMounts": [ - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/var/lib/kubelet" - }, - { - "name": "certificates", - "readOnly": true, - "mountPath": "/etc/kubernetes/certs" - }, - { - "name": "kube-proxy-token-f5vbg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-proxy", - "serviceAccount": "kube-proxy", - "nodeName": "aks-nodepool1-19574989-0", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-07T07:19:52Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-07T07:19:57Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-07T07:19:52Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.240.0.4", - "startTime": "2019-06-07T07:19:52Z", - "containerStatuses": [ - { - "name": "kube-proxy", - "state": { - "running": { - "startedAt": "2019-06-07T07:19:57Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/hyperkube-amd64:v1.11.8", - "imageID": "docker-pullable://k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913", - "containerID": "docker://79872f04462bb77d8c9f121e39910af80fdbb73c874bfafdf29f16f8df1f5011" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-svc-redirect-bfkqr", - "generateName": "kube-svc-redirect-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-bfkqr", - "uid": "cbfbc2f5-7c31-11e9-a710-f6837ce82b50", - "resourceVersion": "7854957", - "creationTimestamp": "2019-05-22T01:34:52Z", - "labels": { - "component": "kube-svc-redirect", - "controller-revision-hash": "3858141844", - "pod-template-generation": "3", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-svc-redirect", - "uid": "45a5fc62-44e5-11e9-9920-423525a6b683", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "azureproxy-nginx", - "configMap": { - "name": "azureproxy-nginx", - "defaultMode": 420 - } - }, - { - "name": "azureproxy-configs", - "configMap": { - "name": "azureproxy-config", - "defaultMode": 420 - } - }, - { - "name": "azureproxy-certs", - "secret": { - "secretName": "azureproxy-certs", - "defaultMode": 420 - } - }, - { - "name": "kube-svc-redirector-token-ngjg2", - "secret": { - "secretName": "kube-svc-redirector-token-ngjg2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "redirector", - "image": "docker.io/deis/kube-svc-redirect:v1.0.2", - "env": [ - { - "name": "KUBERNETES_SVC_IP", - "value": "10.0.0.1" - }, - { - "name": "KUBE_SVC_REDIRECTOR_PROXY_IP", - "value": "127.0.0.1:14612" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "5m", - "memory": "2Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-svc-redirector-token-ngjg2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "capabilities": { - "add": [ - "NET_ADMIN" - ] - } - } - }, - { - "name": "azureproxy", - "image": "nginx:1.13.12-alpine", - "ports": [ - { - "hostPort": 14612, - "containerPort": 14612, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "5m", - "memory": "32Mi" - } - }, - "volumeMounts": [ - { - "name": "azureproxy-nginx", - "readOnly": true, - "mountPath": "/etc/nginx/nginx.conf", - "subPath": "nginx.conf" - }, - { - "name": "azureproxy-configs", - "readOnly": true, - "mountPath": "/etc/nginx/conf.d" - }, - { - "name": "azureproxy-certs", - "readOnly": true, - "mountPath": "/etc/nginx/certs" - }, - { - "name": "kube-svc-redirector-token-ngjg2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-svc-redirector", - "serviceAccount": "kube-svc-redirector", - "nodeName": "aks-nodepool1-19574989-1", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:53Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:35:01Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:53Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.240.0.5", - "startTime": "2019-05-22T01:34:53Z", - "containerStatuses": [ - { - "name": "azureproxy", - "state": { - "running": { - "startedAt": "2019-05-22T01:35:00Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:1.13.12-alpine", - "imageID": "docker-pullable://nginx@sha256:9d46fd628d54ebe1633ee3cf0fe2acfcc419cfae541c63056530e39cd5620366", - "containerID": "docker://1e6a9c4bd079d1a8aecfb3f597550b80d1017be90ca14dd4438420320ff7858d" - }, - { - "name": "redirector", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:59Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "deis/kube-svc-redirect:v1.0.2", - "imageID": "docker-pullable://deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", - "containerID": "docker://5fcd87041f2734df58b330a3516d64c4e3a25dc72542b20590ade3fecb9232f7" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kube-svc-redirect-jbzcf", - "generateName": "kube-svc-redirect-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-jbzcf", - "uid": "ebc3015a-7c31-11e9-a710-f6837ce82b50", - "resourceVersion": "7855073", - "creationTimestamp": "2019-05-22T01:35:46Z", - "labels": { - "component": "kube-svc-redirect", - "controller-revision-hash": "3858141844", - "pod-template-generation": "3", - "tier": "node" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "kube-svc-redirect", - "uid": "45a5fc62-44e5-11e9-9920-423525a6b683", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "azureproxy-nginx", - "configMap": { - "name": "azureproxy-nginx", - "defaultMode": 420 - } - }, - { - "name": "azureproxy-configs", - "configMap": { - "name": "azureproxy-config", - "defaultMode": 420 - } - }, - { - "name": "azureproxy-certs", - "secret": { - "secretName": "azureproxy-certs", - "defaultMode": 420 - } - }, - { - "name": "kube-svc-redirector-token-ngjg2", - "secret": { - "secretName": "kube-svc-redirector-token-ngjg2", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "redirector", - "image": "docker.io/deis/kube-svc-redirect:v1.0.2", - "env": [ - { - "name": "KUBERNETES_SVC_IP", - "value": "10.0.0.1" - }, - { - "name": "KUBE_SVC_REDIRECTOR_PROXY_IP", - "value": "127.0.0.1:14612" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "5m", - "memory": "2Mi" - } - }, - "volumeMounts": [ - { - "name": "kube-svc-redirector-token-ngjg2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "capabilities": { - "add": [ - "NET_ADMIN" - ] - } - } - }, - { - "name": "azureproxy", - "image": "nginx:1.13.12-alpine", - "ports": [ - { - "hostPort": 14612, - "containerPort": 14612, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "5m", - "memory": "32Mi" - } - }, - "volumeMounts": [ - { - "name": "azureproxy-nginx", - "readOnly": true, - "mountPath": "/etc/nginx/nginx.conf", - "subPath": "nginx.conf" - }, - { - "name": "azureproxy-configs", - "readOnly": true, - "mountPath": "/etc/nginx/conf.d" - }, - { - "name": "azureproxy-certs", - "readOnly": true, - "mountPath": "/etc/nginx/certs" - }, - { - "name": "kube-svc-redirector-token-ngjg2", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kube-svc-redirector", - "serviceAccount": "kube-svc-redirector", - "nodeName": "aks-nodepool1-19574989-0", - "hostNetwork": true, - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "operator": "Exists", - "effect": "NoExecute" - }, - { - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/network-unavailable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:35:46Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:35:53Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:35:46Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.240.0.4", - "startTime": "2019-05-22T01:35:46Z", - "containerStatuses": [ - { - "name": "azureproxy", - "state": { - "running": { - "startedAt": "2019-05-22T01:35:52Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:1.13.12-alpine", - "imageID": "docker-pullable://nginx@sha256:9d46fd628d54ebe1633ee3cf0fe2acfcc419cfae541c63056530e39cd5620366", - "containerID": "docker://100b561a08a0163ac8c591af88b4bdbafa107923e3a2732aae73a3808b178f18" - }, - { - "name": "redirector", - "state": { - "running": { - "startedAt": "2019-05-22T01:35:50Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "deis/kube-svc-redirect:v1.0.2", - "imageID": "docker-pullable://deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef", - "containerID": "docker://4a76cf31a6cd132443cbb2dc6f66aa806af2f5a73b8cee3ea8286d078485192b" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "kubernetes-dashboard-596c5bdf67-f64bg", - "generateName": "kubernetes-dashboard-596c5bdf67-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/kubernetes-dashboard-596c5bdf67-f64bg", - "uid": "b372caee-7c31-11e9-a084-2a3503e86e0a", - "resourceVersion": "7854783", - "creationTimestamp": "2019-05-22T01:34:11Z", - "labels": { - "k8s-app": "kubernetes-dashboard", - "kubernetes.io/cluster-service": "true", - "pod-template-hash": "1527168923" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "kubernetes-dashboard-596c5bdf67", - "uid": "b2f0c2ad-7c31-11e9-a084-2a3503e86e0a", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kubernetes-dashboard-token-w4t8s", - "secret": { - "secretName": "kubernetes-dashboard-token-w4t8s", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "main", - "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", - "ports": [ - { - "name": "http", - "containerPort": 9090, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "cpu": "100m", - "memory": "500Mi" - }, - "requests": { - "cpu": "100m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "kubernetes-dashboard-token-w4t8s", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "httpGet": { - "path": "/", - "port": 9090, - "scheme": "HTTP" - }, - "initialDelaySeconds": 30, - "timeoutSeconds": 30, - "periodSeconds": 10, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "kubernetes-dashboard", - "serviceAccount": "kubernetes-dashboard", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:12Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:24Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:11Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.120", - "startTime": "2019-05-22T01:34:12Z", - "containerStatuses": [ - { - "name": "main", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:22Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1", - "imageID": "docker-pullable://k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747", - "containerID": "docker://165645b78e131f68e533cd5aca6e363b75ea80a188f0bff1b7fb614c84893ef6" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "metrics-server-589d467559-l866n", - "generateName": "metrics-server-589d467559-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/metrics-server-589d467559-l866n", - "uid": "b28e6bc3-7c31-11e9-a084-2a3503e86e0a", - "resourceVersion": "7854782", - "creationTimestamp": "2019-05-22T01:34:10Z", - "labels": { - "k8s-app": "metrics-server", - "pod-template-hash": "1458023115" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "metrics-server-589d467559", - "uid": "b284ce8e-7c31-11e9-a084-2a3503e86e0a", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "metrics-server-token-qtdgm", - "secret": { - "secretName": "metrics-server-token-qtdgm", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "metrics-server", - "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", - "command": [ - "/metrics-server", - "--source=kubernetes.summary_api:''" - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "metrics-server-token-qtdgm", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "metrics-server", - "serviceAccount": "metrics-server", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:11Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:24Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:10Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.119", - "startTime": "2019-05-22T01:34:11Z", - "containerStatuses": [ - { - "name": "metrics-server", - "state": { - "running": { - "startedAt": "2019-05-22T01:34:21Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "k8s.gcr.io/metrics-server-amd64:v0.2.1", - "imageID": "docker-pullable://k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892", - "containerID": "docker://bc98c2f03836b32fcf0e39d6b7b7980ceb67fadb472c6c4a4bfdf4d08ee92677" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-9wqbf", - "generateName": "omsagent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-9wqbf", - "uid": "128ded84-6b78-11e9-8b55-963bcaafdced", - "resourceVersion": "5486687", - "creationTimestamp": "2019-04-30T18:45:06Z", - "labels": { - "controller-revision-hash": "776703171", - "dsName": "omsagent-ds", - "pod-template-generation": "2" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "4.0.0-0" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "omsagent", - "uid": "b7f05c92-6876-11e9-8b55-963bcaafdced", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "host-root", - "hostPath": { - "path": "/", - "type": "" - } - }, - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "omsagent-token-h5tmr", - "secret": { - "secretName": "omsagent-token-h5tmr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "microsoft/oms:healthpreview04262019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "DaemonSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "300Mi" - }, - "requests": { - "cpu": "50m", - "memory": "225Mi" - } - }, - "volumeMounts": [ - { - "name": "host-root", - "readOnly": true, - "mountPath": "/hostfs" - }, - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "readOnly": true, - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "omsagent-token-h5tmr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "(ps -ef | grep main | grep -v \"grep\") && (ps -ef | grep main | grep -v \"grep\")" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-30T18:45:06Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-30T18:45:12Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-30T18:45:06Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.115", - "startTime": "2019-04-30T18:45:06Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-04-30T18:45:11Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "microsoft/oms:healthpreview04262019", - "imageID": "docker-pullable://microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", - "containerID": "docker://1b1f6cad1cf954612695754847981ce8e632f0233bebf3c89dea1c127b157bc0" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-n5kss", - "generateName": "omsagent-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-n5kss", - "uid": "673da111-8270-11e9-8d23-32c49ee6f300", - "resourceVersion": "8740634", - "creationTimestamp": "2019-05-30T00:18:09Z", - "labels": { - "controller-revision-hash": "776703171", - "dsName": "omsagent-ds", - "pod-template-generation": "2" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "4.0.0-0" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "DaemonSet", - "name": "omsagent", - "uid": "b7f05c92-6876-11e9-8b55-963bcaafdced", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "host-root", - "hostPath": { - "path": "/", - "type": "" - } - }, - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "omsagent-token-h5tmr", - "secret": { - "secretName": "omsagent-token-h5tmr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "microsoft/oms:healthpreview04262019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "DaemonSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "300Mi" - }, - "requests": { - "cpu": "50m", - "memory": "225Mi" - } - }, - "volumeMounts": [ - { - "name": "host-root", - "readOnly": true, - "mountPath": "/hostfs" - }, - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "readOnly": true, - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "omsagent-token-h5tmr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "(ps -ef | grep main | grep -v \"grep\") && (ps -ef | grep main | grep -v \"grep\")" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node-role.kubernetes.io/master", - "operator": "Equal", - "value": "true", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute" - }, - { - "key": "node.kubernetes.io/disk-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/memory-pressure", - "operator": "Exists", - "effect": "NoSchedule" - }, - { - "key": "node.kubernetes.io/unschedulable", - "operator": "Exists", - "effect": "NoSchedule" - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-30T00:18:09Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-30T00:18:14Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-30T00:18:09Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.106", - "startTime": "2019-05-30T00:18:09Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-05-30T00:18:13Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "microsoft/oms:healthpreview04262019", - "imageID": "docker-pullable://microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", - "containerID": "docker://5eae48a3ebee8d930c0940e069a4d8720b432e62eba4a6197b6a6b552a573eb9" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "omsagent-rs-6dc57b8544-jh5vw", - "generateName": "omsagent-rs-6dc57b8544-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-rs-6dc57b8544-jh5vw", - "uid": "0971836e-6b78-11e9-8b55-963bcaafdced", - "resourceVersion": "7580651", - "creationTimestamp": "2019-04-30T18:44:51Z", - "labels": { - "pod-template-hash": "2871364100", - "rsName": "omsagent-rs" - }, - "annotations": { - "agentVersion": "1.10.0.1", - "dockerProviderVersion": "4.0.0-0" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "omsagent-rs-6dc57b8544", - "uid": "096a6655-6b78-11e9-8b55-963bcaafdced", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "docker-sock", - "hostPath": { - "path": "/var/run", - "type": "" - } - }, - { - "name": "container-hostname", - "hostPath": { - "path": "/etc/hostname", - "type": "" - } - }, - { - "name": "host-log", - "hostPath": { - "path": "/var/log", - "type": "" - } - }, - { - "name": "containerlog-path", - "hostPath": { - "path": "/var/lib/docker/containers", - "type": "" - } - }, - { - "name": "azure-json-path", - "hostPath": { - "path": "/etc/kubernetes", - "type": "" - } - }, - { - "name": "omsagent-secret", - "secret": { - "secretName": "omsagent-secret", - "defaultMode": 420 - } - }, - { - "name": "omsagent-rs-config", - "configMap": { - "name": "omsagent-rs-config", - "defaultMode": 420 - } - }, - { - "name": "omsagent-token-h5tmr", - "secret": { - "secretName": "omsagent-token-h5tmr", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "omsagent", - "image": "microsoft/oms:healthpreview04262019", - "ports": [ - { - "containerPort": 25225, - "protocol": "TCP" - }, - { - "name": "in-rs-tcp", - "containerPort": 25235, - "protocol": "TCP" - }, - { - "containerPort": 25224, - "protocol": "UDP" - } - ], - "env": [ - { - "name": "AKS_RESOURCE_ID", - "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test" - }, - { - "name": "AKS_REGION", - "value": "eastus" - }, - { - "name": "DISABLE_KUBE_SYSTEM_LOG_COLLECTION", - "value": "true" - }, - { - "name": "CONTROLLER_TYPE", - "value": "ReplicaSet" - }, - { - "name": "NODE_IP", - "valueFrom": { - "fieldRef": { - "apiVersion": "v1", - "fieldPath": "status.hostIP" - } - } - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "limits": { - "cpu": "150m", - "memory": "500Mi" - }, - "requests": { - "cpu": "50m", - "memory": "100Mi" - } - }, - "volumeMounts": [ - { - "name": "docker-sock", - "mountPath": "/var/run/host" - }, - { - "name": "host-log", - "mountPath": "/var/log" - }, - { - "name": "containerlog-path", - "mountPath": "/var/lib/docker/containers" - }, - { - "name": "azure-json-path", - "mountPath": "/etc/kubernetes/host" - }, - { - "name": "omsagent-secret", - "readOnly": true, - "mountPath": "/etc/omsagent-secret" - }, - { - "name": "omsagent-rs-config", - "mountPath": "/etc/config" - }, - { - "name": "omsagent-token-h5tmr", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/bin/bash", - "-c", - "ps -ef | grep main | grep -v \"grep\"" - ] - }, - "initialDelaySeconds": 60, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 3 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "nodeSelector": { - "beta.kubernetes.io/os": "linux", - "kubernetes.io/role": "agent" - }, - "serviceAccountName": "omsagent", - "serviceAccount": "omsagent", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-30T18:44:51Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-30T18:44:57Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-04-30T18:44:51Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.87", - "startTime": "2019-04-30T18:44:51Z", - "containerStatuses": [ - { - "name": "omsagent", - "state": { - "running": { - "startedAt": "2019-04-30T18:44:56Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "microsoft/oms:healthpreview04262019", - "imageID": "docker-pullable://microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385", - "containerID": "docker://2ad967c98de3c3956481b419a78c9ffa2cf65e62ee2aa51cf22b48d5d1983d0c" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "tunnelfront-74599866d6-fkhbd", - "generateName": "tunnelfront-74599866d6-", - "namespace": "kube-system", - "selfLink": "/api/v1/namespaces/kube-system/pods/tunnelfront-74599866d6-fkhbd", - "uid": "b345ca4e-7c31-11e9-a084-2a3503e86e0a", - "resourceVersion": "9030840", - "creationTimestamp": "2019-05-22T01:34:11Z", - "labels": { - "component": "tunnel", - "pod-template-hash": "3015542282" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "tunnelfront-74599866d6", - "uid": "b31abdc6-7c31-11e9-a084-2a3503e86e0a", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "kubeconfig", - "configMap": { - "name": "tunnelfront-kubecfg", - "defaultMode": 420, - "optional": true - } - }, - { - "name": "certificates", - "hostPath": { - "path": "/etc/kubernetes/certs", - "type": "" - } - }, - { - "name": "tunnelfront-token-njgvg", - "secret": { - "secretName": "tunnelfront-token-njgvg", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "tunnel-front", - "image": "docker.io/deis/hcp-tunnel-front:v1.9.2-v4.0.7", - "env": [ - { - "name": "OVERRIDE_TUNNEL_SERVER_NAME", - "value": "t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "TUNNEL_CLUSTERUSER_NAME", - "value": "28957308" - }, - { - "name": "TUNNELGATEWAY_SERVER_NAME", - "value": "dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io" - }, - { - "name": "TUNNELGATEWAY_SSH_PORT", - "value": "22" - }, - { - "name": "TUNNELGATEWAY_TLS_PORT", - "value": "443" - }, - { - "name": "KUBE_CONFIG", - "value": "/etc/kubernetes/kubeconfig/kubeconfig" - }, - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": { - "requests": { - "cpu": "10m", - "memory": "64Mi" - } - }, - "volumeMounts": [ - { - "name": "kubeconfig", - "readOnly": true, - "mountPath": "/etc/kubernetes/kubeconfig" - }, - { - "name": "certificates", - "readOnly": true, - "mountPath": "/etc/kubernetes/certs" - }, - { - "name": "tunnelfront-token-njgvg", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "livenessProbe": { - "exec": { - "command": [ - "/lib/tunnel-front/check-tunnel-connection.sh" - ] - }, - "initialDelaySeconds": 10, - "timeoutSeconds": 1, - "periodSeconds": 60, - "successThreshold": 1, - "failureThreshold": 12 - }, - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent", - "securityContext": { - "privileged": true - } - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "Default", - "nodeSelector": { - "beta.kubernetes.io/os": "linux" - }, - "serviceAccountName": "tunnelfront", - "serviceAccount": "tunnelfront", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "affinity": { - "nodeAffinity": { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "kubernetes.azure.com/cluster", - "operator": "Exists" - } - ] - } - ] - } - } - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "CriticalAddonsOnly", - "operator": "Exists" - }, - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priorityClassName": "system-node-critical", - "priority": 2000001000 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:13Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-06-01T14:56:47Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T01:34:11Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.94", - "startTime": "2019-05-22T01:34:13Z", - "containerStatuses": [ - { - "name": "tunnel-front", - "state": { - "running": { - "startedAt": "2019-06-01T14:56:46Z" - } - }, - "lastState": { - "terminated": { - "exitCode": 137, - "reason": "Error", - "startedAt": "2019-05-22T01:34:26Z", - "finishedAt": "2019-06-01T14:56:42Z", - "containerID": "docker://077af2518bf2bf16e136c9d71453799398a7a6d8f854a0af106d972922f0588a" - } - }, - "ready": true, - "restartCount": 1, - "image": "deis/hcp-tunnel-front:v1.9.2-v4.0.7", - "imageID": "docker-pullable://deis/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41", - "containerID": "docker://113cbbf2ad5a68809b0a1e133e88bc090731544ce230d67f0ae73692d95fd576" - } - ], - "qosClass": "Burstable" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "nginx-deployment-7c47c68ddb-dlkd9", - "generateName": "nginx-deployment-7c47c68ddb-", - "namespace": "test", - "selfLink": "/api/v1/namespaces/test/pods/nginx-deployment-7c47c68ddb-dlkd9", - "uid": "842aba73-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952515", - "creationTimestamp": "2019-05-22T22:32:43Z", - "labels": { - "app": "nginx", - "pod-template-hash": "3703724886" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "nginx-deployment-7c47c68ddb", - "uid": "8424475c-7ce1-11e9-8d23-32c49ee6f300", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-8slws", - "secret": { - "secretName": "default-token-8slws", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "front-end", - "image": "nginx", - "ports": [ - { - "containerPort": 81, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-8slws", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - }, - { - "name": "rss-reader", - "image": "nickchase/rss-php-nginx:v1", - "ports": [ - { - "containerPort": 88, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-8slws", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-1", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:43Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:50Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:43Z" - } - ], - "hostIP": "10.240.0.5", - "podIP": "10.244.0.103", - "startTime": "2019-05-22T22:32:43Z", - "containerStatuses": [ - { - "name": "front-end", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:48Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:latest", - "imageID": "docker-pullable://nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", - "containerID": "docker://5820180607dfee4af5839033758c0dd3d53eb844616b575b1eed3b509f806da9" - }, - { - "name": "rss-reader", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:50Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nickchase/rss-php-nginx:v1", - "imageID": "docker-pullable://nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", - "containerID": "docker://e8fb5dfa7453640278a77eac9148d820d12eeddbe7951b68c988b573ca524c2b" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - }, - { - "metadata": { - "name": "nginx-deployment-7c47c68ddb-j2wmf", - "generateName": "nginx-deployment-7c47c68ddb-", - "namespace": "test", - "selfLink": "/api/v1/namespaces/test/pods/nginx-deployment-7c47c68ddb-j2wmf", - "uid": "842dd513-7ce1-11e9-8d23-32c49ee6f300", - "resourceVersion": "7952518", - "creationTimestamp": "2019-05-22T22:32:43Z", - "labels": { - "app": "nginx", - "pod-template-hash": "3703724886" - }, - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "kind": "ReplicaSet", - "name": "nginx-deployment-7c47c68ddb", - "uid": "8424475c-7ce1-11e9-8d23-32c49ee6f300", - "controller": true, - "blockOwnerDeletion": true - } - ] - }, - "spec": { - "volumes": [ - { - "name": "default-token-8slws", - "secret": { - "secretName": "default-token-8slws", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "front-end", - "image": "nginx", - "ports": [ - { - "containerPort": 81, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-8slws", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - }, - { - "name": "rss-reader", - "image": "nickchase/rss-php-nginx:v1", - "ports": [ - { - "containerPort": 88, - "protocol": "TCP" - } - ], - "env": [ - { - "name": "KUBERNETES_PORT_443_TCP_ADDR", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - }, - { - "name": "KUBERNETES_PORT", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_PORT_443_TCP", - "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443" - }, - { - "name": "KUBERNETES_SERVICE_HOST", - "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io" - } - ], - "resources": {}, - "volumeMounts": [ - { - "name": "default-token-8slws", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "IfNotPresent" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "aks-nodepool1-19574989-0", - "securityContext": {}, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:43Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:51Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-05-22T22:32:43Z" - } - ], - "hostIP": "10.240.0.4", - "podIP": "10.244.1.129", - "startTime": "2019-05-22T22:32:43Z", - "containerStatuses": [ - { - "name": "front-end", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:48Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nginx:latest", - "imageID": "docker-pullable://nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68", - "containerID": "docker://dac553a97432f5c2afd757308d7703a037b35bbc562de1abf6a54e03e100a2ba" - }, - { - "name": "rss-reader", - "state": { - "running": { - "startedAt": "2019-05-22T22:32:50Z" - } - }, - "lastState": {}, - "ready": true, - "restartCount": 0, - "image": "nickchase/rss-php-nginx:v1", - "imageID": "docker-pullable://nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b", - "containerID": "docker://a173f4ed2e191702e35d8cfe074272bc0b6c5b1d5874bd1513451eb25308cf8d" - } - ], - "qosClass": "BestEffort" - }, - "apiVersion": "v1", - "kind": "Pod" - } - ] -} \ No newline at end of file diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index 7c6d2ba20..0c1b378a0 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -16,17 +16,20 @@ class FilterHealthModelBuilder < Filter config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json' config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json' config_param :health_state_serialized_path, :default => '/mnt/azure/health_model_state.json' - attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator, :serializer, :deserializer + attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator include HealthModel @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime' @@cluster_id = KubernetesApiClient.getClusterId + @@token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" + @@cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled def initialize begin super @buffer = HealthModel::HealthModelBuffer.new + @cluster_health_state = ClusterHealthState.new(@@token_file_path, @@cert_file_path) @health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file) @monitor_factory = HealthModel::MonitorFactory.new @hierarchy_builder = HealthHierarchyBuilder.new(@health_model_definition, @monitor_factory) @@ -41,14 +44,7 @@ def initialize @generator = HealthMissingSignalGenerator.new #TODO: cluster_labels needs to be initialized @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) - @serializer = HealthStateSerializer.new(@health_state_serialized_path) - @deserializer = HealthStateDeserializer.new(@health_state_serialized_path) - # TODO: in_kube_api_health should set these values - # resources.node_inventory = node_inventory - # resources.pod_inventory = pod_inventory - # resources.deployment_inventory = deployment_inventory - #TODO: check if the path exists - deserialized_state_info = @deserializer.deserialize + deserialized_state_info = @cluster_health_state.get_state @state = HealthMonitorState.new @state.initialize_state(deserialized_state_info) @cluster_old_state = 'none' @@ -208,11 +204,16 @@ def filter_stream(tag, es) new_es.add(time, record) } - @serializer.serialize(@state) + #emit the stream + router.emit_stream(@@rewrite_tag, new_es) + + #initialize monitor_set and model_builder @monitor_set = HealthModel::MonitorSet.new @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set) - router.emit_stream(@@rewrite_tag, new_es) + #update cluster state custom resource + @cluster_health_state.update_state(@state.to_h) + # return an empty event stream, else the match will throw a NoMethodError return [] elsif tag.start_with?("oms.api.KubeHealth.AgentCollectionTime") @@ -223,6 +224,7 @@ def filter_stream(tag, es) end rescue => e + ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) @log.warn "Message: #{e.message} Backtrace: #{e.backtrace}" return nil end diff --git a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb similarity index 55% rename from source/code/plugin/health/aggregate_monitor_instance_id_labels.rb rename to source/code/plugin/health/agg_monitor_id_labels.rb index d1eb86daf..48ca46184 100644 --- a/source/code/plugin/health/aggregate_monitor_instance_id_labels.rb +++ b/source/code/plugin/health/agg_monitor_id_labels.rb @@ -1,11 +1,11 @@ module HealthModel class AggregateMonitorInstanceIdLabels @@id_labels_mapping = { - MonitorId::SYSTEM_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/workload-name"], - MonitorId::USER_WORKLOAD => ["container.azm.ms/namespace", "container.azm.ms/workload-name"], - MonitorId::NODE => ["agentpool", "kubernetes.io/role", "kubernetes.io/hostname"], - MonitorId::NAMESPACE => ["container.azm.ms/namespace"], - MonitorId::AGENT_NODE_POOL => ["agentpool"], + MonitorId::SYSTEM_WORKLOAD => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME], + MonitorId::USER_WORKLOAD => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME], + MonitorId::NODE => [HealthMonitorLabels::AGENTPOOL, HealthMonitorLabels::ROLE, HealthMonitorLabels::HOSTNAME], + MonitorId::NAMESPACE => [HealthMonitorLabels::NAMESPACE], + MonitorId::AGENT_NODE_POOL => [HealthMonitorLabels::AGENTPOOL], # MonitorId::ALL_AGENT_NODE_POOLS => [], # MonitorId::ALL_NODE_POOLS => [], # MonitorId::ALL_NODES => [], diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb new file mode 100644 index 000000000..35d204b12 --- /dev/null +++ b/source/code/plugin/health/cluster_health_state.rb @@ -0,0 +1,116 @@ +require "net/http" +require "net/https" +require "uri" + +module HealthModel + class ClusterHealthState + + attr_reader :token_file_path, :cert_file_path, :log, :http_client, :uri, :token + @@resource_uri_template = "%{kube_api_server_url}/apis/azmon.container.insights/v1/namespaces/kube-system/healthstates/cluster-health-state" + + def initialize(token_file_path, cert_file_path) + @token_file_path = token_file_path + @cert_file_path = cert_file_path + @log = HealthMonitorHelpers.get_log_handle + @http_client = get_http_client + @token = get_token + end + + def update_state(state) + get_request = Net::HTTP::Get.new(@uri.request_uri) + + get_request["Authorization"] = "Bearer #{@token}" + @log.info "Making GET request to #{@uri.request_uri} @ #{Time.now.utc.iso8601}" + get_response = @http_client.request(get_request) + @log.info "Got response of #{get_response.code} for #{@uri.request_uri} @ #{Time.now.utc.iso8601}" + + if get_response.code.to_i == 404 # NOT found + #POST + update_request = Net::HTTP::Post.new(@uri.request_uri) + update_request["Content-Type"] = "application/json" + + elsif get_response.code.to_i == 200 # Update == Patch + #PATCH + update_request = Net::HTTP::Patch.new(@uri.request_uri) + update_request["Content-Type"] = "application/merge-patch+json" + end + update_request["Authorization"] = "Bearer #{@token}" + + update_request_body = get_update_request_body + update_request_body["state"] = state.to_json + update_request.body = update_request_body.to_json + + @log.debug "Making an update request with #{update_request.method} " + update_response = @http_client.request(update_request) + @log.info "Got a response of #{update_response.code}" + end + + def get_state + get_request = Net::HTTP::Get.new(@uri.request_uri) + get_request["Authorization"] = "Bearer #{@token}" + @log.info "Making GET request to #{@uri.request_uri} @ #{Time.now.utc.iso8601}" + get_response = @http_client.request(get_request) + @log.info "Got response of #{get_response.code} for #{@uri.request_uri} @ #{Time.now.utc.iso8601}" + + if get_response.code.to_i == 200 + return JSON.parse(JSON.parse(get_response.body)["state"]) + else + return {} + end + end + + private + def get_token() + begin + if File.exist?(@token_file_path) && File.readable?(@token_file_path) + token_str = File.read(@token_file_path).strip + return token_str + else + @log.info ("Unable to read token string from #{@token_file_path}") + return nil + end + end + end + + def get_http_client() + kube_api_server_url = get_kube_api_server_url + resource_uri = @@resource_uri_template % { + kube_api_server_url: kube_api_server_url + } + @uri = URI.parse(resource_uri) + http = Net::HTTP.new(@uri.host, @uri.port) + http.use_ssl = true + if !File.exist?(@cert_file_path) + raise "#{@cert_file_path} doesnt exist" + else + http.ca_file = @cert_file_path + end + http.verify_mode = OpenSSL::SSL::VERIFY_PEER + return http + end + + def get_kube_api_server_url + if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"] + return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}" + else + @log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri") + if Gem.win_platform? #unit testing on windows dev machine + value = %x( kubectl -n default get endpoints kubernetes --no-headers) + url = "https://#{value.split(' ')[1]}" + return "https://localhost:8080" # This is NEVER used. this is just to return SOME value + end + return nil + end + end + + def get_update_request_body + body = {} + body["apiVersion"] = "azmon.container.insights/v1" + body["kind"] = "HealthState" + body["metadata"] = {} + body["metadata"]["name"] = "cluster-health-state" + body["metadata"]["namespace"] = "kube-system" + return body + end + end +end diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb index 67b9f6b1b..ff7f6a390 100644 --- a/source/code/plugin/health/health_missing_signal_generator.rb +++ b/source/code/plugin/health/health_missing_signal_generator.rb @@ -51,7 +51,7 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory missing_signals_map[monitor_instance_id] = new_monitor log.info "Added missing signal #{new_monitor.monitor_instance_id} #{new_monitor.state}" elsif HealthMonitorHelpers.is_pods_ready_monitor(monitor.monitor_id) - lookup = "#{monitor.labels['container.azm.ms/namespace']}~~#{monitor.labels['container.azm.ms/workload-name']}" + lookup = "#{monitor.labels[HealthMonitorLabels::NAMESPACE]}~~#{monitor.labels[HealthMonitorLabels::WORKLOAD_NAME]}" new_monitor = HealthMonitorRecord.new( monitor.monitor_id, monitor.monitor_instance_id, diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index 9598e3a9c..82ae569f3 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -69,4 +69,13 @@ class HealthMonitorStates NONE = "none" UNKNOWN = "unknown" end + + class HealthMonitorLabels + WORKLOAD_NAME = "container.azm.ms/workload-name" + WORKLOAD_KIND = "container.azm.ms/workload-kind" + NAMESPACE = "container.azm.ms/namespace" + AGENTPOOL = "agentpool" + ROLE = "kubernetes.io/role" + HOSTNAME = "kubernetes.io/hostname" + end end \ No newline at end of file diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb index 5a20ba31f..0c1cbf7f2 100644 --- a/source/code/plugin/health/health_monitor_provider.rb +++ b/source/code/plugin/health/health_monitor_provider.rb @@ -91,9 +91,9 @@ def get_labels(health_monitor_record) workload_name = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadName'] workload_kind = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadKind'] - monitor_labels['container.azm.ms/workload-name'] = workload_name.split('~~')[1] - monitor_labels['container.azm.ms/workload-kind'] = workload_kind - monitor_labels['container.azm.ms/namespace'] = namespace + monitor_labels[HealthMonitorLabels::WORKLOAD_NAME] = workload_name.split('~~')[1] + monitor_labels[HealthMonitorLabels::WORKLOAD_KIND] = workload_kind + monitor_labels[HealthMonitorLabels::NAMESPACE] = namespace when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID node_name = health_monitor_record[HealthMonitorRecordFields::NODE_NAME] diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index beffb6f4a..c3df5e3a9 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -33,8 +33,18 @@ def to_h def initialize_state(deserialized_state) @@monitor_states = {} deserialized_state.each{|k,v| - @@monitor_states[k] = v + health_monitor_instance_state_hash = JSON.parse(v) + state = HealthMonitorInstanceState.new(*health_monitor_instance_state_hash.values_at(*HealthMonitorInstanceState.members)) + state.prev_sent_record_time = health_monitor_instance_state_hash["prev_sent_record_time"] + state.old_state = health_monitor_instance_state_hash["old_state"] + state.new_state = health_monitor_instance_state_hash["new_state"] + state.state_change_time = health_monitor_instance_state_hash["state_change_time"] + state.prev_records = health_monitor_instance_state_hash["prev_records"] + state.is_state_change_consistent = health_monitor_instance_state_hash["is_state_change_consistent"] || false + state.should_send = health_monitor_instance_state_hash["should_send"] + @@monitor_states[k] = state @@first_record_sent[k] = true + } end diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb index a30755efe..4cf53e82c 100644 --- a/source/code/plugin/health/health_signal_reducer.rb +++ b/source/code/plugin/health/health_signal_reducer.rb @@ -28,8 +28,8 @@ def reduce_signals(health_monitor_records, health_k8s_inventory) end reduced_signals_map[monitor_instance_id] = health_monitor_record elsif HealthMonitorHelpers.is_pods_ready_monitor(monitor_id) - workload_name = health_monitor_record.labels['container.azm.ms/workload-name'] - namespace = health_monitor_record.labels['container.azm.ms/namespace'] + workload_name = health_monitor_record.labels[HealthMonitorLabels::WORKLOAD_NAME] + namespace = health_monitor_record.labels[HealthMonitorLabels::NAMESPACE] lookup = "#{namespace}~~#{workload_name}" if (workload_name.nil? || !workload_names.include?(lookup)) #only add pod record if present in the inventory next diff --git a/source/code/plugin/health/health_state_deserializer.rb b/source/code/plugin/health/health_state_deserializer.rb deleted file mode 100644 index 0a3ac3563..000000000 --- a/source/code/plugin/health/health_state_deserializer.rb +++ /dev/null @@ -1,36 +0,0 @@ -module HealthModel - class HealthStateDeserializer - - attr_reader :deserialize_path - - def initialize(path) - @deserialize_path = path - end - - def deserialize - if !File.file?(@deserialize_path) - return {} - end - - file = File.read(@deserialize_path) #File.read(@deserialize_path) - - deserialized_state = {} - if !file.nil? || !file.empty? - records = JSON.parse(file) - - records.each{|monitor_instance_id, health_monitor_instance_state_hash| - state = HealthMonitorInstanceState.new(*health_monitor_instance_state_hash.values_at(*HealthMonitorInstanceState.members)) - state.prev_sent_record_time = health_monitor_instance_state_hash["prev_sent_record_time"] - state.old_state = health_monitor_instance_state_hash["old_state"] - state.new_state = health_monitor_instance_state_hash["new_state"] - state.state_change_time = health_monitor_instance_state_hash["state_change_time"] - state.prev_records = health_monitor_instance_state_hash["prev_records"] - state.is_state_change_consistent = health_monitor_instance_state_hash["is_state_change_consistent"] || false - state.should_send = health_monitor_instance_state_hash["should_send"] - deserialized_state[monitor_instance_id] = state - } - return deserialized_state - end - end - end -end \ No newline at end of file diff --git a/source/code/plugin/health/health_state_serializer.rb b/source/code/plugin/health/health_state_serializer.rb deleted file mode 100644 index 7ffb445c2..000000000 --- a/source/code/plugin/health/health_state_serializer.rb +++ /dev/null @@ -1,20 +0,0 @@ -module HealthModel - class HealthStateSerializer - - attr_reader :serialized_path - def initialize(path) - @serialized_path = path - end - - def serialize(state) - File.open(@serialized_path, 'w') do |f| - states = state.to_h - states_hash = {} - states.each{|id, value| - states_hash[id] = value.to_h - } - f.write(JSON.pretty_generate(states_hash)) - end - end - end -end \ No newline at end of file diff --git a/test/code/plugin/health/ca.crt b/test/code/plugin/health/ca.crt new file mode 100644 index 000000000..9daeafb98 --- /dev/null +++ b/test/code/plugin/health/ca.crt @@ -0,0 +1 @@ +test diff --git a/test/code/plugin/health/cluster_health_state_spec.rb b/test/code/plugin/health/cluster_health_state_spec.rb new file mode 100644 index 000000000..897291fe2 --- /dev/null +++ b/test/code/plugin/health/cluster_health_state_spec.rb @@ -0,0 +1,37 @@ +require_relative '../test_helpers' +Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file } +require 'time' +include HealthModel +include Minitest + +describe "Cluster Health State Spec" do + + it "ClusterHealthState.new throws if cert file is NOT present" do + state = { + "m1" => { + "state" => "pass", + "time" => Time.now.utc.iso8601 + } + } + + token_file_path = 'token' + cert_file_path = '/var/ca.crt' + + proc {ClusterHealthState.new(token_file_path, cert_file_path)}.must_raise + + end + + it "ClusterHealthState.new returns nil if token is NOT present" do + state = { + "m1" => { + "state" => "pass", + "time" => Time.now.utc.iso8601 + } + } + token_file_path = 'token' + cert_file_path = File.join(File.expand_path(File.dirname(__FILE__)), "ca.crt") + + chs = ClusterHealthState.new(token_file_path, cert_file_path) + chs.token.must_be_nil + end +end diff --git a/test/code/plugin/health/health_missing_signal_generator_spec.rb b/test/code/plugin/health/health_missing_signal_generator_spec.rb index 5a27bc1e4..98d65416d 100644 --- a/test/code/plugin/health/health_missing_signal_generator_spec.rb +++ b/test/code/plugin/health/health_missing_signal_generator_spec.rb @@ -11,26 +11,26 @@ resources.expect(:get_workload_names, ["default~~workload1"]) provider = Mock.new - provider.expect(:get_node_labels, {"kubernetes.io/hostname" => "node1"}, ["node1"]) + provider.expect(:get_node_labels, {HealthMonitorLabels::HOSTNAME => "node1"}, ["node1"]) node1_cpu_record = Mock.new def node1_cpu_record.monitor_id; "node_cpu_utilization"; end def node1_cpu_record.monitor_instance_id; "node_cpu_utilization"; end - def node1_cpu_record.labels; {"kubernetes.io/hostname" => "node1"}; end + def node1_cpu_record.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end def node1_cpu_record.config; {}; end def node1_cpu_record.state; "pass"; end node1_memory_record = Mock.new def node1_memory_record.monitor_id; "node_memory_utilization"; end def node1_memory_record.monitor_instance_id; "node_memory_utilization"; end - def node1_memory_record.labels; {"kubernetes.io/hostname" => "node1"}; end + def node1_memory_record.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end def node1_memory_record.config; {}; end def node1_memory_record.state; "pass"; end node1_condition_record = Mock.new def node1_condition_record.monitor_id; "node_condition"; end def node1_condition_record.monitor_instance_id; "node_condition-0c593682737a955dc8e0947ad12754fe"; end - def node1_condition_record.labels; {"kubernetes.io/hostname" => "node1"}; end + def node1_condition_record.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end def node1_condition_record.config; {}; end def node1_condition_record.state; "pass"; end @@ -38,7 +38,7 @@ def node1_condition_record.state; "pass"; end workload1_pods_ready_record = Mock.new def workload1_pods_ready_record.monitor_id; "user_workload_pods_ready"; end def workload1_pods_ready_record.monitor_instance_id; "user_workload_pods_ready-workload1"; end - def workload1_pods_ready_record.labels; {"container.azm.ms/namespace" => "default", "container.azm.ms/workload-name" => "workload1"}; end + def workload1_pods_ready_record.labels; {HealthMonitorLabels::NAMESPACE => "default", HealthMonitorLabels::WORKLOAD_NAME => "workload1"}; end def workload1_pods_ready_record.config; {}; end def workload1_pods_ready_record.state; "pass"; end @@ -62,7 +62,7 @@ def workload1_pods_ready_record.state; "pass"; end #arrange resources.expect(:get_nodes, ["node1"]) resources.expect(:get_workload_names, ["default~~workload1"]) - provider.expect(:get_node_labels, {"kubernetes.io/hostname" => "node1"}, ["node1"]) + provider.expect(:get_node_labels, {HealthMonitorLabels::HOSTNAME => "node1"}, ["node1"]) generator.update_last_received_records([node1_cpu_record, node1_memory_record]) #act missing = generator.get_missing_signals('fake_cluster_id', [node1_cpu_record, node1_memory_record], resources, provider) diff --git a/test/code/plugin/health/health_signal_reducer_spec.rb b/test/code/plugin/health/health_signal_reducer_spec.rb index d074748b0..f71a5c509 100644 --- a/test/code/plugin/health/health_signal_reducer_spec.rb +++ b/test/code/plugin/health/health_signal_reducer_spec.rb @@ -10,7 +10,7 @@ record1 = Mock.new def record1.monitor_id; "node_cpu_utilization"; end def record1.monitor_instance_id; "node_cpu_utilization-node1"; end - def record1.labels; {"kubernetes.io/hostname" => "node1"}; end + def record1.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end inventory = Mock.new def inventory.get_nodes; ["node1"]; end def inventory.get_workload_names; []; end @@ -26,14 +26,14 @@ def inventory.get_workload_names; []; end record1 = Mock.new def record1.monitor_id; "node_cpu_utilization"; end def record1.monitor_instance_id; "node_cpu_utilization-node1"; end - def record1.labels; {"kubernetes.io/hostname" => "node1"}; end + def record1.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end def record1.transition_date_time; Time.now.utc.iso8601 ; end record2 = Mock.new def record2.monitor_id; "node_cpu_utilization"; end def record2.monitor_instance_id; "node_cpu_utilization-node1"; end - def record2.labels; {"kubernetes.io/hostname" => "node1"}; end + def record2.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end def record2.transition_date_time; "#{Time.now.utc.iso8601}" ; end inventory = Mock.new @@ -51,7 +51,7 @@ def inventory.get_workload_names; []; end record1 = Mock.new def record1.monitor_id; "node_cpu_utilization"; end def record1.monitor_instance_id; "node_cpu_utilization-node1"; end - def record1.labels; {"kubernetes.io/hostname" => "node1"}; end + def record1.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end inventory = Mock.new def inventory.get_nodes; ["node2"]; end def inventory.get_workload_names; []; end @@ -67,7 +67,7 @@ def inventory.get_workload_names; []; end record1 = Mock.new def record1.monitor_id; "user_workload_pods_ready"; end def record1.monitor_instance_id; "user_workload_pods_ready-workload1"; end - def record1.labels; {"container.azm.ms/namespace" => "default", "container.azm.ms/workload-name" => "workload1"}; end + def record1.labels; {HealthMonitorLabels::NAMESPACE => "default", HealthMonitorLabels::WORKLOAD_NAME => "workload1"}; end def record1.transition_date_time; Time.now.utc.iso8601 ; end inventory = Mock.new @@ -85,7 +85,7 @@ def inventory.get_workload_names; ["default~~workload1"]; end record2 = Mock.new def record2.monitor_id; "user_workload_pods_ready"; end def record2.monitor_instance_id; "user_workload_pods_ready-workload2"; end - def record2.labels; {"container.azm.ms/namespace" => "default1", "container.azm.ms/workload-name" => "workload2"}; end + def record2.labels; {HealthMonitorLabels::NAMESPACE => "default1", HealthMonitorLabels::WORKLOAD_NAME => "workload2"}; end def record1.transition_date_time; Time.now.utc.iso8601 ; end #act reduced = reducer.reduce_signals([record1, record2], inventory) diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb index 4e5c4eb63..a83db50fc 100644 --- a/test/code/plugin/health/parent_monitor_provider_spec.rb +++ b/test/code/plugin/health/parent_monitor_provider_spec.rb @@ -89,7 +89,7 @@ def monitor.monitor_instance_id; "monitor_instance_id"; end monitor = Mock.new def monitor.monitor_id; "conditional_monitor_id"; end def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end - def monitor.labels; {"kubernetes.io/role" => "master"}; end + def monitor.labels; {HealthMonitorLabels::ROLE => "master"}; end #act parent_id = health_model_definition.get_parent_monitor_id(monitor) @@ -134,7 +134,7 @@ def monitor.labels; {"kubernetes.io/role" => "master"}; end monitor = Mock.new def monitor.monitor_id; "conditional_monitor_id"; end def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end - def monitor.labels; {"kubernetes.io/role" => "master1"}; end + def monitor.labels; {HealthMonitorLabels::ROLE => "master1"}; end #act and assert assert_raises do From 8d1402635a3ecf0b5c742ad7f78db2188dbffded Mon Sep 17 00:00:00 2001 From: r-dilip Date: Wed, 14 Aug 2019 10:43:47 -0700 Subject: [PATCH 90/90] Dummy update --- source/code/plugin/health/cluster_health_state.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb index 35d204b12..ac7e05675 100644 --- a/source/code/plugin/health/cluster_health_state.rb +++ b/source/code/plugin/health/cluster_health_state.rb @@ -40,9 +40,8 @@ def update_state(state) update_request_body["state"] = state.to_json update_request.body = update_request_body.to_json - @log.debug "Making an update request with #{update_request.method} " update_response = @http_client.request(update_request) - @log.info "Got a response of #{update_response.code}" + @log.info "Got a response of #{update_response.code} for #{update_request.method}" end def get_state