From 2b1866f195e528dbd47dca832183e9accd81afe5 Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 4 Dec 2018 18:33:11 -0800 Subject: [PATCH 01/23] containernodeinventory changes --- source/code/plugin/in_kube_nodes.rb | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index edbbdd37f..f23d2eec7 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -55,6 +55,11 @@ def enumerate #get node inventory nodeInventory['items'].each do |items| record = {} + # Sending records for ContainerNodeInventory + containerInventory = {} + containerInventory['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated + containerInventory['Computer'] = items['metadata']['name'] + record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Computer'] = items['metadata']['name'] record['ClusterName'] = KubernetesApiClient.getClusterName @@ -89,8 +94,14 @@ def enumerate end - record['KubeletVersion'] = items['status']['nodeInfo']['kubeletVersion'] - record['KubeProxyVersion'] = items['status']['nodeInfo']['kubeProxyVersion'] + nodeInfo = items['status']['nodeInfo'] + record['KubeletVersion'] = nodeInfo['kubeletVersion'] + record['KubeProxyVersion'] = nodeInfo['kubeProxyVersion'] + containerInventory['OperatingSystem'] = nodeInfo['osImage'] + dockerVersion = nodeInfo['containerRuntimeVersion'] + dockerVersion.slice! "docker://" + containerInventory['DockerVersion'] = dockerVersion + wrapper = { "DataType"=>"KUBE_NODE_INVENTORY_BLOB", "IPName"=>"ContainerInsights", From b5b6c0eed6ba72ea046beb924565aa5f0cce9aca Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 6 Dec 2018 13:10:07 -0800 Subject: [PATCH 02/23] changes for containernodeinventory --- installer/conf/container.conf | 23 ----------------------- installer/conf/kube.conf | 13 +++++++++++++ source/code/plugin/in_kube_nodes.rb | 16 +++++++++++----- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 798bd8eb6..091753230 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -15,16 +15,6 @@ log_level debug -# Container host inventory - - type omi - run_interval 60s - tag oms.api.ContainerNodeInventory - items [ - ["root/cimv2","Container_HostInventory"] - ] - - #cadvisor perf type cadvisorperf @@ -33,19 +23,6 @@ log_level debug - - type out_oms_api - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - - type out_oms log_level debug diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 94fe2ef0b..22c51ad0e 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -118,6 +118,19 @@ max_retry_wait 9m + + type out_oms_api + log_level debug + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer + buffer_queue_limit 20 + flush_interval 20s + retry_limit 10 + retry_wait 15s + max_retry_wait 9m + + type out_oms log_level debug diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index f23d2eec7..a621b5bc0 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -6,6 +6,8 @@ module Fluent class Kube_nodeInventory_Input < Input Plugin.register_input('kubenodeinventory', self) + @@ContainerNodeInventoryTag = 'oms.api.ContainerNodeInventory' + def initialize super require 'yaml' @@ -52,13 +54,14 @@ def enumerate begin if(!nodeInventory.empty?) eventStream = MultiEventStream.new + containerNodeInventoryEventStream = MultiEventStream.new #get node inventory nodeInventory['items'].each do |items| record = {} # Sending records for ContainerNodeInventory - containerInventory = {} - containerInventory['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated - containerInventory['Computer'] = items['metadata']['name'] + containerNodeInventoryRecord = {} + containerNodeInventoryRecord['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated + containerNodeInventoryRecord['Computer'] = items['metadata']['name'] record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Computer'] = items['metadata']['name'] @@ -97,10 +100,12 @@ def enumerate nodeInfo = items['status']['nodeInfo'] record['KubeletVersion'] = nodeInfo['kubeletVersion'] record['KubeProxyVersion'] = nodeInfo['kubeProxyVersion'] - containerInventory['OperatingSystem'] = nodeInfo['osImage'] + containerNodeInventoryRecord['OperatingSystem'] = nodeInfo['osImage'] dockerVersion = nodeInfo['containerRuntimeVersion'] dockerVersion.slice! "docker://" - containerInventory['DockerVersion'] = dockerVersion + containerNodeInventoryRecord['DockerVersion'] = dockerVersion + # ContainerNodeInventory data for docker version and operating system. + containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord wrapper = { "DataType"=>"KUBE_NODE_INVENTORY_BLOB", @@ -110,6 +115,7 @@ def enumerate eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") From f48c26a64ed046ba58e1bf1869ca0b533794700f Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 14 Dec 2018 17:23:37 -0800 Subject: [PATCH 03/23] changes to add node telemetry --- .../code/plugin/ApplicationInsightsUtility.rb | 6 ++--- source/code/plugin/in_kube_nodes.rb | 24 ++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 78553a83f..76e0b2926 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -83,7 +83,7 @@ def sendHeartBeatEvent(pluginName) end end - def sendCustomEvent(pluginName, properties) + def sendCustomMetric(pluginName, properties) begin if !(@@Tc.nil?) @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], @@ -93,7 +93,7 @@ def sendCustomEvent(pluginName, properties) $log.info("AppInsights Container Count Telemetry sent successfully") end rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}") + $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}") end end @@ -120,7 +120,7 @@ def sendTelemetry(pluginName, properties) end @@CustomProperties['Computer'] = properties['Computer'] sendHeartBeatEvent(pluginName) - sendCustomEvent(pluginName, properties) + sendCustomMetric(pluginName, properties) rescue => errorStr $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") end diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index a621b5bc0..ee7440e5f 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -14,6 +14,7 @@ def initialize require 'json' require_relative 'KubernetesApiClient' + require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' end @@ -31,6 +32,7 @@ def start @condition = ConditionVariable.new @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) + @@telemetryTimeTracker = DateTime.now.to_time.to_i end end @@ -48,9 +50,10 @@ def enumerate currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 - $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body) - $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + @@telemetrySent = false + $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body) + $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") begin if(!nodeInventory.empty?) eventStream = MultiEventStream.new @@ -113,9 +116,24 @@ def enumerate "DataItems"=>[record.each{|k,v| record[k]=v}] } eventStream.add(emitTime, wrapper) if wrapper + # Adding telemetry to send node telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 + if (timeDifferenceInMinutes >= 5) + properties = {} + properties["Computer"] = record["Computer"] + ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) + capacityInfo = items['status']['capacity'] + ApplicationInsightsUtility.sendMetricTelemetry("CoreCapacity", capacityInfo["cpu"] , properties) + ApplicationInsightsUtility.sendMetricTelemetry("Memory", capacityInfo["memory"] , properties) + @@telemetrySent = true + end end router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream + if @@telemetrySent == true + @@telemetryTimeTracker = DateTime.now.to_time.to_i + end @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") From 5336e55be74096197532ebff2401e6713c0de805 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 14 Dec 2018 17:58:27 -0800 Subject: [PATCH 04/23] pod telemetry cahnges --- source/code/plugin/in_kube_nodes.rb | 6 +++--- source/code/plugin/in_kube_podinventory.rb | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index ee7440e5f..a8c4b3f6f 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -32,7 +32,7 @@ def start @condition = ConditionVariable.new @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) - @@telemetryTimeTracker = DateTime.now.to_time.to_i + @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i end end @@ -117,7 +117,7 @@ def enumerate } eventStream.add(emitTime, wrapper) if wrapper # Adding telemetry to send node telemetry every 5 minutes - timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference/60 if (timeDifferenceInMinutes >= 5) properties = {} @@ -132,7 +132,7 @@ def enumerate router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream if @@telemetrySent == true - @@telemetryTimeTracker = DateTime.now.to_time.to_i + @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i end @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index ec76bac61..51b1d56e0 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -10,8 +10,10 @@ def initialize super require 'yaml' require 'json' + require 'set' require_relative 'KubernetesApiClient' + require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' end @@ -29,6 +31,7 @@ def start @condition = ConditionVariable.new @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) + @@podTelemetryTimeTracker = DateTime.now.to_time.to_i end end @@ -71,6 +74,8 @@ def parse_and_emit_records(podInventory, serviceList) emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 eventStream = MultiEventStream.new + controllerSet = Set.new [] + telemetryFlush = false begin #begin block start podInventory['items'].each do |items| #podInventory block start records = [] @@ -78,6 +83,12 @@ def parse_and_emit_records(podInventory, serviceList) record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Name'] = items['metadata']['name'] podNameSpace = items['metadata']['namespace'] + # Adding telemetry to send node telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 + if (timeDifferenceInMinutes >= 5) + telemetryFlush = true + end if podNameSpace.eql?("kube-system") && !items['metadata'].key?("ownerReferences") # The above case seems to be the only case where you have horizontal scaling of pods # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash @@ -85,8 +96,14 @@ def parse_and_emit_records(podInventory, serviceList) # its ok to use this. # Use kubernetes.io/config.hash to be able to correlate with cadvisor data podUid = items['metadata']['annotations']['kubernetes.io/config.hash'] + if telemetryFlush == true + controllerSet.add(podUid) + end else podUid = items['metadata']['uid'] + if telemetryFlush == true + controllerSet.add(podUid) + end end record['PodUid'] = podUid record['PodLabel'] = [items['metadata']['labels']] @@ -189,6 +206,11 @@ def parse_and_emit_records(podInventory, serviceList) eventStream.add(emitTime, wrapper) if wrapper end end + if telemetryFlush == true + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", records.count , {}) + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) + @@podTelemetryTimeTracker = DateTime.now.to_time.to_i + end end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream @@istestvar = ENV['ISTEST'] From 7c67c046dc76573deddecf854ea140b5802a84c7 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sat, 15 Dec 2018 19:02:42 -0800 Subject: [PATCH 05/23] updated telemetry changes --- source/code/plugin/in_kube_nodes.rb | 10 +++++----- source/code/plugin/in_kube_podinventory.rb | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index a8c4b3f6f..1c792d0da 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -50,7 +50,7 @@ def enumerate currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 - @@telemetrySent = false + telemetrySent = false $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body) $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") @@ -124,14 +124,14 @@ def enumerate properties["Computer"] = record["Computer"] ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) capacityInfo = items['status']['capacity'] - ApplicationInsightsUtility.sendMetricTelemetry("CoreCapacity", capacityInfo["cpu"] , properties) - ApplicationInsightsUtility.sendMetricTelemetry("Memory", capacityInfo["memory"] , properties) - @@telemetrySent = true + ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) + ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) + telemetrySent = true end end router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream - if @@telemetrySent == true + if telemetrySent == true @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i end @@istestvar = ENV['ISTEST'] diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 51b1d56e0..2b8b2625d 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -83,8 +83,8 @@ def parse_and_emit_records(podInventory, serviceList) record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Name'] = items['metadata']['name'] podNameSpace = items['metadata']['namespace'] - # Adding telemetry to send node telemetry every 5 minutes - timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs + # Adding telemetry to send pod telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference/60 if (timeDifferenceInMinutes >= 5) telemetryFlush = true @@ -206,13 +206,13 @@ def parse_and_emit_records(podInventory, serviceList) eventStream.add(emitTime, wrapper) if wrapper end end - if telemetryFlush == true - ApplicationInsightsUtility.sendMetricTelemetry("PodCount", records.count , {}) - ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) - @@podTelemetryTimeTracker = DateTime.now.to_time.to_i - end end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream + if telemetryFlush == true + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) + @@podTelemetryTimeTracker = DateTime.now.to_time.to_i + end @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") From 0c3ccef8eb119a36dd24e47ffb3b4f3c4ac05286 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 17 Dec 2018 13:50:17 -0800 Subject: [PATCH 06/23] changes to get uid of owner references as controller id --- source/code/plugin/in_kube_podinventory.rb | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 2b8b2625d..c6873e8fe 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -83,12 +83,7 @@ def parse_and_emit_records(podInventory, serviceList) record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Name'] = items['metadata']['name'] podNameSpace = items['metadata']['namespace'] - # Adding telemetry to send pod telemetry every 5 minutes - timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference/60 - if (timeDifferenceInMinutes >= 5) - telemetryFlush = true - end + if podNameSpace.eql?("kube-system") && !items['metadata'].key?("ownerReferences") # The above case seems to be the only case where you have horizontal scaling of pods # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash @@ -96,14 +91,8 @@ def parse_and_emit_records(podInventory, serviceList) # its ok to use this. # Use kubernetes.io/config.hash to be able to correlate with cadvisor data podUid = items['metadata']['annotations']['kubernetes.io/config.hash'] - if telemetryFlush == true - controllerSet.add(podUid) - end else podUid = items['metadata']['uid'] - if telemetryFlush == true - controllerSet.add(podUid) - end end record['PodUid'] = podUid record['PodLabel'] = [items['metadata']['labels']] @@ -146,9 +135,18 @@ def parse_and_emit_records(podInventory, serviceList) record['ClusterId'] = KubernetesApiClient.getClusterId record['ClusterName'] = KubernetesApiClient.getClusterName record['ServiceName'] = getServiceNameFromLabels(items['metadata']['namespace'], items['metadata']['labels'], serviceList) + # Adding telemetry to send pod telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 + if (timeDifferenceInMinutes >= 5) + telemetryFlush = true + end if !items['metadata']['ownerReferences'].nil? record['ControllerKind'] = items['metadata']['ownerReferences'][0]['kind'] record['ControllerName'] = items['metadata']['ownerReferences'][0]['name'] + if telemetryFlush == true + controllerSet.add(record['ControllerKind'] + record['ControllerName']) + end end podRestartCount = 0 record['PodRestartCount'] = 0 From 7f7b98b41a1a219fe63dbbaaefc047630feeea30 Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 18 Dec 2018 10:54:22 -0800 Subject: [PATCH 07/23] updating socket to the new mount location --- source/code/plugin/DockerApiClient.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index e12ef13ec..014dece57 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -10,7 +10,7 @@ class DockerApiClient require_relative 'DockerApiRestHelper' require_relative 'ApplicationInsightsUtility' - @@SocketPath = "/var/run/docker.sock" + @@SocketPath = "/var/run/host/docker.sock" @@ChunkSize = 4096 @@TimeoutInSeconds = 5 @@PluginName = 'ContainerInventory' From 7a4c2bed631f3b7ac450fb341737de7f8fe8b9f4 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 19 Dec 2018 14:49:56 -0800 Subject: [PATCH 08/23] Adding exception telemetry and heartbeat --- source/code/plugin/in_kube_events.rb | 5 +++++ source/code/plugin/in_kube_nodes.rb | 2 ++ source/code/plugin/in_kube_podinventory.rb | 5 +++++ source/code/plugin/in_kube_services.rb | 3 +++ 4 files changed, 15 insertions(+) diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 5df31df95..ca8f7ce63 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -15,6 +15,7 @@ def initialize require_relative 'KubernetesApiClient' require_relative 'oms_common' require_relative 'omslog' + require_relative 'ApplicationInsightsUtility' end config_param :run_interval, :time, :default => '1m' @@ -94,6 +95,7 @@ def enumerate(eventList = nil) rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -110,6 +112,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock @@ -129,6 +132,7 @@ def getEventQueryState rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return eventQueryState end @@ -144,6 +148,7 @@ def writeEventQueryState(eventQueryState) rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 1c792d0da..b0df4ea63 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -142,6 +142,7 @@ def enumerate rescue => errorStr $log.warn "Failed to retrieve node inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -158,6 +159,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index c6873e8fe..bcd03d236 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -66,6 +66,7 @@ def enumerate(podList = nil) rescue => errorStr $log.warn "Failed in enumerate pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -207,6 +208,7 @@ def parse_and_emit_records(podInventory, serviceList) end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream if telemetryFlush == true + ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory") ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) @@podTelemetryTimeTracker = DateTime.now.to_time.to_i @@ -218,6 +220,7 @@ def parse_and_emit_records(podInventory, serviceList) rescue => errorStr $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end #begin block end end @@ -234,6 +237,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock @@ -268,6 +272,7 @@ def getServiceNameFromLabels(namespace, labels, serviceList) rescue => errorStr $log.warn "Failed to retrieve service name from labels: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return serviceName end diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 9a33f4581..b6f03be04 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -14,6 +14,7 @@ def initialize require_relative 'KubernetesApiClient' require_relative 'oms_common' require_relative 'omslog' + require_relative 'ApplicationInsightsUtility' end config_param :run_interval, :time, :default => '1m' @@ -70,6 +71,7 @@ def enumerate rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(e.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -86,6 +88,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock From d2b043cc40511f6502ad3c91288a7002b56d709d Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 19 Dec 2018 18:04:06 -0800 Subject: [PATCH 09/23] changes to fix controller type --- .../code/plugin/ApplicationInsightsUtility.rb | 35 +++++++++++-------- .../code/plugin/CAdvisorMetricsAPIClient.rb | 5 +-- source/code/plugin/DockerApiClient.rb | 6 ++-- source/code/plugin/in_containerinventory.rb | 4 ++- source/code/plugin/in_kube_events.rb | 10 +++--- source/code/plugin/in_kube_nodes.rb | 12 ++++--- source/code/plugin/in_kube_podinventory.rb | 16 +++++---- source/code/plugin/in_kube_services.rb | 7 ++-- 8 files changed, 58 insertions(+), 37 deletions(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 76e0b2926..d75b28832 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -14,6 +14,7 @@ class ApplicationInsightsUtility @@AcsClusterType = 'ACS' @@AksClusterType = 'AKS' @@DaemonsetControllerType = 'DaemonSet' + @@ReplicasetControllerType = 'ReplicaSet' @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf' @@EnvAcsResourceName = 'ACS_RESOURCE_NAME' @@EnvAksRegion = 'AKS_REGION' @@ -54,10 +55,11 @@ def initializeUtility() @@CustomProperties["ClusterName"] = clusterName @@CustomProperties["Region"] = ENV[@@EnvAksRegion] end - @@CustomProperties['ControllerType'] = @@DaemonsetControllerType dockerInfo = DockerApiClient.dockerInfo - @@CustomProperties['DockerVersion'] = dockerInfo['Version'] - @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] + if (!dockerInfo.empty? && !dockerInfo.nil?) + @@CustomProperties['DockerVersion'] = dockerInfo['Version'] + @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] + end @@CustomProperties['WorkspaceID'] = getWorkspaceId @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] @@ -70,9 +72,10 @@ def initializeUtility() end end - def sendHeartBeatEvent(pluginName) + def sendHeartBeatEvent(pluginName, controllerType) begin eventName = pluginName + @@HeartBeat + @@CustomProperties['ControllerType'] = controllerType if !(@@Tc.nil?) @@Tc.track_event eventName , :properties => @@CustomProperties @@Tc.flush @@ -83,9 +86,10 @@ def sendHeartBeatEvent(pluginName) end end - def sendCustomMetric(pluginName, properties) + def sendCustomMetric(pluginName, properties, controllerType) begin if !(@@Tc.nil?) + @@CustomProperties['ControllerType'] = controllerType @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, :properties => @@CustomProperties @@ -97,12 +101,13 @@ def sendCustomMetric(pluginName, properties) end end - def sendExceptionTelemetry(errorStr) + def sendExceptionTelemetry(errorStr, controllerType) begin - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility + if @@CustomProperties.empty? || @@CustomProperties.nil? || || @@CustomProperties['DockerVersion'].nil? + initializeUtility() end if !(@@Tc.nil?) + @@CustomProperties['ControllerType'] = controllerType @@Tc.track_exception errorStr , :properties => @@CustomProperties @@Tc.flush $log.info("AppInsights Exception Telemetry sent successfully") @@ -113,11 +118,12 @@ def sendExceptionTelemetry(errorStr) end #Method to send heartbeat and container inventory count - def sendTelemetry(pluginName, properties) + def sendTelemetry(pluginName, properties, controllerType) begin - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility + if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? + initializeUtility() end + @@CustomProperties['ControllerType'] = controllerType @@CustomProperties['Computer'] = properties['Computer'] sendHeartBeatEvent(pluginName) sendCustomMetric(pluginName, properties) @@ -127,17 +133,18 @@ def sendTelemetry(pluginName, properties) end #Method to send metric. It will merge passed-in properties with common custom properties - def sendMetricTelemetry(metricName, metricValue, properties) + def sendMetricTelemetry(metricName, metricValue, properties, controllerType) begin if (metricName.empty? || metricName.nil?) $log.warn("SendMetricTelemetry: metricName is missing") return end - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility + if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? + initializeUtility() end telemetryProps = {} telemetryProps["Computer"] = @@hostName + @@CustomProperties['ControllerType'] = controllerType # add common dimensions @@CustomProperties.each{ |k,v| telemetryProps[k]=v} # add passed-in dimensions if any diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index 9e47e5a9e..7be173280 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -22,6 +22,7 @@ class CAdvisorMetricsAPIClient @@txBytesTimeLast = nil @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i + @@ReplicasetControllerType = 'ReplicaSet' def initialize end @@ -142,7 +143,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met telemetryProps = {} telemetryProps['PodName'] = podName telemetryProps['ContainerName'] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps, @@ReplicasetControllerType) end end rescue => errorStr @@ -205,7 +206,7 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec telemetryProps = {} telemetryProps['PodName'] = podName telemetryProps['ContainerName'] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps, @@ReplicasetControllerType) end end rescue => errorStr diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index 014dece57..5fcd44063 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -14,6 +14,8 @@ class DockerApiClient @@ChunkSize = 4096 @@TimeoutInSeconds = 5 @@PluginName = 'ContainerInventory' + @@DaemonsetControllerType = 'DaemonSet' + def initialize end @@ -44,7 +46,7 @@ def getResponse(request, isMultiJson, isVersion) return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson) rescue => errorStr $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@DaemonsetControllerType) end end @@ -63,7 +65,7 @@ def parseResponse(dockerResponse, isMultiJson) end rescue => errorStr $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@DaemonsetControllerType) end return parsedJsonResponse end diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb index f501421a2..6db461ef4 100644 --- a/source/code/plugin/in_containerinventory.rb +++ b/source/code/plugin/in_containerinventory.rb @@ -19,6 +19,8 @@ def initialize require_relative 'ContainerInventoryState' require_relative 'ApplicationInsightsUtility' require_relative 'omslog' + + @@DaemonsetControllerType = 'DaemonSet' end config_param :run_interval, :time, :default => '1m' @@ -237,7 +239,7 @@ def enumerate telemetryProperties = {} telemetryProperties['Computer'] = hostname telemetryProperties['ContainerCount'] = containerInventory.length - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties, @@DaemonsetControllerType) end $log.info("in_container_inventory::enumerate : Processing complete - emitted stream @ #{Time.now.utc.iso8601}") end diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index ca8f7ce63..ac565896d 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -16,6 +16,8 @@ def initialize require_relative 'oms_common' require_relative 'omslog' require_relative 'ApplicationInsightsUtility' + + @@ReplicasetControllerType = 'ReplicaSet' end config_param :run_interval, :time, :default => '1m' @@ -95,7 +97,7 @@ def enumerate(eventList = nil) rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @@ -112,7 +114,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @mutex.lock @@ -132,7 +134,7 @@ def getEventQueryState rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end return eventQueryState end @@ -148,7 +150,7 @@ def writeEventQueryState(eventQueryState) rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index b0df4ea63..f11e429bb 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -17,6 +17,8 @@ def initialize require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' + + @@ReplicasetControllerType = 'ReplicaSet' end config_param :run_interval, :time, :default => '1m' @@ -122,10 +124,10 @@ def enumerate if (timeDifferenceInMinutes >= 5) properties = {} properties["Computer"] = record["Computer"] - ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) + ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties, @@ReplicasetControllerType) capacityInfo = items['status']['capacity'] - ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) - ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) + ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties, @@ReplicasetControllerType) telemetrySent = true end end @@ -142,7 +144,7 @@ def enumerate rescue => errorStr $log.warn "Failed to retrieve node inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @@ -159,7 +161,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @mutex.lock diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index bcd03d236..4b2cdbc83 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -16,6 +16,8 @@ def initialize require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' + + @@ReplicasetControllerType = 'ReplicaSet' end config_param :run_interval, :time, :default => '1m' @@ -66,7 +68,7 @@ def enumerate(podList = nil) rescue => errorStr $log.warn "Failed in enumerate pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @@ -208,9 +210,9 @@ def parse_and_emit_records(podInventory, serviceList) end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream if telemetryFlush == true - ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory") - ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) - ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) + ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory", @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}, @@ReplicasetControllerType) @@podTelemetryTimeTracker = DateTime.now.to_time.to_i end @@istestvar = ENV['ISTEST'] @@ -220,7 +222,7 @@ def parse_and_emit_records(podInventory, serviceList) rescue => errorStr $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end #begin block end end @@ -237,7 +239,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @mutex.lock @@ -272,7 +274,7 @@ def getServiceNameFromLabels(namespace, labels, serviceList) rescue => errorStr $log.warn "Failed to retrieve service name from labels: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end return serviceName end diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index b6f03be04..9d5186561 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -15,6 +15,9 @@ def initialize require_relative 'oms_common' require_relative 'omslog' require_relative 'ApplicationInsightsUtility' + + @@ReplicasetControllerType = 'ReplicaSet' + end config_param :run_interval, :time, :default => '1m' @@ -71,7 +74,7 @@ def enumerate rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(e.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @@ -88,7 +91,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) end end @mutex.lock From e048d9bf8daf7079a4c134d589859c08eb31da12 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 20 Dec 2018 10:45:42 -0800 Subject: [PATCH 10/23] Fixing typo --- source/code/plugin/ApplicationInsightsUtility.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index d75b28832..2e807eb9b 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -103,7 +103,7 @@ def sendCustomMetric(pluginName, properties, controllerType) def sendExceptionTelemetry(errorStr, controllerType) begin - if @@CustomProperties.empty? || @@CustomProperties.nil? || || @@CustomProperties['DockerVersion'].nil? + if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? initializeUtility() end if !(@@Tc.nil?) From e4242d035c9e745133ef9c10fd6d0a5e97d8a539 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 20 Dec 2018 14:18:34 -0800 Subject: [PATCH 11/23] fixing method signature --- source/code/plugin/ApplicationInsightsUtility.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 2e807eb9b..5101c1430 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -125,8 +125,8 @@ def sendTelemetry(pluginName, properties, controllerType) end @@CustomProperties['ControllerType'] = controllerType @@CustomProperties['Computer'] = properties['Computer'] - sendHeartBeatEvent(pluginName) - sendCustomMetric(pluginName, properties) + sendHeartBeatEvent(pluginName, controllerType) + sendCustomMetric(pluginName, properties, controllerType) rescue => errorStr $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") end From d6f746ac02d03d01f86ada6b312e5b77d43d7c86 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 20 Dec 2018 15:01:22 -0800 Subject: [PATCH 12/23] updating plugins to get controller type from env --- .../code/plugin/ApplicationInsightsUtility.rb | 23 ++++++++----------- .../code/plugin/CAdvisorMetricsAPIClient.rb | 5 ++-- source/code/plugin/DockerApiClient.rb | 5 ++-- source/code/plugin/in_containerinventory.rb | 3 +-- source/code/plugin/in_kube_events.rb | 9 ++++---- source/code/plugin/in_kube_nodes.rb | 11 ++++----- source/code/plugin/in_kube_podinventory.rb | 15 ++++++------ source/code/plugin/in_kube_services.rb | 6 ++--- 8 files changed, 32 insertions(+), 45 deletions(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 5101c1430..d2754c65a 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -13,13 +13,12 @@ class ApplicationInsightsUtility @@Exception = 'ExceptionEvent' @@AcsClusterType = 'ACS' @@AksClusterType = 'AKS' - @@DaemonsetControllerType = 'DaemonSet' - @@ReplicasetControllerType = 'ReplicaSet' @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf' @@EnvAcsResourceName = 'ACS_RESOURCE_NAME' @@EnvAksRegion = 'AKS_REGION' @@EnvAgentVersion = 'AGENT_VERSION' @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH' + @@EnvControllerType = 'CONTROLLER_TYPE' @@CustomProperties = {} @@Tc = nil @@hostName = (OMS::Common.get_hostname) @@ -62,6 +61,7 @@ def initializeUtility() end @@CustomProperties['WorkspaceID'] = getWorkspaceId @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] + @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType] encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] if !encodedAppInsightsKey.nil? decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) @@ -72,10 +72,9 @@ def initializeUtility() end end - def sendHeartBeatEvent(pluginName, controllerType) + def sendHeartBeatEvent(pluginName) begin eventName = pluginName + @@HeartBeat - @@CustomProperties['ControllerType'] = controllerType if !(@@Tc.nil?) @@Tc.track_event eventName , :properties => @@CustomProperties @@Tc.flush @@ -86,10 +85,9 @@ def sendHeartBeatEvent(pluginName, controllerType) end end - def sendCustomMetric(pluginName, properties, controllerType) + def sendCustomMetric(pluginName, properties) begin if !(@@Tc.nil?) - @@CustomProperties['ControllerType'] = controllerType @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, :properties => @@CustomProperties @@ -101,13 +99,12 @@ def sendCustomMetric(pluginName, properties, controllerType) end end - def sendExceptionTelemetry(errorStr, controllerType) + def sendExceptionTelemetry(errorStr) begin if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? initializeUtility() end if !(@@Tc.nil?) - @@CustomProperties['ControllerType'] = controllerType @@Tc.track_exception errorStr , :properties => @@CustomProperties @@Tc.flush $log.info("AppInsights Exception Telemetry sent successfully") @@ -118,22 +115,21 @@ def sendExceptionTelemetry(errorStr, controllerType) end #Method to send heartbeat and container inventory count - def sendTelemetry(pluginName, properties, controllerType) + def sendTelemetry(pluginName, properties) begin if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? initializeUtility() end - @@CustomProperties['ControllerType'] = controllerType @@CustomProperties['Computer'] = properties['Computer'] - sendHeartBeatEvent(pluginName, controllerType) - sendCustomMetric(pluginName, properties, controllerType) + sendHeartBeatEvent(pluginName) + sendCustomMetric(pluginName, properties) rescue => errorStr $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") end end #Method to send metric. It will merge passed-in properties with common custom properties - def sendMetricTelemetry(metricName, metricValue, properties, controllerType) + def sendMetricTelemetry(metricName, metricValue, properties) begin if (metricName.empty? || metricName.nil?) $log.warn("SendMetricTelemetry: metricName is missing") @@ -144,7 +140,6 @@ def sendMetricTelemetry(metricName, metricValue, properties, controllerType) end telemetryProps = {} telemetryProps["Computer"] = @@hostName - @@CustomProperties['ControllerType'] = controllerType # add common dimensions @@CustomProperties.each{ |k,v| telemetryProps[k]=v} # add passed-in dimensions if any diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index 7be173280..9e47e5a9e 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -22,7 +22,6 @@ class CAdvisorMetricsAPIClient @@txBytesTimeLast = nil @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i - @@ReplicasetControllerType = 'ReplicaSet' def initialize end @@ -143,7 +142,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met telemetryProps = {} telemetryProps['PodName'] = podName telemetryProps['ContainerName'] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) end end rescue => errorStr @@ -206,7 +205,7 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec telemetryProps = {} telemetryProps['PodName'] = podName telemetryProps['ContainerName'] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) end end rescue => errorStr diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index 5fcd44063..903256f6d 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -14,7 +14,6 @@ class DockerApiClient @@ChunkSize = 4096 @@TimeoutInSeconds = 5 @@PluginName = 'ContainerInventory' - @@DaemonsetControllerType = 'DaemonSet' def initialize end @@ -46,7 +45,7 @@ def getResponse(request, isMultiJson, isVersion) return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson) rescue => errorStr $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@DaemonsetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -65,7 +64,7 @@ def parseResponse(dockerResponse, isMultiJson) end rescue => errorStr $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@DaemonsetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return parsedJsonResponse end diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb index 6db461ef4..a38697741 100644 --- a/source/code/plugin/in_containerinventory.rb +++ b/source/code/plugin/in_containerinventory.rb @@ -20,7 +20,6 @@ def initialize require_relative 'ApplicationInsightsUtility' require_relative 'omslog' - @@DaemonsetControllerType = 'DaemonSet' end config_param :run_interval, :time, :default => '1m' @@ -239,7 +238,7 @@ def enumerate telemetryProperties = {} telemetryProperties['Computer'] = hostname telemetryProperties['ContainerCount'] = containerInventory.length - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties, @@DaemonsetControllerType) + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) end $log.info("in_container_inventory::enumerate : Processing complete - emitted stream @ #{Time.now.utc.iso8601}") end diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index ac565896d..b7be24510 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -17,7 +17,6 @@ def initialize require_relative 'omslog' require_relative 'ApplicationInsightsUtility' - @@ReplicasetControllerType = 'ReplicaSet' end config_param :run_interval, :time, :default => '1m' @@ -97,7 +96,7 @@ def enumerate(eventList = nil) rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -114,7 +113,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock @@ -134,7 +133,7 @@ def getEventQueryState rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return eventQueryState end @@ -150,7 +149,7 @@ def writeEventQueryState(eventQueryState) rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index f11e429bb..85153b21c 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -18,7 +18,6 @@ def initialize require_relative 'oms_common' require_relative 'omslog' - @@ReplicasetControllerType = 'ReplicaSet' end config_param :run_interval, :time, :default => '1m' @@ -124,10 +123,10 @@ def enumerate if (timeDifferenceInMinutes >= 5) properties = {} properties["Computer"] = record["Computer"] - ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) capacityInfo = items['status']['capacity'] - ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties, @@ReplicasetControllerType) - ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) + ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) telemetrySent = true end end @@ -144,7 +143,7 @@ def enumerate rescue => errorStr $log.warn "Failed to retrieve node inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -161,7 +160,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 4b2cdbc83..eaf14b035 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -17,7 +17,6 @@ def initialize require_relative 'oms_common' require_relative 'omslog' - @@ReplicasetControllerType = 'ReplicaSet' end config_param :run_interval, :time, :default => '1m' @@ -68,7 +67,7 @@ def enumerate(podList = nil) rescue => errorStr $log.warn "Failed in enumerate pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -210,9 +209,9 @@ def parse_and_emit_records(podInventory, serviceList) end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream if telemetryFlush == true - ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory", @@ReplicasetControllerType) - ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}, @@ReplicasetControllerType) - ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory") + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) @@podTelemetryTimeTracker = DateTime.now.to_time.to_i end @@istestvar = ENV['ISTEST'] @@ -222,7 +221,7 @@ def parse_and_emit_records(podInventory, serviceList) rescue => errorStr $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end #begin block end end @@ -239,7 +238,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock @@ -274,7 +273,7 @@ def getServiceNameFromLabels(namespace, labels, serviceList) rescue => errorStr $log.warn "Failed to retrieve service name from labels: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return serviceName end diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 9d5186561..655beef59 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -16,8 +16,6 @@ def initialize require_relative 'omslog' require_relative 'ApplicationInsightsUtility' - @@ReplicasetControllerType = 'ReplicaSet' - end config_param :run_interval, :time, :default => '1m' @@ -74,7 +72,7 @@ def enumerate rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(e.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -91,7 +89,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr, @@ReplicasetControllerType) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock From 081ba8a12fbfbb4a1bb25ed30b6a0ab200be596a Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 20 Dec 2018 17:20:55 -0800 Subject: [PATCH 13/23] fixing bugs --- .../code/plugin/ApplicationInsightsUtility.rb | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index d2754c65a..2b2db673b 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -54,11 +54,7 @@ def initializeUtility() @@CustomProperties["ClusterName"] = clusterName @@CustomProperties["Region"] = ENV[@@EnvAksRegion] end - dockerInfo = DockerApiClient.dockerInfo - if (!dockerInfo.empty? && !dockerInfo.nil?) - @@CustomProperties['DockerVersion'] = dockerInfo['Version'] - @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] - end + getDockerInfo() @@CustomProperties['WorkspaceID'] = getWorkspaceId @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType] @@ -72,6 +68,14 @@ def initializeUtility() end end + def getDockerInfo() + dockerInfo = DockerApiClient.dockerInfo + if (!dockerInfo.nil? && !dockerInfo.empty?) + @@CustomProperties['DockerVersion'] = dockerInfo['Version'] + @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] + end + end + def sendHeartBeatEvent(pluginName) begin eventName = pluginName + @@HeartBeat @@ -101,8 +105,10 @@ def sendCustomMetric(pluginName, properties) def sendExceptionTelemetry(errorStr) begin - if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? + if @@CustomProperties.empty? || @@CustomProperties.nil? initializeUtility() + elsif @@CustomProperties['DockerVersion'].nil? + getDockerInfo() end if !(@@Tc.nil?) @@Tc.track_exception errorStr , :properties => @@CustomProperties @@ -117,8 +123,10 @@ def sendExceptionTelemetry(errorStr) #Method to send heartbeat and container inventory count def sendTelemetry(pluginName, properties) begin - if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? + if @@CustomProperties.empty? || @@CustomProperties.nil? initializeUtility() + elsif @@CustomProperties['DockerVersion'].nil? + getDockerInfo() end @@CustomProperties['Computer'] = properties['Computer'] sendHeartBeatEvent(pluginName) @@ -135,8 +143,10 @@ def sendMetricTelemetry(metricName, metricValue, properties) $log.warn("SendMetricTelemetry: metricName is missing") return end - if @@CustomProperties.empty? || @@CustomProperties.nil? || @@CustomProperties['DockerVersion'].nil? + if @@CustomProperties.empty? || @@CustomProperties.nil? initializeUtility() + elsif @@CustomProperties['DockerVersion'].nil? + getDockerInfo() end telemetryProps = {} telemetryProps["Computer"] = @@hostName From f3577b81c32ab08ef2843b0f61f3620497c1e082 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 21 Dec 2018 11:36:09 -0800 Subject: [PATCH 14/23] changes to fixed type --- source/code/plugin/in_kube_events.rb | 8 +++++++- source/code/plugin/in_kube_services.rb | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index b7be24510..ad2a76f1a 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -88,7 +88,13 @@ def enumerate(eventList = nil) end record['ClusterName'] = KubernetesApiClient.getClusterName record['ClusterId'] = KubernetesApiClient.getClusterId - eventStream.add(emitTime, record) if record + #eventStream.add(emitTime, record) if record + wrapper = { + "DataType"=>"KUBE_EVENTS_BLOB", + "IPName"=>"ContainerInsights", + "DataItems"=>[record.each{|k,v| record[k]=v}] + } + eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream end diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 655beef59..be492afa9 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -65,7 +65,13 @@ def enumerate record['ClusterIP'] = items['spec']['clusterIP'] record['ServiceType'] = items['spec']['type'] # : Add ports and status fields - eventStream.add(emitTime, record) if record + #eventStream.add(emitTime, record) if record + wrapper = { + "DataType"=>"KUBE_SERVICES_BLOB", + "IPName"=>"ContainerInsights", + "DataItems"=>[record.each{|k,v| record[k]=v}] + } + eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream end From 5ea2d17f597ea9e7c0eeabb670edc554f16d7721 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 21 Dec 2018 11:42:19 -0800 Subject: [PATCH 15/23] removing comments --- source/code/plugin/in_kube_events.rb | 1 - source/code/plugin/in_kube_services.rb | 1 - 2 files changed, 2 deletions(-) diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index ad2a76f1a..660ecb1b6 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -88,7 +88,6 @@ def enumerate(eventList = nil) end record['ClusterName'] = KubernetesApiClient.getClusterName record['ClusterId'] = KubernetesApiClient.getClusterId - #eventStream.add(emitTime, record) if record wrapper = { "DataType"=>"KUBE_EVENTS_BLOB", "IPName"=>"ContainerInsights", diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index be492afa9..6ca65e8b0 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -65,7 +65,6 @@ def enumerate record['ClusterIP'] = items['spec']['clusterIP'] record['ServiceType'] = items['spec']['type'] # : Add ports and status fields - #eventStream.add(emitTime, record) if record wrapper = { "DataType"=>"KUBE_SERVICES_BLOB", "IPName"=>"ContainerInsights", From 6d64894437d38043e4fb1f5e0424d60022de0818 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 21 Dec 2018 11:58:37 -0800 Subject: [PATCH 16/23] changes for fixed type --- installer/conf/kube.conf | 19 ++++++++++--------- source/code/plugin/in_kube_events.rb | 2 +- source/code/plugin/in_kube_services.rb | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 22c51ad0e..6331d257e 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -11,7 +11,7 @@ #Kubernetes events type kubeevents - tag oms.api.KubeEvents.CollectionTime + tag oms.containerinsights.KubeEvents run_interval 60s log_level debug @@ -26,7 +26,7 @@ #Kubernetes services type kubeservices - tag oms.api.KubeServices.CollectionTime + tag oms.containerinsights.KubeServices run_interval 60s log_level debug @@ -62,18 +62,19 @@ max_retry_wait 9m - - type out_oms_api + + type out_oms log_level debug - num_threads 5 + num_threads 5 buffer_chunk_limit 5m buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_kubeevents*.buffer + buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer buffer_queue_limit 10 - buffer_queue_full_action drop_oldest_chunk + buffer_queue_full_action drop_oldest_chunk flush_interval 20s retry_limit 10 retry_wait 30s + max_retry_wait 9m @@ -88,8 +89,8 @@ retry_wait 30s - - type out_oms_api + + type out_oms log_level debug num_threads 5 buffer_chunk_limit 20m diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 660ecb1b6..309dd8034 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -20,7 +20,7 @@ def initialize end config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.api.KubeEvents.CollectionTime" + config_param :tag, :string, :default => "oms.containerinsights.KubeEvents" def configure (conf) super diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 6ca65e8b0..e1bb93f30 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -19,7 +19,7 @@ def initialize end config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.api.KubeServices.CollectionTime" + config_param :tag, :string, :default => "oms.containerinsights.KubeServices" def configure (conf) super From 5d22e5fd382adb428179d7a03c2c8abad8807251 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 28 Dec 2018 12:12:25 -0800 Subject: [PATCH 17/23] adding kubelet version as a dimension --- source/code/plugin/in_kube_nodes.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 85153b21c..a6908fc99 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -123,7 +123,7 @@ def enumerate if (timeDifferenceInMinutes >= 5) properties = {} properties["Computer"] = record["Computer"] - ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) + properties["KubeletVersion"] = record["KubeletVersion"] capacityInfo = items['status']['capacity'] ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) From 3c2f791a1fd97a11be22e2e487416d223e7d9c58 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 4 Jan 2019 15:25:21 -0800 Subject: [PATCH 18/23] Excluding raw docker containers from container inventory --- source/code/plugin/DockerApiClient.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index 903256f6d..bbe7dc0b6 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -86,7 +86,10 @@ def listContainers() containers = getResponse(request, true, false) if !containers.nil? && !containers.empty? containers.each do |container| - ids.push(container['Id']) + labels = container['Labels'] + if !labels.nil? && !labels['io.kubernetes.pod.uid'].nil? + ids.push(container['Id']) + end end end return ids From 3f4aafb28210c0f247b323fd54bb2ee1313f4e2e Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 7 Jan 2019 13:16:12 -0800 Subject: [PATCH 19/23] making labels key case insensitive --- source/code/plugin/DockerApiClient.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index bbe7dc0b6..54ba7d531 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -86,7 +86,7 @@ def listContainers() containers = getResponse(request, true, false) if !containers.nil? && !containers.empty? containers.each do |container| - labels = container['Labels'] + labels = (!container['Labels'].nil?)? container['Labels'] : container['labels'] if !labels.nil? && !labels['io.kubernetes.pod.uid'].nil? ids.push(container['Id']) end From 4b353010b6e99937257b7b725d541379600258d3 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 7 Jan 2019 15:42:39 -0800 Subject: [PATCH 20/23] make poduid label case insensitive --- source/code/plugin/DockerApiClient.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index 54ba7d531..d04bf0589 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -87,8 +87,13 @@ def listContainers() if !containers.nil? && !containers.empty? containers.each do |container| labels = (!container['Labels'].nil?)? container['Labels'] : container['labels'] - if !labels.nil? && !labels['io.kubernetes.pod.uid'].nil? - ids.push(container['Id']) + if !labels.nil? + labelKeys = labels.keys + #Case insensitive lookup for pod uid label + keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase} + if !labels[keyValue].nil? + ids.push(container['Id']) + end end end end From 1ad4419548b48600948dc1b467f2d2b50066926b Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 8 Jan 2019 13:47:32 -0800 Subject: [PATCH 21/23] changes to exclude pause amd 64 containers --- source/code/plugin/DockerApiClient.rb | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index d04bf0589..5a46b5fdb 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -89,10 +89,18 @@ def listContainers() labels = (!container['Labels'].nil?)? container['Labels'] : container['labels'] if !labels.nil? labelKeys = labels.keys - #Case insensitive lookup for pod uid label - keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase} - if !labels[keyValue].nil? - ids.push(container['Id']) + dockerTypeLabel = labelKeys.find {|k| 'io.kubernetes.docker.type'.downcase == k.downcase} + if !dockerTypeLabel.nil? + dockerTypeLabelValue = labels[dockerTypeLabel] + # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers + if !(dockerTypeLabelValue.downcase == "podsandbox".downcase) + # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that + # are created in the pods for ContainerInventory + keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase} + if !labels[keyValue].nil? + ids.push(container['Id']) + end + end end end end From 7118d0221a5b34196f87fced139a5d24f8c7d39b Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 11 Jan 2019 11:54:33 -0800 Subject: [PATCH 22/23] fixing indentation so that kube.conf contents can be used in config map in the yaml --- installer/conf/kube.conf | 270 +++++++++++++++++++-------------------- 1 file changed, 135 insertions(+), 135 deletions(-) diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 6331d257e..164865022 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -1,148 +1,148 @@ -# Fluentd config file for OMS Docker - cluster components (kubeAPI) + # Fluentd config file for OMS Docker - cluster components (kubeAPI) -#Kubernetes pod inventory - - type kubepodinventory - tag oms.containerinsights.KubePodInventory - run_interval 60s - log_level debug - + #Kubernetes pod inventory + + type kubepodinventory + tag oms.containerinsights.KubePodInventory + run_interval 60s + log_level debug + -#Kubernetes events - - type kubeevents - tag oms.containerinsights.KubeEvents - run_interval 60s - log_level debug - + #Kubernetes events + + type kubeevents + tag oms.containerinsights.KubeEvents + run_interval 60s + log_level debug + -#Kubernetes logs - - type kubelogs - tag oms.api.KubeLogs - run_interval 60s - + #Kubernetes logs + + type kubelogs + tag oms.api.KubeLogs + run_interval 60s + -#Kubernetes services - - type kubeservices - tag oms.containerinsights.KubeServices - run_interval 60s - log_level debug - + #Kubernetes services + + type kubeservices + tag oms.containerinsights.KubeServices + run_interval 60s + log_level debug + -#Kubernetes Nodes - - type kubenodeinventory - tag oms.containerinsights.KubeNodeInventory - run_interval 60s - log_level debug - + #Kubernetes Nodes + + type kubenodeinventory + tag oms.containerinsights.KubeNodeInventory + run_interval 60s + log_level debug + -#Kubernetes perf - - type kubeperf - tag oms.api.KubePerf - run_interval 60s - log_level debug - + #Kubernetes perf + + type kubeperf + tag oms.api.KubePerf + run_interval 60s + log_level debug + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 5m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer - buffer_queue_limit 10 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 5m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer + buffer_queue_limit 10 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms_api - log_level debug - buffer_chunk_limit 10m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer - buffer_queue_limit 10 - flush_interval 20s - retry_limit 10 - retry_wait 30s - + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms_api - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - + + type out_oms_api + log_level debug + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer + buffer_queue_limit 20 + flush_interval 20s + retry_limit 10 + retry_wait 15s + max_retry_wait 9m + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + \ No newline at end of file From d38aaf9d7e0c7cae311ab967c04a06bc79ce1650 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 11 Jan 2019 12:03:20 -0800 Subject: [PATCH 23/23] updating readme to fix date and agent version --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5c65308fb..dd55f810e 100644 --- a/README.md +++ b/README.md @@ -11,21 +11,21 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) -### 10/09/2018 - Version microsoft/oms:ciprod01092019 +### 01/09/2018 - Version microsoft/oms:ciprod01092019-2 - Omsagent - 1.8.1.256 (nov 2018 release) - Persist fluentbit state between container restarts - Populate 'TimeOfCommand' for agent ingest time for container logs - Get node cpu usage from cpuusagenanoseconds (and convert to cpuusgaenanocores) - Container Node Inventory - move to fluentD from OMI - Mount docker.sock (Daemon set) as /var/run/host -- Liveness probe (Daemon set) - check for omsagent user permissions in docker.sock and update as necessary (required when docker daemon gets restarted) +- Add omsagent user to docker group - Move to fixed type for kubeevents & kubeservices - Disable collecting ENV for our oms agent container (daemonset & replicaset) - Disable container inventory collection for 'sandbox' containers & non kubernetes managed containers - Agent telemetry - ContainerLogsAgentSideLatencyMs - Agent telemetry - PodCount - Agent telemetry - ControllerCount -- Agent telemetry - K8S Version +- Agent telemetry - K8S Version - Agent telemetry - NodeCoreCapacity - Agent telemetry - NodeMemoryCapacity - Agent telemetry - KubeEvents (exceptions)