From 82d91c2da509c4f702e11626997390c86c4b0cc6 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 15:03:31 -0800
Subject: [PATCH 01/45] optimize kpi

---
 kubernetes/omsagent.yaml                    |  17 +-
 source/plugins/ruby/KubernetesApiClient.rb  | 387 ++++++-----
 source/plugins/ruby/in_kube_podinventory.rb | 714 +++++++++++---------
 3 files changed, 611 insertions(+), 507 deletions(-)
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 2155361e9..85c383ec2 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -134,7 +134,7 @@ data:
      <match oms.containerinsights.KubePodInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
@@ -149,7 +149,7 @@ data:
      <match oms.containerinsights.KubeEvents**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
@@ -179,7 +179,7 @@ data:
      <match oms.containerinsights.KubeNodeInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
@@ -208,7 +208,7 @@ data:
      <match oms.api.KubePerf**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
@@ -564,6 +564,15 @@ spec:
             periodSeconds: 60
       affinity:
         nodeAffinity:
+        # affinity to schedule on to ephemeral os node if its available
+        # preferredDuringSchedulingIgnoredDuringExecution:
+          # - weight: 1
+          #   preference:
+          #     matchExpressions:
+          #     - key: storageprofile
+          #       operator: NotIn
+          #       values:
+          #       - managed
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
               - labelSelector:
diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 073eb0417..13c084a5c 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -172,6 +172,10 @@ def isAROV3Cluster
       return @@IsAROV3Cluster
     end
 
+    def isAROv3MasterOrInfraPod(nodeName)
+      return isAROV3Cluster() && (!nodeName.nil? && (nodeName.downcase.start_with?("infra-") || nodeName.downcase.start_with?("master-")))
+    end
+
     def isNodeMaster
       return @@IsNodeMaster if !@@IsNodeMaster.nil?
       @@IsNodeMaster = false
@@ -276,7 +280,8 @@ def getPods(namespace)
     def getWindowsNodes
       winNodes = []
       begin
-        resourceUri = getNodesResourceUri("nodes")
+        # get only windows nodes
+        resourceUri = getNodesResourceUri("nodes?labelSelector=kubernetes.io%2Fos%3Dwindows")
         nodeInventory = JSON.parse(getKubeResourceInfo(resourceUri).body)
         @Log.info "KubernetesAPIClient::getWindowsNodes : Got nodes from kube api"
         # Resetting the windows node cache
@@ -396,42 +401,67 @@ def getPodUid(podNameSpace, podMetadata)
       return podUid
     end
 
-    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+    def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         clusterId = getClusterId
-        metricInfo = metricJSON
-        metricInfo["items"].each do |pod|
-          podNameSpace = pod["metadata"]["namespace"]
-          podUid = getPodUid(podNameSpace, pod["metadata"])
-          if podUid.nil?
-            next
-          end
-
-          # For ARO, skip the pods scheduled on to master or infra nodes to ingest
-          if isAROV3Cluster() && !pod["spec"].nil? && !pod["spec"]["nodeName"].nil? &&
-             (pod["spec"]["nodeName"].downcase.start_with?("infra-") ||
-              pod["spec"]["nodeName"].downcase.start_with?("master-"))
-            next
-          end
+        podNameSpace = pod["metadata"]["namespace"]
+        podUid = getPodUid(podNameSpace, pod["metadata"])
+        if podUid.nil?
+          return metricItems
+        end
 
-          podContainers = []
-          if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
-            podContainers = podContainers + pod["spec"]["containers"]
-          end
-          # Adding init containers to the record list as well.
-          if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
-            podContainers = podContainers + pod["spec"]["initContainers"]
-          end
+        nodeName = ""
+        #for unscheduled (non-started) pods nodeName does NOT exist
+        if !pod["spec"]["nodeName"].nil?
+          nodeName = pod["spec"]["nodeName"]
+        end
+        # For ARO, skip the pods scheduled on to master or infra nodes to ingest
+        if isAROv3MasterOrInfraPod(nodeName)
+          return metricItems
+        end
 
-          if (!podContainers.nil? && !podContainers.empty? && !pod["spec"]["nodeName"].nil?)
-            nodeName = pod["spec"]["nodeName"]
-            podContainers.each do |container|
-              containerName = container["name"]
-              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-              if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
-                metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
+        podContainers = []
+        if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
+          podContainers = podContainers + pod["spec"]["containers"]
+        end
+        # Adding init containers to the record list as well.
+        if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
+          podContainers = podContainers + pod["spec"]["initContainers"]
+        end
 
+        if (!podContainers.nil? && !podContainers.empty? && !pod["spec"]["nodeName"].nil?)
+          podContainers.each do |container|
+            containerName = container["name"]
+            #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+            if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
+              metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
+
+              metricItem = {}
+              metricItem["DataItems"] = []
+
+              metricProps = {}
+              metricProps["Timestamp"] = metricTime
+              metricProps["Host"] = nodeName
+              # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+              metricProps["Computer"] = nodeName
+              metricProps["ObjectName"] = "K8SContainer"
+              metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+              metricProps["Collections"] = []
+              metricCollections = {}
+              metricCollections["CounterName"] = metricNametoReturn
+              metricCollections["Value"] = metricValue
+
+              metricProps["Collections"].push(metricCollections)
+              metricItem["DataItems"].push(metricProps)
+              metricItems.push(metricItem)
+              #No container level limit for the given metric, so default to node level limit
+            else
+              nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
+              if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
+                metricValue = @@NodeMetrics[nodeMetricsHashKey]
+                #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
                 metricItem = {}
                 metricItem["DataItems"] = []
 
@@ -451,32 +481,6 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
                 metricProps["Collections"].push(metricCollections)
                 metricItem["DataItems"].push(metricProps)
                 metricItems.push(metricItem)
-                #No container level limit for the given metric, so default to node level limit
-              else
-                nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
-                if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
-                  metricValue = @@NodeMetrics[nodeMetricsHashKey]
-                  #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
-                  metricItem = {}
-                  metricItem["DataItems"] = []
-
-                  metricProps = {}
-                  metricProps["Timestamp"] = metricTime
-                  metricProps["Host"] = nodeName
-                  # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
-                  metricProps["Computer"] = nodeName
-                  metricProps["ObjectName"] = "K8SContainer"
-                  metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
-
-                  metricProps["Collections"] = []
-                  metricCollections = {}
-                  metricCollections["CounterName"] = metricNametoReturn
-                  metricCollections["Value"] = metricValue
-
-                  metricProps["Collections"].push(metricCollections)
-                  metricItem["DataItems"].push(metricProps)
-                  metricItems.push(metricItem)
-                end
               end
             end
           end
@@ -488,78 +492,74 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
       return metricItems
     end #getContainerResourceRequestAndLimits
 
-    def getContainerResourceRequestsAndLimitsAsInsightsMetrics(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+    def getContainerResourceRequestsAndLimitsAsInsightsMetrics(pod, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         clusterId = getClusterId
         clusterName = getClusterName
-
-        metricInfo = metricJSON
-        metricInfo["items"].each do |pod|
-          podNameSpace = pod["metadata"]["namespace"]
-          if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences")
-            # The above case seems to be the only case where you have horizontal scaling of pods
-            # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
-            # instead of the actual poduid. Since this uid is not being surface into the UX
-            # its ok to use this.
-            # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            if pod["metadata"]["annotations"].nil?
-              next
-            else
-              podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
-            end
+        podNameSpace = pod["metadata"]["namespace"]
+        if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences")
+          # The above case seems to be the only case where you have horizontal scaling of pods
+          # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
+          # instead of the actual poduid. Since this uid is not being surface into the UX
+          # its ok to use this.
+          # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
+          if pod["metadata"]["annotations"].nil?
+            return metricItems
           else
-            podUid = pod["metadata"]["uid"]
+            podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
           end
+        else
+          podUid = pod["metadata"]["uid"]
+        end
 
-          podContainers = []
-          if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
-            podContainers = podContainers + pod["spec"]["containers"]
-          end
-          # Adding init containers to the record list as well.
-          if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
-            podContainers = podContainers + pod["spec"]["initContainers"]
-          end
+        podContainers = []
+        if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
+          podContainers = podContainers + pod["spec"]["containers"]
+        end
+        # Adding init containers to the record list as well.
+        if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
+          podContainers = podContainers + pod["spec"]["initContainers"]
+        end
 
-          if (!podContainers.nil? && !podContainers.empty?)
-            if (!pod["spec"]["nodeName"].nil?)
-              nodeName = pod["spec"]["nodeName"]
+        if (!podContainers.nil? && !podContainers.empty?)
+          if (!pod["spec"]["nodeName"].nil?)
+            nodeName = pod["spec"]["nodeName"]
+          else
+            nodeName = "" #unscheduled pod. We still want to collect limits & requests for GPU
+          end
+          podContainers.each do |container|
+            metricValue = nil
+            containerName = container["name"]
+            #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+            if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
+              metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
             else
-              nodeName = "" #unscheduled pod. We still want to collect limits & requests for GPU
-            end
-            podContainers.each do |container|
-              metricValue = nil
-              containerName = container["name"]
-              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-              if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
-                metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
-              else
-                #No container level limit for the given metric, so default to node level limit for non-gpu metrics
-                if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
-                  nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
-                  metricValue = @@NodeMetrics[nodeMetricsHashKey]
-                end
-              end
-              if (!metricValue.nil?)
-                metricItem = {}
-                metricItem["CollectionTime"] = metricTime
-                metricItem["Computer"] = nodeName
-                metricItem["Name"] = metricNametoReturn
-                metricItem["Value"] = metricValue
-                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
-
-                metricTags = {}
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
-                #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
-
-                metricItem["Tags"] = metricTags
-
-                metricItems.push(metricItem)
+              #No container level limit for the given metric, so default to node level limit for non-gpu metrics
+              if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
+                nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
+                metricValue = @@NodeMetrics[nodeMetricsHashKey]
               end
             end
+            if (!metricValue.nil?)
+              metricItem = {}
+              metricItem["CollectionTime"] = metricTime
+              metricItem["Computer"] = nodeName
+              metricItem["Name"] = metricNametoReturn
+              metricItem["Value"] = metricValue
+              metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+              metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+
+              metricTags = {}
+              metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
+              metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+              metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
+              #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
+
+              metricItem["Tags"] = metricTags
+
+              metricItems.push(metricItem)
+            end
           end
         end
       rescue => error
@@ -569,92 +569,82 @@ def getContainerResourceRequestsAndLimitsAsInsightsMetrics(metricJSON, metricCat
       return metricItems
     end #getContainerResourceRequestAndLimitsAsInsightsMetrics
 
-    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
-      metricItems = []
+    def parseNodeLimits(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItem = {}
       begin
-        metricInfo = metricJSON
         clusterId = getClusterId
         #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
         #if we are coming up with the time it should be same for all nodes
         #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-        metricInfo["items"].each do |node|
-          if (!node["status"][metricCategory].nil?)
-
-            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
-            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
-
-            metricItem = {}
-            metricItem["DataItems"] = []
-            metricProps = {}
-            metricProps["Timestamp"] = metricTime
-            metricProps["Host"] = node["metadata"]["name"]
-            # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
-            metricProps["Computer"] = node["metadata"]["name"]
-            metricProps["ObjectName"] = "K8SNode"
-            metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
-            metricProps["Collections"] = []
-            metricCollections = {}
-            metricCollections["CounterName"] = metricNametoReturn
-            metricCollections["Value"] = metricValue
-
-            metricProps["Collections"].push(metricCollections)
-            metricItem["DataItems"].push(metricProps)
-            metricItems.push(metricItem)
-            #push node level metrics to a inmem hash so that we can use it looking up at container level.
-            #Currently if container level cpu & memory limits are not defined we default to node level limits
-            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
-            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
-          end
+        if (!node["status"][metricCategory].nil?)
+          # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
+          metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+          metricItem["DataItems"] = []
+          metricProps = {}
+          metricProps["Timestamp"] = metricTime
+          metricProps["Host"] = node["metadata"]["name"]
+          # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+          metricProps["Computer"] = node["metadata"]["name"]
+          metricProps["ObjectName"] = "K8SNode"
+          metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
+          metricProps["Collections"] = []
+          metricCollections = {}
+          metricCollections["CounterName"] = metricNametoReturn
+          metricCollections["Value"] = metricValue
+
+          metricProps["Collections"].push(metricCollections)
+          metricItem["DataItems"].push(metricProps)
+
+          #push node level metrics to a inmem hash so that we can use it looking up at container level.
+          #Currently if container level cpu & memory limits are not defined we default to node level limits
+          @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+          #@Log.info ("Node metric hash: #{@@NodeMetrics}")
         end
       rescue => error
         @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
       end
-      return metricItems
+      return metricItem
     end #parseNodeLimits
 
-    def parseNodeLimitsAsInsightsMetrics(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
-      metricItems = []
+    def parseNodeLimitsAsInsightsMetrics(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItem = {}
       begin
-        metricInfo = metricJSON
-        clusterId = getClusterId
-        clusterName = getClusterName
         #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
         #if we are coming up with the time it should be same for all nodes
         #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-        metricInfo["items"].each do |node|
-          if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
-
-            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" or "amd.com/gpu" or "nvidia.com/gpu"
-            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
-
-            metricItem = {}
-            metricItem["CollectionTime"] = metricTime
-            metricItem["Computer"] = node["metadata"]["name"]
-            metricItem["Name"] = metricNametoReturn
-            metricItem["Value"] = metricValue
-            metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-            metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
-
-            metricTags = {}
-            metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
-            metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
-            metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_VENDOR] = metricNameToCollect
-
-            metricItem["Tags"] = metricTags
-
-            metricItems.push(metricItem)
-            #push node level metrics (except gpu ones) to a inmem hash so that we can use it looking up at container level.
-            #Currently if container level cpu & memory limits are not defined we default to node level limits
-            if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
-              @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
-              #@Log.info ("Node metric hash: #{@@NodeMetrics}")
-            end
+        if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
+          clusterId = getClusterId
+          clusterName = getClusterName
+
+          # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" or "amd.com/gpu" or "nvidia.com/gpu"
+          metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+          metricItem["CollectionTime"] = metricTime
+          metricItem["Computer"] = node["metadata"]["name"]
+          metricItem["Name"] = metricNametoReturn
+          metricItem["Value"] = metricValue
+          metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+          metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+
+          metricTags = {}
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_VENDOR] = metricNameToCollect
+
+          metricItem["Tags"] = metricTags
+
+          #push node level metrics (except gpu ones) to a inmem hash so that we can use it looking up at container level.
+          #Currently if container level cpu & memory limits are not defined we default to node level limits
+          if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
+            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
           end
         end
       rescue => error
         @Log.warn("parseNodeLimitsAsInsightsMetrics failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
       end
-      return metricItems
+      return metricItem
     end
 
     def getMetricNumericValue(metricName, metricVal)
@@ -777,5 +767,34 @@ def getKubeAPIServerUrl
       end
       return apiServerUrl
     end
+
+    def getKubeServicesInventoryRecords(serviceList, batchTime = Time.utc.iso8601)
+      kubeServiceRecords = []
+      begin
+        if (!serviceList.nil? && !serviceList.empty?)
+          servicesCount = serviceList["items"].length
+          @Log.info("KubernetesApiClient::getKubeServicesInventoryRecords : number of services in serviceList  #{servicesCount} @ #{Time.now.utc.iso8601}")
+          servicesSizeInKB = (serviceList["items"].to_s.length) / 1024
+          @Log.info("KubernetesApiClient::getKubeServicesInventoryRecords : size of serviceList in KB #{servicesSizeInKB} @ #{Time.now.utc.iso8601}")
+          serviceList["items"].each do |item|
+            kubeServiceRecord = {}
+            kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            kubeServiceRecord["ServiceName"] = item["metadata"]["name"]
+            kubeServiceRecord["Namespace"] = item["metadata"]["namespace"]
+            kubeServiceRecord["SelectorLabels"] = [item["spec"]["selector"]]
+            # add these before emit to avoid memory foot print
+            # kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
+            # kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
+            kubeServiceRecord["ClusterIP"] = item["spec"]["clusterIP"]
+            kubeServiceRecord["ServiceType"] = item["spec"]["type"]
+            kubeServiceRecords.push(kubeServiceRecord.dup)
+          end
+        end
+      rescue => errorStr
+        @Log.warn "KubernetesApiClient::getKubeServicesInventoryRecords:Failed with an error : #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+      return kubeServiceRecords
+    end
   end
 end
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index bba3e920f..0eead7782 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 module Fluent
-  require_relative "podinventory_to_mdm"      
+  require_relative "podinventory_to_mdm"
 
   class Kube_PodInventory_Input < Input
     Plugin.register_input("kubepodinventory", self)
@@ -19,7 +19,7 @@ def initialize
       require "yajl"
       require "set"
       require "time"
-      
+
       require_relative "kubernetes_container_inventory"
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
@@ -27,11 +27,18 @@ def initialize
       require_relative "omslog"
       require_relative "constants"
 
+      @PODS_EMIT_STREAM = true
+      @CONTAINER_PERF_EMIT_STREAM = true
+      @GPU_PERF_EMIT_STREAM = true
+      @SERVICES_EMIT_STREAM = true
+
       @PODS_CHUNK_SIZE = "1500"
       @podCount = 0
       @controllerSet = Set.new []
       @winContainerCount = 0
       @controllerData = {}
+      # 0 indicates no batch enabled for stream emit
+      @PODS_EMIT_STREAM_BATCH_SIZE = 0
     end
 
     config_param :run_interval, :time, :default => 60
@@ -44,6 +51,36 @@ def configure(conf)
 
     def start
       if @run_interval
+        if !ENV["PODS_EMIT_STREAM"].nil? && !ENV["PODS_EMIT_STREAM"].empty?
+          @PODS_EMIT_STREAM = ENV["PODS_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM  @ #{@PODS_EMIT_STREAM}")
+
+        if !ENV["SERVICES_EMIT_STREAM"].nil? && !ENV["SERVICES_EMIT_STREAM"].empty?
+          @SERVICES_EMIT_STREAM = ENV["SERVICES_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_podinventory::start : SERVICES_EMIT_STREAM  @ #{@SERVICES_EMIT_STREAM}")
+
+        if !ENV["CONTAINER_PERF_EMIT_STREAM"].nil? && !ENV["CONTAINER_PERF_EMIT_STREAM"].empty?
+          @CONTAINER_PERF_EMIT_STREAM = ENV["CONTAINER_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_podinventory::start : CONTAINER_PERF_EMIT_STREAM  @ #{@CONTAINER_PERF_EMIT_STREAM}")
+
+        if !ENV["GPU_PERF_EMIT_STREAM"].nil? && !ENV["GPU_PERF_EMIT_STREAM"].empty?
+          @GPU_PERF_EMIT_STREAM = ENV["GPU_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_podinventory::start : GPU_PERF_EMIT_STREAM  @ #{@GPU_PERF_EMIT_STREAM}")
+
+        if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty?
+          @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"]
+        end
+        $log.info("in_kube_podinventory::start : PODS_CHUNK_SIZE  @ #{@PODS_CHUNK_SIZE}")
+
+        if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty?
+          @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
+        end
+        $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE  @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
+
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -72,6 +109,7 @@ def enumerate(podList = nil)
         @controllerData = {}
         currentTime = Time.now
         batchTime = currentTime.utc.iso8601
+        serviceRecords = []
 
         # Get services first so that we dont need to make a call for very chunk
         $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
@@ -84,6 +122,9 @@ def enumerate(podList = nil)
           serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
           $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
           serviceInfo = nil
+          # service inventory records much smaller size and fixed compared to serviceList
+          serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime)
+          serviceList = nil
         end
 
         # Initializing continuation token to nil
@@ -92,7 +133,7 @@ def enumerate(podList = nil)
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-          parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime)
+          parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
         else
           $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
         end
@@ -101,7 +142,7 @@ def enumerate(podList = nil)
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-            parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime)
+            parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
           else
             $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
           end
@@ -109,7 +150,7 @@ def enumerate(podList = nil)
 
         # Setting these to nil so that we dont hold memory until GC kicks in
         podInventory = nil
-        serviceList = nil
+        serviceRecords = nil
 
         # Adding telemetry to send pod telemetry every 5 minutes
         timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
@@ -137,258 +178,151 @@ def enumerate(podList = nil)
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
-    end    
+    end
 
-    def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime = Time.utc.iso8601)
+    def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime = Time.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
       #batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
+      kubePerfEventStream = MultiEventStream.new
+      insightsMetricsEventStream = MultiEventStream.new
       @@istestvar = ENV["ISTEST"]
 
       begin #begin block start
         # Getting windows nodes from kubeapi
         winNodes = KubernetesApiClient.getWindowsNodesArray
-
-        podInventory["items"].each do |items| #podInventory block start          
-          containerInventoryRecords = []
-          records = []
-          record = {}
-          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-          record["Name"] = items["metadata"]["name"]
-          podNameSpace = items["metadata"]["namespace"]
-
-          # For ARO v3 cluster, skip the pods scheduled on to master or infra nodes
-          if KubernetesApiClient.isAROV3Cluster && !items["spec"].nil? && !items["spec"]["nodeName"].nil? &&
-             (items["spec"]["nodeName"].downcase.start_with?("infra-") ||
-              items["spec"]["nodeName"].downcase.start_with?("master-"))
-            next
-          end
-
-          podUid = KubernetesApiClient.getPodUid(podNameSpace, items["metadata"])
-          if podUid.nil?
-            next
-          end
-          record["PodUid"] = podUid
-          record["PodLabel"] = [items["metadata"]["labels"]]
-          record["Namespace"] = podNameSpace
-          record["PodCreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-          #for unscheduled (non-started) pods startTime does NOT exist
-          if !items["status"]["startTime"].nil?
-            record["PodStartTime"] = items["status"]["startTime"]
-          else
-            record["PodStartTime"] = ""
-          end
-          #podStatus
-          # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
-          podReadyCondition = true
-          if !items["status"]["reason"].nil? && items["status"]["reason"] == "NodeLost" && !items["status"]["conditions"].nil?
-            items["status"]["conditions"].each do |condition|
-              if condition["type"] == "Ready" && condition["status"] == "False"
-                podReadyCondition = false
-                break
-              end
+        podInventory["items"].each do |item| #podInventory block start
+          # pod inventory records
+          podInventoryRecords = getPodInventoryRecords(item, serviceRecords, batchTime)
+          podInventoryRecords.each do |record|
+            if !record.nil?
+              wrapper = {
+                          "DataType" => "KUBE_POD_INVENTORY_BLOB",
+                          "IPName" => "ContainerInsights",
+                          "DataItems" => [record.each { |k, v| record[k] = v }],
+                        }
+              eventStream.add(emitTime, wrapper) if wrapper
+              @inventoryToMdmConvertor.process_pod_inventory_record(wrapper)
             end
           end
-
-          if podReadyCondition == false
-            record["PodStatus"] = "Unknown"
-          # ICM - https://portal.microsofticm.com/imp/v3/incidents/details/187091803/home
-          elsif !items["metadata"]["deletionTimestamp"].nil? && !items["metadata"]["deletionTimestamp"].empty?
-            record["PodStatus"] = Constants::POD_STATUS_TERMINATING
-          else
-            record["PodStatus"] = items["status"]["phase"]
-          end
-          #for unscheduled (non-started) pods podIP does NOT exist
-          if !items["status"]["podIP"].nil?
-            record["PodIp"] = items["status"]["podIP"]
-          else
-            record["PodIp"] = ""
-          end
-          #for unscheduled (non-started) pods nodeName does NOT exist
-          if !items["spec"]["nodeName"].nil?
-            record["Computer"] = items["spec"]["nodeName"]
-          else
-            record["Computer"] = ""
-          end
-
           # Setting this flag to true so that we can send ContainerInventory records for containers
           # on windows nodes and parse environment variables for these containers
           if winNodes.length > 0
-            if (!record["Computer"].empty? && (winNodes.include? record["Computer"]))
+            nodeName = ""
+            if !item["spec"]["nodeName"].nil?
+              nodeName = item["spec"]["nodeName"]
+            end
+            if (!nodeName.empty? && (winNodes.include? nodeName))
               clusterCollectEnvironmentVar = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
               #Generate ContainerInventory records for windows nodes so that we can get image and image tag in property panel
-              containerInventoryRecordsInPodItem = KubernetesContainerInventory.getContainerInventoryRecords(items, batchTime, clusterCollectEnvironmentVar, true)  
-              containerInventoryRecordsInPodItem.each do |containerRecord|
-                containerInventoryRecords.push(containerRecord)          
-              end              
+              containerInventoryRecords = KubernetesContainerInventory.getContainerInventoryRecords(item, batchTime, clusterCollectEnvironmentVar, true)
+              # Send container inventory records for containers on windows nodes
+              @winContainerCount += containerInventoryRecords.length
+              containerInventoryRecords.each do |cirecord|
+                if !cirecord.nil?
+                  ciwrapper = {
+                    "DataType" => "CONTAINER_INVENTORY_BLOB",
+                    "IPName" => "ContainerInsights",
+                    "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }],
+                  }
+                  eventStream.add(emitTime, ciwrapper) if ciwrapper
+                end
+              end
             end
           end
 
-          record["ClusterId"] = KubernetesApiClient.getClusterId
-          record["ClusterName"] = KubernetesApiClient.getClusterName
-          record["ServiceName"] = getServiceNameFromLabels(items["metadata"]["namespace"], items["metadata"]["labels"], serviceList)
-
-          if !items["metadata"]["ownerReferences"].nil?
-            record["ControllerKind"] = items["metadata"]["ownerReferences"][0]["kind"]
-            record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"]
-            @controllerSet.add(record["ControllerKind"] + record["ControllerName"])
-            #Adding controller kind to telemetry ro information about customer workload
-            if (@controllerData[record["ControllerKind"]].nil?)
-              @controllerData[record["ControllerKind"]] = 1
-            else
-              controllerValue = @controllerData[record["ControllerKind"]]
-              @controllerData[record["ControllerKind"]] += 1
+          if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+            if @PODS_EMIT_STREAM
+              $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+                $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+              end
+              router.emit_stream(@tag, eventStream) if eventStream
             end
+            eventStream = MultiEventStream.new
           end
-          podRestartCount = 0
-          record["PodRestartCount"] = 0
 
-          #Invoke the helper method to compute ready/not ready mdm metric
-          @inventoryToMdmConvertor.process_record_for_pods_ready_metric(record["ControllerName"], record["Namespace"], items["status"]["conditions"])
+          #container perf records
+          containerMetricDataItems = []
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "requests", "cpu", "cpuRequestNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "requests", "memory", "memoryRequestBytes", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "cpu", "cpuLimitNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "memory", "memoryLimitBytes", batchTime))
 
-          podContainers = []
-          if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty?
-            podContainers = podContainers + items["status"]["containerStatuses"]
-          end
-          # Adding init containers to the record list as well.
-          if items["status"].key?("initContainerStatuses") && !items["status"]["initContainerStatuses"].empty?
-            podContainers = podContainers + items["status"]["initContainerStatuses"]
+          containerMetricDataItems.each do |record|
+            record["DataType"] = "LINUX_PERF_BLOB"
+            record["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, record) if record
           end
 
-          # if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
-          if !podContainers.empty? #container status block start
-            podContainers.each do |container|
-              containerRestartCount = 0
-              lastFinishedTime = nil
-              # Need this flag to determine if we need to process container data for mdm metrics like oomkilled and container restart
-              #container Id is of the form
-              #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527
-              if !container["containerID"].nil?
-                record["ContainerID"] = container["containerID"].split("//")[1]
-              else
-                # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0
-                record["ContainerID"] = ""
-              end
-              #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
-              if podUid.nil? || container["name"].nil?
-                next
-              else
-                record["ContainerName"] = podUid + "/" + container["name"]
-              end
-              #Pod restart count is a sumtotal of restart counts of individual containers
-              #within the pod. The restart count of a container is maintained by kubernetes
-              #itself in the form of a container label.
-              containerRestartCount = container["restartCount"]
-              record["ContainerRestartCount"] = containerRestartCount
-
-              containerStatus = container["state"]
-              record["ContainerStatusReason"] = ""
-              # state is of the following form , so just picking up the first key name
-              # "state": {
-              #   "waiting": {
-              #     "reason": "CrashLoopBackOff",
-              #      "message": "Back-off 5m0s restarting failed container=metrics-server pod=metrics-server-2011498749-3g453_kube-system(5953be5f-fcae-11e7-a356-000d3ae0e432)"
-              #   }
-              # },
-              # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running
-              if podReadyCondition == false
-                record["ContainerStatus"] = "Unknown"
-              else
-                record["ContainerStatus"] = containerStatus.keys[0]
-              end
-              #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric
-              #Picking up both container and node start time from cAdvisor to be consistent
-              if containerStatus.keys[0] == "running"
-                record["ContainerCreationTimeStamp"] = container["state"]["running"]["startedAt"]
-              else
-                if !containerStatus[containerStatus.keys[0]]["reason"].nil? && !containerStatus[containerStatus.keys[0]]["reason"].empty?
-                  record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"]
-                end
-                # Process the record to see if job was completed 6 hours ago. If so, send metric to mdm
-                if !record["ControllerKind"].nil? && record["ControllerKind"].downcase == Constants::CONTROLLER_KIND_JOB
-                  @inventoryToMdmConvertor.process_record_for_terminated_job_metric(record["ControllerName"], record["Namespace"], containerStatus)
-                end
-              end
-
-              # Record the last state of the container. This may have information on why a container was killed.
-              begin
-                if !container["lastState"].nil? && container["lastState"].keys.length == 1
-                  lastStateName = container["lastState"].keys[0]
-                  lastStateObject = container["lastState"][lastStateName]
-                  if !lastStateObject.is_a?(Hash)
-                    raise "expected a hash object. This could signify a bug or a kubernetes API change"
-                  end
+          if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+            if @CONTAINER_PERF_EMIT_STREAM
+              $log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+            end
+            kubePerfEventStream = MultiEventStream.new
+          end
 
-                  if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt")
-                    newRecord = Hash.new
-                    newRecord["lastState"] = lastStateName  # get the name of the last state (ex: terminated)
-                    lastStateReason = lastStateObject["reason"]
-                    # newRecord["reason"] = lastStateObject["reason"]  # (ex: OOMKilled)
-                    newRecord["reason"] = lastStateReason  # (ex: OOMKilled)
-                    newRecord["startedAt"] = lastStateObject["startedAt"]  # (ex: 2019-07-02T14:58:51Z)
-                    lastFinishedTime = lastStateObject["finishedAt"]
-                    newRecord["finishedAt"] = lastFinishedTime  # (ex: 2019-07-02T14:58:52Z)
-
-                    # only write to the output field if everything previously ran without error
-                    record["ContainerLastStatus"] = newRecord
-
-                    #Populate mdm metric for OOMKilled container count if lastStateReason is OOMKilled
-                    if lastStateReason.downcase == Constants::REASON_OOM_KILLED
-                      @inventoryToMdmConvertor.process_record_for_oom_killed_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
-                    end
-                    lastStateReason = nil
-                  else
-                    record["ContainerLastStatus"] = Hash.new
-                  end
-                else
-                  record["ContainerLastStatus"] = Hash.new
-                end
+          # container GPU records
+          containerGPUInsightsMetricsDataItems = []
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "nvidia.com/gpu", "containerGpuRequests", batchTime))
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime))
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "amd.com/gpu", "containerGpuRequests", batchTime))
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "amd.com/gpu", "containerGpuLimits", batchTime))
+          containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
+            wrapper = {
+              "DataType" => "INSIGHTS_METRICS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+            }
+            insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+          end
 
-                #Populate mdm metric for container restart count if greater than 0
-                if (!containerRestartCount.nil? && (containerRestartCount.is_a? Integer) && containerRestartCount > 0)
-                  @inventoryToMdmConvertor.process_record_for_container_restarts_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
-                end
-              rescue => errorStr
-                $log.warn "Failed in parse_and_emit_record pod inventory while processing ContainerLastStatus: #{errorStr}"
-                $log.debug_backtrace(errorStr.backtrace)
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-                record["ContainerLastStatus"] = Hash.new
+          if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+            if @GPU_PERF_EMIT_STREAM
+              $log.info("in_kube_podinventory::parse_and_emit_records: number of GPU insights metrics records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+                $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
               end
-
-              podRestartCount += containerRestartCount
-              records.push(record.dup)            
-            end
-          else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod
-            records.push(record)
-          end  #container status block end
-          records.each do |record|
-            if !record.nil?
-              record["PodRestartCount"] = podRestartCount
-              wrapper = {
-                          "DataType" => "KUBE_POD_INVENTORY_BLOB",
-                          "IPName" => "ContainerInsights",
-                          "DataItems" => [record.each { |k, v| record[k] = v }],
-                        }
-              eventStream.add(emitTime, wrapper) if wrapper
-              @inventoryToMdmConvertor.process_pod_inventory_record(wrapper)
-            end
-          end
-          # Send container inventory records for containers on windows nodes
-          @winContainerCount += containerInventoryRecords.length
-          containerInventoryRecords.each do |cirecord|
-            if !cirecord.nil?
-              ciwrapper = {
-                "DataType" => "CONTAINER_INVENTORY_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }],
-              }
-              eventStream.add(emitTime, ciwrapper) if ciwrapper
+              router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
             end
+            insightsMetricsEventStream = MultiEventStream.new
           end
         end  #podInventory block end
 
-        router.emit_stream(@tag, eventStream) if eventStream
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+          if eventStream.count > 0
+            $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+          if insightsMetricsEventStream.count > 0
+            $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+        end
+
+        if eventStream.count > 0
+          if @PODS_EMIT_STREAM
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@tag, eventStream) if eventStream
+          end
+          eventStream = nil
+        end
+
+        if kubePerfEventStream.count > 0
+          if @CONTAINER_PERF_EMIT_STREAM
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+          end
+          kubePerfEventStream = nil
+        end
+
+        if insightsMetricsEventStream.count > 0
+          if @GPU_PERF_EMIT_STREAM
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          end
+          insightsMetricsEventStream = nil
+        end
 
         if continuationToken.nil? #no more chunks in this batch to be sent, get all pod inventory records to send
           @log.info "Sending pod inventory mdm records to out_mdm"
@@ -401,101 +335,39 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
           router.emit_stream(@@MDMKubePodInventoryTag, mdm_pod_inventory_es) if mdm_pod_inventory_es
         end
 
-        #:optimize:kubeperf merge
-        begin
-          #if(!podInventory.empty?)
-          containerMetricDataItems = []
-          #hostName = (OMS::Common.get_hostname)
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu", "cpuRequestNanoCores", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory", "memoryRequestBytes", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu", "cpuLimitNanoCores", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory", "memoryLimitBytes", batchTime))
-
-          kubePerfEventStream = MultiEventStream.new
-          insightsMetricsEventStream = MultiEventStream.new
-
-          containerMetricDataItems.each do |record|
-            record["DataType"] = "LINUX_PERF_BLOB"
-            record["IPName"] = "LogManagement"
-            kubePerfEventStream.add(emitTime, record) if record
-          end
-          #end
-          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-
-          begin
-            #start GPU InsightsMetrics items
-
-            containerGPUInsightsMetricsDataItems = []
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "requests", "nvidia.com/gpu", "containerGpuRequests", batchTime))
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime))
-
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "requests", "amd.com/gpu", "containerGpuRequests", batchTime))
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "limits", "amd.com/gpu", "containerGpuLimits", batchTime))
-
-            containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
-              wrapper = {
-                "DataType" => "INSIGHTS_METRICS_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-              }
-              insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
-
-              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-                $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-              end
-            end
-
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            #end GPU InsightsMetrics items
-          rescue => errorStr
-            $log.warn "Failed when processing GPU metrics in_kube_podinventory : #{errorStr}"
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
-        rescue => errorStr
-          $log.warn "Failed in parse_and_emit_record for KubePerf from in_kube_podinventory : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-        end
-        #:optimize:end kubeperf merge
-
-        #:optimize:start kubeservices merge
-        begin
-          if (!serviceList.nil? && !serviceList.empty?)
-            kubeServicesEventStream = MultiEventStream.new
-            serviceList["items"].each do |items|
-              kubeServiceRecord = {}
-              kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-              kubeServiceRecord["ServiceName"] = items["metadata"]["name"]
-              kubeServiceRecord["Namespace"] = items["metadata"]["namespace"]
-              kubeServiceRecord["SelectorLabels"] = [items["spec"]["selector"]]
+        if continuationToken.nil? # sending kube services inventory records
+          kubeServicesEventStream = MultiEventStream.new
+          serviceRecords.each do |kubeServiceRecord|
+            if !kubeServiceRecord.nil?
+              # adding before emit to reduce memory foot print
               kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
               kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
-              kubeServiceRecord["ClusterIP"] = items["spec"]["clusterIP"]
-              kubeServiceRecord["ServiceType"] = items["spec"]["type"]
-              #<TODO> : Add ports and status fields
               kubeServicewrapper = {
                 "DataType" => "KUBE_SERVICES_BLOB",
                 "IPName" => "ContainerInsights",
                 "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }],
               }
               kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
+
+              if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+                if @SERVICES_EMIT_STREAM
+                  $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                  router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
+                end
+                kubeServicesEventStream = MultiEventStream.new
+              end
             end
+          end
+
+          if @SERVICES_EMIT_STREAM && kubeServicesEventStream.count > 0
+            $log.info("in_kube_podinventory::parse_and_emit_records : number of service records emitted #{kubeServicesEventStream.count} @ #{Time.now.utc.iso8601}")
             router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
           end
-        rescue => errorStr
-          $log.warn "Failed in parse_and_emit_record for KubeServices from in_kube_podinventory : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          kubeServicesEventStream = nil
         end
-        #:optimize:end kubeservices merge
 
         #Updating value for AppInsights telemetry
         @podCount += podInventory["items"].length
-
-        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
-          $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-        end
       rescue => errorStr
         $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -535,26 +407,230 @@ def run_periodic
       @mutex.unlock
     end
 
-    def getServiceNameFromLabels(namespace, labels, serviceList)
+    def getPodInventoryRecords(item, serviceRecords, batchTime = Time.utc.iso8601)
+      records = []
+      record = {}
+
+      begin
+        record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        record["Name"] = item["metadata"]["name"]
+        podNameSpace = item["metadata"]["namespace"]
+        nodeName = ""
+        #for unscheduled (non-started) pods nodeName does NOT exist
+        if !item["spec"]["nodeName"].nil?
+          nodeName = item["spec"]["nodeName"]
+        end
+        podUid = KubernetesApiClient.getPodUid(podNameSpace, item["metadata"])
+        if podUid.nil?
+          return records
+        end
+        if KubernetesApiClient.isAROv3MasterOrInfraPod(nodeName)
+          return records
+        end
+
+        record["PodUid"] = podUid
+        record["PodLabel"] = [item["metadata"]["labels"]]
+        record["Namespace"] = podNameSpace
+        record["PodCreationTimeStamp"] = item["metadata"]["creationTimestamp"]
+        #for unscheduled (non-started) pods startTime does NOT exist
+        if !item["status"]["startTime"].nil?
+          record["PodStartTime"] = item["status"]["startTime"]
+        else
+          record["PodStartTime"] = ""
+        end
+        #podStatus
+        # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
+        podReadyCondition = true
+        if !item["status"]["reason"].nil? && item["status"]["reason"] == "NodeLost" && !item["status"]["conditions"].nil?
+          item["status"]["conditions"].each do |condition|
+            if condition["type"] == "Ready" && condition["status"] == "False"
+              podReadyCondition = false
+              break
+            end
+          end
+        end
+        if podReadyCondition == false
+          record["PodStatus"] = "Unknown"
+          # ICM - https://portal.microsofticm.com/imp/v3/incidents/details/187091803/home
+        elsif !item["metadata"]["deletionTimestamp"].nil? && !item["metadata"]["deletionTimestamp"].empty?
+          record["PodStatus"] = Constants::POD_STATUS_TERMINATING
+        else
+          record["PodStatus"] = item["status"]["phase"]
+        end
+        #for unscheduled (non-started) pods podIP does NOT exist
+        if !item["status"]["podIP"].nil?
+          record["PodIp"] = item["status"]["podIP"]
+        else
+          record["PodIp"] = ""
+        end
+
+        record["Computer"] = nodeName
+        record["ClusterId"] = KubernetesApiClient.getClusterId
+        record["ClusterName"] = KubernetesApiClient.getClusterName
+        record["ServiceName"] = getServiceNameFromLabels(item["metadata"]["namespace"], item["metadata"]["labels"], serviceRecords)
+
+        if !item["metadata"]["ownerReferences"].nil?
+          record["ControllerKind"] = item["metadata"]["ownerReferences"][0]["kind"]
+          record["ControllerName"] = item["metadata"]["ownerReferences"][0]["name"]
+          @controllerSet.add(record["ControllerKind"] + record["ControllerName"])
+          #Adding controller kind to telemetry ro information about customer workload
+          if (@controllerData[record["ControllerKind"]].nil?)
+            @controllerData[record["ControllerKind"]] = 1
+          else
+            controllerValue = @controllerData[record["ControllerKind"]]
+            @controllerData[record["ControllerKind"]] += 1
+          end
+        end
+        podRestartCount = 0
+        record["PodRestartCount"] = 0
+
+        #Invoke the helper method to compute ready/not ready mdm metric
+        @inventoryToMdmConvertor.process_record_for_pods_ready_metric(record["ControllerName"], record["Namespace"], item["status"]["conditions"])
+
+        podContainers = []
+        if item["status"].key?("containerStatuses") && !item["status"]["containerStatuses"].empty?
+          podContainers = podContainers + item["status"]["containerStatuses"]
+        end
+        # Adding init containers to the record list as well.
+        if item["status"].key?("initContainerStatuses") && !item["status"]["initContainerStatuses"].empty?
+          podContainers = podContainers + item["status"]["initContainerStatuses"]
+        end
+        # if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
+        if !podContainers.empty? #container status block start
+          podContainers.each do |container|
+            containerRestartCount = 0
+            lastFinishedTime = nil
+            # Need this flag to determine if we need to process container data for mdm metrics like oomkilled and container restart
+            #container Id is of the form
+            #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527
+            if !container["containerID"].nil?
+              record["ContainerID"] = container["containerID"].split("//")[1]
+            else
+              # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0
+              record["ContainerID"] = ""
+            end
+            #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
+            if podUid.nil? || container["name"].nil?
+              next
+            else
+              record["ContainerName"] = podUid + "/" + container["name"]
+            end
+            #Pod restart count is a sumtotal of restart counts of individual containers
+            #within the pod. The restart count of a container is maintained by kubernetes
+            #itself in the form of a container label.
+            containerRestartCount = container["restartCount"]
+            record["ContainerRestartCount"] = containerRestartCount
+
+            containerStatus = container["state"]
+            record["ContainerStatusReason"] = ""
+            # state is of the following form , so just picking up the first key name
+            # "state": {
+            #   "waiting": {
+            #     "reason": "CrashLoopBackOff",
+            #      "message": "Back-off 5m0s restarting failed container=metrics-server pod=metrics-server-2011498749-3g453_kube-system(5953be5f-fcae-11e7-a356-000d3ae0e432)"
+            #   }
+            # },
+            # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running
+            if podReadyCondition == false
+              record["ContainerStatus"] = "Unknown"
+            else
+              record["ContainerStatus"] = containerStatus.keys[0]
+            end
+            #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric
+            #Picking up both container and node start time from cAdvisor to be consistent
+            if containerStatus.keys[0] == "running"
+              record["ContainerCreationTimeStamp"] = container["state"]["running"]["startedAt"]
+            else
+              if !containerStatus[containerStatus.keys[0]]["reason"].nil? && !containerStatus[containerStatus.keys[0]]["reason"].empty?
+                record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"]
+              end
+              # Process the record to see if job was completed 6 hours ago. If so, send metric to mdm
+              if !record["ControllerKind"].nil? && record["ControllerKind"].downcase == Constants::CONTROLLER_KIND_JOB
+                @inventoryToMdmConvertor.process_record_for_terminated_job_metric(record["ControllerName"], record["Namespace"], containerStatus)
+              end
+            end
+
+            # Record the last state of the container. This may have information on why a container was killed.
+            begin
+              if !container["lastState"].nil? && container["lastState"].keys.length == 1
+                lastStateName = container["lastState"].keys[0]
+                lastStateObject = container["lastState"][lastStateName]
+                if !lastStateObject.is_a?(Hash)
+                  raise "expected a hash object. This could signify a bug or a kubernetes API change"
+                end
+
+                if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt")
+                  newRecord = Hash.new
+                  newRecord["lastState"] = lastStateName  # get the name of the last state (ex: terminated)
+                  lastStateReason = lastStateObject["reason"]
+                  # newRecord["reason"] = lastStateObject["reason"]  # (ex: OOMKilled)
+                  newRecord["reason"] = lastStateReason  # (ex: OOMKilled)
+                  newRecord["startedAt"] = lastStateObject["startedAt"]  # (ex: 2019-07-02T14:58:51Z)
+                  lastFinishedTime = lastStateObject["finishedAt"]
+                  newRecord["finishedAt"] = lastFinishedTime  # (ex: 2019-07-02T14:58:52Z)
+
+                  # only write to the output field if everything previously ran without error
+                  record["ContainerLastStatus"] = newRecord
+
+                  #Populate mdm metric for OOMKilled container count if lastStateReason is OOMKilled
+                  if lastStateReason.downcase == Constants::REASON_OOM_KILLED
+                    @inventoryToMdmConvertor.process_record_for_oom_killed_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
+                  end
+                  lastStateReason = nil
+                else
+                  record["ContainerLastStatus"] = Hash.new
+                end
+              else
+                record["ContainerLastStatus"] = Hash.new
+              end
+
+              #Populate mdm metric for container restart count if greater than 0
+              if (!containerRestartCount.nil? && (containerRestartCount.is_a? Integer) && containerRestartCount > 0)
+                @inventoryToMdmConvertor.process_record_for_container_restarts_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
+              end
+            rescue => errorStr
+              $log.warn "Failed in parse_and_emit_record pod inventory while processing ContainerLastStatus: #{errorStr}"
+              $log.debug_backtrace(errorStr.backtrace)
+              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+              record["ContainerLastStatus"] = Hash.new
+            end
+
+            podRestartCount += containerRestartCount
+            records.push(record.dup)
+          end
+        else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod
+          records.push(record)
+        end  #container status block end
+
+        records.each do |record|
+          if !record.nil?
+            record["PodRestartCount"] = podRestartCount
+          end
+        end
+      rescue => error
+        $log.warn("getPodInventoryRecords failed: #{error}")
+      end
+      return records
+    end
+
+    def getServiceNameFromLabels(namespace, labels, serviceRecords)
       serviceName = ""
       begin
         if !labels.nil? && !labels.empty?
-          if (!serviceList.nil? && !serviceList.empty? && serviceList.key?("items") && !serviceList["items"].empty?)
-            serviceList["items"].each do |item|
-              found = 0
-              if !item["spec"].nil? && !item["spec"]["selector"].nil? && item["metadata"]["namespace"] == namespace
-                selectorLabels = item["spec"]["selector"]
-                if !selectorLabels.empty?
-                  selectorLabels.each do |key, value|
-                    if !(labels.select { |k, v| k == key && v == value }.length > 0)
-                      break
-                    end
-                    found = found + 1
+          serviceRecords.each do |kubeServiceRecord|
+            found = 0
+            if kubeServiceRecord["Namespace"] == namespace
+              selectorLabels = kubeServiceRecord["SelectorLabels"]
+              if !selectorLabels.empty?
+                selectorLabels.each do |key, value|
+                  if !(labels.select { |k, v| k == key && v == value }.length > 0)
+                    break
                   end
+                  found = found + 1
                 end
-                if found == selectorLabels.length
-                  return item["metadata"]["name"]
-                end
+              end
+              if found == selectorLabels.length
+                return kubeServiceRecord["ServiceName"]
               end
             end
           end

From bede6efb4a818a451142172a59b133669850ab42 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 16:22:34 -0800
Subject: [PATCH 02/45] optimize kube node inventory

---
 kubernetes/omsagent.yaml                    |  50 ++
 source/plugins/ruby/in_kube_nodes.rb        | 476 ++++++++++++--------
 source/plugins/ruby/in_kube_podinventory.rb |  33 +-
 3 files changed, 365 insertions(+), 194 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 85c383ec2..49a9235de 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -510,6 +510,56 @@ spec:
               memory: 250Mi
           env:
             # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these
+             # pod inventory plugin settings
+            - name: PODS_CHUNK_SIZE
+              value: "500"
+            - name: PODS_EMIT_STREAM_BATCH_SIZE
+              value: "250"
+            - name: PODS_EMIT_STREAM
+              value: "true"
+            - name: MDM_PODS_INVENTORY_EMIT_STREAM
+              value: "true"
+            - name: CONTAINER_PERF_EMIT_STREAM
+              value: "true"
+            - name: SERVICES_EMIT_STREAM
+              value: "true"
+            - name: GPU_PERF_EMIT_STREAM
+              value: "true"
+
+            # node inventory plugin settings
+            - name: NODES_CHUNK_SIZE
+              value: "200"
+            - name: NODES_EMIT_STREAM_BATCH_SIZE
+              value: "100"
+            - name: NODES_EMIT_STREAM
+              value: "true"
+            - name: NODES_PERF_EMIT_STREAM
+              value: "true"
+            - name: GPU_NODES_PERF_EMIT_STREAM
+              value: "true"
+            - name: CONTAINER_NODE_INVENTORY_EMIT_STREAM
+              value: "true"
+            - name: MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
+              value: "true"
+
+            # event inventory plugin settings
+            - name: EVENTS_CHUNK_SIZE
+              value: "30000"
+            - name: EVENTS_EMIT_STREAM
+              value: "true"
+
+            # kube state deployments
+            - name: DEPLOYMENTS_CHUNK_SIZE
+              value: "1000"
+            - name: DEPLOYMENTS_EMIT_STREAM
+              value: "true"
+
+            # kube hpa
+            - name: HPA_CHUNK_SIZE
+              value: "2000"
+            - name: HPA_EMIT_STREAM
+              value: "true"
+
             - name: AKS_RESOURCE_ID
               value: "VALUE_AKS_RESOURCE_ID_VALUE"
             - name: AKS_REGION
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 4d58382f5..8346a1a2b 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -33,6 +33,13 @@ def initialize
       require_relative "oms_common"
       require_relative "omslog"
       @NODES_CHUNK_SIZE = "400"
+      # 0 indicates no batch enabled for stream emit
+      @NODES_EMIT_STREAM_BATCH_SIZE = 0
+      @NODES_EMIT_STREAM = true
+      @NODES_PERF_EMIT_STREAM = true
+      @GPU_NODES_PERF_EMIT_STREAM = true
+      @CONTAINER_NODE_INVENTORY_EMIT_STREAM = true
+      @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM = true
       require_relative "constants"
     end
 
@@ -45,6 +52,46 @@ def configure(conf)
 
     def start
       if @run_interval
+        if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty?
+          @NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"]
+        end
+        $log.info("in_kube_nodes::start : NODES_CHUNK_SIZE  @ #{@NODES_CHUNK_SIZE}")
+
+        if !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].empty?
+          @NODES_EMIT_STREAM_BATCH_SIZE = ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i
+          NodesChunkSize = @NODES_CHUNK_SIZE.to_i
+          if @NODES_EMIT_STREAM_BATCH_SIZE > NodesChunkSize
+            $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE cant be greater than nodes chunksize @ #{@NODES_CHUNK_SIZE}")
+            @NODES_EMIT_STREAM_BATCH_SIZE = NodesChunkSize
+          end
+        end
+        $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE  @ #{@NODES_EMIT_STREAM_BATCH_SIZE}")
+
+        if !ENV["NODES_EMIT_STREAM"].nil? && !ENV["NODES_EMIT_STREAM"].empty?
+          @NODES_EMIT_STREAM = ENV["NODES_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_nodes::start : NODES_EMIT_STREAM  @ #{@NODES_EMIT_STREAM}")
+
+        if !ENV["CONTAINER_NODE_INVENTORY_EMIT_STREAM"].nil? && !ENV["CONTAINER_NODE_INVENTORY_EMIT_STREAM"].empty?
+          @CONTAINER_NODE_INVENTORY_EMIT_STREAM = ENV["CONTAINER_NODE_INVENTORY_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_nodes::start : CONTAINER_NODE_INVENTORY_EMIT_STREAM  @ #{@CONTAINER_NODE_INVENTORY_EMIT_STREAM}")
+
+        if !ENV["MDM_KUBE_NODE_INVENTORY_EMIT_STREAM"].nil? && !ENV["MDM_KUBE_NODE_INVENTORY_EMIT_STREAM"].empty?
+          @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM = ENV["MDM_KUBE_NODE_INVENTORY_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_nodes::start : MDM_KUBE_NODE_INVENTORY_EMIT_STREAM  @ #{@MDM_KUBE_NODE_INVENTORY_EMIT_STREAM}")
+
+        if !ENV["NODES_PERF_EMIT_STREAM"].nil? && !ENV["NODES_PERF_EMIT_STREAM"].empty?
+          @NODES_PERF_EMIT_STREAM = ENV["NODES_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_nodes::start : NODES_PERF_EMIT_STREAM  @ #{@NODES_PERF_EMIT_STREAM}")
+
+        if !ENV["GPU_NODES_PERF_EMIT_STREAM"].nil? && !ENV["GPU_NODES_PERF_EMIT_STREAM"].empty?
+          @GPU_NODES_PERF_EMIT_STREAM = ENV["GPU_NODES_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_nodes::start : GPU_NODES_PERF_EMIT_STREAM  @ #{@GPU_NODES_PERF_EMIT_STREAM}")
+
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -109,210 +156,179 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
         eventStream = MultiEventStream.new
         containerNodeInventoryEventStream = MultiEventStream.new
         insightsMetricsEventStream = MultiEventStream.new
+        kubePerfEventStream = MultiEventStream.new
         @@istestvar = ENV["ISTEST"]
         #get node inventory
-        nodeInventory["items"].each do |items|
-          record = {}
-          # Sending records for ContainerNodeInventory
-          containerNodeInventoryRecord = {}
-          containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-          containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
-
-          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-          record["Computer"] = items["metadata"]["name"]
-          record["ClusterName"] = KubernetesApiClient.getClusterName
-          record["ClusterId"] = KubernetesApiClient.getClusterId
-          record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-          record["Labels"] = [items["metadata"]["labels"]]
-          record["Status"] = ""
-
-          if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
-            if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
-              record["KubernetesProviderID"] = "azurestack"
-            else
-              #Multicluster kusto query is filtering after splitting by ":" to the left, so do the same here
-              #https://msazure.visualstudio.com/One/_git/AzureUX-Monitoring?path=%2Fsrc%2FMonitoringExtension%2FClient%2FInfraInsights%2FData%2FQueryTemplates%2FMultiClusterKustoQueryTemplate.ts&_a=contents&version=GBdev
-              provider = items["spec"]["providerID"].split(":")[0]
-              if !provider.nil? && !provider.empty?
-                record["KubernetesProviderID"] = provider
-              else
-                record["KubernetesProviderID"] = items["spec"]["providerID"]
+        nodeInventory["items"].each do |item|
+          # node inventory
+          nodeInventoryRecord = getNodeInventoryRecord(item, batchTime)
+          wrapper = {
+              "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [nodeInventoryRecord.each { |k, v| nodeInventoryRecord[k] = v }],
+          }
+          eventStream.add(emitTime, wrapper) if wrapper
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+              if @NODES_EMIT_STREAM
+                $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                router.emit_stream(@tag, eventStream) if eventStream
               end
-            end
-          else
-            record["KubernetesProviderID"] = "onprem"
-          end
-
-          # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-          # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
-          # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-          # implying that the node is ready for hosting pods, however its out of disk.
-
-          if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
-            allNodeConditions = ""
-            items["status"]["conditions"].each do |condition|
-              if condition["status"] == "True"
-                if !allNodeConditions.empty?
-                  allNodeConditions = allNodeConditions + "," + condition["type"]
-                else
-                  allNodeConditions = condition["type"]
-                end
+              if @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
+                $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
               end
-              #collect last transition to/from ready (no matter ready is true/false)
-              if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
-                record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
+              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+                $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
               end
-            end
-            if !allNodeConditions.empty?
-              record["Status"] = allNodeConditions
-            end
+              eventStream = MultiEventStream.new
           end
 
-          nodeInfo = items["status"]["nodeInfo"]
-          record["KubeletVersion"] = nodeInfo["kubeletVersion"]
-          record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
-          containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
-          containerRuntimeVersion = nodeInfo["containerRuntimeVersion"]
-          if containerRuntimeVersion.downcase.start_with?("docker://")
-            containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion.split("//")[1]
-          else
-            # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
-            containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion
-          end
-          # ContainerNodeInventory data for docker version and operating system.
-          containerNodeInventoryWrapper = {
-            "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
-            "IPName" => "ContainerInsights",
-            "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
-          }
-          containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+            # container node inventory
+            containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime)
+            containerNodeInventoryWrapper = {
+              "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+            }
+            containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+
+            if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+              if @CONTAINER_NODE_INVENTORY_EMIT_STREAM
+                $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+              end
+              containerNodeInventoryEventStream = MultiEventStream.new
+            end
 
-          wrapper = {
-            "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-            "IPName" => "ContainerInsights",
-            "DataItems" => [record.each { |k, v| record[k] = v }],
-          }
-          eventStream.add(emitTime, wrapper) if wrapper
-          # Adding telemetry to send node telemetry every 10 minutes
-          timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-          timeDifferenceInMinutes = timeDifference / 60
-          if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
-            properties = {}
-            properties["Computer"] = record["Computer"]
-            properties["KubeletVersion"] = record["KubeletVersion"]
-            properties["OperatingSystem"] = nodeInfo["operatingSystem"]
-            # DockerVersion field holds docker version if runtime is docker/moby else <runtime>://<version>
-            if containerRuntimeVersion.downcase.start_with?("docker://")
-              properties["DockerVersion"] = containerRuntimeVersion.split("//")[1]
-            else
-              properties["DockerVersion"] = containerRuntimeVersion
+            # node metrics records
+            nodeMetricRecords = []
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime)
+            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+              nodeMetricRecords.push(nodeMetricRecord)
+            end
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "allocatable", "memory", "memoryAllocatableBytes", batchTime)
+            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+              nodeMetricRecords.push(nodeMetricRecord)
+            end
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "capacity", "cpu", "cpuCapacityNanoCores", batchTime)
+            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+              nodeMetricRecords.push(nodeMetricRecord)
+            end
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
+            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+              nodeMetricRecords.push(nodeMetricRecord)
+            end
+            nodeMetricRecords.each do |metricRecord|
+              metricRecord["DataType"] = "LINUX_PERF_BLOB"
+              metricRecord["IPName"] = "LogManagement"
+              kubePerfEventStream.add(emitTime, metricRecord) if metricRecord
+            end
+            if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+              if @NODES_PERF_EMIT_STREAM
+                $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+              end
+              kubePerfEventStream = MultiEventStream.new
             end
-            properties["KubernetesProviderID"] = record["KubernetesProviderID"]
-            properties["KernelVersion"] = nodeInfo["kernelVersion"]
-            properties["OSImage"] = nodeInfo["osImage"]
 
-            capacityInfo = items["status"]["capacity"]
-            ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+           # node GPU metrics record
+           nodeGPUInsightsMetricsRecords = []
+           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime)
+           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+           end
+           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime)
+           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+           end
+           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime)
+           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+           end
+           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime)
+           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+           end
+           nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord|
+              wrapper = {
+                  "DataType" => "INSIGHTS_METRICS_BLOB",
+                  "IPName" => "ContainerInsights",
+                  "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+                }
+                insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+            end
+            if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+                if @GPU_NODES_PERF_EMIT_STREAM
+                  $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                  router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+                end
+              insightsMetricsEventStream = MultiEventStream.new
+            end
+            # Adding telemetry to send node telemetry every 10 minutes
+            timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+            timeDifferenceInMinutes = timeDifference / 60
+            if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+              properties = getNodeTelemetryProps(item)
+              properties["KubernetesProviderID"] = nodeInventoryRecord["KubernetesProviderID"]
+              capacityInfo = item["status"]["capacity"]
+              ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+              begin
+                if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
+                  properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
+                end
 
-            begin
-              if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
-                properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
+                if (!capacityInfo["amd.com/gpu"].nil?) && (!capacityInfo["amd.com/gpu"].empty?)
+                  properties["amdgpus"] = capacityInfo["amd.com/gpu"]
+                end
+              rescue => errorStr
+                $log.warn "Failed in getting GPU telemetry in_kube_nodes : #{errorStr}"
+                $log.debug_backtrace(errorStr.backtrace)
+                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
               end
 
-              if (!capacityInfo["amd.com/gpu"].nil?) && (!capacityInfo["amd.com/gpu"].empty?)
-                properties["amdgpus"] = capacityInfo["amd.com/gpu"]
+              # Telemetry for data collection config for replicaset
+              if (File.file?(@@configMapMountPath))
+                properties["collectAllKubeEvents"] = @@collectAllKubeEvents
               end
-            rescue => errorStr
-              $log.warn "Failed in getting GPU telemetry in_kube_nodes : #{errorStr}"
-              $log.debug_backtrace(errorStr.backtrace)
-              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-            end
-
-            # Telemetry for data collection config for replicaset
-            if (File.file?(@@configMapMountPath))
-              properties["collectAllKubeEvents"] = @@collectAllKubeEvents
-            end
 
-            #telemetry about prometheus metric collections settings for replicaset
-            if (File.file?(@@promConfigMountPath))
-              properties["rsPromInt"] = @@rsPromInterval
-              properties["rsPromFPC"] = @@rsPromFieldPassCount
-              properties["rsPromFDC"] = @@rsPromFieldDropCount
-              properties["rsPromServ"] = @@rsPromK8sServiceCount
-              properties["rsPromUrl"] = @@rsPromUrlCount
-              properties["rsPromMonPods"] = @@rsPromMonitorPods
-              properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
+              #telemetry about prometheus metric collections settings for replicaset
+              if (File.file?(@@promConfigMountPath))
+                properties["rsPromInt"] = @@rsPromInterval
+                properties["rsPromFPC"] = @@rsPromFieldPassCount
+                properties["rsPromFDC"] = @@rsPromFieldDropCount
+                properties["rsPromServ"] = @@rsPromK8sServiceCount
+                properties["rsPromUrl"] = @@rsPromUrlCount
+                properties["rsPromMonPods"] = @@rsPromMonitorPods
+                properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
+              end
+              ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+              telemetrySent = true
             end
-            ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
-            telemetrySent = true
-          end
         end
-        router.emit_stream(@tag, eventStream) if eventStream
-        router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-        router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
         if telemetrySent == true
           @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
-
-        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
-          $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+        if eventStream.count > 0
+          if @NODES_EMIT_STREAM
+            $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@tag, eventStream) if eventStream
+          end
+          eventStream = nil
         end
-        #:optimize:kubeperf merge
-        begin
-          #if(!nodeInventory.empty?)
-          nodeMetricDataItems = []
-          #allocatable metrics @ node level
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
-          #capacity metrics @ node level
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
-
-          kubePerfEventStream = MultiEventStream.new
-
-          nodeMetricDataItems.each do |record|
-            record["DataType"] = "LINUX_PERF_BLOB"
-            record["IPName"] = "LogManagement"
-            kubePerfEventStream.add(emitTime, record) if record
+        if kubePerfEventStream.count > 0
+          if @NODES_PERF_EMIT_STREAM
+            $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
           end
-          #end
-          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-
-          #start GPU InsightsMetrics items
-          begin
-            nodeGPUInsightsMetricsDataItems = []
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime))
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime))
-
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime))
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime))
-
-            nodeGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
-              wrapper = {
-                "DataType" => "INSIGHTS_METRICS_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-              }
-              insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
-            end
-
+          kubePerfEventStream = nil
+        end
+        if insightsMetricsEventStream.count > 0
+          if @GPU_NODES_PERF_EMIT_STREAM
+            $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
             router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-              $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-          rescue => errorStr
-            $log.warn "Failed when processing GPU metrics in_kube_nodes : #{errorStr}"
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end
-          #end GPU InsightsMetrics items
-        rescue => errorStr
-          $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          insightsMetricsEventStream = nil
         end
-        #:optimize:end kubeperf merge
-
       rescue => errorStr
         $log.warn "Failed to retrieve node inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -352,5 +368,107 @@ def run_periodic
       end
       @mutex.unlock
     end
+
+    def getNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
+      record = {}
+      begin
+        record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        record["Computer"] = item["metadata"]["name"]
+        record["ClusterName"] = KubernetesApiClient.getClusterName
+        record["ClusterId"] = KubernetesApiClient.getClusterId
+        record["CreationTimeStamp"] = item["metadata"]["creationTimestamp"]
+        record["Labels"] = [item["metadata"]["labels"]]
+        record["Status"] = ""
+
+        if !item["spec"]["providerID"].nil? && !item["spec"]["providerID"].empty?
+          if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
+            record["KubernetesProviderID"] = "azurestack"
+          else
+            #Multicluster kusto query is filtering after splitting by ":" to the left, so do the same here
+            #https://msazure.visualstudio.com/One/_git/AzureUX-Monitoring?path=%2Fsrc%2FMonitoringExtension%2FClient%2FInfraInsights%2FData%2FQueryTemplates%2FMultiClusterKustoQueryTemplate.ts&_a=contents&version=GBdev
+            provider = item["spec"]["providerID"].split(":")[0]
+            if !provider.nil? && !provider.empty?
+              record["KubernetesProviderID"] = provider
+            else
+              record["KubernetesProviderID"] = item["spec"]["providerID"]
+            end
+          end
+        else
+          record["KubernetesProviderID"] = "onprem"
+        end
+
+        # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+        # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+        # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+        # implying that the node is ready for hosting pods, however its out of disk.
+        if item["status"].key?("conditions") && !item["status"]["conditions"].empty?
+          allNodeConditions = ""
+          item["status"]["conditions"].each do |condition|
+            if condition["status"] == "True"
+              if !allNodeConditions.empty?
+                allNodeConditions = allNodeConditions + "," + condition["type"]
+              else
+                allNodeConditions = condition["type"]
+              end
+            end
+            #collect last transition to/from ready (no matter ready is true/false)
+            if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+              record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
+            end
+          end
+          if !allNodeConditions.empty?
+            record["Status"] = allNodeConditions
+          end
+        end
+        nodeInfo = item["status"]["nodeInfo"]
+        record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+        record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+      rescue => errorStr
+        $log.warn "in_kube_nodes::getNodeInventoryRecord:Failed: #{errorStr}"
+      end
+      return record
+    end
+
+    def getContainerNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
+      containerNodeInventoryRecord = {}
+      begin
+        containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        containerNodeInventoryRecord["Computer"] = item["metadata"]["name"]
+        nodeInfo = item["status"]["nodeInfo"]
+        containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+        containerRuntimeVersion = nodeInfo["containerRuntimeVersion"]
+        if containerRuntimeVersion.downcase.start_with?("docker://")
+          containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion.split("//")[1]
+        else
+          # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
+          containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion
+        end
+      rescue => errorStr
+        $log.warn "in_kube_nodes::getContainerNodeInventoryRecord:Failed: #{errorStr}"
+      end
+      return containerNodeInventoryRecord
+    end
+
+    def getNodeTelemetryProps(item)
+      properties = {}
+      begin
+        properties["Computer"] = item["metadata"]["name"]
+        nodeInfo = item["status"]["nodeInfo"]
+        properties["KubeletVersion"] = nodeInfo["kubeletVersion"]
+        properties["OperatingSystem"] = nodeInfo["osImage"]
+        properties["KernelVersion"] = nodeInfo["kernelVersion"]
+        properties["OSImage"] = nodeInfo["osImage"]
+        containerRuntimeVersion = nodeInfo["containerRuntimeVersion"]
+        if containerRuntimeVersion.downcase.start_with?("docker://")
+          properties["DockerVersion"] = containerRuntimeVersion.split("//")[1]
+        else
+          # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
+          properties["DockerVersion"] = containerRuntimeVersion
+        end
+      rescue => errorStr
+        $log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}"
+      end
+     return properties
+    end
   end # Kube_Node_Input
 end # module
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 0eead7782..9a20be62d 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -78,6 +78,11 @@ def start
 
         if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty?
           @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
+          PodsChunkSize = @PODS_CHUNK_SIZE.to_i
+          if @PODS_EMIT_STREAM_BATCH_SIZE > PodsChunkSize
+            $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE shouldnt be greater than @ #{@PODS_CHUNK_SIZE} ")
+            @PODS_EMIT_STREAM_BATCH_SIZE = PodsChunkSize
+          end
         end
         $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE  @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
 
@@ -291,20 +296,14 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
           end
         end  #podInventory block end
 
-        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
-          if eventStream.count > 0
-            $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-          end
-          if insightsMetricsEventStream.count > 0
-            $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-          end
-        end
-
         if eventStream.count > 0
           if @PODS_EMIT_STREAM
             $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
             router.emit_stream(@tag, eventStream) if eventStream
           end
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+            $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
           eventStream = nil
         end
 
@@ -321,10 +320,13 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
             $log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
             router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
           end
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+            $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
           insightsMetricsEventStream = nil
         end
 
-        if continuationToken.nil? #no more chunks in this batch to be sent, get all pod inventory records to send
+        if continuationToken.nil? #no more chunks in this batch to be sent, get all mdm pod inventory records to send
           @log.info "Sending pod inventory mdm records to out_mdm"
           pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime)
           @log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}"
@@ -348,7 +350,6 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
                 "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }],
               }
               kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
-
               if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
                 if @SERVICES_EMIT_STREAM
                   $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
@@ -415,15 +416,17 @@ def getPodInventoryRecords(item, serviceRecords, batchTime = Time.utc.iso8601)
         record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
         record["Name"] = item["metadata"]["name"]
         podNameSpace = item["metadata"]["namespace"]
+        podUid = KubernetesApiClient.getPodUid(podNameSpace, item["metadata"])
+        if podUid.nil?
+          return records
+        end
+
         nodeName = ""
         #for unscheduled (non-started) pods nodeName does NOT exist
         if !item["spec"]["nodeName"].nil?
           nodeName = item["spec"]["nodeName"]
         end
-        podUid = KubernetesApiClient.getPodUid(podNameSpace, item["metadata"])
-        if podUid.nil?
-          return records
-        end
+        # For ARO v3 cluster, skip the pods scheduled on to master or infra nodes
         if KubernetesApiClient.isAROv3MasterOrInfraPod(nodeName)
           return records
         end

From 9f7759e37334acb6ef43c370107db4ec3d1c2383 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 16:36:21 -0800
Subject: [PATCH 03/45] add flags for events, deployments and hpa

---
 source/plugins/ruby/in_kube_events.rb         |  18 +-
 .../plugins/ruby/in_kubestate_deployments.rb  | 424 +++++++++---------
 source/plugins/ruby/in_kubestate_hpa.rb       | 423 ++++++++---------
 3 files changed, 448 insertions(+), 417 deletions(-)

diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index 6f59a3fc1..561909246 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -19,6 +19,7 @@ def initialize
 
       # 30000 events account to approximately 5MB
       @EVENTS_CHUNK_SIZE = 30000
+      @EVENTS_EMIT_STREAM = true
 
       # Initializing events count for telemetry
       @eventsCount = 0
@@ -36,6 +37,15 @@ def configure(conf)
 
     def start
       if @run_interval
+        if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty?
+          @EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"]
+        end
+        $log.info("in_kube_events::start : EVENTS_CHUNK_SIZE  @ #{@EVENTS_CHUNK_SIZE}")
+
+        if !ENV["EVENTS_EMIT_STREAM"].nil? && !ENV["EVENTS_EMIT_STREAM"].empty?
+          @EVENTS_EMIT_STREAM = ENV["EVENTS_EMIT_STREAM"].to_s.downcase == "true" ? true : false
+        end
+        $log.info("in_kube_events::start : EVENTS_EMIT_STREAM  @ #{@EVENTS_EMIT_STREAM}")
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -82,6 +92,8 @@ def enumerate
         end
         $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
         if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+          eventsCount = eventList["items"].length
+          $log.info "in_kube_events::enumerate:Received number of events is eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
           newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
         else
           $log.warn "in_kube_events::enumerate:Received empty eventList"
@@ -91,6 +103,8 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+            eventsCount = eventList["items"].length
+            $log.info "in_kube_events::enumerate:Received number of events is eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
             newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
           else
             $log.warn "in_kube_events::enumerate:Received empty eventList"
@@ -156,7 +170,9 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
           eventStream.add(emitTime, wrapper) if wrapper
           @eventsCount += 1
         end
-        router.emit_stream(@tag, eventStream) if eventStream
+        if @EVENTS_EMIT_STREAM
+          router.emit_stream(@tag, eventStream) if eventStream
+        end
       rescue => errorStr
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index bcf397150..e1679626d 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -2,230 +2,238 @@
 # frozen_string_literal: true
 
 module Fluent
-    class Kube_Kubestate_Deployments_Input < Input
-      Plugin.register_input("kubestatedeployments", self)
-      @@istestvar = ENV["ISTEST"]
-      # telemetry - To keep telemetry cost reasonable, we keep track of the max deployments over a period of 15m
-      @@deploymentsCount = 0
-      
-      
-  
-      def initialize
-        super
-        require "yajl/json_gem"
-        require "yajl"
-        require "date"
-        require "time"
-  
-        require_relative "KubernetesApiClient"
-        require_relative "oms_common"
-        require_relative "omslog"
-        require_relative "ApplicationInsightsUtility"
-        require_relative "constants"
-  
-        # roughly each deployment is 8k
-        # 1000 deployments account to approximately 8MB
-        @DEPLOYMENTS_CHUNK_SIZE = 1000
-        @DEPLOYMENTS_API_GROUP = "apps"
-        @@telemetryLastSentTime = DateTime.now.to_time.to_i
-  
-        
-        @deploymentsRunningTotal = 0
-  
-        @NodeName = OMS::Common.get_hostname
-        @ClusterId = KubernetesApiClient.getClusterId
-        @ClusterName = KubernetesApiClient.getClusterName
-      end
-  
-      config_param :run_interval, :time, :default => 60
-      config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
-  
-      def configure(conf)
-        super
-      end
-  
-      def start
-        if @run_interval
-          @finished = false
-          @condition = ConditionVariable.new
-          @mutex = Mutex.new
-          @thread = Thread.new(&method(:run_periodic))
+  class Kube_Kubestate_Deployments_Input < Input
+    Plugin.register_input("kubestatedeployments", self)
+    @@istestvar = ENV["ISTEST"]
+    # telemetry - To keep telemetry cost reasonable, we keep track of the max deployments over a period of 15m
+    @@deploymentsCount = 0
+
+    def initialize
+      super
+      require "yajl/json_gem"
+      require "yajl"
+      require "date"
+      require "time"
+
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "constants"
+
+      # roughly each deployment is 8k
+      # 1000 deployments account to approximately 8MB
+      @DEPLOYMENTS_CHUNK_SIZE = 1000
+      @DEPLOYMENTS_EMIT_STREAM = true
+      @DEPLOYMENTS_API_GROUP = "apps"
+      @@telemetryLastSentTime = DateTime.now.to_time.to_i
+
+      @deploymentsRunningTotal = 0
+
+      @NodeName = OMS::Common.get_hostname
+      @ClusterId = KubernetesApiClient.getClusterId
+      @ClusterName = KubernetesApiClient.getClusterName
+    end
+
+    config_param :run_interval, :time, :default => 60
+    config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty?
+          @DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"]
         end
-      end
-  
-      def shutdown
-        if @run_interval
-          @mutex.synchronize {
-            @finished = true
-            @condition.signal
-          }
-          @thread.join
+        $log.info("in_kubestate_deployments::start : DEPLOYMENTS_CHUNK_SIZE  @ #{@DEPLOYMENTS_CHUNK_SIZE}")
+
+        if !ENV["DEPLOYMENTS_EMIT_STREAM"].nil? && !ENV["DEPLOYMENTS_EMIT_STREAM"].empty?
+          @DEPLOYMENTS_EMIT_STREAM = ENV["DEPLOYMENTS_EMIT_STREAM"].to_s.downcase == "true" ? true : false
         end
+        $log.info("in_kubestate_deployments::start : DEPLOYMENTS_EMIT_STREAM  @ #{@DEPLOYMENTS_EMIT_STREAM}")
+
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
       end
-  
-      def enumerate
-        begin
-          deploymentList = nil
-          currentTime = Time.now
-          batchTime = currentTime.utc.iso8601
-          
-          #set the running total for this batch to 0
-          @deploymentsRunningTotal = 0
-  
-          # Initializing continuation token to nil
-          continuationToken = nil
-          $log.info("in_kubestate_deployments::enumerate : Getting deployments from Kube API @ #{Time.now.utc.iso8601}")
-          continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}", api_group: @DEPLOYMENTS_API_GROUP)
-          $log.info("in_kubestate_deployments::enumerate : Done getting deployments from Kube API @ #{Time.now.utc.iso8601}")
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def enumerate
+      begin
+        deploymentList = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+
+        #set the running total for this batch to 0
+        @deploymentsRunningTotal = 0
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kubestate_deployments::enumerate : Getting deployments from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}", api_group: @DEPLOYMENTS_API_GROUP)
+        $log.info("in_kubestate_deployments::enumerate : Done getting deployments from Kube API @ #{Time.now.utc.iso8601}")
+        if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
+          parse_and_emit_records(deploymentList, batchTime)
+        else
+          $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
+        end
+
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @DEPLOYMENTS_API_GROUP)
           if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
             parse_and_emit_records(deploymentList, batchTime)
           else
             $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
           end
-  
-          #If we receive a continuation token, make calls, process and flush data until we have processed all data
-          while (!continuationToken.nil? && !continuationToken.empty?)
-            continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @DEPLOYMENTS_API_GROUP)
-            if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
-              parse_and_emit_records(deploymentList, batchTime)
-            else
-              $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
-            end
+        end
+
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        deploymentList = nil
+
+        $log.info("successfully emitted a total of #{@deploymentsRunningTotal} kube_state_deployment metrics")
+        # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
+        if (@deploymentsRunningTotal > @@deploymentsCount)
+          @@deploymentsCount = @deploymentsRunningTotal
+        end
+        if (((DateTime.now.to_time.to_i - @@telemetryLastSentTime).abs) / 60) >= Constants::KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES
+          #send telemetry
+          $log.info "sending deployemt telemetry..."
+          ApplicationInsightsUtility.sendMetricTelemetry("MaxDeploymentCount", @@deploymentsCount, {})
+          #reset last sent value & time
+          @@deploymentsCount = 0
+          @@telemetryLastSentTime = DateTime.now.to_time.to_i
+        end
+      rescue => errorStr
+        $log.warn "in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}")
+      end
+    end # end enumerate
+
+    def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
+      metricItems = []
+      insightsMetricsEventStream = MultiEventStream.new
+      begin
+        metricInfo = deployments
+        metricInfo["items"].each do |deployment|
+          deploymentName = deployment["metadata"]["name"]
+          deploymentNameSpace = deployment["metadata"]["namespace"]
+          deploymentCreatedTime = ""
+          if !deployment["metadata"]["creationTimestamp"].nil?
+            deploymentCreatedTime = deployment["metadata"]["creationTimestamp"]
+          end
+          deploymentStrategy = "RollingUpdate" #default when not specified as per spec
+          if !deployment["spec"]["strategy"].nil? && !deployment["spec"]["strategy"]["type"].nil?
+            deploymentStrategy = deployment["spec"]["strategy"]["type"]
           end
-  
-          # Setting this to nil so that we dont hold memory until GC kicks in
-          deploymentList = nil
-  
-          $log.info("successfully emitted a total of #{@deploymentsRunningTotal} kube_state_deployment metrics")
-          # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
-          if (@deploymentsRunningTotal > @@deploymentsCount)
-            @@deploymentsCount = @deploymentsRunningTotal
+          deploymentSpecReplicas = 1 #default is 1 as per k8s spec
+          if !deployment["spec"]["replicas"].nil?
+            deploymentSpecReplicas = deployment["spec"]["replicas"]
           end
-          if (((DateTime.now.to_time.to_i - @@telemetryLastSentTime).abs)/60 ) >= Constants::KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES
-            #send telemetry
-            $log.info "sending deployemt telemetry..."
-            ApplicationInsightsUtility.sendMetricTelemetry("MaxDeploymentCount", @@deploymentsCount, {})
-            #reset last sent value & time
-            @@deploymentsCount = 0
-            @@telemetryLastSentTime = DateTime.now.to_time.to_i
+          deploymentStatusReadyReplicas = 0
+          if !deployment["status"]["readyReplicas"].nil?
+            deploymentStatusReadyReplicas = deployment["status"]["readyReplicas"]
           end
-        rescue => errorStr
-          $log.warn "in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}"
-          ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}")
+          deploymentStatusUpToDateReplicas = 0
+          if !deployment["status"]["updatedReplicas"].nil?
+            deploymentStatusUpToDateReplicas = deployment["status"]["updatedReplicas"]
+          end
+          deploymentStatusAvailableReplicas = 0
+          if !deployment["status"]["availableReplicas"].nil?
+            deploymentStatusAvailableReplicas = deployment["status"]["availableReplicas"]
+          end
+
+          metricItem = {}
+          metricItem["CollectionTime"] = batchTime
+          metricItem["Computer"] = @NodeName
+          metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE
+          metricItem["Value"] = deploymentStatusReadyReplicas
+          metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+          metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
+
+          metricTags = {}
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME] = deploymentName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = deploymentNameSpace
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY] = deploymentStrategy
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = deploymentCreatedTime
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS] = deploymentSpecReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED] = deploymentStatusUpToDateReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE] = deploymentStatusAvailableReplicas
+
+          metricItem["Tags"] = metricTags
+
+          metricItems.push(metricItem)
         end
-      end # end enumerate
-  
-      def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
-        metricItems = []
-        insightsMetricsEventStream = MultiEventStream.new
-        begin
-            metricInfo = deployments
-            metricInfo["items"].each do |deployment|
-                deploymentName = deployment["metadata"]["name"]
-                deploymentNameSpace = deployment["metadata"]["namespace"]
-                deploymentCreatedTime = ""
-                if !deployment["metadata"]["creationTimestamp"].nil?
-                    deploymentCreatedTime = deployment["metadata"]["creationTimestamp"]
-                end
-                deploymentStrategy = "RollingUpdate" #default when not specified as per spec
-                if !deployment["spec"]["strategy"].nil? && !deployment["spec"]["strategy"]["type"].nil?
-                    deploymentStrategy = deployment["spec"]["strategy"]["type"]
-                end
-                deploymentSpecReplicas = 1 #default is 1 as per k8s spec
-                if !deployment["spec"]["replicas"].nil?
-                    deploymentSpecReplicas = deployment["spec"]["replicas"]
-                end
-                deploymentStatusReadyReplicas = 0
-                if !deployment["status"]["readyReplicas"].nil?
-                    deploymentStatusReadyReplicas = deployment["status"]["readyReplicas"]
-                end
-                deploymentStatusUpToDateReplicas = 0
-                if !deployment["status"]["updatedReplicas"].nil?
-                    deploymentStatusUpToDateReplicas = deployment["status"]["updatedReplicas"]
-                end
-                deploymentStatusAvailableReplicas = 0
-                if !deployment["status"]["availableReplicas"].nil?
-                    deploymentStatusAvailableReplicas = deployment["status"]["availableReplicas"]
-                end
-                
-                metricItem = {}
-                metricItem["CollectionTime"] = batchTime
-                metricItem["Computer"] = @NodeName
-                metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE
-                metricItem["Value"] = deploymentStatusReadyReplicas
-                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
-
-                metricTags = {}
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME] = deploymentName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = deploymentNameSpace
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY ] = deploymentStrategy
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = deploymentCreatedTime
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS] = deploymentSpecReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED] = deploymentStatusUpToDateReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE] = deploymentStatusAvailableReplicas
-                
-
-                metricItem["Tags"] = metricTags
-
-                metricItems.push(metricItem)
-            end
-
-            time = Time.now.to_f
-            metricItems.each do |insightsMetricsRecord|
-                wrapper = {
-                  "DataType" => "INSIGHTS_METRICS_BLOB",
-                  "IPName" => "ContainerInsights",
-                  "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-                }
-                insightsMetricsEventStream.add(time, wrapper) if wrapper
-            end
-    
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
-            @deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length()
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-                $log.info("kubestatedeploymentsInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-        rescue => error
-            $log.warn("in_kubestate_deployments::parse_and_emit_records failed: #{error} ")
-            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::parse_and_emit_records failed: #{error}")
+
+        time = Time.now.to_f
+        metricItems.each do |insightsMetricsRecord|
+          wrapper = {
+            "DataType" => "INSIGHTS_METRICS_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+          }
+          insightsMetricsEventStream.add(time, wrapper) if wrapper
+        end
+
+        if @DEPLOYMENTS_EMIT_STREAM
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
+        end
+        @deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length()
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+          $log.info("kubestatedeploymentsInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
-        
+      rescue => error
+        $log.warn("in_kubestate_deployments::parse_and_emit_records failed: #{error} ")
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::parse_and_emit_records failed: #{error}")
       end
-  
-      def run_periodic
-        @mutex.lock
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
+      until done
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
-        @nextTimeToRun = Time.now
-        @waitTimeout = @run_interval
-        until done
-          @nextTimeToRun = @nextTimeToRun + @run_interval
-          @now = Time.now
-          if @nextTimeToRun <= @now
-            @waitTimeout = 1
-            @nextTimeToRun = @now
-          else
-            @waitTimeout = @nextTimeToRun - @now
-          end
-          @condition.wait(@mutex, @waitTimeout)
-          done = @finished
-          @mutex.unlock
-          if !done
-            begin
-              $log.info("in_kubestate_deployments::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
-              enumerate
-              $log.info("in_kubestate_deployments::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
-            rescue => errorStr
-              $log.warn "in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}"
-              ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}")
-            end
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kubestate_deployments::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
+            enumerate
+            $log.info("in_kubestate_deployments::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
+          rescue => errorStr
+            $log.warn "in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}")
           end
-          @mutex.lock
         end
-        @mutex.unlock
+        @mutex.lock
       end
+      @mutex.unlock
     end
-end
\ No newline at end of file
+  end
+end
diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb
index 3ce63a75a..ac7d1e853 100644
--- a/source/plugins/ruby/in_kubestate_hpa.rb
+++ b/source/plugins/ruby/in_kubestate_hpa.rb
@@ -2,231 +2,238 @@
 # frozen_string_literal: true
 
 module Fluent
-    class Kube_Kubestate_HPA_Input < Input
-      Plugin.register_input("kubestatehpa", self)
-      @@istestvar = ENV["ISTEST"]
-      
-  
-      def initialize
-        super
-        require "yajl/json_gem"
-        require "yajl"
-        require "time"
-  
-        require_relative "KubernetesApiClient"
-        require_relative "oms_common"
-        require_relative "omslog"
-        require_relative "ApplicationInsightsUtility"
-        require_relative "constants"
-  
-        # roughly each HPA is 3k
-        # 2000 HPAs account to approximately 6-7MB
-        @HPA_CHUNK_SIZE = 2000
-        @HPA_API_GROUP = "autoscaling"
-  
-        # telemetry
-        @hpaCount = 0
-  
-        @NodeName = OMS::Common.get_hostname
-        @ClusterId = KubernetesApiClient.getClusterId
-        @ClusterName = KubernetesApiClient.getClusterName
-      end
-  
-      config_param :run_interval, :time, :default => 60
-      config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
-  
-      def configure(conf)
-        super
-      end
-  
-      def start
-        if @run_interval
-          @finished = false
-          @condition = ConditionVariable.new
-          @mutex = Mutex.new
-          @thread = Thread.new(&method(:run_periodic))
+  class Kube_Kubestate_HPA_Input < Input
+    Plugin.register_input("kubestatehpa", self)
+    @@istestvar = ENV["ISTEST"]
+
+    def initialize
+      super
+      require "yajl/json_gem"
+      require "yajl"
+      require "time"
+
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "constants"
+
+      # roughly each HPA is 3k
+      # 2000 HPAs account to approximately 6-7MB
+      @HPA_CHUNK_SIZE = 2000
+      @HPA_API_GROUP = "autoscaling"
+      @HPA_EMIT_STREAM = true
+
+      # telemetry
+      @hpaCount = 0
+
+      @NodeName = OMS::Common.get_hostname
+      @ClusterId = KubernetesApiClient.getClusterId
+      @ClusterName = KubernetesApiClient.getClusterName
+    end
+
+    config_param :run_interval, :time, :default => 60
+    config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty?
+          @HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"]
         end
-      end
-  
-      def shutdown
-        if @run_interval
-          @mutex.synchronize {
-            @finished = true
-            @condition.signal
-          }
-          @thread.join
+        $log.info("in_kubestate_hpa::start : HPA_CHUNK_SIZE  @ #{@HPA_CHUNK_SIZE}")
+
+        if !ENV["HPA_EMIT_STREAM"].nil? && !ENV["HPA_EMIT_STREAM"].empty?
+          @HPA_EMIT_STREAM = ENV["HPA_EMIT_STREAM"].to_s.downcase == "true" ? true : false
         end
+        $log.info("in_kubestate_hpa::start : HPA_EMIT_STREAM  @ #{@HPA_EMIT_STREAM}")
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+      end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
       end
-  
-      def enumerate
-        begin
-          hpaList = nil
-          currentTime = Time.now
-          batchTime = currentTime.utc.iso8601
-          
-          @hpaCount = 0
-  
-          # Initializing continuation token to nil
-          continuationToken = nil
-          $log.info("in_kubestate_hpa::enumerate : Getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
-          continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}", api_group: @HPA_API_GROUP)
-          $log.info("in_kubestate_hpa::enumerate : Done getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
+    end
+
+    def enumerate
+      begin
+        hpaList = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+
+        @hpaCount = 0
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kubestate_hpa::enumerate : Getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}", api_group: @HPA_API_GROUP)
+        $log.info("in_kubestate_hpa::enumerate : Done getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
+        if (!hpaList.nil? && !hpaList.empty? && hpaList.key?("items") && !hpaList["items"].nil? && !hpaList["items"].empty?)
+          parse_and_emit_records(hpaList, batchTime)
+        else
+          $log.warn "in_kubestate_hpa::enumerate:Received empty hpaList"
+        end
+
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @HPA_API_GROUP)
           if (!hpaList.nil? && !hpaList.empty? && hpaList.key?("items") && !hpaList["items"].nil? && !hpaList["items"].empty?)
             parse_and_emit_records(hpaList, batchTime)
           else
             $log.warn "in_kubestate_hpa::enumerate:Received empty hpaList"
           end
-  
-          #If we receive a continuation token, make calls, process and flush data until we have processed all data
-          while (!continuationToken.nil? && !continuationToken.empty?)
-            continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @HPA_API_GROUP)
-            if (!hpaList.nil? && !hpaList.empty? && hpaList.key?("items") && !hpaList["items"].nil? && !hpaList["items"].empty?)
-              parse_and_emit_records(hpaList, batchTime)
-            else
-              $log.warn "in_kubestate_hpa::enumerate:Received empty hpaList"
+        end
+
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        hpaList = nil
+
+        # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
+        if (@hpaCount > 0)
+          # this will not be a useful telemetry, as hpa counts will not be huge, just log for now
+          $log.info("in_kubestate_hpa::hpaCount= #{hpaCount}")
+          #ApplicationInsightsUtility.sendMetricTelemetry("HPACount", @hpaCount, {})
+        end
+      rescue => errorStr
+        $log.warn "in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}")
+      end
+    end # end enumerate
+
+    def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
+      metricItems = []
+      insightsMetricsEventStream = MultiEventStream.new
+      begin
+        metricInfo = hpas
+        metricInfo["items"].each do |hpa|
+          hpaName = hpa["metadata"]["name"]
+          hpaNameSpace = hpa["metadata"]["namespace"]
+          hpaCreatedTime = ""
+          if !hpa["metadata"]["creationTimestamp"].nil?
+            hpaCreatedTime = hpa["metadata"]["creationTimestamp"]
+          end
+          hpaSpecMinReplicas = 1 #default is 1 as per k8s spec
+          if !hpa["spec"]["minReplicas"].nil?
+            hpaSpecMinReplicas = hpa["spec"]["minReplicas"]
+          end
+          hpaSpecMaxReplicas = 0
+          if !hpa["spec"]["maxReplicas"].nil?
+            hpaSpecMaxReplicas = hpa["spec"]["maxReplicas"]
+          end
+          hpaSpecScaleTargetKind = ""
+          hpaSpecScaleTargetName = ""
+          if !hpa["spec"]["scaleTargetRef"].nil?
+            if !hpa["spec"]["scaleTargetRef"]["kind"].nil?
+              hpaSpecScaleTargetKind = hpa["spec"]["scaleTargetRef"]["kind"]
+            end
+            if !hpa["spec"]["scaleTargetRef"]["name"].nil?
+              hpaSpecScaleTargetName = hpa["spec"]["scaleTargetRef"]["name"]
             end
           end
-  
-          # Setting this to nil so that we dont hold memory until GC kicks in
-          hpaList = nil
-  
-          # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
-          if (@hpaCount > 0)
-            # this will not be a useful telemetry, as hpa counts will not be huge, just log for now
-            $log.info("in_kubestate_hpa::hpaCount= #{hpaCount}")
-            #ApplicationInsightsUtility.sendMetricTelemetry("HPACount", @hpaCount, {})
+          hpaStatusCurrentReplicas = 0
+          if !hpa["status"]["currentReplicas"].nil?
+            hpaStatusCurrentReplicas = hpa["status"]["currentReplicas"]
           end
-        rescue => errorStr
-          $log.warn "in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}"
-          ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}")
+          hpaStatusDesiredReplicas = 0
+          if !hpa["status"]["desiredReplicas"].nil?
+            hpaStatusDesiredReplicas = hpa["status"]["desiredReplicas"]
+          end
+
+          hpaStatuslastScaleTime = ""
+          if !hpa["status"]["lastScaleTime"].nil?
+            hpaStatuslastScaleTime = hpa["status"]["lastScaleTime"]
+          end
+
+          metricItem = {}
+          metricItem["CollectionTime"] = batchTime
+          metricItem["Computer"] = @NodeName
+          metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE
+          metricItem["Value"] = hpaStatusCurrentReplicas
+          metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+          metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
+
+          metricTags = {}
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME] = hpaName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = hpaNameSpace
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = hpaCreatedTime
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS] = hpaSpecMinReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS] = hpaSpecMaxReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND] = hpaSpecScaleTargetKind
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME] = hpaSpecScaleTargetName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS] = hpaStatusDesiredReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME] = hpaStatuslastScaleTime
+
+          metricItem["Tags"] = metricTags
+
+          metricItems.push(metricItem)
         end
-      end # end enumerate
-  
-      def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
-        metricItems = []
-        insightsMetricsEventStream = MultiEventStream.new
-        begin
-            metricInfo = hpas
-            metricInfo["items"].each do |hpa|
-                hpaName = hpa["metadata"]["name"]
-                hpaNameSpace = hpa["metadata"]["namespace"]
-                hpaCreatedTime = ""
-                if !hpa["metadata"]["creationTimestamp"].nil?
-                    hpaCreatedTime = hpa["metadata"]["creationTimestamp"]
-                end
-                hpaSpecMinReplicas = 1 #default is 1 as per k8s spec
-                if !hpa["spec"]["minReplicas"].nil?
-                    hpaSpecMinReplicas = hpa["spec"]["minReplicas"]
-                end
-                hpaSpecMaxReplicas = 0
-                if !hpa["spec"]["maxReplicas"].nil?
-                    hpaSpecMaxReplicas = hpa["spec"]["maxReplicas"]
-                end
-                hpaSpecScaleTargetKind = ""
-                hpaSpecScaleTargetName = ""
-                if !hpa["spec"]["scaleTargetRef"].nil?
-                    if !hpa["spec"]["scaleTargetRef"]["kind"].nil?
-                        hpaSpecScaleTargetKind = hpa["spec"]["scaleTargetRef"]["kind"]
-                    end
-                    if !hpa["spec"]["scaleTargetRef"]["name"].nil?
-                        hpaSpecScaleTargetName = hpa["spec"]["scaleTargetRef"]["name"]
-                    end
-
-                end
-                hpaStatusCurrentReplicas = 0
-                if !hpa["status"]["currentReplicas"].nil?
-                    hpaStatusCurrentReplicas = hpa["status"]["currentReplicas"]
-                end
-                hpaStatusDesiredReplicas = 0
-                if !hpa["status"]["desiredReplicas"].nil?
-                    hpaStatusDesiredReplicas = hpa["status"]["desiredReplicas"]
-                end
-                
-                hpaStatuslastScaleTime = ""
-                if !hpa["status"]["lastScaleTime"].nil?
-                    hpaStatuslastScaleTime = hpa["status"]["lastScaleTime"]
-                end
-                
-                
-                metricItem = {}
-                metricItem["CollectionTime"] = batchTime
-                metricItem["Computer"] = @NodeName
-                metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE
-                metricItem["Value"] = hpaStatusCurrentReplicas
-                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
-
-                metricTags = {}
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME] = hpaName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = hpaNameSpace
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = hpaCreatedTime
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS] = hpaSpecMinReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS] = hpaSpecMaxReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND] = hpaSpecScaleTargetKind
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME] = hpaSpecScaleTargetName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS] = hpaStatusDesiredReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME] = hpaStatuslastScaleTime
-                
-
-                metricItem["Tags"] = metricTags
-
-                metricItems.push(metricItem)
-            end
 
-            time = Time.now.to_f
-            metricItems.each do |insightsMetricsRecord|
-                wrapper = {
-                  "DataType" => "INSIGHTS_METRICS_BLOB",
-                  "IPName" => "ContainerInsights",
-                  "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-                }
-                insightsMetricsEventStream.add(time, wrapper) if wrapper
-            end
-    
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-                $log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-        rescue => error
-            $log.warn("in_kubestate_hpa::parse_and_emit_records failed: #{error} ")
-            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::parse_and_emit_records failed: #{error}")
+        time = Time.now.to_f
+        metricItems.each do |insightsMetricsRecord|
+          wrapper = {
+            "DataType" => "INSIGHTS_METRICS_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+          }
+          insightsMetricsEventStream.add(time, wrapper) if wrapper
+        end
+
+        if @HPA_EMIT_STREAM
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
         end
-        
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+          $log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+        end
+      rescue => error
+        $log.warn("in_kubestate_hpa::parse_and_emit_records failed: #{error} ")
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::parse_and_emit_records failed: #{error}")
       end
-  
-      def run_periodic
-        @mutex.lock
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
+      until done
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
-        @nextTimeToRun = Time.now
-        @waitTimeout = @run_interval
-        until done
-          @nextTimeToRun = @nextTimeToRun + @run_interval
-          @now = Time.now
-          if @nextTimeToRun <= @now
-            @waitTimeout = 1
-            @nextTimeToRun = @now
-          else
-            @waitTimeout = @nextTimeToRun - @now
-          end
-          @condition.wait(@mutex, @waitTimeout)
-          done = @finished
-          @mutex.unlock
-          if !done
-            begin
-              $log.info("in_kubestate_hpa::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
-              enumerate
-              $log.info("in_kubestate_hpa::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
-            rescue => errorStr
-              $log.warn "in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}"
-              ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}")
-            end
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kubestate_hpa::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
+            enumerate
+            $log.info("in_kubestate_hpa::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
+          rescue => errorStr
+            $log.warn "in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}")
           end
-          @mutex.lock
         end
-        @mutex.unlock
+        @mutex.lock
       end
+      @mutex.unlock
     end
-end
\ No newline at end of file
+  end
+end

From 6073fed8d0c32e4607519ac6075457beb1b4ca49 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 16:46:06 -0800
Subject: [PATCH 04/45] have separate function parseNodeLimits

---
 source/plugins/ruby/KubernetesApiClient.rb | 49 ++++++++++++++++++++--
 source/plugins/ruby/in_kube_nodes.rb       |  8 ++--
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 13c084a5c..893fd438f 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -569,7 +569,50 @@ def getContainerResourceRequestsAndLimitsAsInsightsMetrics(pod, metricCategory,
       return metricItems
     end #getContainerResourceRequestAndLimitsAsInsightsMetrics
 
-    def parseNodeLimits(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItems = []
+      begin
+        metricInfo = metricJSON
+        clusterId = getClusterId
+        #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
+        #if we are coming up with the time it should be same for all nodes
+        #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        metricInfo["items"].each do |node|
+          if (!node["status"][metricCategory].nil?)
+
+            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
+            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+            metricItem = {}
+            metricItem["DataItems"] = []
+            metricProps = {}
+            metricProps["Timestamp"] = metricTime
+            metricProps["Host"] = node["metadata"]["name"]
+            # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+            metricProps["Computer"] = node["metadata"]["name"]
+            metricProps["ObjectName"] = "K8SNode"
+            metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
+            metricProps["Collections"] = []
+            metricCollections = {}
+            metricCollections["CounterName"] = metricNametoReturn
+            metricCollections["Value"] = metricValue
+
+            metricProps["Collections"].push(metricCollections)
+            metricItem["DataItems"].push(metricProps)
+            metricItems.push(metricItem)
+            #push node level metrics to a inmem hash so that we can use it looking up at container level.
+            #Currently if container level cpu & memory limits are not defined we default to node level limits
+            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
+          end
+        end
+      rescue => error
+        @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+      end
+      return metricItems
+    end #parseNodeLimits
+
+    def parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItem = {}
       begin
         clusterId = getClusterId
@@ -602,10 +645,10 @@ def parseNodeLimits(node, metricCategory, metricNameToCollect, metricNametoRetur
           #@Log.info ("Node metric hash: #{@@NodeMetrics}")
         end
       rescue => error
-        @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+        @Log.warn("parseNodeLimitsFromNodeItem failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
       end
       return metricItem
-    end #parseNodeLimits
+    end #parseNodeLimitsFromNodeItem
 
     def parseNodeLimitsAsInsightsMetrics(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItem = {}
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 8346a1a2b..e9c00a642 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -202,19 +202,19 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
 
             # node metrics records
             nodeMetricRecords = []
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime)
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime)
             if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
               nodeMetricRecords.push(nodeMetricRecord)
             end
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "allocatable", "memory", "memoryAllocatableBytes", batchTime)
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "memory", "memoryAllocatableBytes", batchTime)
             if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
               nodeMetricRecords.push(nodeMetricRecord)
             end
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "capacity", "cpu", "cpuCapacityNanoCores", batchTime)
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "cpu", "cpuCapacityNanoCores", batchTime)
             if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
               nodeMetricRecords.push(nodeMetricRecord)
             end
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimits(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
+            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
             if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
               nodeMetricRecords.push(nodeMetricRecord)
             end

From 97f55f75d7c60b7ebf01659754e215b2e6090b85 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 16:53:18 -0800
Subject: [PATCH 05/45] refactor code

---
 source/plugins/ruby/KubernetesApiClient.rb | 27 ++--------------------
 1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 893fd438f..eb2aa3425 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -578,32 +578,9 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet
         #if we are coming up with the time it should be same for all nodes
         #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
         metricInfo["items"].each do |node|
-          if (!node["status"][metricCategory].nil?)
-
-            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
-            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
-
-            metricItem = {}
-            metricItem["DataItems"] = []
-            metricProps = {}
-            metricProps["Timestamp"] = metricTime
-            metricProps["Host"] = node["metadata"]["name"]
-            # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
-            metricProps["Computer"] = node["metadata"]["name"]
-            metricProps["ObjectName"] = "K8SNode"
-            metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
-            metricProps["Collections"] = []
-            metricCollections = {}
-            metricCollections["CounterName"] = metricNametoReturn
-            metricCollections["Value"] = metricValue
-
-            metricProps["Collections"].push(metricCollections)
-            metricItem["DataItems"].push(metricProps)
+          metricItem = parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime)
+          if !metricItem.nil? && !metricItem.empty?
             metricItems.push(metricItem)
-            #push node level metrics to a inmem hash so that we can use it looking up at container level.
-            #Currently if container level cpu & memory limits are not defined we default to node level limits
-            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
-            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
           end
         end
       rescue => error

From abc28c27142959a10832aacb138f73a85a9935db Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 17:41:14 -0800
Subject: [PATCH 06/45] fix crash

---
 source/plugins/ruby/in_kube_nodes.rb        | 257 ++++++++++----------
 source/plugins/ruby/in_kube_podinventory.rb |   5 -
 2 files changed, 126 insertions(+), 136 deletions(-)

diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index e9c00a642..89a2ff7ba 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -59,11 +59,6 @@ def start
 
         if !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].empty?
           @NODES_EMIT_STREAM_BATCH_SIZE = ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i
-          NodesChunkSize = @NODES_CHUNK_SIZE.to_i
-          if @NODES_EMIT_STREAM_BATCH_SIZE > NodesChunkSize
-            $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE cant be greater than nodes chunksize @ #{@NODES_CHUNK_SIZE}")
-            @NODES_EMIT_STREAM_BATCH_SIZE = NodesChunkSize
-          end
         end
         $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE  @ #{@NODES_EMIT_STREAM_BATCH_SIZE}")
 
@@ -163,147 +158,147 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           # node inventory
           nodeInventoryRecord = getNodeInventoryRecord(item, batchTime)
           wrapper = {
-              "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [nodeInventoryRecord.each { |k, v| nodeInventoryRecord[k] = v }],
+            "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [nodeInventoryRecord.each { |k, v| nodeInventoryRecord[k] = v }],
           }
           eventStream.add(emitTime, wrapper) if wrapper
           if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-              if @NODES_EMIT_STREAM
-                $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-                router.emit_stream(@tag, eventStream) if eventStream
-              end
-              if @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
-                $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-                router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-              end
-              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
-                $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-              end
-              eventStream = MultiEventStream.new
+            if @NODES_EMIT_STREAM
+              $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              router.emit_stream(@tag, eventStream) if eventStream
+            end
+            if @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
+              $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+            end
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+              $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+            end
+            eventStream = MultiEventStream.new
           end
 
-            # container node inventory
-            containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime)
-            containerNodeInventoryWrapper = {
-              "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
-            }
-            containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+          # container node inventory
+          containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime)
+          containerNodeInventoryWrapper = {
+            "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+          }
+          containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
-            if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-              if @CONTAINER_NODE_INVENTORY_EMIT_STREAM
-                $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-                router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
-              end
-              containerNodeInventoryEventStream = MultiEventStream.new
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            if @CONTAINER_NODE_INVENTORY_EMIT_STREAM
+              $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
             end
+            containerNodeInventoryEventStream = MultiEventStream.new
+          end
 
-            # node metrics records
-            nodeMetricRecords = []
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime)
-            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
-              nodeMetricRecords.push(nodeMetricRecord)
-            end
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "memory", "memoryAllocatableBytes", batchTime)
-            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
-              nodeMetricRecords.push(nodeMetricRecord)
-            end
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "cpu", "cpuCapacityNanoCores", batchTime)
-            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
-              nodeMetricRecords.push(nodeMetricRecord)
-            end
-            nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
-            if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
-              nodeMetricRecords.push(nodeMetricRecord)
-            end
-            nodeMetricRecords.each do |metricRecord|
-              metricRecord["DataType"] = "LINUX_PERF_BLOB"
-              metricRecord["IPName"] = "LogManagement"
-              kubePerfEventStream.add(emitTime, metricRecord) if metricRecord
-            end
-            if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-              if @NODES_PERF_EMIT_STREAM
-                $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-                router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-              end
-              kubePerfEventStream = MultiEventStream.new
+          # node metrics records
+          nodeMetricRecords = []
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "memory", "memoryAllocatableBytes", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "cpu", "cpuCapacityNanoCores", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecords.each do |metricRecord|
+            metricRecord["DataType"] = "LINUX_PERF_BLOB"
+            metricRecord["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, metricRecord) if metricRecord
+          end
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            if @NODES_PERF_EMIT_STREAM
+              $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
             end
+            kubePerfEventStream = MultiEventStream.new
+          end
 
-           # node GPU metrics record
-           nodeGPUInsightsMetricsRecords = []
-           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime)
-           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
-              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
-           end
-           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime)
-           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
-              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
-           end
-           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime)
-           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
-              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
-           end
-           insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime)
-           if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
-              nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
-           end
-           nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord|
-              wrapper = {
-                  "DataType" => "INSIGHTS_METRICS_BLOB",
-                  "IPName" => "ContainerInsights",
-                  "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-                }
-                insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
-            end
-            if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-                if @GPU_NODES_PERF_EMIT_STREAM
-                  $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-                  router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-                end
-              insightsMetricsEventStream = MultiEventStream.new
+          # node GPU metrics record
+          nodeGPUInsightsMetricsRecords = []
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord|
+            wrapper = {
+              "DataType" => "INSIGHTS_METRICS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+            }
+            insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+          end
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            if @GPU_NODES_PERF_EMIT_STREAM
+              $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+              router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
             end
-            # Adding telemetry to send node telemetry every 10 minutes
-            timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-            timeDifferenceInMinutes = timeDifference / 60
-            if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
-              properties = getNodeTelemetryProps(item)
-              properties["KubernetesProviderID"] = nodeInventoryRecord["KubernetesProviderID"]
-              capacityInfo = item["status"]["capacity"]
-              ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
-              begin
-                if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
-                  properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
-                end
-
-                if (!capacityInfo["amd.com/gpu"].nil?) && (!capacityInfo["amd.com/gpu"].empty?)
-                  properties["amdgpus"] = capacityInfo["amd.com/gpu"]
-                end
-              rescue => errorStr
-                $log.warn "Failed in getting GPU telemetry in_kube_nodes : #{errorStr}"
-                $log.debug_backtrace(errorStr.backtrace)
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+            insightsMetricsEventStream = MultiEventStream.new
+          end
+          # Adding telemetry to send node telemetry every 10 minutes
+          timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+          timeDifferenceInMinutes = timeDifference / 60
+          if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+            properties = getNodeTelemetryProps(item)
+            properties["KubernetesProviderID"] = nodeInventoryRecord["KubernetesProviderID"]
+            capacityInfo = item["status"]["capacity"]
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+            begin
+              if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
+                properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
               end
 
-              # Telemetry for data collection config for replicaset
-              if (File.file?(@@configMapMountPath))
-                properties["collectAllKubeEvents"] = @@collectAllKubeEvents
+              if (!capacityInfo["amd.com/gpu"].nil?) && (!capacityInfo["amd.com/gpu"].empty?)
+                properties["amdgpus"] = capacityInfo["amd.com/gpu"]
               end
+            rescue => errorStr
+              $log.warn "Failed in getting GPU telemetry in_kube_nodes : #{errorStr}"
+              $log.debug_backtrace(errorStr.backtrace)
+              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+            end
 
-              #telemetry about prometheus metric collections settings for replicaset
-              if (File.file?(@@promConfigMountPath))
-                properties["rsPromInt"] = @@rsPromInterval
-                properties["rsPromFPC"] = @@rsPromFieldPassCount
-                properties["rsPromFDC"] = @@rsPromFieldDropCount
-                properties["rsPromServ"] = @@rsPromK8sServiceCount
-                properties["rsPromUrl"] = @@rsPromUrlCount
-                properties["rsPromMonPods"] = @@rsPromMonitorPods
-                properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
-              end
-              ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
-              telemetrySent = true
+            # Telemetry for data collection config for replicaset
+            if (File.file?(@@configMapMountPath))
+              properties["collectAllKubeEvents"] = @@collectAllKubeEvents
+            end
+
+            #telemetry about prometheus metric collections settings for replicaset
+            if (File.file?(@@promConfigMountPath))
+              properties["rsPromInt"] = @@rsPromInterval
+              properties["rsPromFPC"] = @@rsPromFieldPassCount
+              properties["rsPromFDC"] = @@rsPromFieldDropCount
+              properties["rsPromServ"] = @@rsPromK8sServiceCount
+              properties["rsPromUrl"] = @@rsPromUrlCount
+              properties["rsPromMonPods"] = @@rsPromMonitorPods
+              properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
             end
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+            telemetrySent = true
+          end
         end
         if telemetrySent == true
           @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
@@ -468,7 +463,7 @@ def getNodeTelemetryProps(item)
       rescue => errorStr
         $log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}"
       end
-     return properties
+      return properties
     end
   end # Kube_Node_Input
 end # module
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 9a20be62d..13903cd4a 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -78,11 +78,6 @@ def start
 
         if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty?
           @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
-          PodsChunkSize = @PODS_CHUNK_SIZE.to_i
-          if @PODS_EMIT_STREAM_BATCH_SIZE > PodsChunkSize
-            $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE shouldnt be greater than @ #{@PODS_CHUNK_SIZE} ")
-            @PODS_EMIT_STREAM_BATCH_SIZE = PodsChunkSize
-          end
         end
         $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE  @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
 

From 259a95c30759f034f1f61a1623913a3020367822 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 18:45:12 -0800
Subject: [PATCH 07/45] fix bug with service name

---
 source/plugins/ruby/in_kube_podinventory.rb | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 13903cd4a..00b721424 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -122,7 +122,7 @@ def enumerate(podList = nil)
           serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
           $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
           serviceInfo = nil
-          # service inventory records much smaller size and fixed compared to serviceList
+          # service inventory records much smaller and fixed size compared to serviceList
           serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime)
           serviceList = nil
         end
@@ -618,7 +618,11 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords)
           serviceRecords.each do |kubeServiceRecord|
             found = 0
             if kubeServiceRecord["Namespace"] == namespace
-              selectorLabels = kubeServiceRecord["SelectorLabels"]
+              selectorLabels = {}
+              # selector labels wrapped in array in kube service records so unwrapping here
+              if !kubeServiceRecord["SelectorLabels"].nil? && kubeServiceRecord["SelectorLabels"].length > 0
+                selectorLabels = kubeServiceRecord["SelectorLabels"][0]
+              end
               if !selectorLabels.empty?
                 selectorLabels.each do |key, value|
                   if !(labels.select { |k, v| k == key && v == value }.length > 0)

From b37529b382fba256ca62e49e243a892d2dc09e51 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 20:34:32 -0800
Subject: [PATCH 08/45] fix bugs related to get service name

---
 source/plugins/ruby/in_kube_nodes.rb        | 8 ++++++++
 source/plugins/ruby/in_kube_podinventory.rb | 5 +++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 89a2ff7ba..c36042927 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -310,6 +310,14 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           end
           eventStream = nil
         end
+        if containerNodeInventoryEventStream.count > 0
+          if @CONTAINER_NODE_INVENTORY_EMIT_STREAM
+            $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+          end
+          containerNodeInventoryEventStream = nil
+        end
+
         if kubePerfEventStream.count > 0
           if @NODES_PERF_EMIT_STREAM
             $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 00b721424..702b0f130 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -623,7 +623,7 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords)
               if !kubeServiceRecord["SelectorLabels"].nil? && kubeServiceRecord["SelectorLabels"].length > 0
                 selectorLabels = kubeServiceRecord["SelectorLabels"][0]
               end
-              if !selectorLabels.empty?
+              if !selectorLabels.nil? && !selectorLabels.empty?
                 selectorLabels.each do |key, value|
                   if !(labels.select { |k, v| k == key && v == value }.length > 0)
                     break
@@ -631,7 +631,8 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords)
                   found = found + 1
                 end
               end
-              if found == selectorLabels.length
+              # service can have no selectors to avoid mapping to wrong service check found > 0
+              if found > 0 && found == selectorLabels.length
                 return kubeServiceRecord["ServiceName"]
               end
             end

From 7375e3388a7db18502a7e248e390495d592ae062 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 29 Nov 2020 22:48:24 -0800
Subject: [PATCH 09/45] update oom fix test agent

---
 kubernetes/omsagent.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 49a9235de..fc95d49f3 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:

From ed0857b77cb56d4658f1a7184129e00c154d8534 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 06:43:30 -0800
Subject: [PATCH 10/45] debug logs

---
 source/plugins/ruby/in_kube_events.rb           |  8 ++++++--
 source/plugins/ruby/in_kube_nodes.rb            |  6 ++++++
 source/plugins/ruby/in_kube_podinventory.rb     | 12 ++++++++++--
 source/plugins/ruby/in_kubestate_deployments.rb |  6 ++++++
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index 561909246..4f532ff52 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -92,8 +92,10 @@ def enumerate
         end
         $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
         if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+          # debug logs to track the payload size
           eventsCount = eventList["items"].length
-          $log.info "in_kube_events::enumerate:Received number of events is eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
+          eventsInventorySizeInKB = (eventList.to_s.length) / 1024
+          $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} and size in KB #{eventsInventorySizeInKB}  @ #{Time.now.utc.iso8601}"
           newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
         else
           $log.warn "in_kube_events::enumerate:Received empty eventList"
@@ -103,8 +105,10 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+            # debug logs to track the payload size
             eventsCount = eventList["items"].length
-            $log.info "in_kube_events::enumerate:Received number of events is eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
+            eventsInventorySizeInKB = (eventList.to_s.length) / 1024
+            $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} and size in KB #{eventsInventorySizeInKB}  @ #{Time.now.utc.iso8601}"
             newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
           else
             $log.warn "in_kube_events::enumerate:Received empty eventList"
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index c36042927..f357da87e 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -119,6 +119,9 @@ def enumerate
 
         $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
         if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+          # debug logs to track the payload size
+          nodeInventorySizeInKB = (nodeInventory.to_s.length) / 1024
+          $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length}  and size in KB: #{nodeInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(nodeInventory, batchTime)
         else
           $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
@@ -128,6 +131,9 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri + "&continue=#{continuationToken}")
           if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+            # debug logs to track the payload size
+            nodeInventorySizeInKB = (nodeInventory.to_s.length) / 1024
+            $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length}  and size in KB: #{nodeInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(nodeInventory, batchTime)
           else
             $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 702b0f130..a70f1aab6 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -118,9 +118,11 @@ def enumerate(podList = nil)
         $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
 
         if !serviceInfo.nil?
-          $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
+          # debug logs to track the payload size
+          serviceInfoResponseSizeInKB = (serviceInfo.body.length) / 1024
+          $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl serviceInfo size in KB #{serviceInfoResponseSizeInKB} @ #{Time.now.utc.iso8601}")
           serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
-          $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
+          $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl serviceInfo size in KB #{serviceInfoResponseSizeInKB} @ #{Time.now.utc.iso8601}")
           serviceInfo = nil
           # service inventory records much smaller and fixed size compared to serviceList
           serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime)
@@ -133,6 +135,9 @@ def enumerate(podList = nil)
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
+          # debug logs to track the payload size
+          podInventorySizeInKB = (podInventory.to_s.length) / 1024
+          $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length}  and size in KB: #{podInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
         else
           $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
@@ -142,6 +147,9 @@ def enumerate(podList = nil)
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
+            # debug logs to track the payload size
+            podInventorySizeInKB = (podInventory.to_s.length) / 1024
+            $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length}  and size in KB: #{podInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
           else
             $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index e1679626d..cd9f279e1 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -86,6 +86,9 @@ def enumerate
         continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}", api_group: @DEPLOYMENTS_API_GROUP)
         $log.info("in_kubestate_deployments::enumerate : Done getting deployments from Kube API @ #{Time.now.utc.iso8601}")
         if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
+          # debug logs to track the payload size
+          deploymentsSizeInKB = (deploymentList.to_s.length) / 1024
+          $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length}  and size in KB: #{deploymentsSizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(deploymentList, batchTime)
         else
           $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
@@ -95,6 +98,9 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @DEPLOYMENTS_API_GROUP)
           if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
+            # debug logs to track the payload size
+            deploymentsSizeInKB = (deploymentList.to_s.length) / 1024
+            $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length}  and size in KB: #{deploymentsSizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(deploymentList, batchTime)
           else
             $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"

From b69f0320ef09e970aeb6645a8dafdacdd903c8e8 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 08:05:24 -0800
Subject: [PATCH 11/45] fix service label issue

---
 source/plugins/ruby/in_kube_podinventory.rb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index a70f1aab6..7a25dbdc4 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -638,10 +638,10 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords)
                   end
                   found = found + 1
                 end
-              end
-              # service can have no selectors to avoid mapping to wrong service check found > 0
-              if found > 0 && found == selectorLabels.length
-                return kubeServiceRecord["ServiceName"]
+                # service can have no selectors
+                if found == selectorLabels.length
+                  return kubeServiceRecord["ServiceName"]
+                end
               end
             end
           end

From 2eeaed4e8d3c7472978a77bb98cf010c2e5f5cd9 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 09:15:51 -0800
Subject: [PATCH 12/45] update to latest agent and enable ephemeral annotation

---
 kubernetes/omsagent.yaml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index fc95d49f3..893432dad 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-2"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-2"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -615,14 +615,14 @@ spec:
       affinity:
         nodeAffinity:
         # affinity to schedule on to ephemeral os node if its available
-        # preferredDuringSchedulingIgnoredDuringExecution:
-          # - weight: 1
-          #   preference:
-          #     matchExpressions:
-          #     - key: storageprofile
-          #       operator: NotIn
-          #       values:
-          #       - managed
+        preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 1
+            preference:
+              matchExpressions:
+              - key: storageprofile
+                operator: NotIn
+                values:
+                - managed
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
               - labelSelector:

From 10e4b71e9408908dd1b60b0148b1b4602383fa2a Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 10:54:43 -0800
Subject: [PATCH 13/45] change stream size to 200 from 250

---
 kubernetes/omsagent.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 893432dad..3310cda66 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -514,7 +514,7 @@ spec:
             - name: PODS_CHUNK_SIZE
               value: "500"
             - name: PODS_EMIT_STREAM_BATCH_SIZE
-              value: "250"
+              value: "200"
             - name: PODS_EMIT_STREAM
               value: "true"
             - name: MDM_PODS_INVENTORY_EMIT_STREAM

From d003daa1d0eeb7439467be08cb25e5b042e95b7d Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 15:29:08 -0800
Subject: [PATCH 14/45] update yaml

---
 kubernetes/omsagent.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 3310cda66..e03455fdb 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -544,7 +544,7 @@ spec:
 
             # event inventory plugin settings
             - name: EVENTS_CHUNK_SIZE
-              value: "30000"
+              value: "3000"
             - name: EVENTS_EMIT_STREAM
               value: "true"
 
@@ -614,8 +614,8 @@ spec:
             periodSeconds: 60
       affinity:
         nodeAffinity:
-        # affinity to schedule on to ephemeral os node if its available
-        preferredDuringSchedulingIgnoredDuringExecution:
+          # affinity to schedule on to ephemeral os node if its available
+          preferredDuringSchedulingIgnoredDuringExecution:
           - weight: 1
             preference:
               matchExpressions:

From 0ba06108eb445dcf8eec341b8fe196798774d89d Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 19:09:07 -0800
Subject: [PATCH 15/45] adjust chunksizes

---
 source/plugins/ruby/in_kube_events.rb         | 13 +--
 source/plugins/ruby/in_kube_nodes.rb          | 88 +++++--------------
 source/plugins/ruby/in_kube_podinventory.rb   | 80 +++++------------
 .../plugins/ruby/in_kubestate_deployments.rb  | 17 ++--
 source/plugins/ruby/in_kubestate_hpa.rb       | 11 +--
 5 files changed, 52 insertions(+), 157 deletions(-)

diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index 4f532ff52..6cea5e996 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -17,9 +17,8 @@ def initialize
       require_relative "omslog"
       require_relative "ApplicationInsightsUtility"
 
-      # 30000 events account to approximately 5MB
-      @EVENTS_CHUNK_SIZE = 30000
-      @EVENTS_EMIT_STREAM = true
+      # 4000 events (1KB per event) account to approximately 4MB
+      @EVENTS_CHUNK_SIZE = 4000
 
       # Initializing events count for telemetry
       @eventsCount = 0
@@ -42,10 +41,6 @@ def start
         end
         $log.info("in_kube_events::start : EVENTS_CHUNK_SIZE  @ #{@EVENTS_CHUNK_SIZE}")
 
-        if !ENV["EVENTS_EMIT_STREAM"].nil? && !ENV["EVENTS_EMIT_STREAM"].empty?
-          @EVENTS_EMIT_STREAM = ENV["EVENTS_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_events::start : EVENTS_EMIT_STREAM  @ #{@EVENTS_EMIT_STREAM}")
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -174,9 +169,7 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
           eventStream.add(emitTime, wrapper) if wrapper
           @eventsCount += 1
         end
-        if @EVENTS_EMIT_STREAM
-          router.emit_stream(@tag, eventStream) if eventStream
-        end
+        router.emit_stream(@tag, eventStream) if eventStream
       rescue => errorStr
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index f357da87e..c77db2791 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -32,14 +32,10 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
-      @NODES_CHUNK_SIZE = "400"
+      # 250 Node items (15KB per node) account to approximately 4MB
+      @NODES_CHUNK_SIZE = "250"
       # 0 indicates no batch enabled for stream emit
       @NODES_EMIT_STREAM_BATCH_SIZE = 0
-      @NODES_EMIT_STREAM = true
-      @NODES_PERF_EMIT_STREAM = true
-      @GPU_NODES_PERF_EMIT_STREAM = true
-      @CONTAINER_NODE_INVENTORY_EMIT_STREAM = true
-      @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM = true
       require_relative "constants"
     end
 
@@ -62,31 +58,6 @@ def start
         end
         $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE  @ #{@NODES_EMIT_STREAM_BATCH_SIZE}")
 
-        if !ENV["NODES_EMIT_STREAM"].nil? && !ENV["NODES_EMIT_STREAM"].empty?
-          @NODES_EMIT_STREAM = ENV["NODES_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_nodes::start : NODES_EMIT_STREAM  @ #{@NODES_EMIT_STREAM}")
-
-        if !ENV["CONTAINER_NODE_INVENTORY_EMIT_STREAM"].nil? && !ENV["CONTAINER_NODE_INVENTORY_EMIT_STREAM"].empty?
-          @CONTAINER_NODE_INVENTORY_EMIT_STREAM = ENV["CONTAINER_NODE_INVENTORY_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_nodes::start : CONTAINER_NODE_INVENTORY_EMIT_STREAM  @ #{@CONTAINER_NODE_INVENTORY_EMIT_STREAM}")
-
-        if !ENV["MDM_KUBE_NODE_INVENTORY_EMIT_STREAM"].nil? && !ENV["MDM_KUBE_NODE_INVENTORY_EMIT_STREAM"].empty?
-          @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM = ENV["MDM_KUBE_NODE_INVENTORY_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_nodes::start : MDM_KUBE_NODE_INVENTORY_EMIT_STREAM  @ #{@MDM_KUBE_NODE_INVENTORY_EMIT_STREAM}")
-
-        if !ENV["NODES_PERF_EMIT_STREAM"].nil? && !ENV["NODES_PERF_EMIT_STREAM"].empty?
-          @NODES_PERF_EMIT_STREAM = ENV["NODES_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_nodes::start : NODES_PERF_EMIT_STREAM  @ #{@NODES_PERF_EMIT_STREAM}")
-
-        if !ENV["GPU_NODES_PERF_EMIT_STREAM"].nil? && !ENV["GPU_NODES_PERF_EMIT_STREAM"].empty?
-          @GPU_NODES_PERF_EMIT_STREAM = ENV["GPU_NODES_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_nodes::start : GPU_NODES_PERF_EMIT_STREAM  @ #{@GPU_NODES_PERF_EMIT_STREAM}")
-
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -170,14 +141,11 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           }
           eventStream.add(emitTime, wrapper) if wrapper
           if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-            if @NODES_EMIT_STREAM
-              $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              router.emit_stream(@tag, eventStream) if eventStream
-            end
-            if @MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
-              $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-            end
+            $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@tag, eventStream) if eventStream
+            $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+
             if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
               $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
@@ -194,10 +162,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
           if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-            if @CONTAINER_NODE_INVENTORY_EMIT_STREAM
-              $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
-            end
+            $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
             containerNodeInventoryEventStream = MultiEventStream.new
           end
 
@@ -225,10 +191,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
             kubePerfEventStream.add(emitTime, metricRecord) if metricRecord
           end
           if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-            if @NODES_PERF_EMIT_STREAM
-              $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-            end
+            $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
             kubePerfEventStream = MultiEventStream.new
           end
 
@@ -259,10 +223,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
             insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
           end
           if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
-            if @GPU_NODES_PERF_EMIT_STREAM
-              $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            end
+            $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
             insightsMetricsEventStream = MultiEventStream.new
           end
           # Adding telemetry to send node telemetry every 10 minutes
@@ -310,32 +272,24 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
         if eventStream.count > 0
-          if @NODES_EMIT_STREAM
-            $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(@tag, eventStream) if eventStream
-          end
+          $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@tag, eventStream) if eventStream
           eventStream = nil
         end
         if containerNodeInventoryEventStream.count > 0
-          if @CONTAINER_NODE_INVENTORY_EMIT_STREAM
-            $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
-          end
+          $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
           containerNodeInventoryEventStream = nil
         end
 
         if kubePerfEventStream.count > 0
-          if @NODES_PERF_EMIT_STREAM
-            $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-          end
+          $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
           kubePerfEventStream = nil
         end
         if insightsMetricsEventStream.count > 0
-          if @GPU_NODES_PERF_EMIT_STREAM
-            $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-          end
+          $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
           insightsMetricsEventStream = nil
         end
       rescue => errorStr
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 7a25dbdc4..07655c22c 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -27,12 +27,8 @@ def initialize
       require_relative "omslog"
       require_relative "constants"
 
-      @PODS_EMIT_STREAM = true
-      @CONTAINER_PERF_EMIT_STREAM = true
-      @GPU_PERF_EMIT_STREAM = true
-      @SERVICES_EMIT_STREAM = true
-
-      @PODS_CHUNK_SIZE = "1500"
+      # 500 pod (10KB per pod) account to approximately 5MB
+      @PODS_CHUNK_SIZE = "500"
       @podCount = 0
       @controllerSet = Set.new []
       @winContainerCount = 0
@@ -51,26 +47,6 @@ def configure(conf)
 
     def start
       if @run_interval
-        if !ENV["PODS_EMIT_STREAM"].nil? && !ENV["PODS_EMIT_STREAM"].empty?
-          @PODS_EMIT_STREAM = ENV["PODS_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM  @ #{@PODS_EMIT_STREAM}")
-
-        if !ENV["SERVICES_EMIT_STREAM"].nil? && !ENV["SERVICES_EMIT_STREAM"].empty?
-          @SERVICES_EMIT_STREAM = ENV["SERVICES_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_podinventory::start : SERVICES_EMIT_STREAM  @ #{@SERVICES_EMIT_STREAM}")
-
-        if !ENV["CONTAINER_PERF_EMIT_STREAM"].nil? && !ENV["CONTAINER_PERF_EMIT_STREAM"].empty?
-          @CONTAINER_PERF_EMIT_STREAM = ENV["CONTAINER_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_podinventory::start : CONTAINER_PERF_EMIT_STREAM  @ #{@CONTAINER_PERF_EMIT_STREAM}")
-
-        if !ENV["GPU_PERF_EMIT_STREAM"].nil? && !ENV["GPU_PERF_EMIT_STREAM"].empty?
-          @GPU_PERF_EMIT_STREAM = ENV["GPU_PERF_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kube_podinventory::start : GPU_PERF_EMIT_STREAM  @ #{@GPU_PERF_EMIT_STREAM}")
-
         if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty?
           @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"]
         end
@@ -241,13 +217,11 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
           end
 
           if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
-            if @PODS_EMIT_STREAM
-              $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
-                $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-              end
-              router.emit_stream(@tag, eventStream) if eventStream
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+              $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
+            router.emit_stream(@tag, eventStream) if eventStream
             eventStream = MultiEventStream.new
           end
 
@@ -265,10 +239,8 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
           end
 
           if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
-            if @CONTAINER_PERF_EMIT_STREAM
-              $log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-            end
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
             kubePerfEventStream = MultiEventStream.new
           end
 
@@ -288,22 +260,18 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
           end
 
           if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
-            if @GPU_PERF_EMIT_STREAM
-              $log.info("in_kube_podinventory::parse_and_emit_records: number of GPU insights metrics records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
-                $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-              end
-              router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of GPU insights metrics records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+              $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
             insightsMetricsEventStream = MultiEventStream.new
           end
         end  #podInventory block end
 
         if eventStream.count > 0
-          if @PODS_EMIT_STREAM
-            $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(@tag, eventStream) if eventStream
-          end
+          $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@tag, eventStream) if eventStream
           if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
             $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
@@ -311,18 +279,14 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
         end
 
         if kubePerfEventStream.count > 0
-          if @CONTAINER_PERF_EMIT_STREAM
-            $log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-          end
+          $log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
           kubePerfEventStream = nil
         end
 
         if insightsMetricsEventStream.count > 0
-          if @GPU_PERF_EMIT_STREAM
-            $log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-          end
+          $log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
           if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
             $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
@@ -354,16 +318,14 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc
               }
               kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
               if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
-                if @SERVICES_EMIT_STREAM
-                  $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
-                  router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
-                end
+                $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
                 kubeServicesEventStream = MultiEventStream.new
               end
             end
           end
 
-          if @SERVICES_EMIT_STREAM && kubeServicesEventStream.count > 0
+          if kubeServicesEventStream.count > 0
             $log.info("in_kube_podinventory::parse_and_emit_records : number of service records emitted #{kubeServicesEventStream.count} @ #{Time.now.utc.iso8601}")
             router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
           end
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index cd9f279e1..ffbe8ae4f 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -22,9 +22,8 @@ def initialize
       require_relative "constants"
 
       # roughly each deployment is 8k
-      # 1000 deployments account to approximately 8MB
-      @DEPLOYMENTS_CHUNK_SIZE = 1000
-      @DEPLOYMENTS_EMIT_STREAM = true
+      # 500 deployments account to approximately 4MB
+      @DEPLOYMENTS_CHUNK_SIZE = 500
       @DEPLOYMENTS_API_GROUP = "apps"
       @@telemetryLastSentTime = DateTime.now.to_time.to_i
 
@@ -49,11 +48,6 @@ def start
         end
         $log.info("in_kubestate_deployments::start : DEPLOYMENTS_CHUNK_SIZE  @ #{@DEPLOYMENTS_CHUNK_SIZE}")
 
-        if !ENV["DEPLOYMENTS_EMIT_STREAM"].nil? && !ENV["DEPLOYMENTS_EMIT_STREAM"].empty?
-          @DEPLOYMENTS_EMIT_STREAM = ENV["DEPLOYMENTS_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kubestate_deployments::start : DEPLOYMENTS_EMIT_STREAM  @ #{@DEPLOYMENTS_EMIT_STREAM}")
-
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -196,10 +190,9 @@ def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
           insightsMetricsEventStream.add(time, wrapper) if wrapper
         end
 
-        if @DEPLOYMENTS_EMIT_STREAM
-          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-          $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
-        end
+        router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+        $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
+
         @deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length()
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
           $log.info("kubestatedeploymentsInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb
index ac7d1e853..736f17250 100644
--- a/source/plugins/ruby/in_kubestate_hpa.rb
+++ b/source/plugins/ruby/in_kubestate_hpa.rb
@@ -22,7 +22,6 @@ def initialize
       # 2000 HPAs account to approximately 6-7MB
       @HPA_CHUNK_SIZE = 2000
       @HPA_API_GROUP = "autoscaling"
-      @HPA_EMIT_STREAM = true
 
       # telemetry
       @hpaCount = 0
@@ -46,10 +45,6 @@ def start
         end
         $log.info("in_kubestate_hpa::start : HPA_CHUNK_SIZE  @ #{@HPA_CHUNK_SIZE}")
 
-        if !ENV["HPA_EMIT_STREAM"].nil? && !ENV["HPA_EMIT_STREAM"].empty?
-          @HPA_EMIT_STREAM = ENV["HPA_EMIT_STREAM"].to_s.downcase == "true" ? true : false
-        end
-        $log.info("in_kubestate_hpa::start : HPA_EMIT_STREAM  @ #{@HPA_EMIT_STREAM}")
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -191,10 +186,8 @@ def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
           insightsMetricsEventStream.add(time, wrapper) if wrapper
         end
 
-        if @HPA_EMIT_STREAM
-          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-          $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
-        end
+        router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+        $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
           $log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end

From 43975d9689b959af758a14381de3e895641351d2 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 20:26:40 -0800
Subject: [PATCH 16/45] add ruby gc env

---
 kubernetes/linux/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index d04e86128..34ab133da 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -15,6 +15,7 @@ ENV HOST_VAR /hostfs/var
 ENV AZMON_COLLECT_ENV False
 ENV KUBE_CLIENT_BACKOFF_BASE 1
 ENV KUBE_CLIENT_BACKOFF_DURATION 0
+ENV RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR 0.9
 RUN /usr/bin/apt-get update && /usr/bin/apt-get install -y libc-bin wget openssl curl sudo python-ctypes init-system-helpers  net-tools rsyslog cron vim dmidecode apt-transport-https gnupg && rm -rf /var/lib/apt/lists/*
 COPY setup.sh main.sh defaultpromenvvariables defaultpromenvvariables-rs mdsd.xml envmdsd $tmpdir/
 WORKDIR ${tmpdir}

From 2b8660ba2552d2968e1aba2b86dafb5b9555d817 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 30 Nov 2020 22:17:37 -0800
Subject: [PATCH 17/45] yaml changes for cioomtest11282020-3

---
 kubernetes/omsagent.yaml | 62 ++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index e03455fdb..21df718bb 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-2"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-3"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-2"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-3"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -515,50 +515,50 @@ spec:
               value: "500"
             - name: PODS_EMIT_STREAM_BATCH_SIZE
               value: "200"
-            - name: PODS_EMIT_STREAM
-              value: "true"
-            - name: MDM_PODS_INVENTORY_EMIT_STREAM
-              value: "true"
-            - name: CONTAINER_PERF_EMIT_STREAM
-              value: "true"
-            - name: SERVICES_EMIT_STREAM
-              value: "true"
-            - name: GPU_PERF_EMIT_STREAM
-              value: "true"
+            # - name: PODS_EMIT_STREAM
+            #   value: "true"
+            # - name: MDM_PODS_INVENTORY_EMIT_STREAM
+            #   value: "true"
+            # - name: CONTAINER_PERF_EMIT_STREAM
+            #   value: "true"
+            # - name: SERVICES_EMIT_STREAM
+            #   value: "true"
+            # - name: GPU_PERF_EMIT_STREAM
+            #   value: "true"
 
             # node inventory plugin settings
             - name: NODES_CHUNK_SIZE
-              value: "200"
+              value: "250"
             - name: NODES_EMIT_STREAM_BATCH_SIZE
               value: "100"
-            - name: NODES_EMIT_STREAM
-              value: "true"
-            - name: NODES_PERF_EMIT_STREAM
-              value: "true"
-            - name: GPU_NODES_PERF_EMIT_STREAM
-              value: "true"
-            - name: CONTAINER_NODE_INVENTORY_EMIT_STREAM
-              value: "true"
-            - name: MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
-              value: "true"
+            # - name: NODES_EMIT_STREAM
+            #   value: "true"
+            # - name: NODES_PERF_EMIT_STREAM
+            #   value: "true"
+            # - name: GPU_NODES_PERF_EMIT_STREAM
+            #   value: "true"
+            # - name: CONTAINER_NODE_INVENTORY_EMIT_STREAM
+            #   value: "true"
+            # - name: MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
+            #   value: "true"
 
             # event inventory plugin settings
             - name: EVENTS_CHUNK_SIZE
-              value: "3000"
-            - name: EVENTS_EMIT_STREAM
-              value: "true"
+              value: "4000"
+            # - name: EVENTS_EMIT_STREAM
+            #   value: "true"
 
             # kube state deployments
             - name: DEPLOYMENTS_CHUNK_SIZE
-              value: "1000"
-            - name: DEPLOYMENTS_EMIT_STREAM
-              value: "true"
+              value: "500"
+            # - name: DEPLOYMENTS_EMIT_STREAM
+            #   value: "true"
 
             # kube hpa
             - name: HPA_CHUNK_SIZE
               value: "2000"
-            - name: HPA_EMIT_STREAM
-              value: "true"
+            # - name: HPA_EMIT_STREAM
+            #   value: "true"
 
             - name: AKS_RESOURCE_ID
               value: "VALUE_AKS_RESOURCE_ID_VALUE"

From 8e378faf319bee9fb89592e026a8c784133ec17e Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 12:52:23 -0800
Subject: [PATCH 18/45] telemetry to track pods latency

---
 source/plugins/ruby/in_kube_podinventory.rb | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 07655c22c..af3506f0d 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -35,6 +35,8 @@ def initialize
       @controllerData = {}
       # 0 indicates no batch enabled for stream emit
       @PODS_EMIT_STREAM_BATCH_SIZE = 0
+      @podInventoryE2EProcessingLatencyInMillis = 0
+      @podsAPIE2ELatencyInMillis = 0
     end
 
     config_param :run_interval, :time, :default => 60
@@ -86,6 +88,7 @@ def enumerate(podList = nil)
         currentTime = Time.now
         batchTime = currentTime.utc.iso8601
         serviceRecords = []
+        @podInventoryE2EProcessingLatencyInMillis = 0
 
         # Get services first so that we dont need to make a call for very chunk
         $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
@@ -105,11 +108,17 @@ def enumerate(podList = nil)
           serviceList = nil
         end
 
+        # to track e2e processing latency
+        @podsAPIE2ELatencyInMillis = 0
+        startTime = (Time.now.to_f * 1000).to_i
+        podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
         continuationToken = nil
         $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+        @podsAPIE2ELatencyInMillis = (podsAPIChunkEndTime - podsAPIStartTime)
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
           # debug logs to track the payload size
           podInventorySizeInKB = (podInventory.to_s.length) / 1024
@@ -121,7 +130,10 @@ def enumerate(podList = nil)
 
         #If we receive a continuation token, make calls, process and flush data until we have processed all data
         while (!continuationToken.nil? && !continuationToken.empty?)
+          podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
+          podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+          @podsAPIE2ELatencyInMillis = @podsAPIE2ELatencyInMillis + (podsAPIChunkEndTime - podsAPIChunkStartTime)
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
             # debug logs to track the payload size
             podInventorySizeInKB = (podInventory.to_s.length) / 1024
@@ -132,6 +144,8 @@ def enumerate(podList = nil)
           end
         end
 
+        endTime = (Time.now.to_f * 1000).to_i
+        @podInventoryE2EProcessingLatencyInMillis = endTime - startTime
         # Setting these to nil so that we dont hold memory until GC kicks in
         podInventory = nil
         serviceRecords = nil
@@ -147,6 +161,8 @@ def enumerate(podList = nil)
         if telemetryFlush == true
           telemetryProperties = {}
           telemetryProperties["Computer"] = @@hostName
+          telemetryProperties["PODS_CHUNK_SIZE"] = @PODS_CHUNK_SIZE
+          telemetryProperties["PODS_EMIT_STREAM_BATCH_SIZE"] = @PODS_EMIT_STREAM_BATCH_SIZE
           ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
           ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
           telemetryProperties["ControllerData"] = @controllerData.to_json
@@ -155,6 +171,8 @@ def enumerate(podList = nil)
             telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
             ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
           end
+          ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyInMillis", @podInventoryE2EProcessingLatencyInMillis, telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyInMillis", @podsAPIE2ELatencyInMillis, telemetryProperties)
           @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       rescue => errorStr

From fb56ab069d2d7285bc23e1c7f523434981d9ea17 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 13:02:57 -0800
Subject: [PATCH 19/45] service count telemetry

---
 source/plugins/ruby/in_kube_podinventory.rb | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index af3506f0d..765238c16 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -30,6 +30,7 @@ def initialize
       # 500 pod (10KB per pod) account to approximately 5MB
       @PODS_CHUNK_SIZE = "500"
       @podCount = 0
+      @serviceCount = 0
       @controllerSet = Set.new []
       @winContainerCount = 0
       @controllerData = {}
@@ -82,6 +83,7 @@ def enumerate(podList = nil)
         podInventory = podList
         telemetryFlush = false
         @podCount = 0
+        @serviceCount = 0
         @controllerSet = Set.new []
         @winContainerCount = 0
         @controllerData = {}
@@ -105,6 +107,8 @@ def enumerate(podList = nil)
           serviceInfo = nil
           # service inventory records much smaller and fixed size compared to serviceList
           serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime)
+          # updating for telemetry
+          @serviceCount += serviceRecords.length
           serviceList = nil
         end
 
@@ -165,6 +169,7 @@ def enumerate(podList = nil)
           telemetryProperties["PODS_EMIT_STREAM_BATCH_SIZE"] = @PODS_EMIT_STREAM_BATCH_SIZE
           ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
           ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
+          ApplicationInsightsUtility.sendMetricTelemetry("ServiceCount", @serviceCount, {})
           telemetryProperties["ControllerData"] = @controllerData.to_json
           ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties)
           if @winContainerCount > 0

From e9541eafb88de8023afccba30548733abbc2de4c Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 13:17:23 -0800
Subject: [PATCH 20/45] rename variables

---
 source/plugins/ruby/in_kube_podinventory.rb | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 765238c16..d94501bba 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -36,8 +36,8 @@ def initialize
       @controllerData = {}
       # 0 indicates no batch enabled for stream emit
       @PODS_EMIT_STREAM_BATCH_SIZE = 0
-      @podInventoryE2EProcessingLatencyInMillis = 0
-      @podsAPIE2ELatencyInMillis = 0
+      @podInventoryE2EProcessingLatencyMs = 0
+      @podsAPIE2ELatencyInMs = 0
     end
 
     config_param :run_interval, :time, :default => 60
@@ -90,7 +90,7 @@ def enumerate(podList = nil)
         currentTime = Time.now
         batchTime = currentTime.utc.iso8601
         serviceRecords = []
-        @podInventoryE2EProcessingLatencyInMillis = 0
+        @podInventoryE2EProcessingLatencyMs = 0
 
         # Get services first so that we dont need to make a call for very chunk
         $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
@@ -113,8 +113,8 @@ def enumerate(podList = nil)
         end
 
         # to track e2e processing latency
-        @podsAPIE2ELatencyInMillis = 0
-        startTime = (Time.now.to_f * 1000).to_i
+        @podsAPIE2ELatencyInMs = 0
+        podInventoryStartTime = (Time.now.to_f * 1000).to_i
         podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
         continuationToken = nil
@@ -122,7 +122,7 @@ def enumerate(podList = nil)
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
         podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
-        @podsAPIE2ELatencyInMillis = (podsAPIChunkEndTime - podsAPIStartTime)
+        @podsAPIE2ELatencyInMs = (podsAPIChunkEndTime - podsAPIStartTime)
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
           # debug logs to track the payload size
           podInventorySizeInKB = (podInventory.to_s.length) / 1024
@@ -137,7 +137,7 @@ def enumerate(podList = nil)
           podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
           podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
-          @podsAPIE2ELatencyInMillis = @podsAPIE2ELatencyInMillis + (podsAPIChunkEndTime - podsAPIChunkStartTime)
+          @podsAPIE2ELatencyInMs = @podsAPIE2ELatencyInMs + (podsAPIChunkEndTime - podsAPIChunkStartTime)
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
             # debug logs to track the payload size
             podInventorySizeInKB = (podInventory.to_s.length) / 1024
@@ -148,8 +148,7 @@ def enumerate(podList = nil)
           end
         end
 
-        endTime = (Time.now.to_f * 1000).to_i
-        @podInventoryE2EProcessingLatencyInMillis = endTime - startTime
+        @podInventoryE2EProcessingLatencyMs = ((Time.now.to_f * 1000).to_i - podInventoryStartTime)
         # Setting these to nil so that we dont hold memory until GC kicks in
         podInventory = nil
         serviceRecords = nil
@@ -176,8 +175,8 @@ def enumerate(podList = nil)
             telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
             ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
           end
-          ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyInMillis", @podInventoryE2EProcessingLatencyInMillis, telemetryProperties)
-          ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyInMillis", @podsAPIE2ELatencyInMillis, telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyMs", @podInventoryE2EProcessingLatencyMs, telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyInMs", @podsAPIE2ELatencyInMs, telemetryProperties)
           @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       rescue => errorStr

From 023a7cbb7c29f729f8add2c172223690b0cac4c7 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 13:30:31 -0800
Subject: [PATCH 21/45] wip

---
 source/plugins/ruby/in_kube_podinventory.rb | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index d94501bba..2e920e7c4 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -37,7 +37,7 @@ def initialize
       # 0 indicates no batch enabled for stream emit
       @PODS_EMIT_STREAM_BATCH_SIZE = 0
       @podInventoryE2EProcessingLatencyMs = 0
-      @podsAPIE2ELatencyInMs = 0
+      @podsAPIE2ELatencyMs = 0
     end
 
     config_param :run_interval, :time, :default => 60
@@ -113,7 +113,7 @@ def enumerate(podList = nil)
         end
 
         # to track e2e processing latency
-        @podsAPIE2ELatencyInMs = 0
+        @podsAPIE2ELatencyMs = 0
         podInventoryStartTime = (Time.now.to_f * 1000).to_i
         podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
@@ -122,7 +122,7 @@ def enumerate(podList = nil)
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
         podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
-        @podsAPIE2ELatencyInMs = (podsAPIChunkEndTime - podsAPIStartTime)
+        @podsAPIE2ELatencyMs = (podsAPIChunkEndTime - podsAPIChunkStartTime)
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
           # debug logs to track the payload size
           podInventorySizeInKB = (podInventory.to_s.length) / 1024
@@ -137,7 +137,7 @@ def enumerate(podList = nil)
           podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
           podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
-          @podsAPIE2ELatencyInMs = @podsAPIE2ELatencyInMs + (podsAPIChunkEndTime - podsAPIChunkStartTime)
+          @podsAPIE2ELatencyMs = @podsAPIE2ELatencyMs + (podsAPIChunkEndTime - podsAPIChunkStartTime)
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
             # debug logs to track the payload size
             podInventorySizeInKB = (podInventory.to_s.length) / 1024
@@ -176,7 +176,7 @@ def enumerate(podList = nil)
             ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
           end
           ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyMs", @podInventoryE2EProcessingLatencyMs, telemetryProperties)
-          ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyInMs", @podsAPIE2ELatencyInMs, telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyMs", @podsAPIE2ELatencyMs, telemetryProperties)
           @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       rescue => errorStr

From 26f07723843937f6b47a2a7e3ebae54158726b9a Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 14:50:08 -0800
Subject: [PATCH 22/45] nodes inventory telemetry

---
 source/plugins/ruby/in_kube_nodes.rb | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index c77db2791..5caefb23e 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -36,6 +36,8 @@ def initialize
       @NODES_CHUNK_SIZE = "250"
       # 0 indicates no batch enabled for stream emit
       @NODES_EMIT_STREAM_BATCH_SIZE = 0
+      @nodeInventoryE2EProcessingLatencyMs = 0
+      @nodesAPIE2ELatencyMs = 0
       require_relative "constants"
     end
 
@@ -82,13 +84,18 @@ def enumerate
         currentTime = Time.now
         batchTime = currentTime.utc.iso8601
 
+        @nodesAPIE2ELatencyMs = 0
+        @nodeInventoryE2EProcessingLatencyMs = 0
+        nodeInventoryStartTime = (Time.now.to_f * 1000).to_i
+        nodesAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
         continuationToken = nil
         $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
         resourceUri = KubernetesApiClient.getNodesResourceUri("nodes?limit=#{@NODES_CHUNK_SIZE}")
         continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri)
-
         $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        nodesAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+        @nodesAPIE2ELatencyMs = (nodesAPIChunkEndTime - nodesAPIChunkStartTime)
         if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
           # debug logs to track the payload size
           nodeInventorySizeInKB = (nodeInventory.to_s.length) / 1024
@@ -100,7 +107,10 @@ def enumerate
 
         #If we receive a continuation token, make calls, process and flush data until we have processed all data
         while (!continuationToken.nil? && !continuationToken.empty?)
+          nodesAPIChunkStartTime = (Time.now.to_f * 1000).to_i
           continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri + "&continue=#{continuationToken}")
+          nodesAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+          @nodesAPIE2ELatencyMs = @nodesAPIE2ELatencyMs + (nodesAPIChunkEndTime - nodesAPIChunkStartTime)
           if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
             # debug logs to track the payload size
             nodeInventorySizeInKB = (nodeInventory.to_s.length) / 1024
@@ -111,6 +121,7 @@ def enumerate
           end
         end
 
+        @nodeInventoryE2EProcessingLatencyMs = ((Time.now.to_f * 1000).to_i - nodeInventoryStartTime)
         # Setting this to nil so that we dont hold memory until GC kicks in
         nodeInventory = nil
       rescue => errorStr
@@ -234,7 +245,10 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
             properties = getNodeTelemetryProps(item)
             properties["KubernetesProviderID"] = nodeInventoryRecord["KubernetesProviderID"]
             capacityInfo = item["status"]["capacity"]
+
             ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeInventoryE2EProcessingLatencyMs", @nodeInventoryE2EProcessingLatencyMs, properties)
+            ApplicationInsightsUtility.sendMetricTelemetry("NodesAPIE2ELatencyMs", @nodesAPIE2ELatencyMs, properties)
             begin
               if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
                 properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
@@ -428,6 +442,8 @@ def getNodeTelemetryProps(item)
           # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
           properties["DockerVersion"] = containerRuntimeVersion
         end
+        telemetryProperties["NODES_CHUNK_SIZE"] = @NODES_CHUNK_SIZE
+        telemetryProperties["NODES_EMIT_STREAM_BATCH_SIZE"] = @NODES_EMIT_STREAM_BATCH_SIZE
       rescue => errorStr
         $log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}"
       end

From 79f40f1bea726de0f5e337d98626da817c2e1d1f Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 17:06:29 -0800
Subject: [PATCH 23/45] configmap changes

---
 .../scripts/tomlparser-health-config.rb       | 64 +++++++++++++++++--
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/build/linux/installer/scripts/tomlparser-health-config.rb b/build/linux/installer/scripts/tomlparser-health-config.rb
index 14c8bdb44..6dab36b7a 100644
--- a/build/linux/installer/scripts/tomlparser-health-config.rb
+++ b/build/linux/installer/scripts/tomlparser-health-config.rb
@@ -13,22 +13,31 @@
 @configMapMountPath = "/etc/config/settings/agent-settings"
 @configSchemaVersion = ""
 @enable_health_model = false
+@nodesChunkSize = 0
+@podsChunkSize = 0
+@eventsChunkSize = 0
+@deploymentsChunkSize = 0
+@hpaChunkSize = 0
+
+def is_number?(value)
+  true if Integer(value) rescue false
+end
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
   begin
     # Check to see if config map is created
     if (File.file?(@configMapMountPath))
-      puts "config::configmap container-azm-ms-agentconfig for agent health settings mounted, parsing values"
+      puts "config::configmap container-azm-ms-agentconfig for agent settings mounted, parsing values"
       parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
       puts "config::Successfully parsed mounted config map"
       return parsedConfig
     else
-      puts "config::configmap container-azm-ms-agentconfig for agent health settings not mounted, using defaults"
+      puts "config::configmap container-azm-ms-agentconfig for agent settings not mounted, using defaults"
       return nil
     end
   rescue => errorStr
-    ConfigParseErrorLogger.logError("Exception while parsing config map for enabling health: #{errorStr}, using defaults, please check config map for errors")
+    ConfigParseErrorLogger.logError("Exception while parsing config map for agent settings : #{errorStr}, using defaults, please check config map for errors")
     return nil
   end
 end
@@ -36,9 +45,39 @@ def parseConfigMap
 # Use the ruby structure created after config parsing to set the right values to be used as environment variables
 def populateSettingValuesFromConfigMap(parsedConfig)
   begin
-    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
+    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil?
+      if !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
         @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
         puts "enable_health_model = #{@enable_health_model}"
+      end
+      chunk_config = parsedConfig[:agent_settings][:chunk_config]
+      if !chunk_config.nil?
+        nodesChunkSize = chunk_config[:NODES_CHUNK_SIZE]
+        if !nodesChunkSize.nil? && is_number?(nodesChunkSize)
+          @nodesChunkSize = nodesChunkSize.to_i
+          puts "NODES_CHUNK_SIZE = #{@nodesChunkSize}"
+        end
+        podsChunkSize = chunk_config[:PODS_CHUNK_SIZE]
+        if !podsChunkSize.nil? && is_number?(podsChunkSize)
+          @podsChunkSize = podsChunkSize.to_i
+          puts "PODS_CHUNK_SIZE = #{@podsChunkSize}"
+        end
+        eventsChunkSize = chunk_config[:EVENTS_CHUNK_SIZE]
+        if !eventsChunkSize.nil? && is_number?(eventsChunkSize)
+          @eventsChunkSize = eventsChunkSize.to_i
+          puts "EVENTS_CHUNK_SIZE = #{@eventsChunkSize}"
+        end
+        deploymentsChunkSize = chunk_config[:DEPLOYMENTS_CHUNK_SIZE]
+        if !deploymentsChunkSize.nil? && is_number?(deploymentsChunkSize)
+          @deploymentsChunkSize = deploymentsChunkSize.to_i
+          puts "DEPLOYMENTS_CHUNK_SIZE = #{@deploymentsChunkSize}"
+        end
+        hpaChunkSize = chunk_config[:HPA_CHUNK_SIZE]
+        if !hpaChunkSize.nil? && is_number?(hpaChunkSize)
+          @hpaChunkSize = hpaChunkSize.to_i
+          puts "HPA_CHUNK_SIZE = #{@hpaChunkSize}"
+        end
+      end
     end
   rescue => errorStr
     puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
@@ -65,9 +104,24 @@ def populateSettingValuesFromConfigMap(parsedConfig)
 
 if !file.nil?
   file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
+  if @nodesChunkSize > 0
+    file.write("export NODES_CHUNK_SIZE=#{@nodesChunkSize}\n")
+  end
+  if @podsChunkSize > 0
+    file.write("export PODS_CHUNK_SIZE=#{@podsChunkSize}\n")
+  end
+  if @eventsChunkSize > 0
+    file.write("export EVENTS_CHUNK_SIZE=#{@eventsChunkSize}\n")
+  end
+  if @deploymentsChunkSize > 0
+    file.write("export DEPLOYMENTS_CHUNK_SIZE=#{@deploymentsChunkSize}\n")
+  end
+  if @hpaChunkSize > 0
+    file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
+  end
   # Close file after writing all environment variables
   file.close
 else
   puts "Exception while opening file for writing config environment variables"
   puts "****************End Config Processing********************"
-end
\ No newline at end of file
+end

From 3545773035752e6b39c04a2095f56f3b833c2248 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 19:12:36 -0800
Subject: [PATCH 24/45] add emit streams in configmap

---
 .../scripts/tomlparser-health-config.rb        | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/build/linux/installer/scripts/tomlparser-health-config.rb b/build/linux/installer/scripts/tomlparser-health-config.rb
index 6dab36b7a..68496d718 100644
--- a/build/linux/installer/scripts/tomlparser-health-config.rb
+++ b/build/linux/installer/scripts/tomlparser-health-config.rb
@@ -18,6 +18,8 @@
 @eventsChunkSize = 0
 @deploymentsChunkSize = 0
 @hpaChunkSize = 0
+@podsEmitStreamBatchSize = 0
+@nodesEmitStreamBatchSize = 0
 
 def is_number?(value)
   true if Integer(value) rescue false
@@ -77,6 +79,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
           @hpaChunkSize = hpaChunkSize.to_i
           puts "HPA_CHUNK_SIZE = #{@hpaChunkSize}"
         end
+        podsEmitStreamBatchSize = chunk_config[:PODS_EMIT_STREAM_BATCH_SIZE]
+        if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize)
+          @podsEmitStreamBatchSize = podsEmitStreamBatchSize.to_i
+          puts "PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
+        end
+        nodesEmitStreamBatchSize = chunk_config[:NODES_EMIT_STREAM_BATCH_SIZE]
+        if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize)
+          @nodesEmitStreamBatchSize = nodesEmitStreamBatchSize.to_i
+          puts "NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
+        end
       end
     end
   rescue => errorStr
@@ -119,6 +131,12 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   if @hpaChunkSize > 0
     file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
   end
+  if @podsEmitStreamBatchSize > 0
+    file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
+  end
+  if @nodesEmitStreamBatchSize > 0
+    file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
+  end
   # Close file after writing all environment variables
   file.close
 else

From 9b7587dbcb6d6b04494638a0eca8e032979c6c37 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 1 Dec 2020 21:40:04 -0800
Subject: [PATCH 25/45] yaml updates

---
 kubernetes/omsagent.yaml                    | 32 ++++++++++-----------
 source/plugins/ruby/in_kube_nodes.rb        |  2 +-
 source/plugins/ruby/in_kube_podinventory.rb |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 21df718bb..6dc8c2f5c 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-3"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-4"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-3"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-4"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -511,10 +511,10 @@ spec:
           env:
             # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these
              # pod inventory plugin settings
-            - name: PODS_CHUNK_SIZE
-              value: "500"
-            - name: PODS_EMIT_STREAM_BATCH_SIZE
-              value: "200"
+            # - name: PODS_CHUNK_SIZE
+            #   value: "500"
+            # - name: PODS_EMIT_STREAM_BATCH_SIZE
+            #   value: "200"
             # - name: PODS_EMIT_STREAM
             #   value: "true"
             # - name: MDM_PODS_INVENTORY_EMIT_STREAM
@@ -527,10 +527,10 @@ spec:
             #   value: "true"
 
             # node inventory plugin settings
-            - name: NODES_CHUNK_SIZE
-              value: "250"
-            - name: NODES_EMIT_STREAM_BATCH_SIZE
-              value: "100"
+            # - name: NODES_CHUNK_SIZE
+            #   value: "250"
+            # - name: NODES_EMIT_STREAM_BATCH_SIZE
+            #   value: "100"
             # - name: NODES_EMIT_STREAM
             #   value: "true"
             # - name: NODES_PERF_EMIT_STREAM
@@ -543,20 +543,20 @@ spec:
             #   value: "true"
 
             # event inventory plugin settings
-            - name: EVENTS_CHUNK_SIZE
-              value: "4000"
+            # - name: EVENTS_CHUNK_SIZE
+            #   value: "4000"
             # - name: EVENTS_EMIT_STREAM
             #   value: "true"
 
             # kube state deployments
-            - name: DEPLOYMENTS_CHUNK_SIZE
-              value: "500"
+            # - name: DEPLOYMENTS_CHUNK_SIZE
+            #   value: "500"
             # - name: DEPLOYMENTS_EMIT_STREAM
             #   value: "true"
 
             # kube hpa
-            - name: HPA_CHUNK_SIZE
-              value: "2000"
+            # - name: HPA_CHUNK_SIZE
+            #   value: "2000"
             # - name: HPA_EMIT_STREAM
             #   value: "true"
 
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 5caefb23e..d1fe43db7 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -35,7 +35,7 @@ def initialize
       # 250 Node items (15KB per node) account to approximately 4MB
       @NODES_CHUNK_SIZE = "250"
       # 0 indicates no batch enabled for stream emit
-      @NODES_EMIT_STREAM_BATCH_SIZE = 0
+      @NODES_EMIT_STREAM_BATCH_SIZE = 100
       @nodeInventoryE2EProcessingLatencyMs = 0
       @nodesAPIE2ELatencyMs = 0
       require_relative "constants"
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 2e920e7c4..fa39d0d9c 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -35,7 +35,7 @@ def initialize
       @winContainerCount = 0
       @controllerData = {}
       # 0 indicates no batch enabled for stream emit
-      @PODS_EMIT_STREAM_BATCH_SIZE = 0
+      @PODS_EMIT_STREAM_BATCH_SIZE = 200
       @podInventoryE2EProcessingLatencyMs = 0
       @podsAPIE2ELatencyMs = 0
     end

From 9b857b4253027b91ab239fee7d9a8f2be0f6285b Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 2 Dec 2020 07:30:20 -0800
Subject: [PATCH 26/45] fix copy and paste bug

---
 source/plugins/ruby/in_kube_nodes.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index d1fe43db7..d9eb6b09a 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -442,8 +442,8 @@ def getNodeTelemetryProps(item)
           # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
           properties["DockerVersion"] = containerRuntimeVersion
         end
-        telemetryProperties["NODES_CHUNK_SIZE"] = @NODES_CHUNK_SIZE
-        telemetryProperties["NODES_EMIT_STREAM_BATCH_SIZE"] = @NODES_EMIT_STREAM_BATCH_SIZE
+        properties["NODES_CHUNK_SIZE"] = @NODES_CHUNK_SIZE
+        properties["NODES_EMIT_STREAM_BATCH_SIZE"] = @NODES_EMIT_STREAM_BATCH_SIZE
       rescue => errorStr
         $log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}"
       end

From 5597360a0c78412a95fcf311f96262cfdff7c7b4 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 2 Dec 2020 13:39:40 -0800
Subject: [PATCH 27/45] add todo comments

---
 kubernetes/omsagent.yaml                    | 51 ---------------------
 source/plugins/ruby/in_kube_nodes.rb        |  3 ++
 source/plugins/ruby/in_kube_podinventory.rb |  2 +
 3 files changed, 5 insertions(+), 51 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 6dc8c2f5c..bc001d9f4 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -509,57 +509,6 @@ spec:
               cpu: 150m
               memory: 250Mi
           env:
-            # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these
-             # pod inventory plugin settings
-            # - name: PODS_CHUNK_SIZE
-            #   value: "500"
-            # - name: PODS_EMIT_STREAM_BATCH_SIZE
-            #   value: "200"
-            # - name: PODS_EMIT_STREAM
-            #   value: "true"
-            # - name: MDM_PODS_INVENTORY_EMIT_STREAM
-            #   value: "true"
-            # - name: CONTAINER_PERF_EMIT_STREAM
-            #   value: "true"
-            # - name: SERVICES_EMIT_STREAM
-            #   value: "true"
-            # - name: GPU_PERF_EMIT_STREAM
-            #   value: "true"
-
-            # node inventory plugin settings
-            # - name: NODES_CHUNK_SIZE
-            #   value: "250"
-            # - name: NODES_EMIT_STREAM_BATCH_SIZE
-            #   value: "100"
-            # - name: NODES_EMIT_STREAM
-            #   value: "true"
-            # - name: NODES_PERF_EMIT_STREAM
-            #   value: "true"
-            # - name: GPU_NODES_PERF_EMIT_STREAM
-            #   value: "true"
-            # - name: CONTAINER_NODE_INVENTORY_EMIT_STREAM
-            #   value: "true"
-            # - name: MDM_KUBE_NODE_INVENTORY_EMIT_STREAM
-            #   value: "true"
-
-            # event inventory plugin settings
-            # - name: EVENTS_CHUNK_SIZE
-            #   value: "4000"
-            # - name: EVENTS_EMIT_STREAM
-            #   value: "true"
-
-            # kube state deployments
-            # - name: DEPLOYMENTS_CHUNK_SIZE
-            #   value: "500"
-            # - name: DEPLOYMENTS_EMIT_STREAM
-            #   value: "true"
-
-            # kube hpa
-            # - name: HPA_CHUNK_SIZE
-            #   value: "2000"
-            # - name: HPA_EMIT_STREAM
-            #   value: "true"
-
             - name: AKS_RESOURCE_ID
               value: "VALUE_AKS_RESOURCE_ID_VALUE"
             - name: AKS_REGION
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index d9eb6b09a..5886b523c 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -346,6 +346,7 @@ def run_periodic
       @mutex.unlock
     end
 
+    # TODO - move this method to KubernetesClient or helper class
     def getNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
       record = {}
       begin
@@ -406,6 +407,7 @@ def getNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
       return record
     end
 
+    # TODO - move this method to KubernetesClient or helper class
     def getContainerNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
       containerNodeInventoryRecord = {}
       begin
@@ -426,6 +428,7 @@ def getContainerNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
       return containerNodeInventoryRecord
     end
 
+    # TODO - move this method to KubernetesClient or helper class
     def getNodeTelemetryProps(item)
       properties = {}
       begin
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index fa39d0d9c..215811353 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -395,6 +395,7 @@ def run_periodic
       @mutex.unlock
     end
 
+    # TODO - move this method to KubernetesClient or helper class
     def getPodInventoryRecords(item, serviceRecords, batchTime = Time.utc.iso8601)
       records = []
       record = {}
@@ -603,6 +604,7 @@ def getPodInventoryRecords(item, serviceRecords, batchTime = Time.utc.iso8601)
       return records
     end
 
+    # TODO - move this method to KubernetesClient or helper class
     def getServiceNameFromLabels(namespace, labels, serviceRecords)
       serviceName = ""
       begin

From 8880e91fcd85df14850e6e41d901a909dac80277 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Thu, 3 Dec 2020 22:02:52 -0800
Subject: [PATCH 28/45] fix node latency telemetry bug

---
 source/plugins/ruby/in_kube_nodes.rb        | 12 +++++++++---
 source/plugins/ruby/in_kube_podinventory.rb |  3 +--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 5886b523c..603c71e97 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -64,7 +64,8 @@ def start
         @condition = ConditionVariable.new
         @mutex = Mutex.new
         @thread = Thread.new(&method(:run_periodic))
-        @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+        @@nodeTelemetryTimeTracker = DateTime.now.to_time.
+        @@nodeInventoryLatencyTelemetryTimeTracker = DateTime.now.to_time.to_i
       end
     end
 
@@ -122,6 +123,13 @@ def enumerate
         end
 
         @nodeInventoryE2EProcessingLatencyMs = ((Time.now.to_f * 1000).to_i - nodeInventoryStartTime)
+        timeDifference = (DateTime.now.to_time.to_i - @@nodeInventoryLatencyTelemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+          ApplicationInsightsUtility.sendMetricTelemetry("NodeInventoryE2EProcessingLatencyMs", @nodeInventoryE2EProcessingLatencyMs, {})
+          ApplicationInsightsUtility.sendMetricTelemetry("NodesAPIE2ELatencyMs", @nodesAPIE2ELatencyMs, {})
+          @@nodeInventoryLatencyTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
         # Setting this to nil so that we dont hold memory until GC kicks in
         nodeInventory = nil
       rescue => errorStr
@@ -247,8 +255,6 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
             capacityInfo = item["status"]["capacity"]
 
             ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
-            ApplicationInsightsUtility.sendMetricTelemetry("NodeInventoryE2EProcessingLatencyMs", @nodeInventoryE2EProcessingLatencyMs, properties)
-            ApplicationInsightsUtility.sendMetricTelemetry("NodesAPIE2ELatencyMs", @nodesAPIE2ELatencyMs, properties)
             begin
               if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
                 properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 215811353..7d935b04e 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -91,7 +91,7 @@ def enumerate(podList = nil)
         batchTime = currentTime.utc.iso8601
         serviceRecords = []
         @podInventoryE2EProcessingLatencyMs = 0
-
+        podInventoryStartTime = (Time.now.to_f * 1000).to_i
         # Get services first so that we dont need to make a call for very chunk
         $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
         serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
@@ -114,7 +114,6 @@ def enumerate(podList = nil)
 
         # to track e2e processing latency
         @podsAPIE2ELatencyMs = 0
-        podInventoryStartTime = (Time.now.to_f * 1000).to_i
         podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
         continuationToken = nil

From 87f52d6839f09e7a280c80d47495c27fb06a317e Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 4 Dec 2020 05:59:13 -0800
Subject: [PATCH 29/45] update yaml with latest test image

---
 kubernetes/omsagent.yaml             | 4 ++--
 source/plugins/ruby/in_kube_nodes.rb | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index bc001d9f4..290490b19 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-4"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-7"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-4"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-7"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 603c71e97..92ffcacbe 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -64,7 +64,7 @@ def start
         @condition = ConditionVariable.new
         @mutex = Mutex.new
         @thread = Thread.new(&method(:run_periodic))
-        @@nodeTelemetryTimeTracker = DateTime.now.to_time.
+        @@nodeTelemetryTimeTracker = DateTime.now.to_time
         @@nodeInventoryLatencyTelemetryTimeTracker = DateTime.now.to_time.to_i
       end
     end

From c4651c94fa590997206827a2751d83d12fdaf99f Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 4 Dec 2020 06:40:32 -0800
Subject: [PATCH 30/45] fix bug

---
 kubernetes/omsagent.yaml             | 4 ++--
 source/plugins/ruby/in_kube_nodes.rb | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 290490b19..e56efdabb 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-7"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-8"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-7"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-8"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 92ffcacbe..9050fa67c 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -64,7 +64,7 @@ def start
         @condition = ConditionVariable.new
         @mutex = Mutex.new
         @thread = Thread.new(&method(:run_periodic))
-        @@nodeTelemetryTimeTracker = DateTime.now.to_time
+        @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
         @@nodeInventoryLatencyTelemetryTimeTracker = DateTime.now.to_time.to_i
       end
     end

From 95144a638d017a3c79efe4cbb161764f29e1357a Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 4 Dec 2020 07:40:56 -0800
Subject: [PATCH 31/45] upping rs memory change

---
 kubernetes/omsagent.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index e56efdabb..cede6ca87 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -504,7 +504,7 @@ spec:
           resources:
             limits:
               cpu: 1
-              memory: 750Mi
+              memory: 1Gi
             requests:
               cpu: 150m
               memory: 250Mi

From ae2cf42140ac94c0860dedc70f7dfdcc18202ad7 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 8 Dec 2020 23:37:08 -0800
Subject: [PATCH 32/45] fix mdm bug with final emit stream

---
 source/plugins/ruby/in_kube_nodes.rb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 9050fa67c..f88d80603 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -294,6 +294,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
         if eventStream.count > 0
           $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
           router.emit_stream(@tag, eventStream) if eventStream
+          $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
           eventStream = nil
         end
         if containerNodeInventoryEventStream.count > 0

From cf8da5c665a6024f7c31e52a590a7428c52c641b Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 9 Dec 2020 10:17:29 -0800
Subject: [PATCH 33/45] update to latest image

---
 kubernetes/omsagent.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index cede6ca87..b827a72d5 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -340,7 +340,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-8"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-9"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -499,7 +499,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-8"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-9"
           imagePullPolicy: IfNotPresent
           resources:
             limits:

From 11eda7c0e5ed0dc426d39c9ca615667a69e11f4c Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 9 Dec 2020 12:56:52 -0800
Subject: [PATCH 34/45] fix pr feedback

---
 .../installer/datafiles/base_container.data   |   2 +-
 .../scripts/tomlparser-agent-config.rb        | 145 ++++++++++++++++++
 kubernetes/linux/main.sh                      |  12 +-
 3 files changed, 152 insertions(+), 7 deletions(-)
 create mode 100644 build/linux/installer/scripts/tomlparser-agent-config.rb

diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index ca2538b79..562a9d6f2 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -122,7 +122,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/tomlparser-mdm-metrics-config.rb;                          build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb;     755; root; root
 /opt/tomlparser-metric-collection-config.rb;                    build/linux/installer/scripts/tomlparser-metric-collection-config.rb;     755; root; root
 
-/opt/tomlparser-health-config.rb;                               build/linux/installer/scripts/tomlparser-health-config.rb;     755; root; root
+/opt/tomlparser-agent-config.rb;                                build/linux/installer/scripts/tomlparser-agent-config.rb;     755; root; root
 /opt/tomlparser.rb;                                             build/common/installer/scripts/tomlparser.rb;     755; root; root
 /opt/td-agent-bit-conf-customizer.rb;                           build/common/installer/scripts/td-agent-bit-conf-customizer.rb;     755; root; root
 /opt/ConfigParseErrorLogger.rb;                                 build/common/installer/scripts/ConfigParseErrorLogger.rb;           755; root; root
diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
new file mode 100644
index 000000000..68496d718
--- /dev/null
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -0,0 +1,145 @@
+#!/usr/local/bin/ruby
+
+#this should be require relative in Linux and require in windows, since it is a gem install on windows
+@os_type = ENV["OS_TYPE"]
+if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+  require "tomlrb"
+else
+  require_relative "tomlrb"
+end
+
+require_relative "ConfigParseErrorLogger"
+
+@configMapMountPath = "/etc/config/settings/agent-settings"
+@configSchemaVersion = ""
+@enable_health_model = false
+@nodesChunkSize = 0
+@podsChunkSize = 0
+@eventsChunkSize = 0
+@deploymentsChunkSize = 0
+@hpaChunkSize = 0
+@podsEmitStreamBatchSize = 0
+@nodesEmitStreamBatchSize = 0
+
+def is_number?(value)
+  true if Integer(value) rescue false
+end
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@configMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for agent settings mounted, parsing values"
+      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for agent settings not mounted, using defaults"
+      return nil
+    end
+  rescue => errorStr
+    ConfigParseErrorLogger.logError("Exception while parsing config map for agent settings : #{errorStr}, using defaults, please check config map for errors")
+    return nil
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used as environment variables
+def populateSettingValuesFromConfigMap(parsedConfig)
+  begin
+    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil?
+      if !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
+        @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
+        puts "enable_health_model = #{@enable_health_model}"
+      end
+      chunk_config = parsedConfig[:agent_settings][:chunk_config]
+      if !chunk_config.nil?
+        nodesChunkSize = chunk_config[:NODES_CHUNK_SIZE]
+        if !nodesChunkSize.nil? && is_number?(nodesChunkSize)
+          @nodesChunkSize = nodesChunkSize.to_i
+          puts "NODES_CHUNK_SIZE = #{@nodesChunkSize}"
+        end
+        podsChunkSize = chunk_config[:PODS_CHUNK_SIZE]
+        if !podsChunkSize.nil? && is_number?(podsChunkSize)
+          @podsChunkSize = podsChunkSize.to_i
+          puts "PODS_CHUNK_SIZE = #{@podsChunkSize}"
+        end
+        eventsChunkSize = chunk_config[:EVENTS_CHUNK_SIZE]
+        if !eventsChunkSize.nil? && is_number?(eventsChunkSize)
+          @eventsChunkSize = eventsChunkSize.to_i
+          puts "EVENTS_CHUNK_SIZE = #{@eventsChunkSize}"
+        end
+        deploymentsChunkSize = chunk_config[:DEPLOYMENTS_CHUNK_SIZE]
+        if !deploymentsChunkSize.nil? && is_number?(deploymentsChunkSize)
+          @deploymentsChunkSize = deploymentsChunkSize.to_i
+          puts "DEPLOYMENTS_CHUNK_SIZE = #{@deploymentsChunkSize}"
+        end
+        hpaChunkSize = chunk_config[:HPA_CHUNK_SIZE]
+        if !hpaChunkSize.nil? && is_number?(hpaChunkSize)
+          @hpaChunkSize = hpaChunkSize.to_i
+          puts "HPA_CHUNK_SIZE = #{@hpaChunkSize}"
+        end
+        podsEmitStreamBatchSize = chunk_config[:PODS_EMIT_STREAM_BATCH_SIZE]
+        if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize)
+          @podsEmitStreamBatchSize = podsEmitStreamBatchSize.to_i
+          puts "PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
+        end
+        nodesEmitStreamBatchSize = chunk_config[:NODES_EMIT_STREAM_BATCH_SIZE]
+        if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize)
+          @nodesEmitStreamBatchSize = nodesEmitStreamBatchSize.to_i
+          puts "NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
+        end
+      end
+    end
+  rescue => errorStr
+    puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
+    @enable_health_model = false
+  end
+end
+
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@configMapMountPath))
+    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+  end
+  @enable_health_model = false
+end
+
+# Write the settings to file, so that they can be set as environment variables
+file = File.open("health_config_env_var", "w")
+
+if !file.nil?
+  file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
+  if @nodesChunkSize > 0
+    file.write("export NODES_CHUNK_SIZE=#{@nodesChunkSize}\n")
+  end
+  if @podsChunkSize > 0
+    file.write("export PODS_CHUNK_SIZE=#{@podsChunkSize}\n")
+  end
+  if @eventsChunkSize > 0
+    file.write("export EVENTS_CHUNK_SIZE=#{@eventsChunkSize}\n")
+  end
+  if @deploymentsChunkSize > 0
+    file.write("export DEPLOYMENTS_CHUNK_SIZE=#{@deploymentsChunkSize}\n")
+  end
+  if @hpaChunkSize > 0
+    file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
+  end
+  if @podsEmitStreamBatchSize > 0
+    file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
+  end
+  if @nodesEmitStreamBatchSize > 0
+    file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
+  end
+  # Close file after writing all environment variables
+  file.close
+else
+  puts "Exception while opening file for writing config environment variables"
+  puts "****************End Config Processing********************"
+end
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index a2ba6a1d1..63c9a2ba9 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -171,8 +171,8 @@ done
 source config_env_var
 
 
-#Parse the configmap to set the right environment variables for health feature.
-/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-health-config.rb
+#Parse the configmap to set the right environment variables for agent config.
+/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-agent-config.rb
 
 cat health_config_env_var | while read line; do
     #echo $line
@@ -429,7 +429,7 @@ echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc
 
 #region check to auto-activate oneagent, to route container logs,
 #Intent is to activate one agent routing for all managed clusters with region in the regionllist, unless overridden by configmap
-# AZMON_CONTAINER_LOGS_ROUTE  will have route (if any) specified in the config map 
+# AZMON_CONTAINER_LOGS_ROUTE  will have route (if any) specified in the config map
 # AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE will have the final route that we compute & set, based on our region list logic
 echo "************start oneagent log routing checks************"
 # by default, use configmap route for safer side
@@ -462,9 +462,9 @@ else
   echo "current region is not in oneagent regions..."
 fi
 
-if [ "$isoneagentregion" = true ]; then 
+if [ "$isoneagentregion" = true ]; then
    #if configmap has a routing for logs, but current region is in the oneagent region list, take the configmap route
-   if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then   
+   if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then
       AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
       echo "oneagent region is true for current region:$currentregion and config map logs route is not empty. so using config map logs route as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
    else #there is no configmap route, so route thru oneagent
@@ -511,7 +511,7 @@ if [ ! -e "/etc/config/kube.conf" ]; then
 
             echo "starting mdsd ..."
             mdsd -l -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
-            
+
             touch /opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2
       fi
    fi

From 2f3574d87bf8df8ba55dd96bf426aaefbb37dd17 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 9 Dec 2020 18:03:50 -0800
Subject: [PATCH 35/45] fix pr feedback

---
 .../scripts/tomlparser-agent-config.rb        | 107 ++++++++-----
 .../scripts/tomlparser-health-config.rb       | 145 ------------------
 source/plugins/ruby/in_kube_nodes.rb          |   1 -
 source/plugins/ruby/in_kube_podinventory.rb   |   3 -
 4 files changed, 67 insertions(+), 189 deletions(-)
 delete mode 100644 build/linux/installer/scripts/tomlparser-health-config.rb

diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
index 68496d718..841f4b081 100644
--- a/build/linux/installer/scripts/tomlparser-agent-config.rb
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -13,13 +13,47 @@
 @configMapMountPath = "/etc/config/settings/agent-settings"
 @configSchemaVersion = ""
 @enable_health_model = false
-@nodesChunkSize = 0
-@podsChunkSize = 0
-@eventsChunkSize = 0
-@deploymentsChunkSize = 0
-@hpaChunkSize = 0
-@podsEmitStreamBatchSize = 0
-@nodesEmitStreamBatchSize = 0
+
+# 250 Node items (15KB per node) account to approximately 4MB
+@nodesChunkSize = 250
+# 500 pods (10KB per pod) account to approximately 5MB
+@podsChunkSize = 500
+# 4000 events (1KB per event) account to approximately 4MB
+@eventsChunkSize = 4000
+# roughly each deployment is 8k
+# 500 deployments account to approximately 4MB
+@deploymentsChunkSize = 500
+# roughly each HPA is 3k
+# 2000 HPAs account to approximately 6-7MB
+@hpaChunkSize = 2000
+# stream batch sizes to avoid large file writes
+# to low will consume disk i/o
+@podsEmitStreamBatchSize = 200
+@nodesEmitStreamBatchSize = 100
+
+# higher the chunk size rs pod memory consumption higher and lower api latency
+# similarly lower the value, helps on the memory consumption but incurrs additional round trip latency
+# these needs to be tuned be based on the workload
+# nodes
+@nodesChunkSizeMin = 100
+@nodesChunkSizeMax = 400
+# pods
+@podsChunkSizeMin = 250
+@podsChunkSizeMax = 1500
+# events
+@eventsChunkSizeMin = 2000
+@eventsChunkSizeMax = 10000
+# deployments
+@deploymentsChunkSizeMin = 500
+@deploymentsChunkSizeMax = 1000
+# hpa
+@hpaChunkSizeMin = 500
+@hpaChunkSizeMax = 1000
+
+# emit stream sizes to prevent lower values which costs disk i/o
+# max will be upto the chunk size
+@podsEmitStreamBatchSizeMin = 50
+@nodesEmitStreamBatchSizeMin = 50
 
 def is_number?(value)
   true if Integer(value) rescue false
@@ -55,37 +89,44 @@ def populateSettingValuesFromConfigMap(parsedConfig)
       chunk_config = parsedConfig[:agent_settings][:chunk_config]
       if !chunk_config.nil?
         nodesChunkSize = chunk_config[:NODES_CHUNK_SIZE]
-        if !nodesChunkSize.nil? && is_number?(nodesChunkSize)
+        if !nodesChunkSize.nil? && is_number?(nodesChunkSize) && (@nodesChunkSizeMin..@nodesChunkSizeMax) === nodesChunkSize.to_i
           @nodesChunkSize = nodesChunkSize.to_i
-          puts "NODES_CHUNK_SIZE = #{@nodesChunkSize}"
+          puts "Using config map value: NODES_CHUNK_SIZE = #{@nodesChunkSize}"
         end
+
         podsChunkSize = chunk_config[:PODS_CHUNK_SIZE]
-        if !podsChunkSize.nil? && is_number?(podsChunkSize)
+        if !podsChunkSize.nil? && is_number?(podsChunkSize) && (@podsChunkSizeMin..@podsChunkSizeMax) === podsChunkSize.to_i
           @podsChunkSize = podsChunkSize.to_i
-          puts "PODS_CHUNK_SIZE = #{@podsChunkSize}"
+          puts "Using config map value: PODS_CHUNK_SIZE = #{@podsChunkSize}"
         end
+
         eventsChunkSize = chunk_config[:EVENTS_CHUNK_SIZE]
-        if !eventsChunkSize.nil? && is_number?(eventsChunkSize)
+        if !eventsChunkSize.nil? && is_number?(eventsChunkSize) && (@eventsChunkSizeMin..@eventsChunkSizeMax) === eventsChunkSize.to_i
           @eventsChunkSize = eventsChunkSize.to_i
-          puts "EVENTS_CHUNK_SIZE = #{@eventsChunkSize}"
+          puts "Using config map value: EVENTS_CHUNK_SIZE = #{@eventsChunkSize}"
         end
+
         deploymentsChunkSize = chunk_config[:DEPLOYMENTS_CHUNK_SIZE]
-        if !deploymentsChunkSize.nil? && is_number?(deploymentsChunkSize)
+        if !deploymentsChunkSize.nil? && is_number?(deploymentsChunkSize) && (@deploymentsChunkSizeMin..@deploymentsChunkSizeMax) === deploymentsChunkSize.to_i
           @deploymentsChunkSize = deploymentsChunkSize.to_i
-          puts "DEPLOYMENTS_CHUNK_SIZE = #{@deploymentsChunkSize}"
+          puts "Using config map value: DEPLOYMENTS_CHUNK_SIZE = #{@deploymentsChunkSize}"
         end
+
         hpaChunkSize = chunk_config[:HPA_CHUNK_SIZE]
-        if !hpaChunkSize.nil? && is_number?(hpaChunkSize)
+        if !hpaChunkSize.nil? && is_number?(hpaChunkSize) && (@hpaChunkSizeMin..@hpaChunkSizeMax) === hpaChunkSize.to_i
           @hpaChunkSize = hpaChunkSize.to_i
-          puts "HPA_CHUNK_SIZE = #{@hpaChunkSize}"
+          puts "Using config map value: HPA_CHUNK_SIZE = #{@hpaChunkSize}"
         end
+
         podsEmitStreamBatchSize = chunk_config[:PODS_EMIT_STREAM_BATCH_SIZE]
-        if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize)
+        if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize) &&
+           podsEmitStreamBatchSize.to_i <= @podsChunkSize && podsEmitStreamBatchSize.to_i >= @podsEmitStreamBatchSizeMin
           @podsEmitStreamBatchSize = podsEmitStreamBatchSize.to_i
           puts "PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
         end
         nodesEmitStreamBatchSize = chunk_config[:NODES_EMIT_STREAM_BATCH_SIZE]
-        if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize)
+        if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize) &&
+           nodesEmitStreamBatchSize.to_i <= @nodesChunkSize && nodesEmitStreamBatchSize.to_i >= @nodesEmitStreamBatchSizeMin
           @nodesEmitStreamBatchSize = nodesEmitStreamBatchSize.to_i
           puts "NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
         end
@@ -116,27 +157,13 @@ def populateSettingValuesFromConfigMap(parsedConfig)
 
 if !file.nil?
   file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
-  if @nodesChunkSize > 0
-    file.write("export NODES_CHUNK_SIZE=#{@nodesChunkSize}\n")
-  end
-  if @podsChunkSize > 0
-    file.write("export PODS_CHUNK_SIZE=#{@podsChunkSize}\n")
-  end
-  if @eventsChunkSize > 0
-    file.write("export EVENTS_CHUNK_SIZE=#{@eventsChunkSize}\n")
-  end
-  if @deploymentsChunkSize > 0
-    file.write("export DEPLOYMENTS_CHUNK_SIZE=#{@deploymentsChunkSize}\n")
-  end
-  if @hpaChunkSize > 0
-    file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
-  end
-  if @podsEmitStreamBatchSize > 0
-    file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
-  end
-  if @nodesEmitStreamBatchSize > 0
-    file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
-  end
+  file.write("export NODES_CHUNK_SIZE=#{@nodesChunkSize}\n")
+  file.write("export PODS_CHUNK_SIZE=#{@podsChunkSize}\n")
+  file.write("export EVENTS_CHUNK_SIZE=#{@eventsChunkSize}\n")
+  file.write("export DEPLOYMENTS_CHUNK_SIZE=#{@deploymentsChunkSize}\n")
+  file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
+  file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
+  file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
   # Close file after writing all environment variables
   file.close
 else
diff --git a/build/linux/installer/scripts/tomlparser-health-config.rb b/build/linux/installer/scripts/tomlparser-health-config.rb
deleted file mode 100644
index 68496d718..000000000
--- a/build/linux/installer/scripts/tomlparser-health-config.rb
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/local/bin/ruby
-
-#this should be require relative in Linux and require in windows, since it is a gem install on windows
-@os_type = ENV["OS_TYPE"]
-if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
-  require "tomlrb"
-else
-  require_relative "tomlrb"
-end
-
-require_relative "ConfigParseErrorLogger"
-
-@configMapMountPath = "/etc/config/settings/agent-settings"
-@configSchemaVersion = ""
-@enable_health_model = false
-@nodesChunkSize = 0
-@podsChunkSize = 0
-@eventsChunkSize = 0
-@deploymentsChunkSize = 0
-@hpaChunkSize = 0
-@podsEmitStreamBatchSize = 0
-@nodesEmitStreamBatchSize = 0
-
-def is_number?(value)
-  true if Integer(value) rescue false
-end
-
-# Use parser to parse the configmap toml file to a ruby structure
-def parseConfigMap
-  begin
-    # Check to see if config map is created
-    if (File.file?(@configMapMountPath))
-      puts "config::configmap container-azm-ms-agentconfig for agent settings mounted, parsing values"
-      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
-      puts "config::Successfully parsed mounted config map"
-      return parsedConfig
-    else
-      puts "config::configmap container-azm-ms-agentconfig for agent settings not mounted, using defaults"
-      return nil
-    end
-  rescue => errorStr
-    ConfigParseErrorLogger.logError("Exception while parsing config map for agent settings : #{errorStr}, using defaults, please check config map for errors")
-    return nil
-  end
-end
-
-# Use the ruby structure created after config parsing to set the right values to be used as environment variables
-def populateSettingValuesFromConfigMap(parsedConfig)
-  begin
-    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil?
-      if !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
-        @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
-        puts "enable_health_model = #{@enable_health_model}"
-      end
-      chunk_config = parsedConfig[:agent_settings][:chunk_config]
-      if !chunk_config.nil?
-        nodesChunkSize = chunk_config[:NODES_CHUNK_SIZE]
-        if !nodesChunkSize.nil? && is_number?(nodesChunkSize)
-          @nodesChunkSize = nodesChunkSize.to_i
-          puts "NODES_CHUNK_SIZE = #{@nodesChunkSize}"
-        end
-        podsChunkSize = chunk_config[:PODS_CHUNK_SIZE]
-        if !podsChunkSize.nil? && is_number?(podsChunkSize)
-          @podsChunkSize = podsChunkSize.to_i
-          puts "PODS_CHUNK_SIZE = #{@podsChunkSize}"
-        end
-        eventsChunkSize = chunk_config[:EVENTS_CHUNK_SIZE]
-        if !eventsChunkSize.nil? && is_number?(eventsChunkSize)
-          @eventsChunkSize = eventsChunkSize.to_i
-          puts "EVENTS_CHUNK_SIZE = #{@eventsChunkSize}"
-        end
-        deploymentsChunkSize = chunk_config[:DEPLOYMENTS_CHUNK_SIZE]
-        if !deploymentsChunkSize.nil? && is_number?(deploymentsChunkSize)
-          @deploymentsChunkSize = deploymentsChunkSize.to_i
-          puts "DEPLOYMENTS_CHUNK_SIZE = #{@deploymentsChunkSize}"
-        end
-        hpaChunkSize = chunk_config[:HPA_CHUNK_SIZE]
-        if !hpaChunkSize.nil? && is_number?(hpaChunkSize)
-          @hpaChunkSize = hpaChunkSize.to_i
-          puts "HPA_CHUNK_SIZE = #{@hpaChunkSize}"
-        end
-        podsEmitStreamBatchSize = chunk_config[:PODS_EMIT_STREAM_BATCH_SIZE]
-        if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize)
-          @podsEmitStreamBatchSize = podsEmitStreamBatchSize.to_i
-          puts "PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
-        end
-        nodesEmitStreamBatchSize = chunk_config[:NODES_EMIT_STREAM_BATCH_SIZE]
-        if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize)
-          @nodesEmitStreamBatchSize = nodesEmitStreamBatchSize.to_i
-          puts "NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
-        end
-      end
-    end
-  rescue => errorStr
-    puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
-    @enable_health_model = false
-  end
-end
-
-@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
-puts "****************Start Config Processing********************"
-if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
-  configMapSettings = parseConfigMap
-  if !configMapSettings.nil?
-    populateSettingValuesFromConfigMap(configMapSettings)
-  end
-else
-  if (File.file?(@configMapMountPath))
-    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
-  end
-  @enable_health_model = false
-end
-
-# Write the settings to file, so that they can be set as environment variables
-file = File.open("health_config_env_var", "w")
-
-if !file.nil?
-  file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
-  if @nodesChunkSize > 0
-    file.write("export NODES_CHUNK_SIZE=#{@nodesChunkSize}\n")
-  end
-  if @podsChunkSize > 0
-    file.write("export PODS_CHUNK_SIZE=#{@podsChunkSize}\n")
-  end
-  if @eventsChunkSize > 0
-    file.write("export EVENTS_CHUNK_SIZE=#{@eventsChunkSize}\n")
-  end
-  if @deploymentsChunkSize > 0
-    file.write("export DEPLOYMENTS_CHUNK_SIZE=#{@deploymentsChunkSize}\n")
-  end
-  if @hpaChunkSize > 0
-    file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
-  end
-  if @podsEmitStreamBatchSize > 0
-    file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
-  end
-  if @nodesEmitStreamBatchSize > 0
-    file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
-  end
-  # Close file after writing all environment variables
-  file.close
-else
-  puts "Exception while opening file for writing config environment variables"
-  puts "****************End Config Processing********************"
-end
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index f88d80603..04d71276d 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -34,7 +34,6 @@ def initialize
       require_relative "omslog"
       # 250 Node items (15KB per node) account to approximately 4MB
       @NODES_CHUNK_SIZE = "250"
-      # 0 indicates no batch enabled for stream emit
       @NODES_EMIT_STREAM_BATCH_SIZE = 100
       @nodeInventoryE2EProcessingLatencyMs = 0
       @nodesAPIE2ELatencyMs = 0
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 7d935b04e..b5f6c4116 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -27,14 +27,12 @@ def initialize
       require_relative "omslog"
       require_relative "constants"
 
-      # 500 pod (10KB per pod) account to approximately 5MB
       @PODS_CHUNK_SIZE = "500"
       @podCount = 0
       @serviceCount = 0
       @controllerSet = Set.new []
       @winContainerCount = 0
       @controllerData = {}
-      # 0 indicates no batch enabled for stream emit
       @PODS_EMIT_STREAM_BATCH_SIZE = 200
       @podInventoryE2EProcessingLatencyMs = 0
       @podsAPIE2ELatencyMs = 0
@@ -59,7 +57,6 @@ def start
           @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
         end
         $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE  @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
-
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new

From 6b589a9539be9a08176ee617158dc5cd68df80f3 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sat, 12 Dec 2020 18:50:12 -0800
Subject: [PATCH 36/45] rename health config to agent config

---
 build/linux/installer/scripts/tomlparser-agent-config.rb | 8 ++++----
 kubernetes/linux/main.sh                                 | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
index 841f4b081..3369201fc 100644
--- a/build/linux/installer/scripts/tomlparser-agent-config.rb
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -122,18 +122,18 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize) &&
            podsEmitStreamBatchSize.to_i <= @podsChunkSize && podsEmitStreamBatchSize.to_i >= @podsEmitStreamBatchSizeMin
           @podsEmitStreamBatchSize = podsEmitStreamBatchSize.to_i
-          puts "PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
+          puts "Using config map value: PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
         end
         nodesEmitStreamBatchSize = chunk_config[:NODES_EMIT_STREAM_BATCH_SIZE]
         if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize) &&
            nodesEmitStreamBatchSize.to_i <= @nodesChunkSize && nodesEmitStreamBatchSize.to_i >= @nodesEmitStreamBatchSizeMin
           @nodesEmitStreamBatchSize = nodesEmitStreamBatchSize.to_i
-          puts "NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
+          puts "Using config map value: NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
         end
       end
     end
   rescue => errorStr
-    puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
+    puts "config::error:Exception while reading config settings for agent configuration setting - #{errorStr}, using defaults"
     @enable_health_model = false
   end
 end
@@ -153,7 +153,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
 end
 
 # Write the settings to file, so that they can be set as environment variables
-file = File.open("health_config_env_var", "w")
+file = File.open("agent_config_env_var", "w")
 
 if !file.nil?
   file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index 63c9a2ba9..ed16d3e32 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -174,11 +174,11 @@ source config_env_var
 #Parse the configmap to set the right environment variables for agent config.
 /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-agent-config.rb
 
-cat health_config_env_var | while read line; do
+cat agent_config_env_var | while read line; do
     #echo $line
     echo $line >> ~/.bashrc
 done
-source health_config_env_var
+source agent_config_env_var
 
 #Parse the configmap to set the right environment variables for network policy manager (npm) integration.
 /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-npm-config.rb

From 53972c2831caf84f64ae729c7f0f6ee34fe32f72 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Sun, 13 Dec 2020 08:37:34 -0800
Subject: [PATCH 37/45] fix max allowed hpa chunk size

---
 build/linux/installer/scripts/tomlparser-agent-config.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
index 3369201fc..d37382145 100644
--- a/build/linux/installer/scripts/tomlparser-agent-config.rb
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -48,7 +48,7 @@
 @deploymentsChunkSizeMax = 1000
 # hpa
 @hpaChunkSizeMin = 500
-@hpaChunkSizeMax = 1000
+@hpaChunkSizeMax = 2000
 
 # emit stream sizes to prevent lower values which costs disk i/o
 # max will be upto the chunk size

From f8702ffc50829eafa82de70971109ea2715fc80d Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 14 Dec 2020 08:45:56 -0800
Subject: [PATCH 38/45] update to use 1k pod chunk since validated on 1.18+

---
 build/linux/installer/scripts/tomlparser-agent-config.rb | 4 ++--
 source/plugins/ruby/in_kube_podinventory.rb              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
index d37382145..1c7e243b9 100644
--- a/build/linux/installer/scripts/tomlparser-agent-config.rb
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -16,8 +16,8 @@
 
 # 250 Node items (15KB per node) account to approximately 4MB
 @nodesChunkSize = 250
-# 500 pods (10KB per pod) account to approximately 5MB
-@podsChunkSize = 500
+# 1000 pods (10KB per pod) account to approximately 10MB
+@podsChunkSize = 1000
 # 4000 events (1KB per event) account to approximately 4MB
 @eventsChunkSize = 4000
 # roughly each deployment is 8k
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index b5f6c4116..5314cd92a 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -27,7 +27,7 @@ def initialize
       require_relative "omslog"
       require_relative "constants"
 
-      @PODS_CHUNK_SIZE = "500"
+      @PODS_CHUNK_SIZE = "1000"
       @podCount = 0
       @serviceCount = 0
       @controllerSet = Set.new []

From 531f76834c6dcc9d4685945f54a565ab752afd1d Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 14 Dec 2020 15:50:50 -0800
Subject: [PATCH 39/45] remove debug logs

---
 .../installer/scripts/tomlparser-agent-config.rb   |  2 +-
 source/plugins/ruby/in_kube_events.rb              |  8 ++------
 source/plugins/ruby/in_kube_nodes.rb               |  8 ++------
 source/plugins/ruby/in_kube_podinventory.rb        | 14 ++++----------
 source/plugins/ruby/in_kubestate_deployments.rb    |  8 ++------
 5 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
index 1c7e243b9..87c5194ed 100644
--- a/build/linux/installer/scripts/tomlparser-agent-config.rb
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -27,7 +27,7 @@
 # 2000 HPAs account to approximately 6-7MB
 @hpaChunkSize = 2000
 # stream batch sizes to avoid large file writes
-# to low will consume disk i/o
+# too low will consume higher disk iops
 @podsEmitStreamBatchSize = 200
 @nodesEmitStreamBatchSize = 100
 
diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index 6cea5e996..98d1bddbe 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -87,10 +87,8 @@ def enumerate
         end
         $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
         if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
-          # debug logs to track the payload size
           eventsCount = eventList["items"].length
-          eventsInventorySizeInKB = (eventList.to_s.length) / 1024
-          $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} and size in KB #{eventsInventorySizeInKB}  @ #{Time.now.utc.iso8601}"
+          $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
           newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
         else
           $log.warn "in_kube_events::enumerate:Received empty eventList"
@@ -100,10 +98,8 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
-            # debug logs to track the payload size
             eventsCount = eventList["items"].length
-            eventsInventorySizeInKB = (eventList.to_s.length) / 1024
-            $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} and size in KB #{eventsInventorySizeInKB}  @ #{Time.now.utc.iso8601}"
+            $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
             newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
           else
             $log.warn "in_kube_events::enumerate:Received empty eventList"
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 04d71276d..5a8ed9c71 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -97,9 +97,7 @@ def enumerate
         nodesAPIChunkEndTime = (Time.now.to_f * 1000).to_i
         @nodesAPIE2ELatencyMs = (nodesAPIChunkEndTime - nodesAPIChunkStartTime)
         if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
-          # debug logs to track the payload size
-          nodeInventorySizeInKB = (nodeInventory.to_s.length) / 1024
-          $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length}  and size in KB: #{nodeInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
+          $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(nodeInventory, batchTime)
         else
           $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
@@ -112,9 +110,7 @@ def enumerate
           nodesAPIChunkEndTime = (Time.now.to_f * 1000).to_i
           @nodesAPIE2ELatencyMs = @nodesAPIE2ELatencyMs + (nodesAPIChunkEndTime - nodesAPIChunkStartTime)
           if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
-            # debug logs to track the payload size
-            nodeInventorySizeInKB = (nodeInventory.to_s.length) / 1024
-            $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length}  and size in KB: #{nodeInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(nodeInventory, batchTime)
           else
             $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 5314cd92a..d6e6739f0 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -96,11 +96,9 @@ def enumerate(podList = nil)
         $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
 
         if !serviceInfo.nil?
-          # debug logs to track the payload size
-          serviceInfoResponseSizeInKB = (serviceInfo.body.length) / 1024
-          $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl serviceInfo size in KB #{serviceInfoResponseSizeInKB} @ #{Time.now.utc.iso8601}")
+          $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
           serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
-          $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl serviceInfo size in KB #{serviceInfoResponseSizeInKB} @ #{Time.now.utc.iso8601}")
+          $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
           serviceInfo = nil
           # service inventory records much smaller and fixed size compared to serviceList
           serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime)
@@ -120,9 +118,7 @@ def enumerate(podList = nil)
         podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
         @podsAPIE2ELatencyMs = (podsAPIChunkEndTime - podsAPIChunkStartTime)
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-          # debug logs to track the payload size
-          podInventorySizeInKB = (podInventory.to_s.length) / 1024
-          $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length}  and size in KB: #{podInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
+          $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length}  from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
         else
           $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
@@ -135,9 +131,7 @@ def enumerate(podList = nil)
           podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
           @podsAPIE2ELatencyMs = @podsAPIE2ELatencyMs + (podsAPIChunkEndTime - podsAPIChunkStartTime)
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-            # debug logs to track the payload size
-            podInventorySizeInKB = (podInventory.to_s.length) / 1024
-            $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length}  and size in KB: #{podInventorySizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
           else
             $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index ffbe8ae4f..48ad87041 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -80,9 +80,7 @@ def enumerate
         continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}", api_group: @DEPLOYMENTS_API_GROUP)
         $log.info("in_kubestate_deployments::enumerate : Done getting deployments from Kube API @ #{Time.now.utc.iso8601}")
         if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
-          # debug logs to track the payload size
-          deploymentsSizeInKB = (deploymentList.to_s.length) / 1024
-          $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length}  and size in KB: #{deploymentsSizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
+          $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(deploymentList, batchTime)
         else
           $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
@@ -92,9 +90,7 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @DEPLOYMENTS_API_GROUP)
           if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
-            # debug logs to track the payload size
-            deploymentsSizeInKB = (deploymentList.to_s.length) / 1024
-            $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length}  and size in KB: #{deploymentsSizeInKB} from Kube API @ #{Time.now.utc.iso8601}")
+            $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(deploymentList, batchTime)
           else
             $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"

From cff2ee4c497323f65a75c2c20e6f3be672ce2141 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 14 Dec 2020 18:15:30 -0800
Subject: [PATCH 40/45] minor updates

---
 source/plugins/ruby/KubernetesApiClient.rb | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index eb2aa3425..aca2142a0 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -596,7 +596,7 @@ def parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metri
         #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
         #if we are coming up with the time it should be same for all nodes
         #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-        if (!node["status"][metricCategory].nil?)
+        if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
           # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
           metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
 
@@ -794,15 +794,13 @@ def getKubeServicesInventoryRecords(serviceList, batchTime = Time.utc.iso8601)
         if (!serviceList.nil? && !serviceList.empty?)
           servicesCount = serviceList["items"].length
           @Log.info("KubernetesApiClient::getKubeServicesInventoryRecords : number of services in serviceList  #{servicesCount} @ #{Time.now.utc.iso8601}")
-          servicesSizeInKB = (serviceList["items"].to_s.length) / 1024
-          @Log.info("KubernetesApiClient::getKubeServicesInventoryRecords : size of serviceList in KB #{servicesSizeInKB} @ #{Time.now.utc.iso8601}")
           serviceList["items"].each do |item|
             kubeServiceRecord = {}
             kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
             kubeServiceRecord["ServiceName"] = item["metadata"]["name"]
             kubeServiceRecord["Namespace"] = item["metadata"]["namespace"]
             kubeServiceRecord["SelectorLabels"] = [item["spec"]["selector"]]
-            # add these before emit to avoid memory foot print
+            # added these before emit to avoid memory foot print
             # kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
             # kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
             kubeServiceRecord["ClusterIP"] = item["spec"]["clusterIP"]

From 60d63911f15902a8881319330dcc9ece940bb774 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 15 Dec 2020 13:28:08 -0800
Subject: [PATCH 41/45] move defaults to common place

---
 source/plugins/ruby/in_kube_events.rb         | 13 +++++++----
 source/plugins/ruby/in_kube_nodes.rb          | 21 +++++++++++++-----
 source/plugins/ruby/in_kube_podinventory.rb   | 22 ++++++++++++++-----
 .../plugins/ruby/in_kubestate_deployments.rb  | 15 ++++++++-----
 source/plugins/ruby/in_kubestate_hpa.rb       | 15 ++++++++-----
 5 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index 98d1bddbe..ff2b73dd6 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -17,8 +17,9 @@ def initialize
       require_relative "omslog"
       require_relative "ApplicationInsightsUtility"
 
-      # 4000 events (1KB per event) account to approximately 4MB
-      @EVENTS_CHUNK_SIZE = 4000
+      # refer tomlparser-agent-config for defaults
+      # this configurable via configmap
+      @EVENTS_CHUNK_SIZE = 0
 
       # Initializing events count for telemetry
       @eventsCount = 0
@@ -36,8 +37,12 @@ def configure(conf)
 
     def start
       if @run_interval
-        if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty?
-          @EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"]
+        if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty? && ENV["EVENTS_CHUNK_SIZE"].to_i > 0
+          @EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen and setting default just safe gauard
+          $log.warn("in_kube_events::start: setting to default value since got EVENTS_CHUNK_SIZE nil or empty")
+          @EVENTS_CHUNK_SIZE = 4000
         end
         $log.info("in_kube_events::start : EVENTS_CHUNK_SIZE  @ #{@EVENTS_CHUNK_SIZE}")
 
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 5a8ed9c71..ce060d9bb 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -32,9 +32,10 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
-      # 250 Node items (15KB per node) account to approximately 4MB
-      @NODES_CHUNK_SIZE = "250"
-      @NODES_EMIT_STREAM_BATCH_SIZE = 100
+      # refer tomlparser-agent-config for the defaults
+      @NODES_CHUNK_SIZE = 0
+      @NODES_EMIT_STREAM_BATCH_SIZE = 0
+
       @nodeInventoryE2EProcessingLatencyMs = 0
       @nodesAPIE2ELatencyMs = 0
       require_relative "constants"
@@ -49,13 +50,21 @@ def configure(conf)
 
     def start
       if @run_interval
-        if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty?
-          @NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"]
+        if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty? && ENV["NODES_CHUNK_SIZE"].to_i > 0
+          @NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen adding safe gauard
+          $log.warn("in_kube_nodes::start: setting to default value since got NODES_CHUNK_SIZE nil or empty")
+          @NODES_CHUNK_SIZE = 250
         end
         $log.info("in_kube_nodes::start : NODES_CHUNK_SIZE  @ #{@NODES_CHUNK_SIZE}")
 
-        if !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].empty?
+        if !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i > 0
           @NODES_EMIT_STREAM_BATCH_SIZE = ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i
+        else
+          # this shouldnt happen and setting default just safe gauard in case
+          $log.warn("in_kube_nodes::start: setting to default value since got NODES_EMIT_STREAM_BATCH_SIZE nil or empty")
+          @NODES_EMIT_STREAM_BATCH_SIZE = 100
         end
         $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE  @ #{@NODES_EMIT_STREAM_BATCH_SIZE}")
 
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index d6e6739f0..ccd763bea 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -27,13 +27,16 @@ def initialize
       require_relative "omslog"
       require_relative "constants"
 
-      @PODS_CHUNK_SIZE = "1000"
+      # refer tomlparser-agent-config for updating defaults
+      # this configurable via configmap
+      @PODS_CHUNK_SIZE = 0
+      @PODS_EMIT_STREAM_BATCH_SIZE = 0
+
       @podCount = 0
       @serviceCount = 0
       @controllerSet = Set.new []
       @winContainerCount = 0
       @controllerData = {}
-      @PODS_EMIT_STREAM_BATCH_SIZE = 200
       @podInventoryE2EProcessingLatencyMs = 0
       @podsAPIE2ELatencyMs = 0
     end
@@ -48,15 +51,24 @@ def configure(conf)
 
     def start
       if @run_interval
-        if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty?
-          @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"]
+        if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty? && ENV["PODS_CHUNK_SIZE"].to_i > 0
+          @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen and setting default as safe gauard in case
+          $log.warn("in_kube_podinventory::start: setting to default value since got PODS_CHUNK_SIZE nil or empty")
+          @PODS_CHUNK_SIZE = 1000
         end
         $log.info("in_kube_podinventory::start : PODS_CHUNK_SIZE  @ #{@PODS_CHUNK_SIZE}")
 
-        if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty?
+        if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i > 0
           @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
+        else
+          # this shouldnt happen and setting default as safe gauard in case
+          $log.warn("in_kube_podinventory::start: setting to default value since got PODS_EMIT_STREAM_BATCH_SIZE nil or empty")
+          @PODS_EMIT_STREAM_BATCH_SIZE = 200
         end
         $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE  @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
+
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index 48ad87041..15e554f06 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -21,9 +21,10 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "constants"
 
-      # roughly each deployment is 8k
-      # 500 deployments account to approximately 4MB
-      @DEPLOYMENTS_CHUNK_SIZE = 500
+      # refer tomlparser-agent-config for defaults
+      # this configurable via configmap
+      @DEPLOYMENTS_CHUNK_SIZE = 0
+
       @DEPLOYMENTS_API_GROUP = "apps"
       @@telemetryLastSentTime = DateTime.now.to_time.to_i
 
@@ -43,8 +44,12 @@ def configure(conf)
 
     def start
       if @run_interval
-        if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty?
-          @DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"]
+        if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty? && ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i > 0
+          @DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen and setting default as safe gauard in case
+          $log.warn("in_kubestate_deployments::start: setting to default value since got DEPLOYMENTS_CHUNK_SIZE nil or empty")
+          @DEPLOYMENTS_CHUNK_SIZE = 500
         end
         $log.info("in_kubestate_deployments::start : DEPLOYMENTS_CHUNK_SIZE  @ #{@DEPLOYMENTS_CHUNK_SIZE}")
 
diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb
index 736f17250..ea6f851bf 100644
--- a/source/plugins/ruby/in_kubestate_hpa.rb
+++ b/source/plugins/ruby/in_kubestate_hpa.rb
@@ -18,9 +18,10 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "constants"
 
-      # roughly each HPA is 3k
-      # 2000 HPAs account to approximately 6-7MB
-      @HPA_CHUNK_SIZE = 2000
+      # refer tomlparser-agent-config for defaults
+      # this configurable via configmap
+      @HPA_CHUNK_SIZE = 0
+
       @HPA_API_GROUP = "autoscaling"
 
       # telemetry
@@ -40,8 +41,12 @@ def configure(conf)
 
     def start
       if @run_interval
-        if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty?
-          @HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"]
+        if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty? && ENV["HPA_CHUNK_SIZE"].to_i > 0
+          @HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen and setting default as safe gauard in case
+          $log.warn("in_kubestate_hpa::start: setting to default value since got HPA_CHUNK_SIZE nil or empty")
+          @HPA_CHUNK_SIZE = 2000
         end
         $log.info("in_kubestate_hpa::start : HPA_CHUNK_SIZE  @ #{@HPA_CHUNK_SIZE}")
 

From f88ae920f81fe4d8d6d801fcd981c681b495c86a Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 15 Dec 2020 15:10:25 -0800
Subject: [PATCH 42/45] chart updates

---
 .../templates/omsagent-rs-configmap.yaml      | 32 +++++++++----------
 charts/azuremonitor-containers/values.yaml    |  9 ++++++
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
index baeedf1be..fc7c471f8 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
@@ -95,7 +95,7 @@ data:
      <match oms.containerinsights.KubePodInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
@@ -108,24 +108,24 @@ data:
      </match>
 
      <match oms.containerinsights.KubePVInventory**>
-     type out_oms
-     log_level debug
-     num_threads 5
-     buffer_chunk_limit 4m
-     buffer_type file
-     buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
-     buffer_queue_limit 20
-     buffer_queue_full_action drop_oldest_chunk
-     flush_interval 20s
-     retry_limit 10
-     retry_wait 5s
-     max_retry_wait 5m
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 4m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 5s
+      max_retry_wait 5m
     </match>
 
      <match oms.containerinsights.KubeEvents**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
@@ -155,7 +155,7 @@ data:
      <match oms.containerinsights.KubeNodeInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
@@ -184,7 +184,7 @@ data:
      <match oms.api.KubePerf**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index e8acda20e..907e315d1 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -81,6 +81,15 @@ omsagent:
   deployment:
     affinity:
       nodeAffinity:
+        # affinity to schedule on to ephemeral os node if its available
+        preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 1
+            preference:
+              matchExpressions:
+              - key: storageprofile
+                operator: NotIn
+                values:
+                - managed
         requiredDuringSchedulingIgnoredDuringExecution:
           nodeSelectorTerms:
             - labelSelector:

From 0392e28b774e3e7c115fa1226702fdfb3df61738 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 15 Dec 2020 15:11:58 -0800
Subject: [PATCH 43/45] final oomfix agent

---
 kubernetes/omsagent.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 43591c365..f9515bb66 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -364,7 +364,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-9"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomfix12152020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -523,7 +523,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomtest11282020-9"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomfix12152020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:

From 6be2e13abaf0e8443c9826fd6a7c7bc671dc9cb8 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 15 Dec 2020 15:35:20 -0800
Subject: [PATCH 44/45] update to use prod image so that can be validated with
 build pipeline

---
 kubernetes/omsagent.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index f9515bb66..013e2a6c0 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -364,7 +364,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomfix12152020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -523,7 +523,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:cioomfix12152020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:

From 1c25829041f75d7640271965eaa711ba889cffb1 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 15 Dec 2020 15:48:26 -0800
Subject: [PATCH 45/45] fix typo in comment

---
 source/plugins/ruby/in_kube_events.rb           | 2 +-
 source/plugins/ruby/in_kube_nodes.rb            | 4 ++--
 source/plugins/ruby/in_kube_podinventory.rb     | 4 ++--
 source/plugins/ruby/in_kubestate_deployments.rb | 2 +-
 source/plugins/ruby/in_kubestate_hpa.rb         | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index ff2b73dd6..4f6017cc5 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -40,7 +40,7 @@ def start
         if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty? && ENV["EVENTS_CHUNK_SIZE"].to_i > 0
           @EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"].to_i
         else
-          # this shouldnt happen and setting default just safe gauard
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kube_events::start: setting to default value since got EVENTS_CHUNK_SIZE nil or empty")
           @EVENTS_CHUNK_SIZE = 4000
         end
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index ce060d9bb..e7c5060a5 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -53,7 +53,7 @@ def start
         if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty? && ENV["NODES_CHUNK_SIZE"].to_i > 0
           @NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"].to_i
         else
-          # this shouldnt happen adding safe gauard
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kube_nodes::start: setting to default value since got NODES_CHUNK_SIZE nil or empty")
           @NODES_CHUNK_SIZE = 250
         end
@@ -62,7 +62,7 @@ def start
         if !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i > 0
           @NODES_EMIT_STREAM_BATCH_SIZE = ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i
         else
-          # this shouldnt happen and setting default just safe gauard in case
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kube_nodes::start: setting to default value since got NODES_EMIT_STREAM_BATCH_SIZE nil or empty")
           @NODES_EMIT_STREAM_BATCH_SIZE = 100
         end
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index ccd763bea..0cff2eefe 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -54,7 +54,7 @@ def start
         if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty? && ENV["PODS_CHUNK_SIZE"].to_i > 0
           @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"].to_i
         else
-          # this shouldnt happen and setting default as safe gauard in case
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kube_podinventory::start: setting to default value since got PODS_CHUNK_SIZE nil or empty")
           @PODS_CHUNK_SIZE = 1000
         end
@@ -63,7 +63,7 @@ def start
         if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i > 0
           @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
         else
-          # this shouldnt happen and setting default as safe gauard in case
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kube_podinventory::start: setting to default value since got PODS_EMIT_STREAM_BATCH_SIZE nil or empty")
           @PODS_EMIT_STREAM_BATCH_SIZE = 200
         end
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index 15e554f06..27e4709a2 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -47,7 +47,7 @@ def start
         if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty? && ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i > 0
           @DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i
         else
-          # this shouldnt happen and setting default as safe gauard in case
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kubestate_deployments::start: setting to default value since got DEPLOYMENTS_CHUNK_SIZE nil or empty")
           @DEPLOYMENTS_CHUNK_SIZE = 500
         end
diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb
index ea6f851bf..afecf8e3b 100644
--- a/source/plugins/ruby/in_kubestate_hpa.rb
+++ b/source/plugins/ruby/in_kubestate_hpa.rb
@@ -44,7 +44,7 @@ def start
         if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty? && ENV["HPA_CHUNK_SIZE"].to_i > 0
           @HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"].to_i
         else
-          # this shouldnt happen and setting default as safe gauard in case
+          # this shouldnt happen just setting default here as safe guard
           $log.warn("in_kubestate_hpa::start: setting to default value since got HPA_CHUNK_SIZE nil or empty")
           @HPA_CHUNK_SIZE = 2000
         end