From a10ff10b0aad76ca7aae788f1ec8a61b9ba8561f Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 19:09:03 -0800
Subject: [PATCH 01/26] Fixes : 1) Disable health (for time being) - in DS & RS
 2) Disable MDM (for time being) - in DS & RS 3) Merge kubeperf into kubenode
 & kubepod 4) Made scheduling predictable for kubenode & kubepod 5) Enable
 containerlog enrichment fields (timeofcommand, containername &
 containerimage) as a configurable setting (default = true/ON) - Also add
 telemetry for it 6) Filter OUT type!=Normal events for k8s events 7)
 AppInsights telemetry async 8) Fix double calling bug in in_win_cadvisor_perf
 9) Add connect timeout (20secs) & read timeout (40 secs) for all cadvisor api
 calls & also for all kubernetes api server  calls 10) Fix batchTime for
 kubepods to be one before making api server call (rather than after making
 the call, which will make it fluctuate based on api server latency for the
 call)

---
 installer/conf/container.conf                 | 28 ++++----
 installer/conf/kube.conf                      | 71 ++++++++-----------
 installer/scripts/tomlparser.rb               | 12 ++++
 source/code/go/src/plugins/oms.go             | 51 ++++++++++---
 .../code/plugin/ApplicationInsightsUtility.rb | 47 ++++++++----
 .../code/plugin/CAdvisorMetricsAPIClient.rb   |  4 ++
 source/code/plugin/KubernetesApiClient.rb     | 11 +--
 source/code/plugin/in_kube_events.rb          |  2 +-
 source/code/plugin/in_kube_nodes.rb           | 53 ++++++++++++--
 source/code/plugin/in_kube_podinventory.rb    | 57 +++++++++++++--
 source/code/plugin/in_win_cadvisor_perf.rb    |  2 +-
 11 files changed, 241 insertions(+), 97 deletions(-)
diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index f9540bde8..133d734ee 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -23,23 +23,23 @@
   log_level debug
 </source>
 
-<filter kubehealth.DaemonSet.Node**>
-  type filter_cadvisor_health_node
-  log_level debug
-</filter>
+#<filter kubehealth.DaemonSet.Node**>
+#  type filter_cadvisor_health_node
+#  log_level debug
+#</filter>
 
-<filter kubehealth.DaemonSet.Container**>
-  type filter_cadvisor_health_container
-  log_level debug
-</filter>
+#<filter kubehealth.DaemonSet.Container**>
+#  type filter_cadvisor_health_container
+#  log_level debug
+#</filter>
 
 #custom_metrics_mdm filter plugin
-<filter mdm.cadvisorperf**>
-  type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
-  log_level info
-</filter>
+#<filter mdm.cadvisorperf**>
+#  type filter_cadvisor2mdm
+#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+#  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+#  log_level info
+#</filter>
 
 <match oms.containerinsights.containerinventory**>
   type out_oms
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 40f4ac880..c85c77199 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,15 +1,15 @@
 # Fluentd config file for OMS Docker - cluster components (kubeAPI)
-<source>
-  type forward
-  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-  bind 0.0.0.0
-</source>
+#<source>
+#  type forward
+#  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+#  bind 0.0.0.0
+#</source>
 
 #Kubernetes pod inventory
 <source>
 	type kubepodinventory
 	tag oms.containerinsights.KubePodInventory
-	run_interval 60s
+	run_interval 60
   log_level debug
 </source>
 
@@ -21,13 +21,6 @@
     log_level debug
 </source>
 
-#Kubernetes logs
-<source>
-	type kubelogs
-	tag oms.api.KubeLogs
-	run_interval 60s
-</source>
-
 #Kubernetes services
 <source>
 	type kubeservices
@@ -40,25 +33,17 @@
 <source>
 	type kubenodeinventory
 	tag oms.containerinsights.KubeNodeInventory
-	run_interval 60s
-  log_level debug
-</source>
-
-#Kubernetes perf
-<source>
-	type kubeperf
-	tag oms.api.KubePerf
-	run_interval 60s
+	run_interval 60
   log_level debug
 </source>
 
 #Kubernetes health
-<source>
-  type kubehealth
- tag kubehealth.ReplicaSet
-  run_interval 60s
-  log_level debug
-</source>
+#<source>
+#  type kubehealth
+# tag kubehealth.ReplicaSet
+#  run_interval 60s
+#  log_level debug
+#</source>
 
 #cadvisor perf- Windows nodes
 <source>
@@ -68,23 +53,23 @@
   log_level debug
 </source>
 
-<filter mdm.kubepodinventory** mdm.kubenodeinventory**>
-  type filter_inventory2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-  log_level info
-</filter>
+#<filter mdm.kubepodinventory** mdm.kubenodeinventory**>
+#  type filter_inventory2mdm
+#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+#  log_level info
+#</filter>
 
 #custom_metrics_mdm filter plugin for perf data from windows nodes
-<filter mdm.cadvisorperf**>
-  type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
-  log_level info
-</filter>
-
-<filter kubehealth**>
-  type filter_health_model_builder
-</filter>
+#<filter mdm.cadvisorperf**>
+#  type filter_cadvisor2mdm
+#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+#  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+#  log_level info
+#</filter>
+
+#<filter kubehealth**>
+#  type filter_health_model_builder
+#</filter>
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index cd16cbf9b..194d5ad90 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -15,6 +15,7 @@
 @logTailPath = "/var/log/containers/*.log"
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
 @excludePath = "*.csv2" #some invalid path
+@enrichContainerLogs = true
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -117,6 +118,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
     rescue => errorStr
       ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults, please check config map for errors")
     end
+
+    #Get container log enrichment setting
+    begin
+      if !parsedConfig[:log_collection_settings][:enrich_container_logs].nil? && !parsedConfig[:log_collection_settings][:enrich_container_logs][:enabled].nil?
+        @enrichContainerLogs = parsedConfig[:log_collection_settings][:enrich_container_logs][:enabled]
+        puts "config::Using config map setting for cluster level container log enrichment"
+      end
+    rescue => errorStr
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level container log enrichment - #{errorStr}, using defaults, please check config map for errors")
+    end
   end
 end
 
@@ -156,6 +167,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
   file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
   file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
+  file.write("export AZMON_CLUSTER_CONTAINER_LOG_ENRICH=#{@enrichContainerLogs}\n")
   # Close file after writing all environment variables
   file.close
   puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 5a323d7e0..5cf8297be 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -92,6 +92,8 @@ var (
 	ResourceName string
 	//KubeMonAgentEvents skip first flush
 	skipKubeMonEventsFlush bool
+	// enrich container logs (when true this will add the fields - timeofcommand, containername & containerimage)
+	enrichContainerLogs bool
 )
 
 var (
@@ -746,16 +748,29 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			stringMap["Name"] = val
 		}
 
-		dataItem := DataItem{
-			ID:                    stringMap["Id"],
-			LogEntry:              stringMap["LogEntry"],
-			LogEntrySource:        stringMap["LogEntrySource"],
-			LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
-			LogEntryTimeOfCommand: start.Format(time.RFC3339),
-			SourceSystem:          stringMap["SourceSystem"],
-			Computer:              Computer,
-			Image:                 stringMap["Image"],
-			Name:                  stringMap["Name"],
+		if enrichContainerLogs == true {
+			dataItem := DataItem{
+				ID:                    stringMap["Id"],
+				LogEntry:              stringMap["LogEntry"],
+				LogEntrySource:        stringMap["LogEntrySource"],
+				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+				LogEntryTimeOfCommand: start.Format(time.RFC3339),
+				SourceSystem:          stringMap["SourceSystem"],
+				Computer:              Computer,
+				Image:                 stringMap["Image"],
+				Name:                  stringMap["Name"],
+			}
+		} else { // dont collect timeofcommand field as its part of container log enrivhment
+			dataItem := DataItem{
+				ID:                    stringMap["Id"],
+				LogEntry:              stringMap["LogEntry"],
+				LogEntrySource:        stringMap["LogEntrySource"],
+				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+				SourceSystem:          stringMap["SourceSystem"],
+				Computer:              Computer,
+				Image:                 stringMap["Image"],
+				Name:                  stringMap["Name"],
+			}
 		}
 
 		FlushedRecordsSize += float64(len(stringMap["LogEntry"]))
@@ -892,6 +907,15 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	// Initilizing this to true to skip the first kubemonagentevent flush since the errors are not populated at this time
 	skipKubeMonEventsFlush = true
 
+	enrichContainerLogs = os.Getenv("AZMON_CLUSTER_CONTAINER_LOG_ENRICH")
+		if (strings.Compare(enrichContainerLogs, "true") == 0) {
+			enrichContainerLogs = true
+			Log("ContainerLogEnrichment=true \n")
+		} else {
+			enrichContainerLogs = false
+			Log("ContainerLogEnrichment=false \n")
+		}
+
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
 		message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error())
@@ -989,7 +1013,12 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
 		populateExcludedStdoutNamespaces()
 		populateExcludedStderrNamespaces()
-		go updateContainerImageNameMaps()
+		if enrichContainerLogs == true {
+			Log("ContainerLogEnrichment=true; starting goroutine to update containerimagenamemaps \n")
+			go updateContainerImageNameMaps()
+		} else {
+			Log("ContainerLogEnrichment=false \n")
+		}
 
 		// Flush config error records every hour
 		go flushKubeMonAgentEventRecords()
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 85b424e69..967335f27 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -73,16 +73,37 @@ def initializeUtility()
           @@Tc = ApplicationInsights::TelemetryClient.new
         elsif !encodedAppInsightsKey.nil?
           decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
+          
           #override ai endpoint if its available otherwise use default.
           if appInsightsEndpoint && !appInsightsEndpoint.nil? && !appInsightsEndpoint.empty?
             $log.info("AppInsightsUtility: Telemetry client uses overrided endpoint url : #{appInsightsEndpoint}")
-            telemetrySynchronousSender = ApplicationInsights::Channel::SynchronousSender.new appInsightsEndpoint
-            telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender)
-            telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue
+            #telemetrySynchronousSender = ApplicationInsights::Channel::SynchronousSender.new appInsightsEndpoint
+            #telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender)
+            #telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue
+            sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint
+            queue = ApplicationInsights::Channel::AsynchronousQueue.new sender
+            channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue
             @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, telemetryChannel
           else
-            @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
+            sender = ApplicationInsights::Channel::AsynchronousSender.new
+            queue = ApplicationInsights::Channel::AsynchronousQueue.new sender
+            channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue
+            @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, channel
           end
+          # The below are default recommended values. If you change these, ensure you test telemetry flow fully
+
+          # flush telemetry if we have 10 or more telemetry items in our queue
+          @@Tc.channel.queue.max_queue_length = 10
+
+          # send telemetry to the service in batches of 5
+          @@Tc.channel.sender.send_buffer_size = 5
+
+          # the background worker thread will be active for 5 seconds before it shuts down. if
+          # during this time items are picked up from the queue, the timer is reset.
+          @@Tc.channel.sender.send_time = 5
+
+          # the background worker thread will poll the queue every 0.5 seconds for new items
+          @@Tc.channel.sender.send_interval = 0.5
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")
@@ -102,8 +123,8 @@ def sendHeartBeatEvent(pluginName)
         eventName = pluginName + @@HeartBeat
         if !(@@Tc.nil?)
           @@Tc.track_event eventName, :properties => @@CustomProperties
-          @@Tc.flush
-          $log.info("AppInsights Heartbeat Telemetry sent successfully")
+          #@@Tc.flush
+          $log.info("AppInsights Heartbeat Telemetry put successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}")
@@ -116,8 +137,8 @@ def sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
           @@Tc.track_metric "LastProcessedContainerInventoryCount", properties["ContainerCount"],
                             :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
                             :properties => @@CustomProperties
-          @@Tc.flush
-          $log.info("AppInsights Container Count Telemetry sent successfully")
+          #@@Tc.flush
+          $log.info("AppInsights Container Count Telemetry sput successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}")
@@ -138,7 +159,7 @@ def sendCustomEvent(eventName, properties)
         end
         if !(@@Tc.nil?)
           @@Tc.track_event eventName, :properties => telemetryProps
-          @@Tc.flush
+          #@@Tc.flush
           $log.info("AppInsights Custom Event #{eventName} sent successfully")
         end
       rescue => errorStr
@@ -162,8 +183,8 @@ def sendExceptionTelemetry(errorStr, properties = nil)
         end
         if !(@@Tc.nil?)
           @@Tc.track_exception errorStr, :properties => telemetryProps
-          @@Tc.flush
-          $log.info("AppInsights Exception Telemetry sent successfully")
+          #@@Tc.flush
+          $log.info("AppInsights Exception Telemetry put successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}")
@@ -209,8 +230,8 @@ def sendMetricTelemetry(metricName, metricValue, properties)
           @@Tc.track_metric metricName, metricValue,
                             :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
                             :properties => telemetryProps
-          @@Tc.flush
-          $log.info("AppInsights metric Telemetry #{metricName} sent successfully")
+          #@@Tc.flush
+          $log.info("AppInsights metric Telemetry #{metricName} put successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}")
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 09499b4cf..87ddcd024 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -21,6 +21,7 @@ class CAdvisorMetricsAPIClient
   @clusterLogTailExcludPath = ENV["AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH"]
   @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
   @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+  @clusterContainerLogEnrich = ENV["AZMON_CLUSTER_CONTAINER_LOG_ENRICH"]
 
   @dsPromInterval = ENV["TELEMETRY_DS_PROM_INTERVAL"]
   @dsPromFieldPassCount = ENV["TELEMETRY_DS_PROM_FIELDPASS_LENGTH"]
@@ -66,6 +67,8 @@ def getSummaryStatsFromCAdvisor(winNode)
           uri = URI.parse(cAdvisorUri)
           http = Net::HTTP.new(uri.host, uri.port)
           http.use_ssl = false
+          http.open_timeout = 20
+          http.read_timeout = 40
 
           cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
           response = http.request(cAdvisorApiRequest)
@@ -219,6 +222,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                       telemetryProps["clusterlogtailexcludepath"] = @clusterLogTailExcludPath
                       telemetryProps["clusterLogTailPath"] = @clusterLogTailPath
                       telemetryProps["clusterAgentSchemaVersion"] = @clusterAgentSchemaVersion
+                      telemetryProps["clusterCLEnrich"] = @clusterContainerLogEnrich
                     end
                     #telemetry about prometheus metric collections settings for daemonset
                     if (File.file?(@promConfigMountPath))
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index be1a51791..8808f9d62 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -41,6 +41,9 @@ def getKubeResourceInfo(resource, api_version: nil)
           uri = URI.parse(resourceUri)
           http = Net::HTTP.new(uri.host, uri.port)
           http.use_ssl = true
+          http.open_timeout = 20
+          http.read_timeout = 40
+          
           if !File.exist?(@@CaFile)
             raise "#{@@CaFile} doesnt exist"
           else
@@ -335,7 +338,7 @@ def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp)
       return containerLogs
     end
 
-    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
+    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601 )
       metricItems = []
       begin
         clusterId = getClusterId
@@ -370,7 +373,7 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
             nodeName = pod["spec"]["nodeName"]
             podContainers.each do |container|
               containerName = container["name"]
-              metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
               if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
                 metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
 
@@ -430,14 +433,14 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
       return metricItems
     end #getContainerResourceRequestAndLimits
 
-    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
+    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         metricInfo = metricJSON
         clusterId = getClusterId
         #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
         #if we are coming up with the time it should be same for all nodes
-        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
         metricInfo["items"].each do |node|
           if (!node["status"][metricCategory].nil?)
 
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index e1fdc5df6..7723875ee 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -50,7 +50,7 @@ def enumerate(eventList = nil)
 
       events = eventList
       $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-      eventInfo = KubernetesApiClient.getKubeResourceInfo("events")
+      eventInfo = KubernetesApiClient.getKubeResourceInfo("events?fieldSelector=type!=Normal")
       $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
 
       if !eventInfo.nil?
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 0a0fd9d2e..83042e4e6 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -9,6 +9,7 @@ class Kube_nodeInventory_Input < Input
     @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
     @@promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
     @@AzStackCloudFileName = "/etc/kubernetes/host/azurestackcloud.json"
+    @@kubeperfTag = "oms.api.KubePerf"
 
     @@rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
     @@rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
@@ -22,6 +23,7 @@ def initialize
       super
       require "yaml"
       require "json"
+      require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
@@ -29,7 +31,7 @@ def initialize
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => "60"
     config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory"
 
     def configure(conf)
@@ -191,6 +193,32 @@ def enumerate
             $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
         end
+        #:optimize:kubeperf merge
+        begin
+          #if(!nodeInventory.empty?)
+            nodeMetricDataItems = []
+            #allocatable metrics @ node level
+            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
+            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
+            #capacity metrics @ node level
+            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
+            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
+
+            eventStream2 = MultiEventStream.new
+
+            nodeMetricDataItems.each do |record|
+              record['DataType'] = "LINUX_PERF_BLOB"
+              record['IPName'] = "LogManagement"
+              eventStream2.add(emitTime, record) if record
+              #router.emit(@tag, time, record) if record 
+            end 
+          #end
+          router.emit_stream(@@kubeperfTag, eventStream2) if eventStream2
+        rescue => errorStr
+          $log.warn "Failed in enumerate for KubePerf from node inventory : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+        end
+        #:optimize:end kubeperf merge
       rescue => errorStr
         $log.warn "Failed to retrieve node inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -201,14 +229,31 @@ def enumerate
     def run_periodic
       @mutex.lock
       done = @finished
+      #@lastTimeRan = Time.now
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+         #@nextTimeToRun = @lastTimeRan + @run_interval
+         @nextTimeToRun = @nextTimeToRun + @run_interval
+         @now = Time.now
+         if @nextTimeToRun <= @now
+           @waitTimeout = 1
+           @nextTimeToRun = @now
+         else
+           @waitTimeout = @nextTimeToRun - @now
+         end
+         #@lastTimeRan = @now
+         #@lastTimeRan = @nextTimeToRun
+         @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_kube_nodes::run_periodic @ #{Time.now.utc.iso8601}")
-            enumerate
+             #$log.info("in_kube_podinventory::run_periodic @ #{Time.now.utc.iso8601}")
+             $log.info("in_kube_nodes::starttime #{Time.now.utc.iso8601}")
+             enumerate
+             #sleep (rand() * 50).to_i
+             $log.info("in_kube_nodes::endtime #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 766831a66..72792fe23 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -13,6 +13,7 @@ def initialize
       require "yaml"
       require "json"
       require "set"
+      require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
@@ -20,7 +21,7 @@ def initialize
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
 
     def configure(conf)
@@ -59,11 +60,12 @@ def enumerate(podList = nil)
 
       begin
         if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
+          batchTime = currentTime.utc.iso8601
           #get pod inventory & services
           $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
           serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
           $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-          parse_and_emit_records(podInventory, serviceList)
+          parse_and_emit_records(podInventory, serviceList, batchTime )
         else
           $log.warn "Received empty podInventory"
         end
@@ -186,10 +188,10 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       end
     end
 
-    def parse_and_emit_records(podInventory, serviceList)
+    def parse_and_emit_records(podInventory, serviceList, batchTime = currentTime.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
+      #batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
       controllerSet = Set.new []
       controllerData = {}
@@ -427,6 +429,32 @@ def parse_and_emit_records(podInventory, serviceList)
 
         router.emit_stream(@tag, eventStream) if eventStream
         router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream
+        #:optimize:kubeperf merge
+        begin
+          #if(!podInventory.empty?) 
+          containerMetricDataItems = []
+          #hostName = (OMS::Common.get_hostname)
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu","cpuRequestNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory","memoryRequestBytes", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu","cpuLimitNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory","memoryLimitBytes", batchTime))
+
+          eventStream2 = MultiEventStream.new
+
+          containerMetricDataItems.each do |record|
+            record['DataType'] = "LINUX_PERF_BLOB"
+            record['IPName'] = "LogManagement"
+            eventStream2.add(emitTime, record) if record
+            #router.emit(@tag, time, record) if record  
+          end
+          #end
+          router.emit_stream(@@kubeperfTag, eventStream2) if eventStream2
+          
+        rescue => errorStr
+          $log.warn "Failed in parse_and_emit_record for KubePerf from pod inventory : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+        end
+        #:optimize:end kubeperf merge
         if telemetryFlush == true
           telemetryProperties = {}
           telemetryProperties["Computer"] = @@hostName
@@ -454,14 +482,31 @@ def parse_and_emit_records(podInventory, serviceList)
     def run_periodic
       @mutex.lock
       done = @finished
+      #@lastTimeRan = Time.now
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        #@nextTimeToRun = @lastTimeRan + @run_interval
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        #@lastTimeRan = @now
+        #@lastTimeRan = @nextTimeToRun
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_kube_podinventory::run_periodic @ #{Time.now.utc.iso8601}")
+            #$log.info("in_kube_podinventory::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_podinventory::starttime #{Time.now.utc.iso8601}")
             enumerate
+            #sleep (rand() * 50).to_i
+            $log.info("in_kube_podinventory::endtime #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb
index 2e5f839e6..0114a47ec 100644
--- a/source/code/plugin/in_win_cadvisor_perf.rb
+++ b/source/code/plugin/in_win_cadvisor_perf.rb
@@ -60,7 +60,7 @@ def enumerate()
           $log.info "in_win_cadvisor_perf: Getting windows nodes"
           nodes = KubernetesApiClient.getWindowsNodes()
           if !nodes.nil?
-            @@winNodes = KubernetesApiClient.getWindowsNodes()
+            @@winNodes = nodes
           end
           $log.info "in_win_cadvisor_perf : Successuly got windows nodes after 5 minute interval"
           @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i

From 5012d9e4415d2d786520d488a584ffe6c770ef75 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 19:27:57 -0800
Subject: [PATCH 02/26] fix setting issue for the new enrichcontainerlog
 setting

---
 source/code/go/src/plugins/oms.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 5cf8297be..a05df7f28 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -907,8 +907,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	// Initilizing this to true to skip the first kubemonagentevent flush since the errors are not populated at this time
 	skipKubeMonEventsFlush = true
 
-	enrichContainerLogs = os.Getenv("AZMON_CLUSTER_CONTAINER_LOG_ENRICH")
-		if (strings.Compare(enrichContainerLogs, "true") == 0) {
+	enrichContainerLogsSetting := os.Getenv("AZMON_CLUSTER_CONTAINER_LOG_ENRICH")
+		if (strings.Compare(enrichContainerLogsSetting, "true") == 0) {
 			enrichContainerLogs = true
 			Log("ContainerLogEnrichment=true \n")
 		} else {

From e198ebb909fe2626a2f7f294a7c081b9c5f1045e Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 19:33:20 -0800
Subject: [PATCH 03/26] fix compilation issue

---
 source/code/go/src/plugins/oms.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index a05df7f28..79f7b32bb 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -748,8 +748,9 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			stringMap["Name"] = val
 		}
 
+		var dataItem
 		if enrichContainerLogs == true {
-			dataItem := DataItem{
+			dataItem = DataItem{
 				ID:                    stringMap["Id"],
 				LogEntry:              stringMap["LogEntry"],
 				LogEntrySource:        stringMap["LogEntrySource"],
@@ -761,7 +762,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 				Name:                  stringMap["Name"],
 			}
 		} else { // dont collect timeofcommand field as its part of container log enrivhment
-			dataItem := DataItem{
+			dataItem = DataItem{
 				ID:                    stringMap["Id"],
 				LogEntry:              stringMap["LogEntry"],
 				LogEntrySource:        stringMap["LogEntrySource"],

From 55764048c1b30f7f87ca71ec9c9eb05ecc4f9f02 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 19:37:09 -0800
Subject: [PATCH 04/26] fix another compilation issue

---
 source/code/go/src/plugins/oms.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 79f7b32bb..834726c93 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -748,7 +748,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			stringMap["Name"] = val
 		}
 
-		var dataItem
+		var dataItem DataItem
 		if enrichContainerLogs == true {
 			dataItem = DataItem{
 				ID:                    stringMap["Id"],

From b22c69510427929d4fd1f19d3363dae6a4a98f8f Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 22:31:01 -0800
Subject: [PATCH 05/26] fix emit issues

---
 installer/datafiles/base_container.data    |  1 -
 source/code/plugin/in_kube_nodes.rb        | 49 +++++++++++-----------
 source/code/plugin/in_kube_podinventory.rb |  8 ++--
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 4ebc4f338..a7c834258 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -148,7 +148,6 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb;                                     source/code/plugin/health/health_monitor_record.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb;                                      source/code/plugin/health/health_monitor_state.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_telemetry.rb;                                  source/code/plugin/health/health_monitor_telemetry.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb;                                      source/code/plugin/health/health_monitor_utils.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb;                                     source/code/plugin/health/health_signal_reducer.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/monitor_factory.rb;                                           source/code/plugin/health/monitor_factory.rb; 644; root; root
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 83042e4e6..7b3cc2e41 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -192,33 +192,34 @@ def enumerate
           if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
             $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
-        end
-        #:optimize:kubeperf merge
-        begin
-          #if(!nodeInventory.empty?)
-            nodeMetricDataItems = []
-            #allocatable metrics @ node level
-            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
-            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
-            #capacity metrics @ node level
-            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
-            nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
+            #:optimize:kubeperf merge
+            begin
+              #if(!nodeInventory.empty?)
+                nodeMetricDataItems = []
+                #allocatable metrics @ node level
+                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
+                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
+                #capacity metrics @ node level
+                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
+                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
 
-            eventStream2 = MultiEventStream.new
+                kubePerfEventStream = MultiEventStream.new
 
-            nodeMetricDataItems.each do |record|
-              record['DataType'] = "LINUX_PERF_BLOB"
-              record['IPName'] = "LogManagement"
-              eventStream2.add(emitTime, record) if record
-              #router.emit(@tag, time, record) if record 
-            end 
-          #end
-          router.emit_stream(@@kubeperfTag, eventStream2) if eventStream2
-        rescue => errorStr
-          $log.warn "Failed in enumerate for KubePerf from node inventory : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
+                nodeMetricDataItems.each do |record|
+                  record['DataType'] = "LINUX_PERF_BLOB"
+                  record['IPName'] = "LogManagement"
+                  kubePerfEventStream.add(emitTime, record) if record
+                  #router.emit(@tag, time, record) if record 
+                end 
+              #end
+              router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+            rescue => errorStr
+              $log.warn "Failed in enumerate for KubePerf from node inventory : #{errorStr}"
+              $log.debug_backtrace(errorStr.backtrace)
+            end
+            #:optimize:end kubeperf merge
         end
-        #:optimize:end kubeperf merge
+      
       rescue => errorStr
         $log.warn "Failed to retrieve node inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 72792fe23..ba1907226 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -188,7 +188,7 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       end
     end
 
-    def parse_and_emit_records(podInventory, serviceList, batchTime = currentTime.utc.iso8601)
+    def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
       #batchTime = currentTime.utc.iso8601
@@ -439,16 +439,16 @@ def parse_and_emit_records(podInventory, serviceList, batchTime = currentTime.ut
           containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu","cpuLimitNanoCores", batchTime))
           containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory","memoryLimitBytes", batchTime))
 
-          eventStream2 = MultiEventStream.new
+          kubePerfEventStream = MultiEventStream.new
 
           containerMetricDataItems.each do |record|
             record['DataType'] = "LINUX_PERF_BLOB"
             record['IPName'] = "LogManagement"
-            eventStream2.add(emitTime, record) if record
+            kubePerfEventStream.add(emitTime, record) if record
             #router.emit(@tag, time, record) if record  
           end
           #end
-          router.emit_stream(@@kubeperfTag, eventStream2) if eventStream2
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
           
         rescue => errorStr
           $log.warn "Failed in parse_and_emit_record for KubePerf from pod inventory : #{errorStr}"

From 881cd1b582a8071b1972f2dc10123e127b201e0d Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 23:02:37 -0800
Subject: [PATCH 06/26] fix a nil issue

---
 source/code/plugin/in_kube_podinventory.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index ba1907226..a25febf50 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -50,6 +50,7 @@ def shutdown
 
     def enumerate(podList = nil)
       podInventory = podList
+      currentTime = Time.now
       $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
       podInfo = KubernetesApiClient.getKubeResourceInfo("pods")
       $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")

From a537b8525621903259093c665ddbb847213484a0 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 4 Nov 2019 23:32:22 -0800
Subject: [PATCH 07/26] fix mising tag

---
 source/code/plugin/in_kube_podinventory.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index a25febf50..3f7f3f85a 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -7,6 +7,7 @@ class Kube_PodInventory_Input < Input
 
     @@MDMKubePodInventoryTag = "mdm.kubepodinventory"
     @@hostName = (OMS::Common.get_hostname)
+    @@kubeperfTag = "oms.api.KubePerf"
 
     def initialize
       super

From 5b9e581f69aa9da718b988b7e4d4d4e3e9054474 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 12 Nov 2019 17:15:40 -0800
Subject: [PATCH 08/26] * Fix all input plugins for scheduling issue * Merge
 kubeservices with kubepodinventory (reduce RS to API server by one more) *
 Remove Kubelogs (not used) * Fix liveness probe * Disable enrichment by
 default for container logs * Move to yajl json parser across the board for
 docker provier code * Remove unused files

---
 installer/conf/container.conf                 |  28 +--
 installer/conf/kube.conf                      | 127 +++++-------
 installer/datafiles/base_container.data       |   1 -
 installer/scripts/livenessprobe.sh            |   2 +-
 installer/scripts/tomlparser.rb               |   2 +-
 .../code/plugin/ApplicationInsightsUtility.rb |   2 +-
 .../code/plugin/CAdvisorMetricsAPIClient.rb   |   2 +-
 source/code/plugin/ContainerInventoryState.rb |   2 +-
 source/code/plugin/DockerApiClient.rb         |   2 +-
 source/code/plugin/KubernetesApiClient.rb     |   2 +-
 source/code/plugin/filter_cadvisor2mdm.rb     |   2 +-
 .../filter_cadvisor_health_container.rb       |   2 +-
 .../plugin/filter_cadvisor_health_node.rb     |   2 +-
 source/code/plugin/filter_docker_log.rb       |   1 +
 .../plugin/filter_health_model_builder.rb     |   2 +-
 source/code/plugin/filter_inventory2mdm.rb    |   2 +-
 .../code/plugin/health/aggregate_monitor.rb   |   2 +-
 .../plugin/health/cluster_health_state.rb     |   1 +
 ...h_container_cpu_memory_record_formatter.rb |   2 +
 .../plugin/health/health_hierarchy_builder.rb |   2 +-
 .../health/health_model_definition_parser.rb  |   2 +-
 .../plugin/health/health_monitor_optimizer.rb |   1 +
 .../plugin/health/health_monitor_provider.rb  |   1 +
 .../plugin/health/health_monitor_state.rb     |   1 +
 .../plugin/health/health_monitor_utils.rb     |   1 +
 source/code/plugin/health/unit_monitor.rb     |   2 +-
 source/code/plugin/in_cadvisor_perf.rb        |  20 +-
 source/code/plugin/in_containerinventory.rb   |  20 +-
 .../code/plugin/in_containerlog_sudo_tail.rb  | 189 ------------------
 source/code/plugin/in_kube_events.rb          |  20 +-
 source/code/plugin/in_kube_health.rb          |  20 +-
 source/code/plugin/in_kube_logs.rb            | 181 -----------------
 source/code/plugin/in_kube_nodes.rb           |  17 +-
 source/code/plugin/in_kube_perf.rb            | 120 -----------
 source/code/plugin/in_kube_podinventory.rb    |  50 ++++-
 source/code/plugin/in_kube_services.rb        | 110 ----------
 source/code/plugin/in_win_cadvisor_perf.rb    |  20 +-
 .../channel/contracts/json_serializable.rb    |   2 +-
 .../channel/sender_base.rb                    |   2 +-
 source/code/plugin/out_mdm.rb                 |   2 +-
 40 files changed, 220 insertions(+), 749 deletions(-)
 delete mode 100644 source/code/plugin/in_containerlog_sudo_tail.rb
 delete mode 100644 source/code/plugin/in_kube_logs.rb
 delete mode 100644 source/code/plugin/in_kube_perf.rb
 delete mode 100644 source/code/plugin/in_kube_services.rb

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 133d734ee..e1877a576 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -11,7 +11,7 @@
 <source>
   type containerinventory
   tag oms.containerinsights.containerinventory
-  run_interval 60s
+  run_interval 60
   log_level debug
 </source>
 
@@ -19,7 +19,7 @@
 <source>
   type cadvisorperf
   tag oms.api.cadvisorperf
-  run_interval 60s
+  run_interval 60
   log_level debug
 </source>
 
@@ -45,14 +45,14 @@
   type out_oms
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
@@ -60,14 +60,14 @@
   type out_oms
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
@@ -96,14 +96,14 @@
   type out_mdm
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
 </match>
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index c85c77199..eca68d6be 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -17,18 +17,10 @@
 <source>
 	type kubeevents
 	tag oms.containerinsights.KubeEvents
-	run_interval 60s
+	run_interval 60
     log_level debug
 </source>
 
-#Kubernetes services
-<source>
-	type kubeservices
-	tag oms.containerinsights.KubeServices
-	run_interval 60s
-  log_level debug
-</source>
-
 #Kubernetes Nodes
 <source>
 	type kubenodeinventory
@@ -40,8 +32,8 @@
 #Kubernetes health
 #<source>
 #  type kubehealth
-# tag kubehealth.ReplicaSet
-#  run_interval 60s
+#  tag kubehealth.ReplicaSet
+#  run_interval 60
 #  log_level debug
 #</source>
 
@@ -49,7 +41,7 @@
 <source>
 	type wincadvisorperf
 	tag oms.api.wincadvisorperf
-	run_interval 60s
+	run_interval 60
   log_level debug
 </source>
 
@@ -73,15 +65,15 @@
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 3
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
 </match>
@@ -89,102 +81,89 @@
 <match oms.containerinsights.KubePodInventory**>
   type out_oms
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 10
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 5s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
 <match oms.containerinsights.KubeEvents**>
 	type out_oms
 	log_level debug
-	num_threads 5
-	buffer_chunk_limit 5m
+	num_threads 3
+	#buffer_chunk_limit 20m
 	buffer_type file
 	buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
-	buffer_queue_limit 10
+	#buffer_queue_limit 20
 	buffer_queue_full_action drop_oldest_chunk
-	flush_interval 20s
+	flush_interval 10s
 	retry_limit 10
-	retry_wait 30s
+	retry_wait 10s
 	max_retry_wait 9m
 </match>
 
-<match oms.api.KubeLogs**>
-	type out_oms_api
-	log_level debug
-  	buffer_chunk_limit 10m
-	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer
-	buffer_queue_limit 10
-	flush_interval 20s
-	retry_limit 10
-	retry_wait 30s
-</match>
-
-
-
 <match oms.containerinsights.KubeServices**>
   type out_oms
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 2
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
 <match oms.containerinsights.KubeNodeInventory**>
   type out_oms
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 3
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
 <match oms.containerinsights.ContainerNodeInventory**>
   type out_oms
   log_level debug
-  buffer_chunk_limit 20m
+  num_threads 2
+  #buffer_chunk_limit 40m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
+  #buffer_queue_limit 40
+  flush_interval 10s
   retry_limit 10
-  retry_wait 15s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
 <match oms.api.KubePerf**>
   type out_oms
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 10
+  #buffer_chunk_limit 40m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 40
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 5s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
@@ -192,14 +171,14 @@
   type out_mdm
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
+  #buffer_chunk_limit 40m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 40
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 5s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
 </match>
@@ -207,29 +186,29 @@
 <match oms.api.wincadvisorperf**>
   type out_oms
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 3
+  #buffer_chunk_limit 40m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 40
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
 
 <match kubehealth.Signals**>
   type out_oms
   log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
+  num_threads 3
+  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
-  buffer_queue_limit 20
+  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
+  flush_interval 10s
   retry_limit 10
-  retry_wait 30s
+  retry_wait 10s
   max_retry_wait 9m
 </match>
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index a7c834258..0264871dd 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -26,7 +26,6 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/microsoft/omsagent/plugin/in_kube_podinventory.rb;			source/code/plugin/in_kube_podinventory.rb;			644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_events.rb;			    source/code/plugin/in_kube_events.rb;				644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_logs.rb;                 source/code/plugin/in_kube_logs.rb;                 644; root; root
 /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb;			source/code/plugin/KubernetesApiClient.rb;			644; root; root
 
 /etc/opt/microsoft/docker-cimprov/container.conf;			    installer/conf/container.conf;                      644; root; root
diff --git a/installer/scripts/livenessprobe.sh b/installer/scripts/livenessprobe.sh
index cb7e8a0ba..e957b4bdf 100644
--- a/installer/scripts/livenessprobe.sh
+++ b/installer/scripts/livenessprobe.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 #test to exit non zero value
-(ps -ef | grep omsagent | grep -v "grep") && (ps -ef | grep td-agent-bit | grep -v "grep")
+(ps -ef | grep omsagent- | grep -v "grep") && (ps -ef | grep td-agent-bit | grep -v "grep")
 if [ $? -eq 0 ] && [ ! -s "inotifyoutput.txt" ]
 then
   # inotifyoutput file is empty and the grep commands for omsagent and td-agent-bit succeeded
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index 194d5ad90..ba67d023a 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -15,7 +15,7 @@
 @logTailPath = "/var/log/containers/*.log"
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
 @excludePath = "*.csv2" #some invalid path
-@enrichContainerLogs = true
+@enrichContainerLogs = false
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 967335f27..6bb518bac 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -6,7 +6,7 @@ class ApplicationInsightsUtility
   require_relative "omslog"
   require_relative "DockerApiClient"
   require_relative "oms_common"
-  require "json"
+  require 'yajl/json_gem'
   require "base64"
 
   @@HeartBeat = "HeartBeatEvent"
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 87ddcd024..cb10992d5 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class CAdvisorMetricsAPIClient
-  require "json"
+  require 'yajl/json_gem'
   require "logger"
   require "net/http"
   require "net/https"
diff --git a/source/code/plugin/ContainerInventoryState.rb b/source/code/plugin/ContainerInventoryState.rb
index 7e5ca18e8..170fa65e3 100644
--- a/source/code/plugin/ContainerInventoryState.rb
+++ b/source/code/plugin/ContainerInventoryState.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class ContainerInventoryState
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'omslog'
     @@InventoryDirectory = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/"
 
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index ee2742dd4..fb2148ec9 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -3,7 +3,7 @@
 
 class DockerApiClient
   require "socket"
-  require "json"
+  require 'yajl/json_gem'
   require "timeout"
   require_relative "omslog"
   require_relative "DockerApiRestHelper"
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 8808f9d62..a125361a1 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class KubernetesApiClient
-  require "json"
+  require 'yajl/json_gem'
   require "logger"
   require "net/http"
   require "net/https"
diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
index a6e643e45..f14a1369b 100644
--- a/source/code/plugin/filter_cadvisor2mdm.rb
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -4,7 +4,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative 'CustomMetricsUtils'
 
diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb
index 2eccd125f..93d50e20f 100644
--- a/source/code/plugin/filter_cadvisor_health_container.rb
+++ b/source/code/plugin/filter_cadvisor_health_container.rb
@@ -3,7 +3,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative "ApplicationInsightsUtility"
     Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
index d2f735cd1..c6280db60 100644
--- a/source/code/plugin/filter_cadvisor_health_node.rb
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -3,7 +3,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative "ApplicationInsightsUtility"
     require_relative "KubernetesApiClient"
diff --git a/source/code/plugin/filter_docker_log.rb b/source/code/plugin/filter_docker_log.rb
index 7ffd333e3..b80f4c204 100644
--- a/source/code/plugin/filter_docker_log.rb
+++ b/source/code/plugin/filter_docker_log.rb
@@ -5,6 +5,7 @@
 module Fluent
 	require 'logger'
 	require 'socket'
+	require 'yajl/json_gem'
 
 	class DockerLogFilter < Filter
 		Plugin.register_filter('filter_docker_log', self)
diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index afb514a73..ab9ecf666 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -4,7 +4,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
 
 
diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index 30f6f911a..422b4b54a 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -4,7 +4,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative 'CustomMetricsUtils'
 
diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb
index 794f716ce..00ee9aecd 100644
--- a/source/code/plugin/health/aggregate_monitor.rb
+++ b/source/code/plugin/health/aggregate_monitor.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 require_relative 'health_model_constants'
-require 'json'
+require 'yajl/json_gem'
 
 module HealthModel
   class AggregateMonitor
diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb
index 3b56dd243..4057991cd 100644
--- a/source/code/plugin/health/cluster_health_state.rb
+++ b/source/code/plugin/health/cluster_health_state.rb
@@ -1,6 +1,7 @@
 require "net/http"
 require "net/https"
 require "uri"
+require 'yajl/json_gem'
 
 module HealthModel
     class ClusterHealthState
diff --git a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
index 5c7db82d9..4e5ad11b8 100644
--- a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
@@ -1,3 +1,5 @@
+require 'yajl/json_gem'
+
 module HealthModel
     class HealthContainerCpuMemoryRecordFormatter
 
diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb
index 2da0050db..4ed838c04 100644
--- a/source/code/plugin/health/health_hierarchy_builder.rb
+++ b/source/code/plugin/health/health_hierarchy_builder.rb
@@ -1,4 +1,4 @@
-require 'json'
+require 'yajl/json_gem'
 module HealthModel
     class HealthHierarchyBuilder
 
diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb
index f6c7a781d..d38bc1615 100644
--- a/source/code/plugin/health/health_model_definition_parser.rb
+++ b/source/code/plugin/health/health_model_definition_parser.rb
@@ -2,7 +2,7 @@
     Class to parse the health model definition. The definition expresses the relationship between monitors, how to roll up to an aggregate monitor,
     and what labels to "pass on" to the parent monitor
 =end
-require 'json'
+require 'yajl/json_gem'
 
 module HealthModel
     class HealthModelDefinitionParser
diff --git a/source/code/plugin/health/health_monitor_optimizer.rb b/source/code/plugin/health/health_monitor_optimizer.rb
index b33c8a986..f8146cd1d 100644
--- a/source/code/plugin/health/health_monitor_optimizer.rb
+++ b/source/code/plugin/health/health_monitor_optimizer.rb
@@ -1,3 +1,4 @@
+require 'yajl/json_gem'
 module HealthModel
     class HealthMonitorOptimizer
         #ctor
diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb
index e75824268..b1f2c8474 100644
--- a/source/code/plugin/health/health_monitor_provider.rb
+++ b/source/code/plugin/health/health_monitor_provider.rb
@@ -1,4 +1,5 @@
 require_relative 'health_model_constants'
+require 'yajl/json_gem'
 
 module HealthModel
     class HealthMonitorProvider
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index 8e2294cc9..90bfee128 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -1,4 +1,5 @@
 require_relative 'health_model_constants'
+require 'yajl/json_gem'
 
 module HealthModel
 
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index e21fdc83d..7e0b9c64c 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -1,6 +1,7 @@
 require 'logger'
 require 'digest'
 require_relative 'health_model_constants'
+require 'yajl/json_gem'
 
 module HealthModel
     # static class that provides a bunch of utility methods
diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb
index 9af599321..66f9a5b8c 100644
--- a/source/code/plugin/health/unit_monitor.rb
+++ b/source/code/plugin/health/unit_monitor.rb
@@ -1,5 +1,5 @@
 require_relative 'health_model_constants'
-require 'json'
+require 'yajl/json_gem'
 
 module HealthModel
     class UnitMonitor
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 810fb512f..9845671cc 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -9,14 +9,15 @@ class CAdvisor_Perf_Input < Input
     def initialize
       super
       require "yaml"
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
 
       require_relative "CAdvisorMetricsAPIClient"
       require_relative "oms_common"
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.api.cadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
     config_param :nodehealthtag, :string, :default => "kubehealth.DaemonSet.Node"
@@ -74,14 +75,25 @@ def enumerate()
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_cadvisor_perf::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_cadvisor_perf::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}"
           end
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index ccf61ab2e..d107047b4 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -13,14 +13,15 @@ class Container_Inventory_Input < Input
 
     def initialize
       super
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
       require_relative "DockerApiClient"
       require_relative "ContainerInventoryState"
       require_relative "ApplicationInsightsUtility"
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.containerinventory"
 
     def configure(conf)
@@ -259,14 +260,25 @@ def enumerate
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_container_inventory::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_container_inventory::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_container_inventory::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_container_inventory::run_periodic: Failed in enumerate container inventory: #{errorStr}"
           end
diff --git a/source/code/plugin/in_containerlog_sudo_tail.rb b/source/code/plugin/in_containerlog_sudo_tail.rb
deleted file mode 100644
index 8faa260d0..000000000
--- a/source/code/plugin/in_containerlog_sudo_tail.rb
+++ /dev/null
@@ -1,189 +0,0 @@
-
-require 'yajl'
-require 'fluent/input'
-require 'fluent/event'
-require 'fluent/config/error'
-require 'fluent/parser'
-require 'open3'
-require 'json'
-require_relative 'omslog'
-require_relative 'KubernetesApiClient'
-
-module Fluent
-  class ContainerLogSudoTail < Input
-    Plugin.register_input('containerlog_sudo_tail', self)
-
-    def initialize
-      super
-      @command = nil
-      @paths = []
-      #Using this to construct the file path for all every container json log file.	
-      #Example container log file path -> /var/lib/docker/containers/{ContainerID}/{ContainerID}-json.log	
-      #We have read permission on this file but don't have execute permission on the below mentioned path. Hence wildcard character searches to find the container ID's doesn't work.	
-      @containerLogFilePath = "/var/lib/docker/containers/"
-      #This folder contains a list of all the containers running/stopped and we're using it to get all the container ID's which will be needed for the log file path below
-      #TODO : Use generic path from docker REST endpoint and find a way to mount the correct folder in the omsagent.yaml	    
-      @containerIDFilePath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/*"
-      @@systemPodsNamespace = 'kube-system'
-      @@getSystemPodsTimeIntervalSecs = 300 #refresh system container list every 5 minutes
-      @@lastSystemPodsGetTime = nil;
-      @@systemContainerIDList = Hash.new
-      @@disableKubeSystemLogCollection = ENV['DISABLE_KUBE_SYSTEM_LOG_COLLECTION']
-      if !@@disableKubeSystemLogCollection.nil? && !@@disableKubeSystemLogCollection.empty? && @@disableKubeSystemLogCollection.casecmp('true') == 0
-        @@disableKubeSystemLogCollection = 'true'
-        $log.info("in_container_sudo_tail : System container log collection is disabled")
-      else
-        @@disableKubeSystemLogCollection = 'false'
-        $log.info("in_container_sudo_tail : System container log collection is enabled")
-      end
-    end
-
-    attr_accessor :command
-
-    #The format used to map the program output to the incoming event.
-    config_param :format, :string, default: 'none'
-
-    #Tag of the event.
-    config_param :tag, :string, default: nil
-
-    #Fluentd will record the position it last read into this file.
-    config_param :pos_file, :string, default: nil
-
-    #The interval time between periodic program runs.
-    config_param :run_interval, :time, default: nil
-
-    BASE_DIR = File.dirname(File.expand_path('..', __FILE__))
-    RUBY_DIR = BASE_DIR + '/ruby/bin/ruby '
-    TAILSCRIPT = BASE_DIR + '/plugin/containerlogtailfilereader.rb '
-
-    def configure(conf)
-      super
-      unless @pos_file
-        raise ConfigError, "'pos_file' is required to keep track of file"
-      end 
-
-      unless @tag 
-        raise ConfigError, "'tag' is required on sudo tail"
-      end
-
-      unless @run_interval
-        raise ConfigError, "'run_interval' is required for periodic tailing"      
-      end
- 
-      @parser = Plugin.new_parser(conf['format'])
-      @parser.configure(conf)
-    end
-
-    def start
-      @finished = false
-      @thread = Thread.new(&method(:run_periodic))
-    end
-
-    def shutdown
-      @finished = true 
-      @thread.join
-    end
-
-    def receive_data(line)
-      es = MultiEventStream.new
-      begin
-        line.chomp!  # remove \n
-        @parser.parse(line) { |time, record|
-          if time && record
-            es.add(time, record)
-          else
-            $log.warn "pattern doesn't match: #{line.inspect}"
-          end
-          unless es.empty?
-            tag=@tag
-            router.emit_stream(tag, es)
-          end
-        }
-      rescue => e
-        $log.warn line.dump, error: e.to_s
-        $log.debug_backtrace(e.backtrace)
-      end
-    end
-
-    def receive_log(line)
-      $log.warn "#{line}" if line.start_with?('WARN')
-      $log.error "#{line}" if line.start_with?('ERROR')
-      $log.info "#{line}" if line.start_with?('INFO')
-    end
- 
-    def readable_path(path)
-      if system("sudo test -r #{path}")
-        OMS::Log.info_once("Following tail of #{path}")
-        return path
-      else
-        OMS::Log.warn_once("#{path} is not readable. Cannot tail the file.")
-	return ""
-      end
-    end
-
-    def set_system_command
-      timeNow = DateTime.now
-      cName = "Unkown"
-      tempContainerInfo = {}
-      paths = ""
-      
-      #if we are on agent & system containers log collection is disabled, get system containerIDs to exclude logs from containers in system containers namespace from being tailed 
-      if !KubernetesApiClient.isNodeMaster && @@disableKubeSystemLogCollection.casecmp('true') == 0 
-        if @@lastSystemPodsGetTime.nil? || ((timeNow - @@lastSystemPodsGetTime)*24*60*60).to_i >= @@getSystemPodsTimeIntervalSecs
-          $log.info("in_container_sudo_tail : System Container list last refreshed at #{@@lastSystemPodsGetTime} - refreshing now at #{timeNow}")
-          sysContainers = KubernetesApiClient.getContainerIDs(@@systemPodsNamespace)
-          #BugBug - https://msecg.visualstudio.com/OMS/_workitems/edit/215107 - we get 200 with empty payloaf from time to time
-          if (!sysContainers.nil? && !sysContainers.empty?)
-            @@systemContainerIDList = sysContainers
-          else
-            $log.info("in_container_sudo_tail : System Container ID List is empty!!!! Continuing to use currently cached list.")
-          end
-          @@lastSystemPodsGetTime = timeNow
-          $log.info("in_container_sudo_tail : System Container ID List: #{@@systemContainerIDList}")
-        end
-      end
-      
-      Dir.glob(@containerIDFilePath).select { |p|
-        cName = p.split('/').last;
-        if !@@systemContainerIDList.key?("docker://" + cName)
-	        p = @containerLogFilePath + cName + "/" + cName + "-json.log"
-          paths += readable_path(p) + " "
-        else
-          $log.info("in_container_sudo_tail : Excluding system container with ID #{cName} from tailng for log collection")
-        end
-      }
-      if !system("sudo test -r #{@pos_file}")
-	      system("sudo touch #{@pos_file}")
-      end
-      @command = "sudo " << RUBY_DIR << TAILSCRIPT << paths <<  " -p #{@pos_file}"
-    end
-
-    def run_periodic
-      until @finished
-        begin
-          sleep @run_interval
-          #if we are on master & system containers log collection is disabled, collect nothing (i.e NO COntainer log collection for ANY container)
-          #we will be not collection omsagent log as well in this case, but its insignificant & okay!
-          if !KubernetesApiClient.isNodeMaster || @@disableKubeSystemLogCollection.casecmp('true') != 0 
-            set_system_command
-            Open3.popen3(@command) {|writeio, readio, errio, wait_thread|
-              writeio.close
-              while line = readio.gets
-                receive_data(line)
-              end
-              while line = errio.gets
-                receive_log(line)
-              end
-              
-              wait_thread.value #wait until child process terminates
-            }
-          end
-        rescue
-          $log.error "containerlog_sudo_tail failed to run or shutdown child proces", error => $!.to_s, :error_class => $!.class.to_s
-          $log.warn_backtrace $!.backtrace
-        end
-      end
-    end
-  end
-
-end
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 7723875ee..87007e60b 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -9,7 +9,8 @@ class Kube_Event_Input < Input
 
     def initialize
       super
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "oms_common"
@@ -17,7 +18,7 @@ def initialize
       require_relative "ApplicationInsightsUtility"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubeEvents"
 
     def configure(conf)
@@ -108,14 +109,25 @@ def enumerate(eventList = nil)
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_kube_events::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_events::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_kube_events::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 9a1b8f9a9..d491f4efd 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -21,7 +21,8 @@ def initialize
       begin
         super
         require "yaml"
-        require "json"
+        require 'yajl/json_gem'
+        require "time"
 
         @@cluster_id = KubernetesApiClient.getClusterId
         @resources = HealthKubernetesResources.instance
@@ -32,7 +33,7 @@ def initialize
     end
 
     include HealthModel
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "kubehealth.ReplicaSet"
 
     def configure(conf)
@@ -302,14 +303,25 @@ def initialize_inventory
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            @@hmlog.info("in_kube_health::run_periodic @ #{Time.now.utc.iso8601}")
+            @@hmlog.info("in_kube_health::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            @@hmlog.info("in_kube_health::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             @@hmlog.warn "in_kube_health::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_logs.rb b/source/code/plugin/in_kube_logs.rb
deleted file mode 100644
index 119473819..000000000
--- a/source/code/plugin/in_kube_logs.rb
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/local/bin/ruby
-# frozen_string_literal: true
-
-module Fluent
-
-    class Kube_Logs_Input < Input
-        Plugin.register_input('kubelogs', self)
-
-        @@KubeLogsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml"
-
-        def initialize
-            super
-            require 'yaml'
-            require 'date'
-            require 'time'
-            require 'json'
-
-            require_relative 'KubernetesApiClient'
-            require_relative 'oms_common'
-            require_relative 'omslog'
-        end
-
-        config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.api.KubeLogs"
-
-        def configure (conf)
-            super
-        end
-
-        def start
-            if @run_interval
-                @finished = false
-                @condition = ConditionVariable.new
-                @mutex = Mutex.new
-                @thread = Thread.new(&method(:run_periodic))
-            end
-        end
-
-        def shutdown
-            if @run_interval
-                @mutex.synchronize {
-                    @finished = true
-                    @condition.signal
-                }
-                @thread.join
-            end
-        end
-
-        def enumerate(podList = nil)
-
-            namespace = ENV['OMS_KUBERNETES_LOGS_NAMESPACE']
-            if namespace.nil? || namespace.empty?
-                return
-            end
-
-            time = Time.now.to_f
-                if podList.nil?
-                    pods = KubernetesApiClient.getPods(namespace)
-                else
-                    pods = podList
-                end
-                logQueryState = getLogQueryState
-                newLogQueryState = {}
-
-                pods.each do |pod|
-                    record = {}
-                    begin
-                        pod['status']['containerStatuses'].each do |container|
-
-                            # if container['state']['running']
-                            #     puts container['name'] + ' is running'
-                            # end
-
-                            timeStamp = DateTime.now
-
-                            containerId = pod['metadata']['namespace'] + "_" + pod['metadata']['name'] + "_" + container['name']
-                            if !logQueryState.empty? && logQueryState[containerId]
-                                timeStamp = DateTime.parse(logQueryState[containerId])
-                            end
-
-                            # Try to get logs for the container
-                            begin
-                              $log.debug "Getting logs for #{container['name']}"
-                              logs = KubernetesApiClient.getContainerLogsSinceTime(pod['metadata']['namespace'], pod['metadata']['name'], container['name'], timeStamp.rfc3339(9), true)
-                              $log.debug "got something back"
-
-                              # By default we don't change the timestamp (if no logs were returned or if there was a (hopefully transient) error in retrieval
-                              newLogQueryState[containerId] = timeStamp.rfc3339(9)
-
-                              if !logs || logs.empty?
-                                  $log.info "no logs returned"
-                              else
-                                  $log.debug "response size is #{logs.length}"
-                                  lines = logs.split("\n")
-                                  index = -1
-
-                                  # skip duplicates
-                                  for i in 0...lines.count
-                                      dateTime = DateTime.parse(lines[i].split(" ").first)
-                                      if (dateTime.to_time - timeStamp.to_time) > 0.0
-                                          index = i
-                                          break
-                                      end
-                                  end
-
-                                  if index >= 0
-                                      $log.debug "starting from line #{index}"
-                                      for i in index...lines.count
-                                          record['Namespace'] = pod['metadata']['namespace']
-                                          record['Pod'] = pod['metadata']['name']
-                                          record['Container'] = container['name']
-                                          record['Message'] = lines[i][(lines[i].index(' ') + 1)..(lines[i].length - 1)]
-                                          record['TimeGenerated'] = lines[i].split(" ").first
-                                          record['Node'] = pod['spec']['nodeName']
-                                          record['Computer'] = OMS::Common.get_hostname
-                                          record['ClusterName'] = KubernetesApiClient.getClusterName
-                                          router.emit(@tag, time, record) if record
-                                      end
-                                      newLogQueryState[containerId] = lines.last.split(" ").first
-                                  else
-                                      newLogQueryState[containerId] = DateTime.now.rfc3339(9)
-                                  end
-                              end
-                            rescue => logException
-                              $log.warn "Failed to retrieve logs for container: #{logException}"
-                              $log.debug_backtrace(logException.backtrace)
-                            end
-                        end
-                        # Update log query state only if logging was succesfful.
-                        # TODO: May have a few duplicate lines in case of
-                        writeLogQueryState(newLogQueryState)
-                    rescue  => errorStr
-                        $log.warn "Exception raised in enumerate: #{errorStr}"
-                        $log.debug_backtrace(errorStr.backtrace)
-                    end
-                end
-        end
-
-        def run_periodic
-            @mutex.lock
-            done = @finished
-            until done
-                @condition.wait(@mutex, @run_interval)
-                done = @finished
-                @mutex.unlock
-                if !done
-                    $log.debug "calling enumerate for KubeLogs"
-                    enumerate
-                    $log.debug "done with enumerate for KubeLogs"
-                end
-                @mutex.lock
-            end
-            @mutex.unlock
-        end
-
-        def getLogQueryState
-            logQueryState = {}
-            begin
-                if File.file?(@@KubeLogsStateFile)
-                    logQueryState = YAML.load_file(@@KubeLogsStateFile, {})
-                end
-            rescue  => errorStr
-                $log.warn "Failed to load query state #{errorStr}"
-                $log.debug_backtrace(errorStr.backtrace)
-            end
-            return logQueryState
-        end
-
-        def writeLogQueryState(logQueryState)
-            begin
-                File.write(@@KubeLogsStateFile, logQueryState.to_yaml)
-            rescue  => errorStr
-                $log.warn "Failed to write query state #{errorStr.to_s}"
-                $log.debug_backtrace(errorStr.backtrace)
-            end
-        end
-
-    end # Kube_Log_Input
-
-end # module
-
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 7b3cc2e41..3ef28dfaf 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -22,7 +22,7 @@ class Kube_nodeInventory_Input < Input
     def initialize
       super
       require "yaml"
-      require "json"
+      require 'yajl/json_gem'
       require "time"
 
       require_relative "KubernetesApiClient"
@@ -31,7 +31,7 @@ def initialize
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "60"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory"
 
     def configure(conf)
@@ -214,8 +214,9 @@ def enumerate
               #end
               router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
             rescue => errorStr
-              $log.warn "Failed in enumerate for KubePerf from node inventory : #{errorStr}"
+              $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
               $log.debug_backtrace(errorStr.backtrace)
+              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
             end
             #:optimize:end kubeperf merge
         end
@@ -230,11 +231,9 @@ def enumerate
     def run_periodic
       @mutex.lock
       done = @finished
-      #@lastTimeRan = Time.now
       @nextTimeToRun = Time.now
       @waitTimeout = @run_interval
       until done
-         #@nextTimeToRun = @lastTimeRan + @run_interval
          @nextTimeToRun = @nextTimeToRun + @run_interval
          @now = Time.now
          if @nextTimeToRun <= @now
@@ -243,18 +242,14 @@ def run_periodic
          else
            @waitTimeout = @nextTimeToRun - @now
          end
-         #@lastTimeRan = @now
-         #@lastTimeRan = @nextTimeToRun
          @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-             #$log.info("in_kube_podinventory::run_periodic @ #{Time.now.utc.iso8601}")
-             $log.info("in_kube_nodes::starttime #{Time.now.utc.iso8601}")
+             $log.info("in_kube_nodes::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
              enumerate
-             #sleep (rand() * 50).to_i
-             $log.info("in_kube_nodes::endtime #{Time.now.utc.iso8601}")
+             $log.info("in_kube_nodes::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_perf.rb b/source/code/plugin/in_kube_perf.rb
deleted file mode 100644
index 8b571139d..000000000
--- a/source/code/plugin/in_kube_perf.rb
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/local/bin/ruby
-# frozen_string_literal: true
-
-module Fluent
-    
-      class Kube_Perf_Input < Input
-        Plugin.register_input('kubeperf', self)
-    
-        def initialize
-          super
-          require 'yaml'
-          require 'json'
-    
-          require_relative 'KubernetesApiClient'
-          require_relative 'oms_common'
-          require_relative 'omslog'
-        end
-    
-        config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.api.KubePerf"
-    
-        def configure (conf)
-          super
-        end
-    
-        def start
-          if @run_interval
-            @finished = false
-            @condition = ConditionVariable.new
-            @mutex = Mutex.new
-            @thread = Thread.new(&method(:run_periodic))
-          end
-        end
-    
-        def shutdown
-          if @run_interval
-            @mutex.synchronize {
-              @finished = true
-              @condition.signal
-            }
-            @thread.join
-          end
-        end
-    
-        def enumerate()
-          time = Time.now.to_f
-          begin
-              eventStream = MultiEventStream.new
-              
-                $log.info("in_kube_perf::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                #get resource requests & resource limits per container as perf data 
-                podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('pods').body)
-                $log.info("in_kube_perf::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                if(!podInventory.empty?) 
-                  containerMetricDataItems = []
-                  hostName = (OMS::Common.get_hostname)
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu","cpuRequestNanoCores"))
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory","memoryRequestBytes"))
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu","cpuLimitNanoCores"))
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory","memoryLimitBytes"))
-      
-                  containerMetricDataItems.each do |record|
-                    record['DataType'] = "LINUX_PERF_BLOB"
-                    record['IPName'] = "LogManagement"
-                    eventStream.add(time, record) if record
-                    #router.emit(@tag, time, record) if record  
-                  end
-                end
-
-                #get allocatable limits per node as perf data
-                #<TODO> Node capacity is different from node allocatable. Allocatable is what is avaialble for allocating pods.
-                # In theory Capacity = Allocatable + kube-reserved + system-reserved + eviction-threshold
-                # For more details refer to https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable
-                $log.info("in_kube_perf::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-                nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body)
-                $log.info("in_kube_perf::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-                if(!nodeInventory.empty?)
-                  nodeMetricDataItems = []
-                  #allocatable metrics @ node level
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores"))
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes"))
-                  #capacity metrics @ node level
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores"))
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes"))
-
-                  nodeMetricDataItems.each do |record|
-                    record['DataType'] = "LINUX_PERF_BLOB"
-                    record['IPName'] = "LogManagement"
-                    eventStream.add(time, record) if record
-                    #router.emit(@tag, time, record) if record 
-                  end 
-                end
-              router.emit_stream(@tag, eventStream) if eventStream          
-              rescue  => errorStr
-              $log.warn "Failed to retrieve metric data: #{errorStr}"
-              $log.debug_backtrace(errorStr.backtrace)
-          end
-        end
-    
-        def run_periodic
-          @mutex.lock
-          done = @finished
-          until done
-            @condition.wait(@mutex, @run_interval)
-            done = @finished
-            @mutex.unlock
-            if !done
-              begin
-                $log.info("in_kube_perf::run_periodic @ #{Time.now.utc.iso8601}")
-                enumerate
-              rescue => errorStr
-                $log.warn "in_kube_perf::run_periodic: enumerate Failed to retrieve kube perf metrics: #{errorStr}"
-              end
-            end
-            @mutex.lock
-          end
-          @mutex.unlock
-        end
-      end # Kube_Perf_Input
-end # module
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 3f7f3f85a..e912ea6ef 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -8,11 +8,12 @@ class Kube_PodInventory_Input < Input
     @@MDMKubePodInventoryTag = "mdm.kubepodinventory"
     @@hostName = (OMS::Common.get_hostname)
     @@kubeperfTag = "oms.api.KubePerf"
+    @@kubeservicesTag = "oms.containerinsights.KubeServices"
 
     def initialize
       super
       require "yaml"
-      require "json"
+      require 'yajl/json_gem'
       require "set"
       require "time"
 
@@ -71,6 +72,8 @@ def enumerate(podList = nil)
         else
           $log.warn "Received empty podInventory"
         end
+        podInfo = nil
+        podInventory = nil
       rescue => errorStr
         $log.warn "Failed in enumerate pod inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -453,10 +456,43 @@ def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8
           router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
           
         rescue => errorStr
-          $log.warn "Failed in parse_and_emit_record for KubePerf from pod inventory : #{errorStr}"
+          $log.warn "Failed in parse_and_emit_record for KubePerf from in_kube_podinventory : #{errorStr}"
           $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end
         #:optimize:end kubeperf merge
+
+        #:optimize:start kubeservices merge
+        begin
+          if (!serviceList.nil? && !serviceList.empty?)
+            kubeServicesEventStream = MultiEventStream.new
+            serviceList["items"].each do |items|
+              kubeServiceRecord = {}
+              kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+              kubeServiceRecord["ServiceName"] = items["metadata"]["name"]
+              kubeServiceRecord["Namespace"] = items["metadata"]["namespace"]
+              kubeServiceRecord["SelectorLabels"] = [items["spec"]["selector"]]
+              kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
+              kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
+              kubeServiceRecord["ClusterIP"] = items["spec"]["clusterIP"]
+              kubeServiceRecord["ServiceType"] = items["spec"]["type"]
+              #<TODO> : Add ports and status fields
+              kubeServicewrapper = {
+                "DataType" => "KUBE_SERVICES_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }],
+              }
+              kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
+            end
+            router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
+          end
+        rescue => errorStr
+          $log.warn "Failed in parse_and_emit_record for KubeServices from in_kube_podinventory : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        end
+        #:optimize:end kubeservices merge
+
         if telemetryFlush == true
           telemetryProperties = {}
           telemetryProperties["Computer"] = @@hostName
@@ -484,11 +520,9 @@ def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8
     def run_periodic
       @mutex.lock
       done = @finished
-      #@lastTimeRan = Time.now
       @nextTimeToRun = Time.now
       @waitTimeout = @run_interval
       until done
-        #@nextTimeToRun = @lastTimeRan + @run_interval
         @nextTimeToRun = @nextTimeToRun + @run_interval
         @now = Time.now
         if @nextTimeToRun <= @now
@@ -497,18 +531,14 @@ def run_periodic
         else
           @waitTimeout = @nextTimeToRun - @now
         end
-        #@lastTimeRan = @now
-        #@lastTimeRan = @nextTimeToRun
         @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            #$log.info("in_kube_podinventory::run_periodic @ #{Time.now.utc.iso8601}")
-            $log.info("in_kube_podinventory::starttime #{Time.now.utc.iso8601}")
+            $log.info("in_kube_podinventory::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
             enumerate
-            #sleep (rand() * 50).to_i
-            $log.info("in_kube_podinventory::endtime #{Time.now.utc.iso8601}")
+            $log.info("in_kube_podinventory::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
deleted file mode 100644
index 7cd703620..000000000
--- a/source/code/plugin/in_kube_services.rb
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/local/bin/ruby
-# frozen_string_literal: true
-
-module Fluent
-  class Kube_Services_Input < Input
-    Plugin.register_input("kubeservices", self)
-
-    def initialize
-      super
-      require "yaml"
-      require "json"
-
-      require_relative "KubernetesApiClient"
-      require_relative "oms_common"
-      require_relative "omslog"
-      require_relative "ApplicationInsightsUtility"
-    end
-
-    config_param :run_interval, :time, :default => "1m"
-    config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
-
-    def configure(conf)
-      super
-    end
-
-    def start
-      if @run_interval
-        @finished = false
-        @condition = ConditionVariable.new
-        @mutex = Mutex.new
-        @thread = Thread.new(&method(:run_periodic))
-      end
-    end
-
-    def shutdown
-      if @run_interval
-        @mutex.synchronize {
-          @finished = true
-          @condition.signal
-        }
-        @thread.join
-      end
-    end
-
-    def enumerate
-      currentTime = Time.now
-      emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
-
-      serviceList = nil
-      
-      $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-      serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
-      $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-
-      if !serviceInfo.nil?
-        serviceList = JSON.parse(serviceInfo.body)
-      end
-
-      begin
-        if (!serviceList.nil? && !serviceList.empty?)
-          eventStream = MultiEventStream.new
-          serviceList["items"].each do |items|
-            record = {}
-            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            record["ServiceName"] = items["metadata"]["name"]
-            record["Namespace"] = items["metadata"]["namespace"]
-            record["SelectorLabels"] = [items["spec"]["selector"]]
-            record["ClusterId"] = KubernetesApiClient.getClusterId
-            record["ClusterName"] = KubernetesApiClient.getClusterName
-            record["ClusterIP"] = items["spec"]["clusterIP"]
-            record["ServiceType"] = items["spec"]["type"]
-            #<TODO> : Add ports and status fields
-            wrapper = {
-              "DataType" => "KUBE_SERVICES_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [record.each { |k, v| record[k] = v }],
-            }
-            eventStream.add(emitTime, wrapper) if wrapper
-          end
-          router.emit_stream(@tag, eventStream) if eventStream
-        end
-      rescue => errorStr
-        $log.debug_backtrace(errorStr.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-      end
-    end
-
-    def run_periodic
-      @mutex.lock
-      done = @finished
-      until done
-        @condition.wait(@mutex, @run_interval)
-        done = @finished
-        @mutex.unlock
-        if !done
-          begin
-            $log.info("in_kube_services::run_periodic @ #{Time.now.utc.iso8601}")
-            enumerate
-          rescue => errorStr
-            $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
-        end
-        @mutex.lock
-      end
-      @mutex.unlock
-    end
-  end # Kube_Services_Input
-end # module
diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb
index 0114a47ec..54183bbc0 100644
--- a/source/code/plugin/in_win_cadvisor_perf.rb
+++ b/source/code/plugin/in_win_cadvisor_perf.rb
@@ -10,7 +10,8 @@ class Win_CAdvisor_Perf_Input < Input
     def initialize
       super
       require "yaml"
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
 
       require_relative "CAdvisorMetricsAPIClient"
       require_relative "KubernetesApiClient"
@@ -18,7 +19,7 @@ def initialize
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.api.wincadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
 
@@ -100,14 +101,25 @@ def enumerate()
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_win_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_win_cadvisor_perf::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_win_cadvisor_perf::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_win_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics for windows nodes: #{errorStr}"
           end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
index 8f4677044..60838e215 100644
--- a/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
+++ b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
@@ -1,4 +1,4 @@
-﻿require 'json'
+﻿require 'yajl/json_gem'
 
 module ApplicationInsights
   module Channel
diff --git a/source/code/plugin/lib/application_insights/channel/sender_base.rb b/source/code/plugin/lib/application_insights/channel/sender_base.rb
index 2431bf748..004b4722f 100644
--- a/source/code/plugin/lib/application_insights/channel/sender_base.rb
+++ b/source/code/plugin/lib/application_insights/channel/sender_base.rb
@@ -1,4 +1,4 @@
-require 'json'
+require 'yajl/json_gem'
 require 'net/http'
 require 'openssl'
 require 'stringio'
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index b8d10090d..0a4e601b2 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -12,7 +12,7 @@ def initialize
       require "net/http"
       require "net/https"
       require "uri"
-      require "json"
+      require 'yajl/json_gem'
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
 

From c6a1b0889631e1dee1fb911a54eb4a13d41b9fa1 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 12 Nov 2019 18:20:47 -0800
Subject: [PATCH 09/26] fix removed files

---
 installer/datafiles/base_container.data | 2 --
 1 file changed, 2 deletions(-)

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 0264871dd..766adbed8 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -31,10 +31,8 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/container.conf;			    installer/conf/container.conf;                      644; root; root
 
 /opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb;     source/code/plugin/CAdvisorMetricsAPIClient.rb;     644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_perf.rb;			        source/code/plugin/in_kube_perf.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb;			        source/code/plugin/in_cadvisor_perf.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb;			    source/code/plugin/in_win_cadvisor_perf.rb;			    644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_services.rb;			        source/code/plugin/in_kube_services.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb;			     source/code/plugin/filter_inventory2mdm.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb;			     source/code/plugin/CustomMetricsUtils.rb;			    	644; root; root

From 4ee422269158d77d768cdf951febed0dcad28b1c Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Wed, 13 Nov 2019 12:13:58 -0800
Subject: [PATCH 10/26] fix timeofcommand and remove a duplicate entry for a
 health file.

---
 installer/datafiles/base_container.data | 1 -
 source/code/go/src/plugins/oms.go       | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 766adbed8..60de5af18 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -140,7 +140,6 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb;                            source/code/plugin/health/health_model_definition_parser.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb;                                  source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb;                                   source/code/plugin/health/health_monitor_provider.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb;                                     source/code/plugin/health/health_monitor_record.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb;                                      source/code/plugin/health/health_monitor_state.rb; 644; root; root
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 834726c93..d89c1c41a 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -767,6 +767,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 				LogEntry:              stringMap["LogEntry"],
 				LogEntrySource:        stringMap["LogEntrySource"],
 				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+				LogEntryTimeOfCommand: "",
 				SourceSystem:          stringMap["SourceSystem"],
 				Computer:              Computer,
 				Image:                 stringMap["Image"],

From e5aabf024f97eead9e963b4a5135e865a2501ad7 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 14 Nov 2019 10:42:27 -0800
Subject: [PATCH 11/26] Rashmi/http leak fixes (#301)

* changes for http connection close

* close socket in ensure

* adding nil check
---
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 13 +++++--------
 source/code/plugin/DockerApiClient.rb         |  9 +++++++--
 source/code/plugin/KubernetesApiClient.rb     | 19 ++++++-------------
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index cb10992d5..7b13fb736 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -65,14 +65,11 @@ def getSummaryStatsFromCAdvisor(winNode)
         cAdvisorUri = getCAdvisorUri(winNode)
         if !cAdvisorUri.nil?
           uri = URI.parse(cAdvisorUri)
-          http = Net::HTTP.new(uri.host, uri.port)
-          http.use_ssl = false
-          http.open_timeout = 20
-          http.read_timeout = 40
-
-          cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
-          response = http.request(cAdvisorApiRequest)
-          @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+          Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40 ) do |http|
+            cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+            response = http.request(cAdvisorApiRequest)
+            @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+          end
         end
       rescue => error
         @Log.warn("CAdvisor api request failed: #{error}")
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index fb2148ec9..cc0d0e722 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -3,7 +3,7 @@
 
 class DockerApiClient
   require "socket"
-  require 'yajl/json_gem'
+  require "yajl/json_gem"
   require "timeout"
   require_relative "omslog"
   require_relative "DockerApiRestHelper"
@@ -40,7 +40,6 @@ def getResponse(request, isMultiJson, isVersion)
           end
           break if (isVersion) ? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
         end
-        socket.close
         return (isTimeOut) ? nil : parseResponse(dockerResponse, isMultiJson)
       rescue => errorStr
         $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
@@ -49,6 +48,12 @@ def getResponse(request, isMultiJson, isVersion)
           ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end
         return nil
+      ensure
+        ## REMOVE LOG BEFORE MERGE
+        $log.warn "Closing docker socket connection"
+        if !socket.nil?
+          socket.close
+        end
       end
     end
 
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 0cff50752..040373afe 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -40,23 +40,16 @@ def getKubeResourceInfo(resource, api_group: nil)
         resourceUri = getResourceUri(resource, api_group)
         if !resourceUri.nil?
           uri = URI.parse(resourceUri)
-          http = Net::HTTP.new(uri.host, uri.port)
-          http.use_ssl = true
-          http.open_timeout = 20
-          http.read_timeout = 40
-          
           if !File.exist?(@@CaFile)
             raise "#{@@CaFile} doesnt exist"
           else
-            http.ca_file = @@CaFile if File.exist?(@@CaFile)
+            Net::HTTP.start(uri.host, uri.port, :use_ssl => true, :ca_file => @@CaFile, :verify_mode => OpenSSL::SSL::VERIFY_PEER ) do |http|
+              kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
+              kubeApiRequest["Authorization"] = "Bearer " + getTokenStr
+              @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
+              response = http.request(kubeApiRequest)
+              @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
           end
-          http.verify_mode = OpenSSL::SSL::VERIFY_PEER
-
-          kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
-          kubeApiRequest["Authorization"] = "Bearer " + getTokenStr
-          @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
-          response = http.request(kubeApiRequest)
-          @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
         end
       rescue => error
         @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")

From 11849ad2f38ef0e0f937aa33da1c464150b6dfc4 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 14 Nov 2019 14:15:48 -0800
Subject: [PATCH 12/26] Rashmi/http leak fixes (#303)

* changes for http connection close

* close socket in ensure

* adding nil check

* adding missing end
---
 source/code/plugin/KubernetesApiClient.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 040373afe..8adf3f6b7 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -49,6 +49,7 @@ def getKubeResourceInfo(resource, api_group: nil)
               @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
               response = http.request(kubeApiRequest)
               @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
+            end
           end
         end
       rescue => error

From 02ee6ebd3f5e554c04f5493af452032d149cb2f8 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 15 Nov 2019 17:11:21 -0800
Subject: [PATCH 13/26] use yajl for events & nodes parsing.

---
 installer/conf/kube.conf             | 94 ++++++++++++++--------------
 source/code/plugin/in_kube_events.rb |  3 +-
 source/code/plugin/in_kube_nodes.rb  |  3 +-
 3 files changed, 51 insertions(+), 49 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index eca68d6be..f8924f643 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -65,15 +65,15 @@
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
-  num_threads 3
-  #buffer_chunk_limit 20m
+  num_threads 5
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  #buffer_queue_limit 20
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
 </match>
@@ -81,30 +81,30 @@
 <match oms.containerinsights.KubePodInventory**>
   type out_oms
   log_level debug
-  num_threads 10
-  #buffer_chunk_limit 20m
+  num_threads 5
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
-  #buffer_queue_limit 20
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 5s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
 <match oms.containerinsights.KubeEvents**>
 	type out_oms
 	log_level debug
-	num_threads 3
-	#buffer_chunk_limit 20m
+	num_threads 5
+	buffer_chunk_limit 20m
 	buffer_type file
 	buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
-	#buffer_queue_limit 20
+	buffer_queue_limit 20
 	buffer_queue_full_action drop_oldest_chunk
-	flush_interval 10s
+	flush_interval 20s
 	retry_limit 10
-	retry_wait 10s
+	retry_wait 30s
 	max_retry_wait 9m
 </match>
 
@@ -112,58 +112,58 @@
   type out_oms
   log_level debug
   num_threads 2
-  #buffer_chunk_limit 20m
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
-  #buffer_queue_limit 20
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
 <match oms.containerinsights.KubeNodeInventory**>
   type out_oms
   log_level debug
-  num_threads 3
-  #buffer_chunk_limit 20m
+  num_threads 5
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
-  #buffer_queue_limit 20
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
 <match oms.containerinsights.ContainerNodeInventory**>
   type out_oms
   log_level debug
-  num_threads 2
-  #buffer_chunk_limit 40m
+  num_threads 3
+  buffer_chunk_limit 40m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
-  #buffer_queue_limit 40
-  flush_interval 10s
+  buffer_queue_limit 40
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
 <match oms.api.KubePerf**>
   type out_oms
   log_level debug
-  num_threads 10
-  #buffer_chunk_limit 40m
+  num_threads 5
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
-  #buffer_queue_limit 40
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 5s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
@@ -171,14 +171,14 @@
   type out_mdm
   log_level debug
   num_threads 5
-  #buffer_chunk_limit 40m
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
-  #buffer_queue_limit 40
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 5s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
 </match>
@@ -186,15 +186,15 @@
 <match oms.api.wincadvisorperf**>
   type out_oms
   log_level debug
-  num_threads 3
-  #buffer_chunk_limit 40m
+  num_threads 5
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
-  #buffer_queue_limit 40
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
@@ -202,13 +202,13 @@
   type out_oms
   log_level debug
   num_threads 3
-  #buffer_chunk_limit 20m
+  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
-  #buffer_queue_limit 20
+  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
\ No newline at end of file
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 87007e60b..82fb88b70 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -10,6 +10,7 @@ class Kube_Event_Input < Input
     def initialize
       super
       require 'yajl/json_gem'
+      require 'yajl'
       require "time"
 
       require_relative "KubernetesApiClient"
@@ -55,7 +56,7 @@ def enumerate(eventList = nil)
       $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
 
       if !eventInfo.nil?
-        events = JSON.parse(eventInfo.body)
+        events = Yajl::Parser.parse(StringIO.new(eventInfo.body))
       end
 
       eventQueryState = getEventQueryState
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 3ef28dfaf..aa60badfc 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -23,6 +23,7 @@ def initialize
       super
       require "yaml"
       require 'yajl/json_gem'
+      require 'yajl'
       require "time"
 
       require_relative "KubernetesApiClient"
@@ -71,7 +72,7 @@ def enumerate
       $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
 
       if !nodeInfo.nil?
-        nodeInventory = JSON.parse(nodeInfo.body)
+        nodeInventory = Yajl::Parser.parse(StringIO.new(nodeInfo.body))
       end
 
       begin

From 1220dd010e15174266efa2f499c4547273157a6e Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 15 Nov 2019 17:44:56 -0800
Subject: [PATCH 14/26] Rashmi/http leak fixes (#304)

* changes for http connection close

* close socket in ensure

* adding nil check

* Update MDM region list to include francecentral, japaneast and australiaeast

* Update MDM region list to include francecentral, japaneast and australiaeast

* adding missing end

* Send telemetry when there is error in calculation of state in percentage aggregation, and send state as unknown (#300)

* changes for chunking

* telemetry changes

* some fixes

* bug fix

* changing to have morgan changes only

* add new line
---
 installer/conf/container.conf              |   0
 source/code/plugin/in_kube_podinventory.rb | 164 +++++++++++++--------
 2 files changed, 101 insertions(+), 63 deletions(-)
 mode change 100755 => 100644 installer/conf/container.conf

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
old mode 100755
new mode 100644
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index e912ea6ef..cc1ec35f8 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -13,7 +13,8 @@ class Kube_PodInventory_Input < Input
     def initialize
       super
       require "yaml"
-      require 'yajl/json_gem'
+      require "yajl/json_gem"
+      require "yajl"
       require "set"
       require "time"
 
@@ -21,6 +22,12 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
+
+      @PODS_CHUNK_SIZE = "1500"
+      @podCount = 0
+      @controllerSet = Set.new []
+      @winContainerCount = 0
+      @controllerData = {}
     end
 
     config_param :run_interval, :time, :default => 60
@@ -50,37 +57,89 @@ def shutdown
       end
     end
 
-    def enumerate(podList = nil)
-      podInventory = podList
-      currentTime = Time.now
-      $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-      podInfo = KubernetesApiClient.getKubeResourceInfo("pods")
-      $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-
-      if !podInfo.nil?
-        podInventory = JSON.parse(podInfo.body)
-      end
-
+    def processPodChunks(podInventory, serviceList, batchTime)
       begin
         if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
-          batchTime = currentTime.utc.iso8601
-          #get pod inventory & services
-          $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-          serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
-          $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-          parse_and_emit_records(podInventory, serviceList, batchTime )
+          parse_and_emit_records(podInventory, serviceList, batchTime)
         else
-          $log.warn "Received empty podInventory"
+          $log.warn "in_kube_podinventory::processPodChunks:Received empty podInventory"
         end
         podInfo = nil
         podInventory = nil
       rescue => errorStr
-        $log.warn "Failed in enumerate pod inventory: #{errorStr}"
+        $log.warn "in_kube_podinventory::processPodChunks:Failed in process pod chunks: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end
 
+    def parsePodsJsonAndProcess(podInfo, serviceList, batchTime)
+      if !podInfo.nil?
+        $log.info("in_kube_podinventory::parsePodsJsonAndProcess:Start:Parsing chunked data using yajl @ #{Time.now.utc.iso8601}")
+        podInventory = Yajl::Parser.parse(StringIO.new(podInfo.body))
+        $log.info("in_kube_podinventory::parsePodsJsonAndProcess:End:Parsing chunked data using yajl @ #{Time.now.utc.iso8601}")
+      end
+      if (!podInventory.nil? && !podInventory["metadata"].nil?)
+        continuationToken = podInventory["metadata"]["continue"]
+      end
+      processPodChunks(podInventory, serviceList, batchTime)
+      return continuationToken
+    end
+
+    def enumerate(podList = nil)
+      podInventory = podList
+      telemetryFlush = false
+      @podCount = 0
+      @controllerSet = Set.new []
+      @winContainerCount = 0
+      @controllerData = {}
+      currentTime = Time.now
+      batchTime = currentTime.utc.iso8601
+
+      # Get services first so that we dont need to make a call for very chunk
+      $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
+      serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+      $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
+
+      # Initializing continuation token to nil
+      continuationToken = nil
+      $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+      podInfo = KubernetesApiClient.getKubeResourceInfo("pods?limit=#{@PODS_CHUNK_SIZE}")
+      $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+
+      continuationToken = parsePodsJsonAndProcess(podInfo, serviceList, batchTime)
+
+      #If we receive a continuation token, make calls, process and flush data until we have processed all data
+      while (!continuationToken.nil? && !continuationToken.empty?)
+        $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API using continuation token @ #{Time.now.utc.iso8601}")
+        podInfo = KubernetesApiClient.getKubeResourceInfo("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
+        $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API using continuation token @ #{Time.now.utc.iso8601}")
+        continuationToken = parsePodsJsonAndProcess(podInfo, serviceList, batchTime)
+      end
+
+      # Adding telemetry to send pod telemetry every 5 minutes
+      timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
+      timeDifferenceInMinutes = timeDifference / 60
+      if (timeDifferenceInMinutes >= 5)
+        telemetryFlush = true
+      end
+
+      # Flush AppInsights telemetry once all the processing is done
+      if telemetryFlush == true
+        telemetryProperties = {}
+        telemetryProperties["Computer"] = @@hostName
+        ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
+        ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
+        telemetryProperties["ControllerData"] = @controllerData.to_json
+        ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties)
+        if @winContainerCount > 0
+          telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
+          ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
+        end
+        @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
+      end
+    end
+
     def populateWindowsContainerInventoryRecord(container, record, containerEnvVariableHash, batchTime)
       begin
         containerInventoryRecord = {}
@@ -193,15 +252,12 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       end
     end
 
-    def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8601)
+    def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
       #batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
-      controllerSet = Set.new []
-      controllerData = {}
-      telemetryFlush = false
-      winContainerCount = 0
+
       begin #begin block start
         # Getting windows nodes from kubeapi
         winNodes = KubernetesApiClient.getWindowsNodesArray
@@ -284,24 +340,17 @@ def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8
           record["ClusterId"] = KubernetesApiClient.getClusterId
           record["ClusterName"] = KubernetesApiClient.getClusterName
           record["ServiceName"] = getServiceNameFromLabels(items["metadata"]["namespace"], items["metadata"]["labels"], serviceList)
-          # Adding telemetry to send pod telemetry every 5 minutes
-          timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
-          timeDifferenceInMinutes = timeDifference / 60
-          if (timeDifferenceInMinutes >= 5)
-            telemetryFlush = true
-          end
+
           if !items["metadata"]["ownerReferences"].nil?
             record["ControllerKind"] = items["metadata"]["ownerReferences"][0]["kind"]
             record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"]
-            if telemetryFlush == true
-              controllerSet.add(record["ControllerKind"] + record["ControllerName"])
-              #Adding controller kind to telemetry ro information about customer workload
-              if (controllerData[record["ControllerKind"]].nil?)
-                controllerData[record["ControllerKind"]] = 1
-              else
-                controllerValue = controllerData[record["ControllerKind"]]
-                controllerData[record["ControllerKind"]] += 1
-              end
+            @controllerSet.add(record["ControllerKind"] + record["ControllerName"])
+            #Adding controller kind to telemetry ro information about customer workload
+            if (@controllerData[record["ControllerKind"]].nil?)
+              @controllerData[record["ControllerKind"]] = 1
+            else
+              controllerValue = @controllerData[record["ControllerKind"]]
+              @controllerData[record["ControllerKind"]] += 1
             end
           end
           podRestartCount = 0
@@ -419,7 +468,7 @@ def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8
             end
           end
           # Send container inventory records for containers on windows nodes
-          winContainerCount += containerInventoryRecords.length
+          @winContainerCount += containerInventoryRecords.length
           containerInventoryRecords.each do |cirecord|
             if !cirecord.nil?
               ciwrapper = {
@@ -436,25 +485,24 @@ def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8
         router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream
         #:optimize:kubeperf merge
         begin
-          #if(!podInventory.empty?) 
+          #if(!podInventory.empty?)
           containerMetricDataItems = []
           #hostName = (OMS::Common.get_hostname)
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu","cpuRequestNanoCores", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory","memoryRequestBytes", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu","cpuLimitNanoCores", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory","memoryLimitBytes", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu", "cpuRequestNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory", "memoryRequestBytes", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu", "cpuLimitNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory", "memoryLimitBytes", batchTime))
 
           kubePerfEventStream = MultiEventStream.new
 
           containerMetricDataItems.each do |record|
-            record['DataType'] = "LINUX_PERF_BLOB"
-            record['IPName'] = "LogManagement"
+            record["DataType"] = "LINUX_PERF_BLOB"
+            record["IPName"] = "LogManagement"
             kubePerfEventStream.add(emitTime, record) if record
-            #router.emit(@tag, time, record) if record  
+            #router.emit(@tag, time, record) if record
           end
           #end
           router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-          
         rescue => errorStr
           $log.warn "Failed in parse_and_emit_record for KubePerf from in_kube_podinventory : #{errorStr}"
           $log.debug_backtrace(errorStr.backtrace)
@@ -493,19 +541,9 @@ def parse_and_emit_records(podInventory, serviceList,  batchTime = Time.utc.iso8
         end
         #:optimize:end kubeservices merge
 
-        if telemetryFlush == true
-          telemetryProperties = {}
-          telemetryProperties["Computer"] = @@hostName
-          ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
-          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory["items"].length, {})
-          telemetryProperties["ControllerData"] = controllerData.to_json
-          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length, telemetryProperties)
-          if winContainerCount > 0
-            telemetryProperties["ClusterWideWindowsContainersCount"] = winContainerCount
-            ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
-          end
-          @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
-        end
+        #Updating value for AppInsights telemetry
+        @podCount += podInventory["items"].length
+
         @@istestvar = ENV["ISTEST"]
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")

From 5dc523f96a91f43585f7642895d157c19017c85a Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 18 Nov 2019 15:32:33 -0800
Subject: [PATCH 15/26] use polltime for metrics and disable out_forward for
 health

---
 installer/conf/container.conf                 | 34 ++++++-------
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 51 ++++++++++---------
 source/code/plugin/in_cadvisor_perf.rb        |  6 ++-
 source/code/plugin/in_win_cadvisor_perf.rb    |  2 +-
 4 files changed, 48 insertions(+), 45 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index e1877a576..be6f83746 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -72,25 +72,25 @@
 </match>
 
 
-<match kubehealth.DaemonSet**>
-  @type health_forward
-  send_timeout 60s
-  recover_wait 10s
-  hard_timeout 60s
-  heartbeat_type tcp
-  skip_network_error_at_init true
-  expire_dns_cache 600s
+#<match kubehealth.DaemonSet**>
+#  @type health_forward
+#  send_timeout 60s
+#  recover_wait 10s
+#  hard_timeout 60s
+#  heartbeat_type tcp
+#  skip_network_error_at_init true
+#  expire_dns_cache 600s
 
-  <server>
-    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
-    port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-  </server>
+#  <server>
+#    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
+#    port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+#  </server>
 
-  <secondary>
-    @type file
-    path %STATE_DIR_WS%/fluent_forward_failed.buffer
-  </secondary>
-</match>
+#  <secondary>
+#    @type file
+#    path %STATE_DIR_WS%/fluent_forward_failed.buffer
+#  </secondary>
+#</match>
 
 <match mdm.cadvisorperf**>
   type out_mdm
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 7b13fb736..be61b8b8f 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -8,6 +8,7 @@ class CAdvisorMetricsAPIClient
   require "net/https"
   require "uri"
   require "date"
+  require "time"
 
   require_relative "oms_common"
   require_relative "KubernetesApiClient"
@@ -103,7 +104,7 @@ def getCAdvisorUri(winNode)
       end
     end
 
-    def getMetrics(winNode = nil)
+    def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601 )
       metricDataItems = []
       begin
         cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
@@ -122,27 +123,27 @@ def getMetrics(winNode = nil)
           operatingSystem = "Linux"
         end
         if !metricInfo.nil?
-          metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes"))
-          metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
+          metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes", metricTime))
+          metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch", metricTime))
 
           if operatingSystem == "Linux"
-            metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", "cpuUsageNanoCores"))
-            metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes"))
-            metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes"))
+            metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", "cpuUsageNanoCores", metricTime))
+            metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes", metricTime))
+            metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes", metricTime))
           elsif operatingSystem == "Windows"
-            containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", "cpuUsageNanoCores")
+            containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", "cpuUsageNanoCores", metricTime)
             if containerCpuUsageNanoSecondsRate && !containerCpuUsageNanoSecondsRate.empty? && !containerCpuUsageNanoSecondsRate.nil?
               metricDataItems.concat(containerCpuUsageNanoSecondsRate)
             end
           end
 
-          cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores", operatingSystem)
+          cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores", operatingSystem, metricTime)
           if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
             metricDataItems.push(cpuUsageNanoSecondsRate)
           end
-          metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes"))
+          metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes", metricTime))
 
-          metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch"))
+          metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch", metricTime))
 
           # Disabling networkRxRate and networkTxRate since we dont use it as of now.
           #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
@@ -165,7 +166,7 @@ def getMetrics(winNode = nil)
       return metricDataItems
     end
 
-    def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
+    def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
       timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
@@ -182,7 +183,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
               #cpu metric
               containerName = container["name"]
               metricValue = container["cpu"][cpuMetricNameToCollect]
-              metricTime = container["cpu"]["time"]
+              metricTime = metricPollTime #container["cpu"]["time"]
               metricItem = {}
               metricItem["DataItems"] = []
 
@@ -273,7 +274,7 @@ def resetWinContainerIdCache
     end
 
     # usageNanoCores doesnt exist for windows nodes. Hence need to compute this from usageCoreNanoSeconds
-    def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
+    def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
       timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
@@ -293,7 +294,7 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
               containerCount += 1
               containerName = container["name"]
               metricValue = container["cpu"][cpuMetricNameToCollect]
-              metricTime = container["cpu"]["time"]
+              metricTime = metricPollTime #container["cpu"]["time"]
               metricItem = {}
               metricItem["DataItems"] = []
 
@@ -367,7 +368,7 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
       return metricItems
     end
 
-    def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn)
+    def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
       timeDifference = (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs
@@ -382,7 +383,7 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec
             pod["containers"].each do |container|
               containerName = container["name"]
               metricValue = container["memory"][memoryMetricNameToCollect]
-              metricTime = container["memory"]["time"]
+              metricTime = metricPollTime #container["memory"]["time"]
 
               metricItem = {}
               metricItem["DataItems"] = []
@@ -432,7 +433,7 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec
       return metricItems
     end
 
-    def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn)
+    def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, metricPollTime)
       metricItem = {}
       clusterId = KubernetesApiClient.getClusterId
       begin
@@ -442,7 +443,7 @@ def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect,
 
         if !node[metricCategory].nil?
           metricValue = node[metricCategory][metricNameToCollect]
-          metricTime = node[metricCategory]["time"]
+          metricTime = metricPollTime #node[metricCategory]["time"]
 
           metricItem["DataItems"] = []
 
@@ -468,7 +469,7 @@ def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect,
       return metricItem
     end
 
-    def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, operatingSystem)
+    def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, operatingSystem, metricPollTime)
       metricItem = {}
       clusterId = KubernetesApiClient.getClusterId
       begin
@@ -478,7 +479,7 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl
 
         if !node[metricCategory].nil?
           metricValue = node[metricCategory][metricNameToCollect]
-          metricTime = node[metricCategory]["time"]
+          metricTime = metricPollTime #node[metricCategory]["time"]
 
           #   if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds")
           #     @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}")
@@ -585,7 +586,7 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl
       return metricItem
     end
 
-    def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
+    def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn, metricPollTime)
       metricItem = {}
       clusterId = KubernetesApiClient.getClusterId
 
@@ -595,7 +596,7 @@ def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
         nodeName = node["nodeName"]
 
         metricValue = node["startTime"]
-        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        metricTime = metricPollTime #Time.now.utc.iso8601 #2018-01-30T19:36:14Z
 
         metricItem["DataItems"] = []
 
@@ -621,10 +622,10 @@ def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
       return metricItem
     end
 
-    def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn)
+    def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
-      currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+      #currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
       begin
         metricInfo = metricJSON
         metricInfo["pods"].each do |pod|
@@ -633,7 +634,7 @@ def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn)
             pod["containers"].each do |container|
               containerName = container["name"]
               metricValue = container["startTime"]
-              metricTime = currentTime
+              metricTime = metricPollTime #currentTime
 
               metricItem = {}
               metricItem["DataItems"] = []
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 9845671cc..96aa66aa1 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -47,10 +47,12 @@ def shutdown
     end
 
     def enumerate()
-      time = Time.now.to_f
+      currentTime = Time.now
+      time = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
       begin
         eventStream = MultiEventStream.new
-        metricData = CAdvisorMetricsAPIClient.getMetrics()
+        metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: nil, metricTime: batchTime )
         metricData.each do |record|
           record["DataType"] = "LINUX_PERF_BLOB"
           record["IPName"] = "LogManagement"
diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb
index 54183bbc0..695a686cf 100644
--- a/source/code/plugin/in_win_cadvisor_perf.rb
+++ b/source/code/plugin/in_win_cadvisor_perf.rb
@@ -67,7 +67,7 @@ def enumerate()
           @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i
         end
         @@winNodes.each do |winNode|
-          metricData = CAdvisorMetricsAPIClient.getMetrics(winNode)
+          metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601)
           metricData.each do |record|
             if !record.empty?
               record["DataType"] = "LINUX_PERF_BLOB"

From 1d1695b1eb60af7197447433c1c13282e4920ee9 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 19 Nov 2019 13:35:56 -0800
Subject: [PATCH 16/26] enable mdm & health

---
 installer/conf/container.conf | 62 +++++++++++++++++------------------
 installer/conf/kube.conf      | 54 +++++++++++++++---------------
 2 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index be6f83746..33e743250 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -23,23 +23,23 @@
   log_level debug
 </source>
 
-#<filter kubehealth.DaemonSet.Node**>
-#  type filter_cadvisor_health_node
-#  log_level debug
-#</filter>
+<filter kubehealth.DaemonSet.Node**>
+  type filter_cadvisor_health_node
+  log_level debug
+</filter>
 
-#<filter kubehealth.DaemonSet.Container**>
-#  type filter_cadvisor_health_container
-#  log_level debug
-#</filter>
+<filter kubehealth.DaemonSet.Container**>
+  type filter_cadvisor_health_container
+  log_level debug
+</filter>
 
 #custom_metrics_mdm filter plugin
-#<filter mdm.cadvisorperf**>
-#  type filter_cadvisor2mdm
-#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-#  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
-#  log_level info
-#</filter>
+<filter mdm.cadvisorperf**>
+  type filter_cadvisor2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+  log_level info
+</filter>
 
 <match oms.containerinsights.containerinventory**>
   type out_oms
@@ -72,25 +72,25 @@
 </match>
 
 
-#<match kubehealth.DaemonSet**>
-#  @type health_forward
-#  send_timeout 60s
-#  recover_wait 10s
-#  hard_timeout 60s
-#  heartbeat_type tcp
-#  skip_network_error_at_init true
-#  expire_dns_cache 600s
+<match kubehealth.DaemonSet**>
+  @type health_forward
+  send_timeout 60s
+  recover_wait 10s
+  hard_timeout 60s
+  heartbeat_type tcp
+  skip_network_error_at_init true
+  expire_dns_cache 600s
 
-#  <server>
-#    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
-#    port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-#  </server>
+  <server>
+    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
+    port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+  </server>
 
-#  <secondary>
-#    @type file
-#    path %STATE_DIR_WS%/fluent_forward_failed.buffer
-#  </secondary>
-#</match>
+  <secondary>
+    @type file
+    path %STATE_DIR_WS%/fluent_forward_failed.buffer
+  </secondary>
+</match>
 
 <match mdm.cadvisorperf**>
   type out_mdm
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index f8924f643..2a434fda1 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,9 +1,9 @@
 # Fluentd config file for OMS Docker - cluster components (kubeAPI)
-#<source>
-#  type forward
-#  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-#  bind 0.0.0.0
-#</source>
+<source>
+  type forward
+  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+  bind 0.0.0.0
+</source>
 
 #Kubernetes pod inventory
 <source>
@@ -18,7 +18,7 @@
 	type kubeevents
 	tag oms.containerinsights.KubeEvents
 	run_interval 60
-    log_level debug
+  log_level debug
 </source>
 
 #Kubernetes Nodes
@@ -30,12 +30,12 @@
 </source>
 
 #Kubernetes health
-#<source>
-#  type kubehealth
-#  tag kubehealth.ReplicaSet
-#  run_interval 60
-#  log_level debug
-#</source>
+<source>
+  type kubehealth
+  tag kubehealth.ReplicaSet
+  run_interval 60
+  log_level debug
+</source>
 
 #cadvisor perf- Windows nodes
 <source>
@@ -45,23 +45,23 @@
   log_level debug
 </source>
 
-#<filter mdm.kubepodinventory** mdm.kubenodeinventory**>
-#  type filter_inventory2mdm
-#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-#  log_level info
-#</filter>
+<filter mdm.kubepodinventory** mdm.kubenodeinventory**>
+  type filter_inventory2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  log_level info
+</filter>
 
 #custom_metrics_mdm filter plugin for perf data from windows nodes
-#<filter mdm.cadvisorperf**>
-#  type filter_cadvisor2mdm
-#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-#  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
-#  log_level info
-#</filter>
-
-#<filter kubehealth**>
-#  type filter_health_model_builder
-#</filter>
+<filter mdm.cadvisorperf**>
+  type filter_cadvisor2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+  log_level info
+</filter>
+
+<filter kubehealth**>
+  type filter_health_model_builder
+</filter>
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug

From 5cad5530081ca6671c1cd219a81bdeedc7637299 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 21 Nov 2019 18:58:22 -0800
Subject: [PATCH 17/26] few optimizations

---
 .../code/plugin/health/aggregate_monitor.rb   |  2 +-
 .../health_container_cpu_memory_aggregator.rb |  6 +-
 source/code/plugin/in_kube_health.rb          | 68 ++++++++++---------
 3 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb
index 00ee9aecd..c0c13c525 100644
--- a/source/code/plugin/health/aggregate_monitor.rb
+++ b/source/code/plugin/health/aggregate_monitor.rb
@@ -184,7 +184,7 @@ def sort_filter_member_monitors(monitor_set)
             member_monitors.push(member_monitor)
         }
 
-	filtered = member_monitors.select{|monitor| monitor.state != MonitorState::NONE}
+        filtered = member_monitors.keep_if{|monitor| monitor.state != MonitorState::NONE}
         sorted = filtered.sort_by{ |monitor| [@@sort_key_order[monitor.state]] }
 
         return sorted
diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index 29ac91bde..e93c66c14 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -64,8 +64,8 @@ def initialize(resources, provider)
         def dedupe_records(container_records)
             cpu_deduped_instances = {}
             memory_deduped_instances = {}
-            container_records = container_records.select{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
-
+            container_records = container_records.keep_if{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
+           
             container_records.each do |record|
                 begin
                     instance_name = record["InstanceName"]
@@ -98,7 +98,7 @@ def dedupe_records(container_records)
 
         def aggregate(container_records)
             #filter and select only cpuUsageNanoCores and memoryRssBytes
-            container_records = container_records.select{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
+            container_records = container_records.keep_if{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
             # poduid lookup has poduid/cname --> workload_name, namespace, cpu_limit, memory limit mapping
             # from the container records, extract the poduid/cname, get the values from poduid_lookup, and aggregate based on namespace_workload_cname
             container_records.each do |record|
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 40ed7a3c9..410eda540 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -22,12 +22,14 @@ def initialize
         super
         require "yaml"
         require 'yajl/json_gem'
+        require "yajl"
         require "time"
 
         @@cluster_id = KubernetesApiClient.getClusterId
         @resources = HealthKubernetesResources.instance
         @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
         @@ApiGroupApps = "apps"
+        @@KubeInfraNamespace = "kube-system"
       rescue => e
         ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
       end
@@ -84,10 +86,11 @@ def enumerate
         #HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil)
         # we do this so that if the call fails, we get a response code/header etc.
         node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
-        node_inventory = JSON.parse(node_inventory_response.body)
-        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
-        pod_inventory = JSON.parse(pod_inventory_response.body)
-        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_group: @@ApiGroupApps).body)
+        node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
+        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
+        pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
+        replicaset_inventory_response = KubernetesApiClient.getKubeResourceInfo("replicasets?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}", api_group: @@ApiGroupApps)
+        replicaset_inventory = Yajl::Parser.parse(StringIO.new(replicaset_inventory_response.body))
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
@@ -109,8 +112,8 @@ def enumerate
           health_monitor_records.push(record) if record
           pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(@resources)
 
-          system_pods = pods_ready_hash.select { |k, v| v["namespace"] == "kube-system" }
-          workload_pods = pods_ready_hash.select { |k, v| v["namespace"] != "kube-system" }
+          system_pods = pods_ready_hash.keep_if { |k, v| v["namespace"] == @@KubeInfraNamespace }
+          workload_pods = Hash.new # { |k, v| v["namespace"] != @@KubeInfraNamespace }
 
           system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
           system_pods_ready_percentage_records.each do |record|
@@ -226,28 +229,28 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id)
       hmlog = HealthMonitorUtils.get_log_handle
 
       records = []
-      pods_hash.keys.each do |key|
-        workload_name = key
-        total_pods = pods_hash[workload_name]["totalPods"]
-        pods_ready = pods_hash[workload_name]["podsReady"]
-        namespace = pods_hash[workload_name]["namespace"]
-        workload_kind = pods_hash[workload_name]["kind"]
-        percent = pods_ready / total_pods * 100
-        timestamp = Time.now.utc.iso8601
-
-        state = HealthMonitorUtils.compute_percentage_state(percent, monitor_config)
-        health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workload_name" => workload_name, "namespace" => namespace, "workload_kind" => workload_kind}}
-        monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name])
-        health_record = {}
-        time_now = Time.now.utc.iso8601
-        health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
-        health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
-        health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-        health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
-        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
-        health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
-        records.push(health_record)
-      end
+        pods_hash.keys.each do |key|
+          workload_name = key
+          total_pods = pods_hash[workload_name]["totalPods"]
+          pods_ready = pods_hash[workload_name]["podsReady"]
+          namespace = pods_hash[workload_name]["namespace"]
+          workload_kind = pods_hash[workload_name]["kind"]
+          percent = pods_ready / total_pods * 100
+          timestamp = Time.now.utc.iso8601
+
+          state = HealthMonitorUtils.compute_percentage_state(percent, monitor_config)
+          health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workload_name" => workload_name, "namespace" => namespace, "workload_kind" => workload_kind}}
+          monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name])
+          health_record = {}
+          time_now = Time.now.utc.iso8601
+          health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
+          health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+          health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+          health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
+          health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
+          health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+          records.push(health_record)
+        end
       #@@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}"
       return records
     end
@@ -297,10 +300,11 @@ def process_node_condition_monitor(node_inventory)
     def initialize_inventory
         #this is required because there are other components, like the container cpu memory aggregator, that depends on the mapping being initialized
         node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
-        node_inventory = JSON.parse(node_inventory_response.body)
-        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
-        pod_inventory = JSON.parse(pod_inventory_response.body)
-        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_group: @@ApiGroupApps).body)
+        node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
+        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
+        pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
+        replicaset_inventory_response = KubernetesApiClient.getKubeResourceInfo("replicasets?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}", api_group: @@ApiGroupApps)
+        replicaset_inventory = Yajl::Parser.parse(StringIO.new(replicaset_inventory_response.body))
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory

From 8a219187d0665eee8f90f441ed9193e26d7c54a8 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 22 Nov 2019 15:07:27 -0800
Subject: [PATCH 18/26] do not remove time of command make kube.conf same as
 scale tested config

---
 installer/conf/kube.conf          | 429 +++++++++++++++---------------
 source/code/go/src/plugins/oms.go |   2 +-
 2 files changed, 217 insertions(+), 214 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 2a434fda1..f5dc21567 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,214 +1,217 @@
 # Fluentd config file for OMS Docker - cluster components (kubeAPI)
-<source>
-  type forward
-  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-  bind 0.0.0.0
-</source>
-
-#Kubernetes pod inventory
-<source>
-	type kubepodinventory
-	tag oms.containerinsights.KubePodInventory
-	run_interval 60
-  log_level debug
-</source>
-
-#Kubernetes events
-<source>
-	type kubeevents
-	tag oms.containerinsights.KubeEvents
-	run_interval 60
-  log_level debug
-</source>
-
-#Kubernetes Nodes
-<source>
-	type kubenodeinventory
-	tag oms.containerinsights.KubeNodeInventory
-	run_interval 60
-  log_level debug
-</source>
-
-#Kubernetes health
-<source>
-  type kubehealth
-  tag kubehealth.ReplicaSet
-  run_interval 60
-  log_level debug
-</source>
-
-#cadvisor perf- Windows nodes
-<source>
-	type wincadvisorperf
-	tag oms.api.wincadvisorperf
-	run_interval 60
-  log_level debug
-</source>
-
-<filter mdm.kubepodinventory** mdm.kubenodeinventory**>
-  type filter_inventory2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-  log_level info
-</filter>
-
-#custom_metrics_mdm filter plugin for perf data from windows nodes
-<filter mdm.cadvisorperf**>
-  type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
-  log_level info
-</filter>
-
-<filter kubehealth**>
-  type filter_health_model_builder
-</filter>
-<match mdm.cadvisorperf**>
-  type out_mdm
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-  retry_mdm_post_wait_minutes 60
-</match>
-
-<match oms.containerinsights.KubePodInventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.KubeEvents**>
-	type out_oms
-	log_level debug
-	num_threads 5
-	buffer_chunk_limit 20m
-	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
-	buffer_queue_limit 20
-	buffer_queue_full_action drop_oldest_chunk
-	flush_interval 20s
-	retry_limit 10
-	retry_wait 30s
-	max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.KubeServices**>
-  type out_oms
-  log_level debug
-  num_threads 2
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.KubeNodeInventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.ContainerNodeInventory**>
-  type out_oms
-  log_level debug
-  num_threads 3
-  buffer_chunk_limit 40m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
-  buffer_queue_limit 40
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.api.KubePerf**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match mdm.kubepodinventory** mdm.kubenodeinventory** >
-  type out_mdm
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-  retry_mdm_post_wait_minutes 60
-</match>
-
-<match oms.api.wincadvisorperf**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match kubehealth.Signals**>
-  type out_oms
-  log_level debug
-  num_threads 3
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
\ No newline at end of file
+    #fluent forward plugin
+    <source>
+     type forward
+     port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+     bind 0.0.0.0
+    </source>
+
+    #Kubernetes pod inventory
+    <source>
+     type kubepodinventory
+     tag oms.containerinsights.KubePodInventory
+     run_interval 60
+     log_level debug
+    </source>
+
+    #Kubernetes events
+    <source>
+     type kubeevents
+     tag oms.containerinsights.KubeEvents
+     run_interval 60
+     log_level debug
+     </source>
+
+    #Kubernetes Nodes
+    <source>
+     type kubenodeinventory
+     tag oms.containerinsights.KubeNodeInventory
+     run_interval 60
+     log_level debug
+    </source>
+
+    #Kubernetes health
+    <source>
+     type kubehealth
+     tag kubehealth.ReplicaSet
+     run_interval 60
+     log_level debug
+    </source>
+
+    #cadvisor perf- Windows nodes
+    <source>
+     type wincadvisorperf
+     tag oms.api.wincadvisorperf
+     run_interval 60
+     log_level debug
+    </source>
+
+    <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
+     type filter_inventory2mdm
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+     log_level info
+    </filter>
+
+    #custom_metrics_mdm filter plugin for perf data from windows nodes
+    <filter mdm.cadvisorperf**>
+     type filter_cadvisor2mdm
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+     metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+     log_level info
+    </filter>
+
+    #health model aggregation filter
+    <filter kubehealth**>
+     type filter_health_model_builder
+    </filter>
+
+    <match oms.containerinsights.KubePodInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
+
+    <match oms.containerinsights.KubeEvents**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 5m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
+     buffer_queue_limit 10
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
+
+    <match oms.containerinsights.KubeServices**>
+     type out_oms
+     log_level debug
+     num_threads 2
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
+
+    <match oms.containerinsights.KubeNodeInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
+
+    <match oms.containerinsights.ContainerNodeInventory**>
+     type out_oms
+     log_level debug
+     num_threads 3
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
+     buffer_queue_limit 20
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
+
+    <match oms.api.KubePerf**>	
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
+
+    <match mdm.kubepodinventory** mdm.kubenodeinventory** >
+     type out_mdm
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+     retry_mdm_post_wait_minutes 60
+    </match>
+
+    <match oms.api.wincadvisorperf**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 20s
+     max_retry_wait 9m
+    </match>
+
+    <match mdm.cadvisorperf**>
+     type out_mdm
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+     retry_mdm_post_wait_minutes 60
+    </match>
+
+    <match kubehealth.Signals**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 20m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 30s
+     max_retry_wait 9m
+    </match>
\ No newline at end of file
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index d89c1c41a..d786c3704 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -767,7 +767,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 				LogEntry:              stringMap["LogEntry"],
 				LogEntrySource:        stringMap["LogEntrySource"],
 				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
-				LogEntryTimeOfCommand: "",
+				LogEntryTimeOfCommand: start.Format(time.RFC3339),
 				SourceSystem:          stringMap["SourceSystem"],
 				Computer:              Computer,
 				Image:                 stringMap["Image"],

From bb4a0132f565a1e379e8dd5172e856d88fd60171 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 22 Nov 2019 15:12:35 -0800
Subject: [PATCH 19/26] remove comments from container.conf

---
 installer/conf/container.conf | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 33e743250..45fed3f6c 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -45,10 +45,8 @@
   type out_oms
   log_level debug
   num_threads 5
-  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
-  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
   flush_interval 10s
   retry_limit 10
@@ -60,10 +58,8 @@
   type out_oms
   log_level debug
   num_threads 5
-  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
-  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
   flush_interval 10s
   retry_limit 10
@@ -96,10 +92,8 @@
   type out_mdm
   log_level debug
   num_threads 5
-  #buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  #buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
   flush_interval 10s
   retry_limit 10

From 0c5e68572522f653355275ff4d4f8f6afd07abac Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 22 Nov 2019 15:35:03 -0800
Subject: [PATCH 20/26] remove flush comment for ai telemetry

---
 source/code/plugin/ApplicationInsightsUtility.rb | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 6bb518bac..1e25078f5 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -123,7 +123,6 @@ def sendHeartBeatEvent(pluginName)
         eventName = pluginName + @@HeartBeat
         if !(@@Tc.nil?)
           @@Tc.track_event eventName, :properties => @@CustomProperties
-          #@@Tc.flush
           $log.info("AppInsights Heartbeat Telemetry put successfully into the queue")
         end
       rescue => errorStr
@@ -137,7 +136,6 @@ def sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
           @@Tc.track_metric "LastProcessedContainerInventoryCount", properties["ContainerCount"],
                             :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
                             :properties => @@CustomProperties
-          #@@Tc.flush
           $log.info("AppInsights Container Count Telemetry sput successfully into the queue")
         end
       rescue => errorStr
@@ -159,7 +157,6 @@ def sendCustomEvent(eventName, properties)
         end
         if !(@@Tc.nil?)
           @@Tc.track_event eventName, :properties => telemetryProps
-          #@@Tc.flush
           $log.info("AppInsights Custom Event #{eventName} sent successfully")
         end
       rescue => errorStr
@@ -183,7 +180,6 @@ def sendExceptionTelemetry(errorStr, properties = nil)
         end
         if !(@@Tc.nil?)
           @@Tc.track_exception errorStr, :properties => telemetryProps
-          #@@Tc.flush
           $log.info("AppInsights Exception Telemetry put successfully into the queue")
         end
       rescue => errorStr
@@ -230,7 +226,6 @@ def sendMetricTelemetry(metricName, metricValue, properties)
           @@Tc.track_metric metricName, metricValue,
                             :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
                             :properties => telemetryProps
-          #@@Tc.flush
           $log.info("AppInsights metric Telemetry #{metricName} put successfully into the queue")
         end
       rescue => errorStr

From 5462ad5cc7082e46999044685a8b7f14b8899f15 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 22 Nov 2019 15:55:43 -0800
Subject: [PATCH 21/26] remove commented code lines

---
 source/code/plugin/DockerApiClient.rb      | 2 --
 source/code/plugin/in_kube_health.rb       | 2 +-
 source/code/plugin/in_kube_nodes.rb        | 1 -
 source/code/plugin/in_kube_podinventory.rb | 1 -
 4 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index cc0d0e722..f2828b357 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -49,8 +49,6 @@ def getResponse(request, isMultiJson, isVersion)
         end
         return nil
       ensure
-        ## REMOVE LOG BEFORE MERGE
-        $log.warn "Closing docker socket connection"
         if !socket.nil?
           socket.close
         end
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 410eda540..0eebf395b 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -113,7 +113,7 @@ def enumerate
           pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(@resources)
 
           system_pods = pods_ready_hash.keep_if { |k, v| v["namespace"] == @@KubeInfraNamespace }
-          workload_pods = Hash.new # { |k, v| v["namespace"] != @@KubeInfraNamespace }
+          workload_pods = Hash.new # pods_ready_hash.select{ |k, v| v["namespace"] != @@KubeInfraNamespace }
 
           system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
           system_pods_ready_percentage_records.each do |record|
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index aa60badfc..92fece728 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -210,7 +210,6 @@ def enumerate
                   record['DataType'] = "LINUX_PERF_BLOB"
                   record['IPName'] = "LogManagement"
                   kubePerfEventStream.add(emitTime, record) if record
-                  #router.emit(@tag, time, record) if record 
                 end 
               #end
               router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index cc1ec35f8..c9ae75a03 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -499,7 +499,6 @@ def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso86
             record["DataType"] = "LINUX_PERF_BLOB"
             record["IPName"] = "LogManagement"
             kubePerfEventStream.add(emitTime, record) if record
-            #router.emit(@tag, time, record) if record
           end
           #end
           router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream

From d8c3a02e160c97dc105b650035929dc3034b18ca Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 26 Nov 2019 14:49:23 -0800
Subject: [PATCH 22/26] fix config

---
 installer/conf/container.conf                 | 41 +++++++-----
 installer/conf/kube.conf                      | 64 ++++++++++---------
 .../code/plugin/ApplicationInsightsUtility.rb |  8 +--
 3 files changed, 63 insertions(+), 50 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 0967ad6b9..40f903194 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -34,12 +34,12 @@
 </filter>
 
 #custom_metrics_mdm filter plugin
-<filter mdm.cadvisorperf**>
-  type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
-  log_level info
-</filter>
+#<filter mdm.cadvisorperf**>
+#  type filter_cadvisor2mdm
+#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+#  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+#  log_level info
+#</filter>
 
 <match oms.containerinsights.containerinventory**>
   type out_oms
@@ -48,10 +48,11 @@
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  buffer_chunk_limit 4m
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
-  max_retry_wait 9m
+  retry_wait 5s
+  max_retry_wait 5m
 </match>
 
 <match oms.api.cadvisorperf**>
@@ -61,10 +62,11 @@
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  buffer_chunk_limit 4m
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
-  max_retry_wait 9m
+  retry_wait 5s
+  max_retry_wait 5m
 </match>
 
 
@@ -76,6 +78,14 @@
   heartbeat_type tcp
   skip_network_error_at_init true
   expire_dns_cache 600s
+  buffer_queue_full_action drop_oldest_chunk
+  buffer_type file 
+  buffer_path %STATE_DIR_WS%/out_health_forward*.buffer
+  buffer_chunk_limit 3m
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 5s
+  max_retry_wait 5m
 
   <server>
     host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
@@ -95,9 +105,10 @@
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
   buffer_queue_full_action drop_oldest_chunk
-  flush_interval 10s
+  buffer_chunk_limit 4m
+  flush_interval 20s
   retry_limit 10
-  retry_wait 10s
-  max_retry_wait 9m
+  retry_wait 5s
+  max_retry_wait 5m
   retry_mdm_post_wait_minutes 60
 </match>
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index e03ec8727..5f5837e5a 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -4,6 +4,8 @@
      type forward
      port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
      bind 0.0.0.0
+     chunk_size_limit 4m
+     skip_invalid_event true
     </source>
 
     #Kubernetes pod inventory
@@ -69,104 +71,104 @@
      type out_oms
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match oms.containerinsights.KubeEvents**>
      type out_oms
      log_level debug
      num_threads 5
-     buffer_chunk_limit 5m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
-     buffer_queue_limit 10
+     buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match oms.containerinsights.KubeServices**>
      type out_oms
      log_level debug
      num_threads 2
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match oms.containerinsights.KubeNodeInventory**>
      type out_oms
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match oms.containerinsights.ContainerNodeInventory**>
      type out_oms
      log_level debug
      num_threads 3
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
      buffer_queue_limit 20
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match oms.api.KubePerf**>	
      type out_oms
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match mdm.kubepodinventory** mdm.kubenodeinventory** >
      type out_mdm
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
      retry_mdm_post_wait_minutes 60
     </match>
 
@@ -174,30 +176,30 @@
      type out_oms
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 20s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
 
     <match mdm.cadvisorperf**>
      type out_mdm
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
      retry_mdm_post_wait_minutes 60
     </match>
 
@@ -205,13 +207,13 @@
      type out_oms
      log_level debug
      num_threads 5
-     buffer_chunk_limit 20m
+     buffer_chunk_limit 4m
      buffer_type file
      buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
      buffer_queue_limit 20
      buffer_queue_full_action drop_oldest_chunk
      flush_interval 20s
      retry_limit 10
-     retry_wait 30s
-     max_retry_wait 9m
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
\ No newline at end of file
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 1e25078f5..f7bd806a0 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -93,17 +93,17 @@ def initializeUtility()
           # The below are default recommended values. If you change these, ensure you test telemetry flow fully
 
           # flush telemetry if we have 10 or more telemetry items in our queue
-          @@Tc.channel.queue.max_queue_length = 10
+          #@@Tc.channel.queue.max_queue_length = 10
 
           # send telemetry to the service in batches of 5
-          @@Tc.channel.sender.send_buffer_size = 5
+          #@@Tc.channel.sender.send_buffer_size = 5
 
           # the background worker thread will be active for 5 seconds before it shuts down. if
           # during this time items are picked up from the queue, the timer is reset.
-          @@Tc.channel.sender.send_time = 5
+          #@@Tc.channel.sender.send_time = 5
 
           # the background worker thread will poll the queue every 0.5 seconds for new items
-          @@Tc.channel.sender.send_interval = 0.5
+          #@@Tc.channel.sender.send_interval = 0.5
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")

From e86c1111162275ed8290010a285c596d10572c58 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 26 Nov 2019 15:06:45 -0800
Subject: [PATCH 23/26] remove timeofcommand when enrichment==false

---
 source/code/go/src/plugins/oms.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index d786c3704..834726c93 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -767,7 +767,6 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 				LogEntry:              stringMap["LogEntry"],
 				LogEntrySource:        stringMap["LogEntrySource"],
 				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
-				LogEntryTimeOfCommand: start.Format(time.RFC3339),
 				SourceSystem:          stringMap["SourceSystem"],
 				Computer:              Computer,
 				Image:                 stringMap["Image"],

From 318ad10ad6eb32c15acc4124d3d7d6c744dc92dc Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 26 Nov 2019 15:46:18 -0800
Subject: [PATCH 24/26] fix config

---
 installer/conf/kube.conf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 5f5837e5a..207780442 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -5,7 +5,6 @@
      port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
      bind 0.0.0.0
      chunk_size_limit 4m
-     skip_invalid_event true
     </source>
 
     #Kubernetes pod inventory

From 0283b2ebfaa1569a7ef08c97ef84b3aa99d3df8d Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 3 Dec 2019 10:24:13 -0800
Subject: [PATCH 25/26] enable mdm filter

---
 installer/conf/container.conf | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 40f903194..93c250fbb 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -34,12 +34,12 @@
 </filter>
 
 #custom_metrics_mdm filter plugin
-#<filter mdm.cadvisorperf**>
-#  type filter_cadvisor2mdm
-#  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-#  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
-#  log_level info
-#</filter>
+<filter mdm.cadvisorperf**>
+  type filter_cadvisor2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+  log_level info
+</filter>
 
 <match oms.containerinsights.containerinventory**>
   type out_oms

From d4983c843defc58f5a546e1f8208ac0a67f85620 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 3 Dec 2019 11:30:38 -0800
Subject: [PATCH 26/26] Rashmi/api chunk (#307)

* changes

* changes

* refactor changes

* changes

* changes

* changes

* changes

* node changes

* changes

* changes

* changes

* changes

* adding open and read timeouts for api client

* removing comments

* updating chunk size
---
 source/code/plugin/KubernetesApiClient.rb  |  30 +-
 source/code/plugin/in_kube_events.rb       | 133 +++++----
 source/code/plugin/in_kube_nodes.rb        | 328 +++++++++++----------
 source/code/plugin/in_kube_podinventory.rb | 136 ++++-----
 4 files changed, 347 insertions(+), 280 deletions(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 8adf3f6b7..43c4c20d3 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class KubernetesApiClient
-  require 'yajl/json_gem'
+  require "yajl/json_gem"
   require "logger"
   require "net/http"
   require "net/https"
@@ -43,7 +43,7 @@ def getKubeResourceInfo(resource, api_group: nil)
           if !File.exist?(@@CaFile)
             raise "#{@@CaFile} doesnt exist"
           else
-            Net::HTTP.start(uri.host, uri.port, :use_ssl => true, :ca_file => @@CaFile, :verify_mode => OpenSSL::SSL::VERIFY_PEER ) do |http|
+            Net::HTTP.start(uri.host, uri.port, :use_ssl => true, :ca_file => @@CaFile, :verify_mode => OpenSSL::SSL::VERIFY_PEER, :open_timeout => 20, :read_timeout => 40) do |http|
               kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
               kubeApiRequest["Authorization"] = "Bearer " + getTokenStr
               @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
@@ -333,7 +333,7 @@ def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp)
       return containerLogs
     end
 
-    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601 )
+    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         clusterId = getClusterId
@@ -546,5 +546,29 @@ def getMetricNumericValue(metricName, metricVal)
       end
       return metricValue
     end # getMetricNumericValue
+
+    def getResourcesAndContinuationToken(uri)
+      continuationToken = nil
+      resourceInventory = nil
+      begin
+        @Log.info "KubernetesApiClient::getResourcesAndContinuationToken : Getting resources from Kube API using url: #{uri} @ #{Time.now.utc.iso8601}"
+        resourceInfo = getKubeResourceInfo(uri)
+        @Log.info "KubernetesApiClient::getResourcesAndContinuationToken : Done getting resources from Kube API using url: #{uri} @ #{Time.now.utc.iso8601}"
+        if !resourceInfo.nil?
+          @Log.info "KubernetesApiClient::getResourcesAndContinuationToken:Start:Parsing data for #{uri} using yajl @ #{Time.now.utc.iso8601}"
+          resourceInventory = Yajl::Parser.parse(StringIO.new(resourceInfo.body))
+          @Log.info "KubernetesApiClient::getResourcesAndContinuationToken:End:Parsing data for #{uri} using yajl @ #{Time.now.utc.iso8601}"
+          resourceInfo = nil
+        end
+        if (!resourceInventory.nil? && !resourceInventory["metadata"].nil?)
+          continuationToken = resourceInventory["metadata"]["continue"]
+        end
+      rescue => errorStr
+        @Log.warn "KubernetesApiClient::getResourcesAndContinuationToken:Failed in get resources for #{uri} and continuation token: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        resourceInventory = nil
+      end
+      return continuationToken, resourceInventory
+    end #getResourcesAndContinuationToken
   end
 end
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 82fb88b70..6116cb62d 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -9,14 +9,17 @@ class Kube_Event_Input < Input
 
     def initialize
       super
-      require 'yajl/json_gem'
-      require 'yajl'
+      require "yajl/json_gem"
+      require "yajl"
       require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "oms_common"
       require_relative "omslog"
       require_relative "ApplicationInsightsUtility"
+      
+      # 30000 events account to approximately 5MB
+      @EVENTS_CHUNK_SIZE = 30000
     end
 
     config_param :run_interval, :time, :default => 60
@@ -45,66 +48,90 @@ def shutdown
       end
     end
 
-    def enumerate(eventList = nil)
-      currentTime = Time.now
-      emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
+    def enumerate
+      begin
+        eventList = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+        eventQueryState = getEventQueryState
+        newEventQueryState = []
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}")
+        $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+        if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+          newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
+        else
+          $log.warn "in_kube_events::enumerate:Received empty eventList"
+        end
 
-      events = eventList
-      $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-      eventInfo = KubernetesApiClient.getKubeResourceInfo("events?fieldSelector=type!=Normal")
-      $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+            newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
+          else
+            $log.warn "in_kube_events::enumerate:Received empty eventList"
+          end
+        end
 
-      if !eventInfo.nil?
-        events = Yajl::Parser.parse(StringIO.new(eventInfo.body))
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        eventList = nil
+        writeEventQueryState(newEventQueryState)
+      rescue => errorStr
+        $log.warn "in_kube_events::enumerate:Failed in enumerate: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
+    end # end enumerate
 
-      eventQueryState = getEventQueryState
-      newEventQueryState = []
+    def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTime = Time.utc.iso8601)
+      currentTime = Time.now
+      emitTime = currentTime.to_f
       begin
-        if (!events.nil? && !events.empty? && !events["items"].nil?)
-          eventStream = MultiEventStream.new
-          events["items"].each do |items|
-            record = {}
-            #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
-            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            eventId = items["metadata"]["uid"] + "/" + items["count"].to_s
-            newEventQueryState.push(eventId)
-            if !eventQueryState.empty? && eventQueryState.include?(eventId)
-              next
-                end
-            record["ObjectKind"] = items["involvedObject"]["kind"]
-            record["Namespace"] = items["involvedObject"]["namespace"]
-            record["Name"] = items["involvedObject"]["name"]
-            record["Reason"] = items["reason"]
-            record["Message"] = items["message"]
-            record["Type"] = items["type"]
-            record["TimeGenerated"] = items["metadata"]["creationTimestamp"]
-            record["SourceComponent"] = items["source"]["component"]
-            record["FirstSeen"] = items["firstTimestamp"]
-            record["LastSeen"] = items["lastTimestamp"]
-            record["Count"] = items["count"]
-            if items["source"].key?("host")
-              record["Computer"] = items["source"]["host"]
-            else
-              record["Computer"] = (OMS::Common.get_hostname)
-            end
-                record['ClusterName'] = KubernetesApiClient.getClusterName
-            record["ClusterId"] = KubernetesApiClient.getClusterId
-            wrapper = {
-              "DataType" => "KUBE_EVENTS_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [record.each { |k, v| record[k] = v }],
-            }
-            eventStream.add(emitTime, wrapper) if wrapper
+        eventStream = MultiEventStream.new
+        events["items"].each do |items|
+          record = {}
+          #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
+          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          eventId = items["metadata"]["uid"] + "/" + items["count"].to_s
+          newEventQueryState.push(eventId)
+          if !eventQueryState.empty? && eventQueryState.include?(eventId)
+            next
           end
-          router.emit_stream(@tag, eventStream) if eventStream
-            end
-        writeEventQueryState(newEventQueryState)
+          record["ObjectKind"] = items["involvedObject"]["kind"]
+          record["Namespace"] = items["involvedObject"]["namespace"]
+          record["Name"] = items["involvedObject"]["name"]
+          record["Reason"] = items["reason"]
+          record["Message"] = items["message"]
+          record["Type"] = items["type"]
+          record["TimeGenerated"] = items["metadata"]["creationTimestamp"]
+          record["SourceComponent"] = items["source"]["component"]
+          record["FirstSeen"] = items["firstTimestamp"]
+          record["LastSeen"] = items["lastTimestamp"]
+          record["Count"] = items["count"]
+          if items["source"].key?("host")
+            record["Computer"] = items["source"]["host"]
+          else
+            record["Computer"] = (OMS::Common.get_hostname)
+          end
+          record["ClusterName"] = KubernetesApiClient.getClusterName
+          record["ClusterId"] = KubernetesApiClient.getClusterId
+          wrapper = {
+            "DataType" => "KUBE_EVENTS_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [record.each { |k, v| record[k] = v }],
+          }
+          eventStream.add(emitTime, wrapper) if wrapper
+        end
+        router.emit_stream(@tag, eventStream) if eventStream
       rescue => errorStr
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
+      end
+      return newEventQueryState
     end
 
     def run_periodic
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 92fece728..fa0994f43 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -22,14 +22,15 @@ class Kube_nodeInventory_Input < Input
     def initialize
       super
       require "yaml"
-      require 'yajl/json_gem'
-      require 'yajl'
+      require "yajl/json_gem"
+      require "yajl"
       require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
+      @NODES_CHUNK_SIZE = "400"
     end
 
     config_param :run_interval, :time, :default => 60
@@ -60,172 +61,193 @@ def shutdown
     end
 
     def enumerate
-      currentTime = Time.now
-      emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
-      telemetrySent = false
+      begin
+        nodeInventory = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
 
-      nodeInventory = nil
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken("nodes?limit=#{@NODES_CHUNK_SIZE}")
+        $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+          parse_and_emit_records(nodeInventory, batchTime)
+        else
+          $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
+        end
 
-      $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-      nodeInfo = KubernetesApiClient.getKubeResourceInfo("nodes")
-      $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken("nodes?limit=#{@NODES_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+            parse_and_emit_records(nodeInventory, batchTime)
+          else
+            $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
+          end
+        end
 
-      if !nodeInfo.nil?
-        nodeInventory = Yajl::Parser.parse(StringIO.new(nodeInfo.body))
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        nodeInventory = nil
+      rescue => errorStr
+        $log.warn "in_kube_nodes::enumerate:Failed in enumerate: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
+    end # end enumerate
 
+    def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
       begin
-        if (!nodeInventory.nil? && !nodeInventory.empty?)
-          eventStream = MultiEventStream.new
-          containerNodeInventoryEventStream = MultiEventStream.new
-          if !nodeInventory["items"].nil?
-            #get node inventory
-            nodeInventory["items"].each do |items|
-              record = {}
-              # Sending records for ContainerNodeInventory
-              containerNodeInventoryRecord = {}
-              containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-              containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
+        currentTime = Time.now
+        emitTime = currentTime.to_f
+        telemetrySent = false
+        eventStream = MultiEventStream.new
+        containerNodeInventoryEventStream = MultiEventStream.new
+        #get node inventory
+        nodeInventory["items"].each do |items|
+          record = {}
+          # Sending records for ContainerNodeInventory
+          containerNodeInventoryRecord = {}
+          containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
 
-              record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-              record["Computer"] = items["metadata"]["name"]
-              record["ClusterName"] = KubernetesApiClient.getClusterName
-              record["ClusterId"] = KubernetesApiClient.getClusterId
-              record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-              record["Labels"] = [items["metadata"]["labels"]]
-              record["Status"] = ""
+          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          record["Computer"] = items["metadata"]["name"]
+          record["ClusterName"] = KubernetesApiClient.getClusterName
+          record["ClusterId"] = KubernetesApiClient.getClusterId
+          record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
+          record["Labels"] = [items["metadata"]["labels"]]
+          record["Status"] = ""
 
-              if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
-                if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
-                  record["KubernetesProviderID"] = "azurestack"
-                else
-                  record["KubernetesProviderID"] = items["spec"]["providerID"]
-                end
-              else
-                record["KubernetesProviderID"] = "onprem"
-              end
+          if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
+            if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
+              record["KubernetesProviderID"] = "azurestack"
+            else
+              record["KubernetesProviderID"] = items["spec"]["providerID"]
+            end
+          else
+            record["KubernetesProviderID"] = "onprem"
+          end
 
-              # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-              # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
-              # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-              # implying that the node is ready for hosting pods, however its out of disk.
+          # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+          # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+          # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+          # implying that the node is ready for hosting pods, however its out of disk.
 
-              if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
-                allNodeConditions = ""
-                items["status"]["conditions"].each do |condition|
-                  if condition["status"] == "True"
-                    if !allNodeConditions.empty?
-                      allNodeConditions = allNodeConditions + "," + condition["type"]
-                    else
-                      allNodeConditions = condition["type"]
-                    end
-                  end
-                  #collect last transition to/from ready (no matter ready is true/false)
-                  if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
-                    record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
-                  end
-                end
+          if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
+            allNodeConditions = ""
+            items["status"]["conditions"].each do |condition|
+              if condition["status"] == "True"
                 if !allNodeConditions.empty?
-                  record["Status"] = allNodeConditions
+                  allNodeConditions = allNodeConditions + "," + condition["type"]
+                else
+                  allNodeConditions = condition["type"]
                 end
               end
+              #collect last transition to/from ready (no matter ready is true/false)
+              if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+                record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
+              end
+            end
+            if !allNodeConditions.empty?
+              record["Status"] = allNodeConditions
+            end
+          end
 
-              nodeInfo = items["status"]["nodeInfo"]
-              record["KubeletVersion"] = nodeInfo["kubeletVersion"]
-              record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
-              containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
-              dockerVersion = nodeInfo["containerRuntimeVersion"]
-              dockerVersion.slice! "docker://"
-              containerNodeInventoryRecord["DockerVersion"] = dockerVersion
-              # ContainerNodeInventory data for docker version and operating system.
-              containerNodeInventoryWrapper = {
-                "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
-              }
-              containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+          nodeInfo = items["status"]["nodeInfo"]
+          record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+          record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+          containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+          dockerVersion = nodeInfo["containerRuntimeVersion"]
+          dockerVersion.slice! "docker://"
+          containerNodeInventoryRecord["DockerVersion"] = dockerVersion
+          # ContainerNodeInventory data for docker version and operating system.
+          containerNodeInventoryWrapper = {
+            "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+          }
+          containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
-              wrapper = {
-                "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [record.each { |k, v| record[k] = v }],
-              }
-              eventStream.add(emitTime, wrapper) if wrapper
-              # Adding telemetry to send node telemetry every 5 minutes
-              timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-              timeDifferenceInMinutes = timeDifference / 60
-              if (timeDifferenceInMinutes >= 10)
-                properties = {}
-                properties["Computer"] = record["Computer"]
-                properties["KubeletVersion"] = record["KubeletVersion"]
-                properties["OperatingSystem"] = nodeInfo["operatingSystem"]
-                properties["DockerVersion"] = dockerVersion
-                properties["KubernetesProviderID"] = record["KubernetesProviderID"]
-                properties["KernelVersion"] = nodeInfo["kernelVersion"]
-                properties["OSImage"] = nodeInfo["osImage"]
+          wrapper = {
+            "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [record.each { |k, v| record[k] = v }],
+          }
+          eventStream.add(emitTime, wrapper) if wrapper
+          # Adding telemetry to send node telemetry every 10 minutes
+          timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+          timeDifferenceInMinutes = timeDifference / 60
+          if (timeDifferenceInMinutes >= 10)
+            properties = {}
+            properties["Computer"] = record["Computer"]
+            properties["KubeletVersion"] = record["KubeletVersion"]
+            properties["OperatingSystem"] = nodeInfo["operatingSystem"]
+            properties["DockerVersion"] = dockerVersion
+            properties["KubernetesProviderID"] = record["KubernetesProviderID"]
+            properties["KernelVersion"] = nodeInfo["kernelVersion"]
+            properties["OSImage"] = nodeInfo["osImage"]
 
-                capacityInfo = items["status"]["capacity"]
-                ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+            capacityInfo = items["status"]["capacity"]
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
 
-                #telemetry about prometheus metric collections settings for replicaset
-                if (File.file?(@@promConfigMountPath))
-                  properties["rsPromInt"] = @@rsPromInterval
-                  properties["rsPromFPC"] = @@rsPromFieldPassCount
-                  properties["rsPromFDC"] = @@rsPromFieldDropCount
-                  properties["rsPromServ"] = @@rsPromK8sServiceCount
-                  properties["rsPromUrl"] = @@rsPromUrlCount
-                  properties["rsPromMonPods"] = @@rsPromMonitorPods
-                  properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
-                end
-                ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
-                telemetrySent = true
-              end
+            #telemetry about prometheus metric collections settings for replicaset
+            if (File.file?(@@promConfigMountPath))
+              properties["rsPromInt"] = @@rsPromInterval
+              properties["rsPromFPC"] = @@rsPromFieldPassCount
+              properties["rsPromFDC"] = @@rsPromFieldDropCount
+              properties["rsPromServ"] = @@rsPromK8sServiceCount
+              properties["rsPromUrl"] = @@rsPromUrlCount
+              properties["rsPromMonPods"] = @@rsPromMonitorPods
+              properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
             end
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+            telemetrySent = true
           end
-          router.emit_stream(@tag, eventStream) if eventStream
-          router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-          router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
-          if telemetrySent == true
-            @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
-          end
-          @@istestvar = ENV["ISTEST"]
-          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
-            $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-          end
-            #:optimize:kubeperf merge
-            begin
-              #if(!nodeInventory.empty?)
-                nodeMetricDataItems = []
-                #allocatable metrics @ node level
-                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
-                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
-                #capacity metrics @ node level
-                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
-                nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
+        end
+        router.emit_stream(@tag, eventStream) if eventStream
+        router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+        router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+        if telemetrySent == true
+          @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
+        @@istestvar = ENV["ISTEST"]
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
+          $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+        end
+        #:optimize:kubeperf merge
+        begin
+          #if(!nodeInventory.empty?)
+          nodeMetricDataItems = []
+          #allocatable metrics @ node level
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
+          #capacity metrics @ node level
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
 
-                kubePerfEventStream = MultiEventStream.new
+          kubePerfEventStream = MultiEventStream.new
 
-                nodeMetricDataItems.each do |record|
-                  record['DataType'] = "LINUX_PERF_BLOB"
-                  record['IPName'] = "LogManagement"
-                  kubePerfEventStream.add(emitTime, record) if record
-                end 
-              #end
-              router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-            rescue => errorStr
-              $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
-              $log.debug_backtrace(errorStr.backtrace)
-              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-            end
-            #:optimize:end kubeperf merge
+          nodeMetricDataItems.each do |record|
+            record["DataType"] = "LINUX_PERF_BLOB"
+            record["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, record) if record
+          end
+          #end
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+        rescue => errorStr
+          $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end
-      
+        #:optimize:end kubeperf merge
+
       rescue => errorStr
         $log.warn "Failed to retrieve node inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
+      $log.warn "in_kube_nodes::parse_and_emit_records:End #{Time.now.utc.iso8601}"
     end
 
     def run_periodic
@@ -234,22 +256,22 @@ def run_periodic
       @nextTimeToRun = Time.now
       @waitTimeout = @run_interval
       until done
-         @nextTimeToRun = @nextTimeToRun + @run_interval
-         @now = Time.now
-         if @nextTimeToRun <= @now
-           @waitTimeout = 1
-           @nextTimeToRun = @now
-         else
-           @waitTimeout = @nextTimeToRun - @now
-         end
-         @condition.wait(@mutex, @waitTimeout)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-             $log.info("in_kube_nodes::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
-             enumerate
-             $log.info("in_kube_nodes::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
+            $log.info("in_kube_nodes::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
+            enumerate
+            $log.info("in_kube_nodes::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index c9ae75a03..28b20bfc0 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -57,86 +57,80 @@ def shutdown
       end
     end
 
-    def processPodChunks(podInventory, serviceList, batchTime)
+    def enumerate(podList = nil)
       begin
-        if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
+        podInventory = podList
+        telemetryFlush = false
+        @podCount = 0
+        @controllerSet = Set.new []
+        @winContainerCount = 0
+        @controllerData = {}
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+
+        # Get services first so that we dont need to make a call for very chunk
+        $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
+        serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
+        # serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+        $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
+
+        if !serviceInfo.nil?
+          $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
+          serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
+          $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
+          serviceInfo = nil
+        end
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
+        $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
           parse_and_emit_records(podInventory, serviceList, batchTime)
         else
-          $log.warn "in_kube_podinventory::processPodChunks:Received empty podInventory"
+          $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
         end
-        podInfo = nil
-        podInventory = nil
-      rescue => errorStr
-        $log.warn "in_kube_podinventory::processPodChunks:Failed in process pod chunks: #{errorStr}"
-        $log.debug_backtrace(errorStr.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-      end
-    end
 
-    def parsePodsJsonAndProcess(podInfo, serviceList, batchTime)
-      if !podInfo.nil?
-        $log.info("in_kube_podinventory::parsePodsJsonAndProcess:Start:Parsing chunked data using yajl @ #{Time.now.utc.iso8601}")
-        podInventory = Yajl::Parser.parse(StringIO.new(podInfo.body))
-        $log.info("in_kube_podinventory::parsePodsJsonAndProcess:End:Parsing chunked data using yajl @ #{Time.now.utc.iso8601}")
-      end
-      if (!podInventory.nil? && !podInventory["metadata"].nil?)
-        continuationToken = podInventory["metadata"]["continue"]
-      end
-      processPodChunks(podInventory, serviceList, batchTime)
-      return continuationToken
-    end
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
+            parse_and_emit_records(podInventory, serviceList, batchTime)
+          else
+            $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
+          end
+        end
 
-    def enumerate(podList = nil)
-      podInventory = podList
-      telemetryFlush = false
-      @podCount = 0
-      @controllerSet = Set.new []
-      @winContainerCount = 0
-      @controllerData = {}
-      currentTime = Time.now
-      batchTime = currentTime.utc.iso8601
-
-      # Get services first so that we dont need to make a call for very chunk
-      $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-      serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
-      $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-
-      # Initializing continuation token to nil
-      continuationToken = nil
-      $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-      podInfo = KubernetesApiClient.getKubeResourceInfo("pods?limit=#{@PODS_CHUNK_SIZE}")
-      $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-
-      continuationToken = parsePodsJsonAndProcess(podInfo, serviceList, batchTime)
-
-      #If we receive a continuation token, make calls, process and flush data until we have processed all data
-      while (!continuationToken.nil? && !continuationToken.empty?)
-        $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API using continuation token @ #{Time.now.utc.iso8601}")
-        podInfo = KubernetesApiClient.getKubeResourceInfo("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
-        $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API using continuation token @ #{Time.now.utc.iso8601}")
-        continuationToken = parsePodsJsonAndProcess(podInfo, serviceList, batchTime)
-      end
+        # Setting these to nil so that we dont hold memory until GC kicks in
+        podInventory = nil
+        serviceList = nil
 
-      # Adding telemetry to send pod telemetry every 5 minutes
-      timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
-      timeDifferenceInMinutes = timeDifference / 60
-      if (timeDifferenceInMinutes >= 5)
-        telemetryFlush = true
-      end
+        # Adding telemetry to send pod telemetry every 5 minutes
+        timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= 5)
+          telemetryFlush = true
+        end
 
-      # Flush AppInsights telemetry once all the processing is done
-      if telemetryFlush == true
-        telemetryProperties = {}
-        telemetryProperties["Computer"] = @@hostName
-        ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
-        ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
-        telemetryProperties["ControllerData"] = @controllerData.to_json
-        ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties)
-        if @winContainerCount > 0
-          telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
-          ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
+        # Flush AppInsights telemetry once all the processing is done
+        if telemetryFlush == true
+          telemetryProperties = {}
+          telemetryProperties["Computer"] = @@hostName
+          ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
+          telemetryProperties["ControllerData"] = @controllerData.to_json
+          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties)
+          if @winContainerCount > 0
+            telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
+            ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
+          end
+          @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
-        @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
+      rescue => errorStr
+        $log.warn "in_kube_podinventory::enumerate:Failed in enumerate: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end