From 3c5b46d3ca41ee6df5092845de647e1b32cb6fb6 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Wed, 1 Aug 2018 16:54:33 -0700
Subject: [PATCH 001/160] Updatng release history

---
 README.md | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 18e50ebe3..a822f6f97 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,35 @@
-# Docker Monitoring Agent for OMI Server
+# AKS Container Health monitoring
 
-### Code of Conduct
+## Code of Conduct
 
 This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).  For more
 information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact
 [opencode@microsoft.com](mailto:opencode@microsoft.com) with any
 additional questions or comments.
+
+## Release History
+
+### 7/31/2018 - Version microsoft/oms:ciprod07312018
+- Changes for node lost scenario (roll-up pod & container statuses as Unknown)
+- Discover unscheduled pods
+- KubeNodeInventory - delimit multiple true node conditions for node status
+- UTF Encoding support for container logs
+- Container environment variable truncated to 200K
+- Handle json parsing errors for OMI provider for docker
+- Test mode enablement for ACS-engine testing
+- Latest OMS agent (1.6.0-163)
+- Latest OMI (1.4.2.5)
+
+
+### 6/7/2018 - Version microsoft/oms:ciprod06072018
+- Remove node-0 dependency 
+- Remove passing WSID & Key as environment variables and pass them as kubernetes secret (for non-AKS; we already pass them as secret for AKS)
+- Please note that if you are manually deploying thru yaml you need to - 
+- Provide workspaceid & key as base64 encoded strings with in double quotes (.yaml has comments to do so as well)
+- Provide cluster name twice (for each container – daemonset & replicaset) 
+
+### 5/8/2018 - Version microsoft/oms:ciprod05082018
+- Kubernetes RBAC enablement
+- Latest released omsagent (1.6.0-42)
+- Bug fix so that we do not collect kube-system namespace container logs when kube api calls fail occasionally (Bug #215107)
+- .yaml changes (for RBAC)

From d31f5889ec2f9ff6981efc72f2166b0430bffae9 Mon Sep 17 00:00:00 2001
From: rashmy <rashmy@RASHMY-PC2>
Date: Wed, 1 Aug 2018 16:52:40 -0700
Subject: [PATCH 002/160] fixing the plugin logs for emit stream

---
 source/code/plugin/in_cadvisor_perf.rb     | 4 ++--
 source/code/plugin/in_kube_nodes.rb        | 7 ++++---
 source/code/plugin/in_kube_podinventory.rb | 3 ++-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 01f2fa9f4..2e28650f6 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -55,10 +55,10 @@ def enumerate()
             end 
             
             router.emit_stream(@tag, eventStream) if eventStream
-            if (ENV['ISTEST'] == true && eventStream.count > 0)
+            @@istestvar = ENV['ISTEST']
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
               $log.info("in_cadvisor_perf::emit-stream : Success @ #{Time.now.utc.iso8601}")
             end
-
             rescue  => errorStr
             $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}"
             $log.debug_backtrace(errorStr.backtrace)
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 473978cbc..6cbad0897 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -99,9 +99,10 @@ def enumerate
                     eventStream.add(emitTime, wrapper) if wrapper
                 end 
                 router.emit_stream(@tag, eventStream) if eventStream
-               if (ENV['ISTEST'] == true && eventStream.count > 0)
-                 $log.info("in_kube_nodeinventory::emit-stream : Success @ #{Time.now.utc.iso8601}")
-               end
+                @@istestvar = ENV['ISTEST']
+                if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
+                  $log.info("in_kube_nodeinventory::emit-stream : Success @ #{Time.now.utc.iso8601}")
+                end
             end  
           rescue  => errorStr
             $log.warn "Failed to retrieve node inventory: #{errorStr}"
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index a96a0b207..656d1aa48 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -190,7 +190,8 @@ def parse_and_emit_records(podInventory, serviceList)
           end  
         end  #podInventory block end
         router.emit_stream(@tag, eventStream) if eventStream
-        if (ENV['ISTEST'] == true && eventStream.count > 0)
+        @@istestvar = ENV['ISTEST']
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
           $log.info("in_kube_podinventory::emit-stream : Success @ #{Time.now.utc.iso8601}")
         end
       rescue  => errorStr

From 11fd5f6d4e3dd0b4fe57c8f4a551d1da4e8fa41f Mon Sep 17 00:00:00 2001
From: rashmy <rashmy@RASHMY-PC2>
Date: Sun, 5 Aug 2018 00:37:52 -0700
Subject: [PATCH 003/160] updating log message

---
 source/code/plugin/in_cadvisor_perf.rb     | 2 +-
 source/code/plugin/in_kube_nodes.rb        | 2 +-
 source/code/plugin/in_kube_podinventory.rb | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 2e28650f6..5b551f74e 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -57,7 +57,7 @@ def enumerate()
             router.emit_stream(@tag, eventStream) if eventStream
             @@istestvar = ENV['ISTEST']
             if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
-              $log.info("in_cadvisor_perf::emit-stream : Success @ #{Time.now.utc.iso8601}")
+              $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
             rescue  => errorStr
             $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}"
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 6cbad0897..edbbdd37f 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -101,7 +101,7 @@ def enumerate
                 router.emit_stream(@tag, eventStream) if eventStream
                 @@istestvar = ENV['ISTEST']
                 if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
-                  $log.info("in_kube_nodeinventory::emit-stream : Success @ #{Time.now.utc.iso8601}")
+                  $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
                 end
             end  
           rescue  => errorStr
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 656d1aa48..f478705f6 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -192,7 +192,7 @@ def parse_and_emit_records(podInventory, serviceList)
         router.emit_stream(@tag, eventStream) if eventStream
         @@istestvar = ENV['ISTEST']
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
-          $log.info("in_kube_podinventory::emit-stream : Success @ #{Time.now.utc.iso8601}")
+          $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
       rescue  => errorStr
         $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}"

From 87a9cf8ddb77f789a805b433ca4ff92556f7d8a0 Mon Sep 17 00:00:00 2001
From: r-dilip <dilip.rangarajan@gmail.com>
Date: Thu, 16 Aug 2018 11:58:10 -0700
Subject: [PATCH 004/160] Remove Log Processing from fluentd configuration

---
 installer/conf/container.conf                 |  32 --
 .../code/plugin/containerlogtailfilereader.rb | 396 ------------------
 source/code/plugin/filter_container_log.rb    |  42 --
 3 files changed, 470 deletions(-)
 delete mode 100644 source/code/plugin/containerlogtailfilereader.rb
 delete mode 100644 source/code/plugin/filter_container_log.rb

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index a20fdbe5a..9eaed9b47 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -50,18 +50,6 @@
 	]
 </source>
 
-# Container log
-# Example line which matches the format:
-# {"log"=>"Test 9th January\n", "stream"=>"stdout", "time"=>"2018-01-09T23:14:39.273429353Z", "ContainerID"=>"ee1ec26aa974af81b21fff24cef8ec78bf7ac1558b5de6f1eb1a5b28ecd6d559", "Image"=>"ubuntu", "Name"=>"determined_wilson", "SourceSystem"=>"Containers"}
-# NOTE: The LogEntryTimeStamp is just being appended in the begining of the LogEntry field. This is the actual time the log was generated and the TimeGenerated field in Kusto is different
-<source>
-	type containerlog_sudo_tail
-	pos_file /var/opt/microsoft/docker-cimprov/state/ContainerLogFile.pos.log
-	tag oms.container.log
-	format /\"log\"=>\"(?<LogEntry>.*)", \"stream\"=>\"(?<LogEntrySource>.*)", \"time\"=>\"(?<LogEntryTimeStamp>.*)", \"ContainerID\"=>\"(?<Id>.*)", \"Image\"=>\"(?<Image>.*)", \"Name\"=>\"(?<Name>.*)", \"SourceSystem\"=>\"(?<SourceSystem>.*)"}/
-	run_interval 60s
-</source>
-
 # Container host inventory
 <source>
 	type omi
@@ -95,11 +83,6 @@
 	type filter_container
 </filter>
 
-# Seperate filter for container log
-<filter oms.container.log>  
-	type filter_container_log  
-</filter> 
-
 <match oms.api.ContainerNodeInventory**>
   type out_oms_api
   log_level debug
@@ -152,21 +135,6 @@
   max_retry_wait 9m
 </match>
 
-<match oms.container.log>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_log*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
-
 <match oms.container.servicelog**>
   type out_oms
   log_level info
diff --git a/source/code/plugin/containerlogtailfilereader.rb b/source/code/plugin/containerlogtailfilereader.rb
deleted file mode 100644
index 2d55b1d73..000000000
--- a/source/code/plugin/containerlogtailfilereader.rb
+++ /dev/null
@@ -1,396 +0,0 @@
-
-require 'optparse'
-require 'json'
-require 'logger'
-require_relative 'omslog'
-require 'fluent/filter'
-
-module ContainerLogTailscript
-
-  class ContainerLogNewTail  
-    def initialize(paths)
-      @paths = paths
-      @tails = {}
-      @pos_file = $options[:pos_file] 
-      @read_from_head = $options[:read_from_head]
-      @pf = nil
-      @pf_file = nil
-
-      @log = Logger.new(STDERR)
-      @log.formatter = proc do |severity, time, progname, msg|
-        "#{severity} #{msg}\n"
-      end
-    end
-
-    attr_reader :paths
-
-    def start
-      start_watchers(@paths) unless @paths.empty?
-    end
-
-    def shutdown
-      @pf_file.close if @pf_file
-    end
-
-    def setup_watcher(path, pe)
-      tw = TailWatcher.new(path, pe, @read_from_head, @log, &method(:receive_lines))
-      tw.on_notify
-      tw
-    end
-
-    def start_watchers(paths)
-      if @pos_file
-        @pf_file = File.open(@pos_file, File::RDWR|File::CREAT)
-        @pf_file.sync = true
-        @pf = PositionFile.parse(@pf_file)
-      end
-      paths.each { |path|
-        pe = nil
-        if @pf
-          pe = @pf[path]    #pe is FilePositionEntry instance
-          if pe.read_inode.zero?
-            begin
-              pe.update(File::Stat.new(path).ino, 0)
-            rescue Errno::ENOENT
-              @log.warn "#{path} not found. Continuing without tailing it."
-            end
-          end
-        end
-        
-        @tails[path] = setup_watcher(path, pe)
-      }
-    end
-
-    def receive_lines(lines, tail_watcher)
-      unless lines.empty?
-        puts lines
-      end
-      return true
-    end
-
-    class TailWatcher
-      def initialize(path, pe, read_from_head, log, &receive_lines)
-        @path = path
-        @pe = pe || MemoryPositionEntry.new
-        @read_from_head = read_from_head
-        @log = log
-        @receive_lines = receive_lines
-        @rotate_handler = RotateHandler.new(path, log, &method(:on_rotate))
-        @io_handler = nil
-        @containerIDFilePath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/"
-      end
-
-      attr_reader :path
-
-      def wrap_receive_lines(lines)
-        newLines = []
-        containerID = @path.split('/').last.chomp('-json.log')
-        containerInspectInformation = @containerIDFilePath + containerID
-        tempContainerInfo = {}
-        begin
-          File.open(containerInspectInformation) { |f| tempContainerInfo = JSON.parse(f.readline)}
-          lines.each { |line|
-          unless line.empty?
-            newLine = {}
-            newLine = JSON.parse(line)
-            newLine["ContainerID"] = containerID
-            newLine["Image"] = tempContainerInfo["Image"]
-            newLine["Name"] = tempContainerInfo["ElementName"]
-            newLine["SourceSystem"] = "Containers"
-            newLines.push(newLine)
-	        end
-          }
-          rescue Exception => e
-            #File doesn't exist or error in reading the data
-            @log.error "Caught exception when opening file -> #{e}"
-          end
-        @receive_lines.call(newLines, self)
-      end
-
-      def on_notify
-        @rotate_handler.on_notify if @rotate_handler
-        return unless @io_handler
-        @io_handler.on_notify
-      end
-
-      def on_rotate(io)
-        if io
-          # first time
-          stat = io.stat
-          fsize = stat.size
-          inode = stat.ino
-
-          last_inode = @pe.read_inode
-          if @read_from_head
-            pos = 0
-            @pe.update(inode, pos)
-          elsif inode == last_inode 
-            # rotated file has the same inode number as the pos_file.
-            # seek to the saved position
-            pos = @pe.read_pos 
-          elsif last_inode != 0
-            # read data from the head of the rotated file.
-            pos = 0
-            @pe.update(inode, pos)
-          else
-            # this is the first MemoryPositionEntry for the first time fluentd started.
-            # seeks to the end of the file to know where to start tailing
-            pos = fsize
-            @pe.update(inode, pos)
-          end
-          io.seek(pos)
-          @io_handler = IOHandler.new(io, @pe, @log, &method(:wrap_receive_lines))
-        else
-          @io_handler = NullIOHandler.new
-        end
-      end
-
-      class IOHandler
-        def initialize(io, pe, log, &receive_lines)
-          @log = log
-          @io = io
-          @pe = pe
-          @log = log
-          @read_lines_limit = 100 
-          @receive_lines = receive_lines
-          @buffer = ''.force_encoding('ASCII-8BIT')
-          @iobuf = ''.force_encoding('ASCII-8BIT')
-          @lines = []
-        end
-
-        attr_reader :io
-
-        def on_notify
-          begin
-            read_more = false
-            if @lines.empty?
-              begin
-                while true
-                  if @buffer.empty?
-                    @io.readpartial(512, @buffer)
-                  else
-                    @buffer << @io.readpartial(512, @iobuf)
-                  end
-                  while line = @buffer.slice!(/.*?\n/m)
-                    @lines << line
-                  end
-                  if @lines.size >= @read_lines_limit
-                    # not to use too much memory in case the file is very large
-                    read_more = true
-                    break
-                  end
-                end
-              rescue EOFError
-              end
-            end
-
-            unless @lines.empty?
-              if @receive_lines.call(@lines)
-                @pe.update_pos(@io.pos - @buffer.bytesize)
-                @lines.clear
-              else
-                read_more = false
-              end
-            end
-          end while read_more
-
-        rescue
-          @log.error "#{$!.to_s}"
-          close
-        end
-
-        def close
-          @io.close unless @io.closed?
-        end
-      end
-
-      class NullIOHandler
-        def initialize
-        end
-
-        def io
-        end
-
-        def on_notify
-        end
-
-        def close
-        end
-      end
-
-      class RotateHandler
-        def initialize(path, log, &on_rotate)
-          @path = path
-          @inode = nil
-          @fsize = -1  # first
-          @on_rotate = on_rotate
-          @log = log
-        end
-
-        def on_notify
-          begin
-            stat = File.stat(@path) #returns a File::Stat object for the file named @path
-            inode = stat.ino
-            fsize = stat.size
-          rescue Errno::ENOENT
-            # moved or deleted
-            inode = nil
-            fsize = 0
-          end
-
-          begin
-            if @inode != inode || fsize < @fsize
-              # rotated or truncated
-              begin
-                io = File.open(@path)
-              rescue Errno::ENOENT
-              end
-              @on_rotate.call(io)
-            end
-            @inode = inode
-            @fsize = fsize
-          end
-
-        rescue
-          @log.error "#{$!.to_s}"
-        end
-      end
-    end
-
-
-    class PositionFile
-      UNWATCHED_POSITION = 0xffffffffffffffff
-
-      def initialize(file, map, last_pos)
-        @file = file
-        @map = map
-        @last_pos = last_pos
-      end
-
-      def [](path)
-        if m = @map[path]
-          return m
-        end
-
-        @file.pos = @last_pos
-        @file.write path
-        @file.write "\t"
-        seek = @file.pos
-        @file.write "0000000000000000\t0000000000000000\n"
-        @last_pos = @file.pos
-
-        @map[path] = FilePositionEntry.new(@file, seek)
-      end
-
-      def self.parse(file)
-        compact(file)
-
-        map = {}
-        file.pos = 0
-        file.each_line {|line|
-          m = /^([^\t]+)\t([0-9a-fA-F]+)\t([0-9a-fA-F]+)/.match(line)
-          next unless m
-          path = m[1]
-          seek = file.pos - line.bytesize + path.bytesize + 1
-          map[path] = FilePositionEntry.new(file, seek)
-        }
-        new(file, map, file.pos)
-      end
-
-      # Clean up unwatched file entries
-      def self.compact(file)
-        file.pos = 0
-        existent_entries = file.each_line.map { |line|
-          m = /^([^\t]+)\t([0-9a-fA-F]+)\t([0-9a-fA-F]+)/.match(line)
-          next unless m
-          path = m[1]
-          pos = m[2].to_i(16)
-          ino = m[3].to_i(16)
-          # 32bit inode converted to 64bit at this phase
-          pos == UNWATCHED_POSITION ? nil : ("%s\t%016x\t%016x\n" % [path, pos, ino])
-        }.compact
-
-        file.pos = 0
-        file.truncate(0)
-        file.write(existent_entries.join)
-      end
-    end
-
-    # pos               inode
-    # ffffffffffffffff\tffffffffffffffff\n
-    class FilePositionEntry
-      POS_SIZE = 16
-      INO_OFFSET = 17
-      INO_SIZE = 16
-      LN_OFFSET = 33
-      SIZE = 34
-
-      def initialize(file, seek)
-        @file = file
-        @seek = seek
-      end
-
-      def update(ino, pos)
-        @file.pos = @seek
-        @file.write "%016x\t%016x" % [pos, ino]
-      end
-
-      def update_pos(pos)
-        @file.pos = @seek
-        @file.write "%016x" % pos
-      end
-
-      def read_inode
-        @file.pos = @seek + INO_OFFSET
-        raw = @file.read(INO_SIZE)
-        raw ? raw.to_i(16) : 0
-      end
-
-      def read_pos
-        @file.pos = @seek
-        raw = @file.read(POS_SIZE)
-        raw ? raw.to_i(16) : 0
-      end
-    end
-
-    class MemoryPositionEntry
-      def initialize
-        @pos = 0
-        @inode = 0
-      end
-
-      def update(ino, pos)
-        @inode = ino
-        @pos = pos
-      end
-
-      def update_pos(pos)
-        @pos = pos
-      end
-
-      def read_pos
-        @pos
-      end
-
-      def read_inode
-        @inode
-      end
-    end
-  end
-end
-
-if __FILE__ == $0
-  $options = {:read_from_head => false}
-  OptionParser.new do |opts|
-    opts.on("-p", "--posfile [POSFILE]") do |p|
-      $options[:pos_file] = p
-    end
-    opts.on("-h", "--[no-]readfromhead") do |h|
-      $options[:read_from_head] = h 
-    end
-  end.parse!
-  a = ContainerLogTailscript::ContainerLogNewTail.new(ARGV)
-  a.start
-  a.shutdown
-end
-
diff --git a/source/code/plugin/filter_container_log.rb b/source/code/plugin/filter_container_log.rb
deleted file mode 100644
index 21e146a35..000000000
--- a/source/code/plugin/filter_container_log.rb
+++ /dev/null
@@ -1,42 +0,0 @@
-# frozen_string_literal: true
-
-require 'fluent/filter'
-
-module Fluent
-  require 'logger'
-  class PassThruFilter < Filter
-    Fluent::Plugin.register_filter('filter_container_log', self)
-
-    def configure(conf)
-      super
-    end
-
-    def start
-      super
-      @hostname = OMS::Common.get_hostname or "Unknown host"
-    end
-
-    def shutdown
-      super
-    end
-    
-    def filter(tag, time, record)
-      begin
-        #Try to force utf-8 encoding on the string so that all characters can flow through to
-        #$log.info "before : #{record['LogEntry']}"
-        record['LogEntry'].force_encoding('UTF-8')
-      rescue
-        $log.error "Failed to convert record['LogEntry'] : '#{record['LogEntry']}' to UTF-8 using force_encoding."
-        $log.error "Current string encoding for record['LogEntry'] is #{record['LogEntry'].encoding}"
-      end
-
-      record['Computer'] =  @hostname
-      wrapper = {
-                 "DataType"=>"CONTAINER_LOG_BLOB",
-                 "IPName"=>"Containers",
-                 "DataItems"=>[record.each{|k,v| record[k]=v}]
-      }
-      wrapper
-    end
-  end
-end

From 308be41fe87202ee6e289cc9c952a24910eed133 Mon Sep 17 00:00:00 2001
From: r-dilip <dilip.rangarajan@gmail.com>
Date: Thu, 16 Aug 2018 12:01:14 -0700
Subject: [PATCH 005/160] Remove plugin references from base_container.data

---
 installer/datafiles/base_container.data | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index c49a8d1d0..ec0728c01 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -23,14 +23,11 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/microsoft/omsagent/plugin/filter_docker_log.rb;			source/code/plugin/filter_docker_log.rb;			644; root; root
 /opt/microsoft/omsagent/plugin/filter_container.rb;			    source/code/plugin/filter_container.rb;				644; root; root
-/opt/microsoft/omsagent/plugin/filter_container_log.rb;			source/code/plugin/filter_container_log.rb;		    644; root; root
 
 /opt/microsoft/omsagent/plugin/in_kube_podinventory.rb;			source/code/plugin/in_kube_podinventory.rb;			644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_events.rb;			    source/code/plugin/in_kube_events.rb;				644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_logs.rb;                 source/code/plugin/in_kube_logs.rb;                 644; root; root
 /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb;			source/code/plugin/KubernetesApiClient.rb;			644; root; root
-/opt/microsoft/omsagent/plugin/in_containerlog_sudo_tail.rb;		source/code/plugin/in_containerlog_sudo_tail.rb;	644; root; root
-/opt/microsoft/omsagent/plugin/containerlogtailfilereader.rb;		source/code/plugin/containerlogtailfilereader.rb;	744; root; root 
 
 /etc/opt/microsoft/docker-cimprov/container.conf;			    installer/conf/container.conf;                      644; root; root
 
@@ -88,15 +85,6 @@ WriteInstallInfo() {
 }
 WriteInstallInfo
 
-#Setup sudo permission for containerlogtailfilereader
-if [ -z $(cat /etc/sudoers.d/omsagent | grep /containerlogtailfilereader.rb) ]
-then
-    chmod +w /etc/sudoers.d/omsagent
-    echo "#run containerlogtailfilereader.rb for docker-provider" >> /etc/sudoers.d/omsagent
-    echo "omsagent ALL=(ALL) NOPASSWD: /opt/microsoft/omsagent/ruby/bin/ruby /opt/microsoft/omsagent/plugin/containerlogtailfilereader.rb *" >> /etc/sudoers.d/omsagent
-    chmod 440 /etc/sudoers.d/omsagent
-fi
-
 # Get the state file in place with proper permissions
 touch /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
 chmod 644 /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt

From bcd1a3ff040eb25218cfffd5028394f7594075c7 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 14 Sep 2018 10:46:55 -0700
Subject: [PATCH 006/160] Dilipr/fluent bit log processing (#126)

* Build out_oms.so and include in docker-cimprov package

* Adding fluent-bit-config file to base container

* PR Feedback

* Adding out_oms.conf to base_container.data

* PR Feedback

* Making the critical section as small as possible

* PR Feedback

* Fixing the newline bug for Computer, and changing containerId to Id
---
 build/Makefile                          | 829 ++++++++++++------------
 installer/conf/out_oms.conf             |   6 +
 installer/conf/td-agent-bit.conf        |  35 +
 installer/datafiles/base_container.data |   7 +-
 source/code/go/src/plugins/Makefile     |  20 +
 source/code/go/src/plugins/glide.lock   | 209 ++++++
 source/code/go/src/plugins/glide.yaml   |  15 +
 source/code/go/src/plugins/oms.go       | 359 ++++++++++
 source/code/go/src/plugins/out_oms.go   |  57 ++
 source/code/go/src/plugins/utils.go     |  67 ++
 10 files changed, 1194 insertions(+), 410 deletions(-)
 create mode 100644 installer/conf/out_oms.conf
 create mode 100644 installer/conf/td-agent-bit.conf
 create mode 100644 source/code/go/src/plugins/Makefile
 create mode 100644 source/code/go/src/plugins/glide.lock
 create mode 100644 source/code/go/src/plugins/glide.yaml
 create mode 100644 source/code/go/src/plugins/oms.go
 create mode 100644 source/code/go/src/plugins/out_oms.go
 create mode 100644 source/code/go/src/plugins/utils.go

diff --git a/build/Makefile b/build/Makefile
index 9586c3b23..b5312cfe3 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -1,409 +1,420 @@
-# -*- mode: Makefile; -*-
-# Copyright (c) Microsoft Corporation
-
-BASE_DIR := $(subst /build,,$(PWD))
-OMI_ROOT := $(shell cd ../../omi/Unix; pwd -P)
-SCXPAL_DIR := $(shell cd ../../pal; pwd -P)
-
-PF_POSIX := 1
-include $(SCXPAL_DIR)/build/config.mak
-include $(BASE_DIR)/build/config.mak
-include $(SCXPAL_DIR)/build/Makefile.pal
-
-ifndef ENABLE_DEBUG
-$(error "ENABLE_DEBUG is not set.  Please re-run configure")
-endif
-
-# Include the version file
-include ../../docker.version
-
-ifndef CONTAINER_BUILDVERSION_STATUS
-$(error "Is docker.version missing?  Please re-run configure")
-endif
-
-SOURCE_DIR := $(BASE_DIR)/source/code
-TEST_DIR := $(BASE_DIR)/test/code
-
-PROVIDER_DIR := $(SOURCE_DIR)/providers
-PROVIDER_TEST_DIR := $(TEST_DIR)/providers
-PAL_INCLUDE_DIR := $(SCXPAL_DIR)/source/code/include
-PAL_TESTUTILS_DIR := $(SCXPAL_DIR)/test/code/testutils
-
-INTERMEDIATE_DIR := $(BASE_DIR)/intermediate/$(BUILD_CONFIGURATION)
-INTERMEDIATE_TESTFILES := $(INTERMEDIATE_DIR)/testfiles
-TARGET_DIR := $(BASE_DIR)/target/$(BUILD_CONFIGURATION)
-PROVIDER_LIBRARY := $(INTERMEDIATE_DIR)/libcontainer.so
-
-INSTALLER_TMPDIR := $(INTERMEDIATE_DIR)/installer_tmp
-
-# Include files
-
-INCLUDE_DEFINES := $(INTERMEDIATE_DIR)/defines.h
-
-# Compiler flags
-
-OMI_INCLUDE_FLAGS := -I$(OMI_ROOT)/output/include
-PROVIDER_INCLUDE_FLAGS := -I$(PAL_INCLUDE_DIR) -I$(INTERMEDIATE_DIR)
-
-PROVIDER_TEST_INCLUDE_FLAGS := -Wmissing-include-dirs -Wno-non-virtual-dtor -I$(SCXPAL_DIR)/source/code/include -I$(INTERMEDIATE_DIR) -I$(SCXPAL_DIR)/test/ext/include -I$(OMI_ROOT)/output/include -I$(OMI_ROOT) -I$(OMI_ROOT)/common -I$(SCXPAL_DIR)/test/code/include $(PROVIDER_INCLUDE_FLAGS) -I$(PROVIDER_DIR)
-
-ifeq ($(ENABLE_DEBUG),1)
-PROV_DEBUG_FLAGS := -g
-endif
-
-COMPILE_FLAGS := $(PROV_DEBUG_FLAGS) -D_REENTRANT -fstack-protector-all -Wall -fno-nonansi-builtins  -Woverloaded-virtual -Wformat -Wformat-security -Wcast-align -Wswitch-enum -Wshadow -Wwrite-strings -Wredundant-decls -Wcast-qual -fPIC
-PROVIDER_COMPILE_FLAGS := $(COMPILE_FLAGS)
-
-LINK_LIBRARIES := -Wl,-rpath=/opt/omi/lib -L$(OMI_ROOT)/output/lib -lmicxx -L$(SCXPAL_TARGET_DIR) -lscxcore -lUtil -lscxassertabort -lrt -luuid
-PROVIDER_TEST_LINK_LIBRARIES := -lbase -lpal -L$(SCXPAL_TARGET_DIR) -lscxcore $(SCXPAL_DIR)/test/ext/lib/linux/$(ARCH)/cppunit/libcppunit.a -lpthread -lrt -luuid
-
-SHARED_FLAGS := -shared
-
-# Support for installbuilder
-
-STAGING_DIR := $(INTERMEDIATE_DIR)/staging
-
-ifeq ($(ULINUX),1)
-  # For consistency, the architecture should be i686 (for x86) and x86_64 (for x64)
-  DOCKER_ARCH := $(shell echo $(PF_ARCH) | sed -e 's/x86$$/i686/' -e 's/x64$$/x86_64/')
-  OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).universal.$(DOCKER_ARCH)
-else
-  PF_DISTRO_LC := $(shell echo $(PF_DISTRO) | tr A-Z a-z)
-  OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).$(PF_DISTRO_LC).$(PF_MAJOR).$(PF_ARCH)
-endif
-
-ifeq ("$(wildcard /usr/bin/dpkg-deb)","")
-	DPKG_LOCATION="--DPKG_LOCATION=$(SCXPAL_DIR)/installer/InstallBuilder/tools/bin/dpkg-deb-$(PF_ARCH)"
-else
-	DPKG_LOCATION=
-endif
-
-# Support for src_to_obj handling
-
-INCLUDES = $(OMI_INCLUDE_FLAGS) $(PROVIDER_INCLUDE_FLAGS)
-CFLAGS = $(COMPILE_FLAGS)
-CXXFLAGS = $(COMPILE_FLAGS)
-
-#--------------------------------------------------------------------------------
-# Build targets
-
-ifeq ($(ULINUX),1)
-all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit
-else
-all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY)
-endif
-
-clean :
-	$(RMDIR) $(BASE_DIR)/build/cppunit_result.* $(BASE_DIR)/build/scxtestrunner.log $(BASE_DIR)/installer/intermediate $(BASE_DIR)/intermediate $(BASE_DIR)/target $(PROVIDER_TEST_DIR)/providertestutils.cpp
-	-find $(BASE_DIR) -name \*~ -exec rm {} \;
-	-$(RM) $(TEST_DIR)/providers/TestScriptPath.h
-
-distclean : clean
-	$(RM) $(BASE_DIR)/build/config.mak
-	-make -C $(OMI_ROOT) distclean
-	-make -C $(SCXPAL_DIR)/build distclean
-	-$(RMDIR) $(OMI_ROOT)/output*
-	-$(RM) $(SCXPAL_DIR)/build/config.mak
-	-$(RM) $(SCXPAL_DIR)/build/Makefile.config_cache
-
-PROVIDER_STATUS:
-	@echo "========================= Performing Building provider"
-
-KIT_STATUS:
-	@echo "========================= Performing Building provider tests"
-
-#--------------------------------------------------------------------------------
-# OMI build
-#
-# Build the OMI distribution
-#
-# Technically, we should go to build OMI all the time. But I'd rather not spend
-# the time doing it here EVERY TIME, when we never normally change OMI. This is
-# a good tradeoff (build if not built, otherwise assume all is well).
-#
-# Doing a 'make clean' in OMI directory will force us to rebuild.
-
-$(OMI_ROOT)/output : $(OMI_ROOT)/output/lib/libmicxx.so
-
-$(OMI_ROOT)/output/lib/libmicxx.so :
-	@echo "========================= Performing Building OMI"
-	make -C $(OMI_ROOT)
-ifeq ($(PERFORM_OMI_MAKEINSTALL),1)
-	make -C $(OMI_ROOT) install
-endif
-
-#--------------------------------------------------------------------------------
-# PAL build
-#
-# Build the PAL (Platform Abstraction Layer)
-#
-# Doing a 'make clean' in PAL directory will force us to rebuild.
-
-$(SCXPAL_INTERMEDIATE_DIR) :
-	@echo "========================= Performing Building PAL"
-	make -C $(SCXPAL_DIR)/build
-
-#================================================================================
-# File depends.h (compiler dependencies)
-#================================================================================
-
-$(INCLUDE_DEFINES) : $(BASE_DIR)/build/config.mak
-	-$(MKPATH) $(@D)
-	@$(ECHO) "Creating $@" 
-	@$(call pf_fwrite,"/*-------------------------------------------------------------------------------", $@)
-	@$(call pf_fappend,"    Copyright (C) 2007-2015 Microsoft Corp.                                     ", $@)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"*/                                                                              ", $@)
-	@$(call pf_fappend,"/**                                                                             ", $@)
-	@$(call pf_fappend,"    \file                                                                       ", $@)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"    \brief       Auto generated file containing build definitions               ", $@)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"    \author      Automated Build System                                         ", $@)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"    DO NOT EDIT THIS FILE!                                                      ", $@)
-	@$(call pf_fappend,"    DO NOT CHECK IN THIS FILE!                                                  ", $@)
-	@$(call pf_fappend,"*/                                                                              ", $@)
-	@$(call pf_fappend,"/*----------------------------------------------------------------------------*/", $@)
-	@$(call pf_fappend,"#ifndef DEFINES_H                                                               ", $@)
-	@$(call pf_fappend,"#define DEFINES_H                                                               ", $@)
-	@$(call pf_fappend,"                                                                                ", $@)
-ifneq ($(PF_DISTRO),)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#ifndef PF_DISTRO_$(PF_DISTRO)                                                  ", $@)
-	@$(call pf_fappend,"#define PF_DISTRO_$(PF_DISTRO)                                                  ", $@)
-	@$(call pf_fappend,"#endif                                                                          ", $@)
-endif
-ifneq ($(PF_MAJOR),)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#ifndef PF_MAJOR                                                                ", $@)
-	@$(call pf_fappend,"#define PF_MAJOR $(PF_MAJOR)                                                    ", $@)
-	@$(call pf_fappend,"#endif                                                                          ", $@)
-endif
-ifneq ($(PF_MINOR),)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#ifndef PF_MINOR                                                                ", $@)
-	@$(call pf_fappend,"#define PF_MINOR $(PF_MINOR)                                                    ", $@)
-	@$(call pf_fappend,"#endif                                                                          ", $@)
-endif
-ifneq ($(ARCH),)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#ifndef $(ARCH)                                                                 ", $@)
-	@$(call pf_fappend,"#define $(ARCH)                                                                 ", $@)
-	@$(call pf_fappend,"#endif                                                                          ", $@)
-endif
-ifeq ($(BUILD_TYPE),Debug)
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#ifndef _DEBUG                                                                  ", $@)
-	@$(call pf_fappend,"#define _DEBUG                                                                  ", $@)
-	@$(call pf_fappend,"#endif                                                                          ", $@)
-else
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#ifndef NDEBUG                                                                  ", $@)
-	@$(call pf_fappend,"#define NDEBUG                                                                  ", $@)
-	@$(call pf_fappend,"#endif                                                                          ", $@)
-endif
-	@$(call pf_fappend,"                                                                                ", $@)
-	@$(call pf_fappend,"#endif /* DEFINES_H */                                                          ", $@)
-	@$(call pf_fappend,"/*----------------------------E-N-D---O-F---F-I-L-E---------------------------*/", $@)
-
-#================================================================================
-# Internal functions
-#================================================================================
-
-# Convert a list of src files with absolute paths under BASE_DIR to corresponding
-# object files under intermediate directory 
-# src_to_obj(list_of_cppfiles)
-src_to_obj = $(patsubst $(BASE_DIR)%, $(INTERMEDIATE_DIR)%, $(patsubst %.c, %.o, $(patsubst %.cpp, %.o, $(1))))
-
-# No default rules, please
-.SUFFIX: 
-
-# Rule for compiling cpp files in source tree, ouptut in mirrored intermediate dir
-$(INTERMEDIATE_DIR)/%.o : $(BASE_DIR)/%.cpp  $(INCLUDE_DEFINES)
-	$(MKPATH) $(@D)
-	$(CXX) -c $(CXXFLAGS) $(INCLUDES) -I$(<D) -o $@ $<
-
-$(INTERMEDIATE_DIR)/%.o : $(BASE_DIR)/%.c $(INCLUDE_DEFINES)
-	$(MKPATH) $(@D)
-	$(CXX) -c $(CFLAGS) $(INCLUDES) -I$(<D) -o $@ $<
-
-#--------------------------------------------------------------------------------
-# Container Provider
-#
-# Build the Provider Library 
-
-STATIC_PROVIDERLIB_SRCFILES = \
-	$(PROVIDER_DIR)/Container_ContainerInventory_Class_Provider.cpp \
-	$(PROVIDER_DIR)/Container_ContainerStatistics_Class_Provider.cpp \
-	$(PROVIDER_DIR)/Container_DaemonEvent_Class_Provider.cpp \
-	$(PROVIDER_DIR)/Container_ImageInventory_Class_Provider.cpp \
-	$(PROVIDER_DIR)/Container_ContainerLog_Class_Provider.cpp \
-	$(PROVIDER_DIR)/Container_HostInventory_Class_Provider.cpp \
-	$(PROVIDER_DIR)/Container_Process_Class_Provider.cpp
-
-STATIC_PROVIDERLIB_SRCFILES += \
-	$(PROVIDER_DIR)/module.cpp \
-	$(PROVIDER_DIR)/schema.c \
-	$(PROVIDER_DIR)/stubs.cpp
-
-STATIC_PROVIDERLIB_SRCFILES += \
-	$(SOURCE_DIR)/cjson/cJSON.c \
-	$(SOURCE_DIR)/dockerapi/DockerRemoteApi.cpp
-
-PROVIDER_HEADERS = \
-	$(SOURCE_DIR)/cjson/cJSON.h \
-	$(SOURCE_DIR)/dockerapi/DockerRemoteApi.h \
-	$(SOURCE_DIR)/dockerapi/DockerRestHelper.h
-
-STATIC_PROVIDERLIB_OBJFILES = $(call src_to_obj,$(STATIC_PROVIDERLIB_SRCFILES))
-
-$(PROVIDER_LIBRARY): INCLUDES += $(PROVIDER_INCLUDE_FLAGS)
-$(PROVIDER_LIBRARY): CFLAGS += $(PROVIDER_COMPILE_FLAGS)
-$(PROVIDER_LIBRARY): CXXFLAGS += $(PROVIDER_COMPILE_FLAGS)
-$(PROVIDER_LIBRARY): $(STATIC_PROVIDERLIB_OBJFILES) $(STATIC_PROVIDERLIB_LOGPOLICY) $(INCLUDE_DEFINES) $(PROVIDER_HEADERS)
-	$(MKPATH) $(INTERMEDIATE_DIR)
-	g++ $(PROVIDER_COMPILE_FLAGS) $(SHARED_FLAGS) $(PROVIDER_INCLUDE_FLAGS) -o $@ $(STATIC_PROVIDERLIB_OBJFILES) $(LINK_LIBRARIES)
-
-#--------------------------------------------------------------------------------
-# Container Provider Unit Tests
-#
-# Build and run the Provider Unit Tests
-
-# For now, providertestutils.cpp lives in PAL, but is built in OM.  This creates
-# some build issues.  To resolve, create a link to the real file.
-
-$(PROVIDER_TEST_DIR)/providertestutils.cpp: $(PAL_TESTUTILS_DIR)/providertestutils.cpp
-# We need to recreate the symlink if the link is older than what it points to.
-	$(RM) $(PROVIDER_TEST_DIR)/providers/providertestutils.cpp
-	ln -s $(PAL_TESTUTILS_DIR)/providertestutils.cpp $(PROVIDER_TEST_DIR)/providertestutils.cpp
-
-STATIC_PROVIDER_UNITFILES = \
-	$(PROVIDER_TEST_DIR)/Container_ContainerInventory_Class_Provider_UnitTest.cpp \
-	$(PROVIDER_TEST_DIR)/Container_ContainerStatistics_Class_Provider_UnitTest.cpp \
-	$(PROVIDER_TEST_DIR)/Container_DaemonEvent_Class_Provider_UnitTest.cpp \
-	$(PROVIDER_TEST_DIR)/Container_ImageInventory_Class_Provider_UnitTest.cpp \
-	$(PROVIDER_TEST_DIR)/Container_HostInventory_Class_Provider_UnitTest.cpp \
-	$(PROVIDER_TEST_DIR)/Container_Process_Class_Provider_UnitTest.cpp \
-	$(PROVIDER_TEST_DIR)/support/testrunnerlogpolicy.cpp \
-	$(PROVIDER_TEST_DIR)/support/productdependencies.cpp \
-	$(PROVIDER_TEST_DIR)/providertestutils.cpp
-
-STATIC_PROVIDER_PAL_UNITFILES = \
-	$(PAL_TESTUTILS_DIR)/scxassert_cppunit.cpp \
-	$(PAL_TESTUTILS_DIR)/testrunner.cpp
-
-STATIC_PROVIDER_TEST_OBJFILES = $(call src_to_obj,$(STATIC_PROVIDER_UNITFILES))
-
-$(INTERMEDIATE_DIR)/testrunner: INCLUDES += $(PROVIDER_TEST_INCLUDE_FLAGS) -I$(PAL_INCLUDE_DIR) -I$(PAL_TESTUTILS_DIR) -I$(PROVIDER_DIR) -I$(SOURCE_DIR)
-$(INTERMEDIATE_DIR)/testrunner: CFLAGS += $(PROVIDER_COMPILE_FLAGS)
-$(INTERMEDIATE_DIR)/testrunner: CXXFLAGS += $(PROVIDER_COMPILE_FLAGS)
-$(INTERMEDIATE_DIR)/testrunner : $(STATIC_PROVIDER_TEST_OBJFILES) $(STATIC_PROVIDERLIB_OBJFILES) $(INCLUDE_DEFINES) $(PROVIDER_HEADERS)
-	$(MKPATH) $(INTERMEDIATE_DIR)
-	g++ $(PROVIDER_COMPILE_FLAGS) $(PROVIDER_TEST_INCLUDE_FLAGS) -o $@ $(STATIC_PROVIDER_PAL_UNITFILES) $(STATIC_PROVIDER_TEST_OBJFILES) $(STATIC_PROVIDERLIB_OBJFILES) $(LINK_LIBRARIES) $(PROVIDER_TEST_LINK_LIBRARIES)
-
-testrun : test
-
-TEST_STATUS:
-	@echo "========================= Performing Building provider tests"
-	@echo \#define TEST_SCRIPT_PATH \"$(TEST_DIR)/scripts/\" > $(TEST_DIR)/providers/TestScriptPath.h
-
-test : TEST_STATUS $(SCXPAL_INTERMEDIATE_DIR) $(INTERMEDIATE_DIR)/testrunner
-	@echo "========================= Performing container testrun execution"
-	$(MKPATH) $(INTERMEDIATE_TESTFILES)
-	$(COPY) $(TEST_DIR)/scripts/createEnv.sh $(TEST_DIR)/scripts/testrun_wrapper $(INTERMEDIATE_TESTFILES)
-	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(OMI_ROOT)/output/lib; cd $(INTERMEDIATE_TESTFILES); ./createEnv.sh
-	cd $(INTERMEDIATE_TESTFILES); ./testrun_wrapper $(INTERMEDIATE_DIR)
-
-#--------------------------------------------------------------------------------
-# Build the distribution kit
-#
-# Build the packages via installbuilder
-#
-# While the "formal build" only builds ULINUX, we may build something else for DEV purposes.
-# Assume we ALWAYS build DPKG, but only build RPM if --enable-ulinux is speified in configure.
-
-kit : CONTAINERLIB_FILENAME = libcontainer.so
-kit : $(OMI_ROOT)/output $(PROVIDER_LIBRARY)
-
-ifeq ($(ULINUX),1)
-
-	@echo "========================= Performing Building RPM and DPKG packages"
-	$(MKPATH) $(INSTALLER_TMPDIR)
-	sudo $(RMDIR) $(STAGING_DIR)
-	$(MKPATH) $(INTERMEDIATE_DIR)
-	python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \
-		--BASE_DIR=$(BASE_DIR) \
-		--TARGET_DIR=$(INTERMEDIATE_DIR) \
-		--INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \
-		--STAGING_DIR=$(STAGING_DIR) \
-		--BUILD_TYPE=$(BUILD_TYPE) \
-		--BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \
-		--PFARCH=$(PF_ARCH) \
-		--PFDISTRO=$(PF_DISTRO) \
-		--PFMAJOR=$(PF_MAJOR) \
-		--PFMINOR=$(PF_MINOR) \
-		--VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \
-		--RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \
-		--CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \
-		--OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \
-		--DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \
-		base_container.data linux.data linux_rpm.data
-
-	sudo $(RMDIR) $(STAGING_DIR)
-	$(MKPATH) $(INTERMEDIATE_DIR)
-	python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \
-		--BASE_DIR=$(BASE_DIR) \
-		--TARGET_DIR=$(INTERMEDIATE_DIR) \
-		--INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \
-		--STAGING_DIR=$(STAGING_DIR) \
-		--BUILD_TYPE=$(BUILD_TYPE) \
-		--BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \
-		--PFARCH=$(PF_ARCH) \
-		--PFDISTRO=$(PF_DISTRO) \
-		--PFMAJOR=$(PF_MAJOR) \
-		--PFMINOR=$(PF_MINOR) \
-		--VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \
-		--RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \
-		--CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \
-		$(DPKG_LOCATION) \
-		--OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \
-		--DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \
-		base_container.data linux.data linux_dpkg.data
-
-	# Strip the package extension from the package filename
-	sed -re 's/.rpm$$|.deb$$//' $(INTERMEDIATE_DIR)/package_filename > $(INTERMEDIATE_DIR)/package_file.tmp; mv $(INTERMEDIATE_DIR)/package_file.tmp $(INTERMEDIATE_DIR)/package_filename
-
-	# Build the tar file containing both .rpm and .deb packages
-	cd $(INTERMEDIATE_DIR); tar cvf $(OUTPUT_PACKAGE_PREFIX).tar $(OUTPUT_PACKAGE_PREFIX).rpm $(OUTPUT_PACKAGE_PREFIX).deb
-
-	../installer/bundle/create_bundle.sh $(PF)_$(PF_DISTRO) $(INTERMEDIATE_DIR) $(OUTPUT_PACKAGE_PREFIX)
-	# Copy the shell bundle to the target directory
-	$(MKPATH) $(TARGET_DIR)
-	cd $(INTERMEDIATE_DIR); $(COPY) `cat $(INTERMEDIATE_DIR)/package_filename`.sh $(TARGET_DIR)
-
-else
-
-	@echo "========================= Performing Building RPM and DPKG packages"
-	sudo $(RMDIR) $(STAGING_DIR)
-	$(MKPATH) $(INTERMEDIATE_DIR)
-	python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \
-		--BASE_DIR=$(BASE_DIR) \
-		--TARGET_DIR=$(INTERMEDIATE_DIR) \
-		--INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \
-		--STAGING_DIR=$(STAGING_DIR) \
-		--BUILD_TYPE=$(BUILD_TYPE) \
-		--BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \
-		--PFARCH=$(PF_ARCH) \
-		--PFDISTRO=$(PF_DISTRO) \
-		--PFMAJOR=$(PF_MAJOR) \
-		--PFMINOR=$(PF_MINOR) \
-		--VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \
-		--RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \
-		--CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \
-		$(DPKG_LOCATION) \
-		--OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \
-		--DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \
-		base_container.data linux.data linux_dpkg.data
-
-endif
+# -*- mode: Makefile; -*-
+# Copyright (c) Microsoft Corporation
+
+BASE_DIR := $(subst /build,,$(PWD))
+OMI_ROOT := $(shell cd ../../omi/Unix; pwd -P)
+SCXPAL_DIR := $(shell cd ../../pal; pwd -P)
+
+PF_POSIX := 1
+include $(SCXPAL_DIR)/build/config.mak
+include $(BASE_DIR)/build/config.mak
+include $(SCXPAL_DIR)/build/Makefile.pal
+
+ifndef ENABLE_DEBUG
+$(error "ENABLE_DEBUG is not set.  Please re-run configure")
+endif
+
+# Include the version file
+include ../../docker.version
+
+ifndef CONTAINER_BUILDVERSION_STATUS
+$(error "Is docker.version missing?  Please re-run configure")
+endif
+
+SOURCE_DIR := $(BASE_DIR)/source/code
+TEST_DIR := $(BASE_DIR)/test/code
+
+PROVIDER_DIR := $(SOURCE_DIR)/providers
+PROVIDER_TEST_DIR := $(TEST_DIR)/providers
+PAL_INCLUDE_DIR := $(SCXPAL_DIR)/source/code/include
+PAL_TESTUTILS_DIR := $(SCXPAL_DIR)/test/code/testutils
+
+INTERMEDIATE_DIR := $(BASE_DIR)/intermediate/$(BUILD_CONFIGURATION)
+INTERMEDIATE_TESTFILES := $(INTERMEDIATE_DIR)/testfiles
+TARGET_DIR := $(BASE_DIR)/target/$(BUILD_CONFIGURATION)
+PROVIDER_LIBRARY := $(INTERMEDIATE_DIR)/libcontainer.so
+
+INSTALLER_TMPDIR := $(INTERMEDIATE_DIR)/installer_tmp
+
+# GO Source dir for custom fluent bit plugin
+GO_SOURCE_DIR := $(SOURCE_DIR)/go/src/plugins
+
+# Include files
+
+INCLUDE_DEFINES := $(INTERMEDIATE_DIR)/defines.h
+
+# Compiler flags
+
+OMI_INCLUDE_FLAGS := -I$(OMI_ROOT)/output/include
+PROVIDER_INCLUDE_FLAGS := -I$(PAL_INCLUDE_DIR) -I$(INTERMEDIATE_DIR)
+
+PROVIDER_TEST_INCLUDE_FLAGS := -Wmissing-include-dirs -Wno-non-virtual-dtor -I$(SCXPAL_DIR)/source/code/include -I$(INTERMEDIATE_DIR) -I$(SCXPAL_DIR)/test/ext/include -I$(OMI_ROOT)/output/include -I$(OMI_ROOT) -I$(OMI_ROOT)/common -I$(SCXPAL_DIR)/test/code/include $(PROVIDER_INCLUDE_FLAGS) -I$(PROVIDER_DIR)
+
+ifeq ($(ENABLE_DEBUG),1)
+PROV_DEBUG_FLAGS := -g
+endif
+
+COMPILE_FLAGS := $(PROV_DEBUG_FLAGS) -D_REENTRANT -fstack-protector-all -Wall -fno-nonansi-builtins  -Woverloaded-virtual -Wformat -Wformat-security -Wcast-align -Wswitch-enum -Wshadow -Wwrite-strings -Wredundant-decls -Wcast-qual -fPIC
+PROVIDER_COMPILE_FLAGS := $(COMPILE_FLAGS)
+
+LINK_LIBRARIES := -Wl,-rpath=/opt/omi/lib -L$(OMI_ROOT)/output/lib -lmicxx -L$(SCXPAL_TARGET_DIR) -lscxcore -lUtil -lscxassertabort -lrt -luuid
+PROVIDER_TEST_LINK_LIBRARIES := -lbase -lpal -L$(SCXPAL_TARGET_DIR) -lscxcore $(SCXPAL_DIR)/test/ext/lib/linux/$(ARCH)/cppunit/libcppunit.a -lpthread -lrt -luuid
+
+SHARED_FLAGS := -shared
+
+# Support for installbuilder
+
+STAGING_DIR := $(INTERMEDIATE_DIR)/staging
+
+ifeq ($(ULINUX),1)
+  # For consistency, the architecture should be i686 (for x86) and x86_64 (for x64)
+  DOCKER_ARCH := $(shell echo $(PF_ARCH) | sed -e 's/x86$$/i686/' -e 's/x64$$/x86_64/')
+  OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).universal.$(DOCKER_ARCH)
+else
+  PF_DISTRO_LC := $(shell echo $(PF_DISTRO) | tr A-Z a-z)
+  OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).$(PF_DISTRO_LC).$(PF_MAJOR).$(PF_ARCH)
+endif
+
+ifeq ("$(wildcard /usr/bin/dpkg-deb)","")
+	DPKG_LOCATION="--DPKG_LOCATION=$(SCXPAL_DIR)/installer/InstallBuilder/tools/bin/dpkg-deb-$(PF_ARCH)"
+else
+	DPKG_LOCATION=
+endif
+
+# Support for src_to_obj handling
+
+INCLUDES = $(OMI_INCLUDE_FLAGS) $(PROVIDER_INCLUDE_FLAGS)
+CFLAGS = $(COMPILE_FLAGS)
+CXXFLAGS = $(COMPILE_FLAGS)
+
+#--------------------------------------------------------------------------------
+# Build targets
+
+ifeq ($(ULINUX),1)
+all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit fluentbitplugin
+else
+all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) fluentbitplugin
+endif
+
+clean :
+	$(RMDIR) $(BASE_DIR)/build/cppunit_result.* $(BASE_DIR)/build/scxtestrunner.log $(BASE_DIR)/installer/intermediate $(BASE_DIR)/intermediate $(BASE_DIR)/target $(PROVIDER_TEST_DIR)/providertestutils.cpp
+	-find $(BASE_DIR) -name \*~ -exec rm {} \;
+	-$(RM) $(TEST_DIR)/providers/TestScriptPath.h
+
+distclean : clean
+	$(RM) $(BASE_DIR)/build/config.mak
+	-make -C $(OMI_ROOT) distclean
+	-make -C $(SCXPAL_DIR)/build distclean
+	-$(RMDIR) $(OMI_ROOT)/output*
+	-$(RM) $(SCXPAL_DIR)/build/config.mak
+	-$(RM) $(SCXPAL_DIR)/build/Makefile.config_cache
+
+PROVIDER_STATUS:
+	@echo "========================= Performing Building provider"
+
+KIT_STATUS:
+	@echo "========================= Performing Building provider tests"
+
+#--------------------------------------------------------------------------------
+# OMI build
+#
+# Build the OMI distribution
+#
+# Technically, we should go to build OMI all the time. But I'd rather not spend
+# the time doing it here EVERY TIME, when we never normally change OMI. This is
+# a good tradeoff (build if not built, otherwise assume all is well).
+#
+# Doing a 'make clean' in OMI directory will force us to rebuild.
+
+$(OMI_ROOT)/output : $(OMI_ROOT)/output/lib/libmicxx.so
+
+$(OMI_ROOT)/output/lib/libmicxx.so :
+	@echo "========================= Performing Building OMI"
+	make -C $(OMI_ROOT)
+ifeq ($(PERFORM_OMI_MAKEINSTALL),1)
+	make -C $(OMI_ROOT) install
+endif
+
+#---------------------------------------------------------------------------------
+# fluentbit go plugin build. This is required to send container logs to ODS endpoint
+#
+fluentbitplugin : 
+	@echo "========================= Building fluentbit out_oms go plugin for logs"
+	make -C $(GO_SOURCE_DIR) fbplugin
+	$(COPY) $(GO_SOURCE_DIR)/out_oms.so $(INTERMEDIATE_DIR)
+
+#--------------------------------------------------------------------------------
+# PAL build
+#
+# Build the PAL (Platform Abstraction Layer)
+#
+# Doing a 'make clean' in PAL directory will force us to rebuild.
+
+$(SCXPAL_INTERMEDIATE_DIR) :
+	@echo "========================= Performing Building PAL"
+	make -C $(SCXPAL_DIR)/build
+
+#================================================================================
+# File depends.h (compiler dependencies)
+#================================================================================
+
+$(INCLUDE_DEFINES) : $(BASE_DIR)/build/config.mak
+	-$(MKPATH) $(@D)
+	@$(ECHO) "Creating $@" 
+	@$(call pf_fwrite,"/*-------------------------------------------------------------------------------", $@)
+	@$(call pf_fappend,"    Copyright (C) 2007-2015 Microsoft Corp.                                     ", $@)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"*/                                                                              ", $@)
+	@$(call pf_fappend,"/**                                                                             ", $@)
+	@$(call pf_fappend,"    \file                                                                       ", $@)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"    \brief       Auto generated file containing build definitions               ", $@)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"    \author      Automated Build System                                         ", $@)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"    DO NOT EDIT THIS FILE!                                                      ", $@)
+	@$(call pf_fappend,"    DO NOT CHECK IN THIS FILE!                                                  ", $@)
+	@$(call pf_fappend,"*/                                                                              ", $@)
+	@$(call pf_fappend,"/*----------------------------------------------------------------------------*/", $@)
+	@$(call pf_fappend,"#ifndef DEFINES_H                                                               ", $@)
+	@$(call pf_fappend,"#define DEFINES_H                                                               ", $@)
+	@$(call pf_fappend,"                                                                                ", $@)
+ifneq ($(PF_DISTRO),)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#ifndef PF_DISTRO_$(PF_DISTRO)                                                  ", $@)
+	@$(call pf_fappend,"#define PF_DISTRO_$(PF_DISTRO)                                                  ", $@)
+	@$(call pf_fappend,"#endif                                                                          ", $@)
+endif
+ifneq ($(PF_MAJOR),)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#ifndef PF_MAJOR                                                                ", $@)
+	@$(call pf_fappend,"#define PF_MAJOR $(PF_MAJOR)                                                    ", $@)
+	@$(call pf_fappend,"#endif                                                                          ", $@)
+endif
+ifneq ($(PF_MINOR),)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#ifndef PF_MINOR                                                                ", $@)
+	@$(call pf_fappend,"#define PF_MINOR $(PF_MINOR)                                                    ", $@)
+	@$(call pf_fappend,"#endif                                                                          ", $@)
+endif
+ifneq ($(ARCH),)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#ifndef $(ARCH)                                                                 ", $@)
+	@$(call pf_fappend,"#define $(ARCH)                                                                 ", $@)
+	@$(call pf_fappend,"#endif                                                                          ", $@)
+endif
+ifeq ($(BUILD_TYPE),Debug)
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#ifndef _DEBUG                                                                  ", $@)
+	@$(call pf_fappend,"#define _DEBUG                                                                  ", $@)
+	@$(call pf_fappend,"#endif                                                                          ", $@)
+else
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#ifndef NDEBUG                                                                  ", $@)
+	@$(call pf_fappend,"#define NDEBUG                                                                  ", $@)
+	@$(call pf_fappend,"#endif                                                                          ", $@)
+endif
+	@$(call pf_fappend,"                                                                                ", $@)
+	@$(call pf_fappend,"#endif /* DEFINES_H */                                                          ", $@)
+	@$(call pf_fappend,"/*----------------------------E-N-D---O-F---F-I-L-E---------------------------*/", $@)
+
+#================================================================================
+# Internal functions
+#================================================================================
+
+# Convert a list of src files with absolute paths under BASE_DIR to corresponding
+# object files under intermediate directory 
+# src_to_obj(list_of_cppfiles)
+src_to_obj = $(patsubst $(BASE_DIR)%, $(INTERMEDIATE_DIR)%, $(patsubst %.c, %.o, $(patsubst %.cpp, %.o, $(1))))
+
+# No default rules, please
+.SUFFIX: 
+
+# Rule for compiling cpp files in source tree, ouptut in mirrored intermediate dir
+$(INTERMEDIATE_DIR)/%.o : $(BASE_DIR)/%.cpp  $(INCLUDE_DEFINES)
+	$(MKPATH) $(@D)
+	$(CXX) -c $(CXXFLAGS) $(INCLUDES) -I$(<D) -o $@ $<
+
+$(INTERMEDIATE_DIR)/%.o : $(BASE_DIR)/%.c $(INCLUDE_DEFINES)
+	$(MKPATH) $(@D)
+	$(CXX) -c $(CFLAGS) $(INCLUDES) -I$(<D) -o $@ $<
+
+#--------------------------------------------------------------------------------
+# Container Provider
+#
+# Build the Provider Library 
+
+STATIC_PROVIDERLIB_SRCFILES = \
+	$(PROVIDER_DIR)/Container_ContainerInventory_Class_Provider.cpp \
+	$(PROVIDER_DIR)/Container_ContainerStatistics_Class_Provider.cpp \
+	$(PROVIDER_DIR)/Container_DaemonEvent_Class_Provider.cpp \
+	$(PROVIDER_DIR)/Container_ImageInventory_Class_Provider.cpp \
+	$(PROVIDER_DIR)/Container_ContainerLog_Class_Provider.cpp \
+	$(PROVIDER_DIR)/Container_HostInventory_Class_Provider.cpp \
+	$(PROVIDER_DIR)/Container_Process_Class_Provider.cpp
+
+STATIC_PROVIDERLIB_SRCFILES += \
+	$(PROVIDER_DIR)/module.cpp \
+	$(PROVIDER_DIR)/schema.c \
+	$(PROVIDER_DIR)/stubs.cpp
+
+STATIC_PROVIDERLIB_SRCFILES += \
+	$(SOURCE_DIR)/cjson/cJSON.c \
+	$(SOURCE_DIR)/dockerapi/DockerRemoteApi.cpp
+
+PROVIDER_HEADERS = \
+	$(SOURCE_DIR)/cjson/cJSON.h \
+	$(SOURCE_DIR)/dockerapi/DockerRemoteApi.h \
+	$(SOURCE_DIR)/dockerapi/DockerRestHelper.h
+
+STATIC_PROVIDERLIB_OBJFILES = $(call src_to_obj,$(STATIC_PROVIDERLIB_SRCFILES))
+
+$(PROVIDER_LIBRARY): INCLUDES += $(PROVIDER_INCLUDE_FLAGS)
+$(PROVIDER_LIBRARY): CFLAGS += $(PROVIDER_COMPILE_FLAGS)
+$(PROVIDER_LIBRARY): CXXFLAGS += $(PROVIDER_COMPILE_FLAGS)
+$(PROVIDER_LIBRARY): $(STATIC_PROVIDERLIB_OBJFILES) $(STATIC_PROVIDERLIB_LOGPOLICY) $(INCLUDE_DEFINES) $(PROVIDER_HEADERS)
+	$(MKPATH) $(INTERMEDIATE_DIR)
+	g++ $(PROVIDER_COMPILE_FLAGS) $(SHARED_FLAGS) $(PROVIDER_INCLUDE_FLAGS) -o $@ $(STATIC_PROVIDERLIB_OBJFILES) $(LINK_LIBRARIES)
+
+#--------------------------------------------------------------------------------
+# Container Provider Unit Tests
+#
+# Build and run the Provider Unit Tests
+
+# For now, providertestutils.cpp lives in PAL, but is built in OM.  This creates
+# some build issues.  To resolve, create a link to the real file.
+
+$(PROVIDER_TEST_DIR)/providertestutils.cpp: $(PAL_TESTUTILS_DIR)/providertestutils.cpp
+# We need to recreate the symlink if the link is older than what it points to.
+	$(RM) $(PROVIDER_TEST_DIR)/providers/providertestutils.cpp
+	ln -s $(PAL_TESTUTILS_DIR)/providertestutils.cpp $(PROVIDER_TEST_DIR)/providertestutils.cpp
+
+STATIC_PROVIDER_UNITFILES = \
+	$(PROVIDER_TEST_DIR)/Container_ContainerInventory_Class_Provider_UnitTest.cpp \
+	$(PROVIDER_TEST_DIR)/Container_ContainerStatistics_Class_Provider_UnitTest.cpp \
+	$(PROVIDER_TEST_DIR)/Container_DaemonEvent_Class_Provider_UnitTest.cpp \
+	$(PROVIDER_TEST_DIR)/Container_ImageInventory_Class_Provider_UnitTest.cpp \
+	$(PROVIDER_TEST_DIR)/Container_HostInventory_Class_Provider_UnitTest.cpp \
+	$(PROVIDER_TEST_DIR)/Container_Process_Class_Provider_UnitTest.cpp \
+	$(PROVIDER_TEST_DIR)/support/testrunnerlogpolicy.cpp \
+	$(PROVIDER_TEST_DIR)/support/productdependencies.cpp \
+	$(PROVIDER_TEST_DIR)/providertestutils.cpp
+
+STATIC_PROVIDER_PAL_UNITFILES = \
+	$(PAL_TESTUTILS_DIR)/scxassert_cppunit.cpp \
+	$(PAL_TESTUTILS_DIR)/testrunner.cpp
+
+STATIC_PROVIDER_TEST_OBJFILES = $(call src_to_obj,$(STATIC_PROVIDER_UNITFILES))
+
+$(INTERMEDIATE_DIR)/testrunner: INCLUDES += $(PROVIDER_TEST_INCLUDE_FLAGS) -I$(PAL_INCLUDE_DIR) -I$(PAL_TESTUTILS_DIR) -I$(PROVIDER_DIR) -I$(SOURCE_DIR)
+$(INTERMEDIATE_DIR)/testrunner: CFLAGS += $(PROVIDER_COMPILE_FLAGS)
+$(INTERMEDIATE_DIR)/testrunner: CXXFLAGS += $(PROVIDER_COMPILE_FLAGS)
+$(INTERMEDIATE_DIR)/testrunner : $(STATIC_PROVIDER_TEST_OBJFILES) $(STATIC_PROVIDERLIB_OBJFILES) $(INCLUDE_DEFINES) $(PROVIDER_HEADERS)
+	$(MKPATH) $(INTERMEDIATE_DIR)
+	g++ $(PROVIDER_COMPILE_FLAGS) $(PROVIDER_TEST_INCLUDE_FLAGS) -o $@ $(STATIC_PROVIDER_PAL_UNITFILES) $(STATIC_PROVIDER_TEST_OBJFILES) $(STATIC_PROVIDERLIB_OBJFILES) $(LINK_LIBRARIES) $(PROVIDER_TEST_LINK_LIBRARIES)
+
+testrun : test
+
+TEST_STATUS:
+	@echo "========================= Performing Building provider tests"
+	@echo \#define TEST_SCRIPT_PATH \"$(TEST_DIR)/scripts/\" > $(TEST_DIR)/providers/TestScriptPath.h
+
+test : TEST_STATUS $(SCXPAL_INTERMEDIATE_DIR) $(INTERMEDIATE_DIR)/testrunner
+	@echo "========================= Performing container testrun execution"
+	$(MKPATH) $(INTERMEDIATE_TESTFILES)
+	$(COPY) $(TEST_DIR)/scripts/createEnv.sh $(TEST_DIR)/scripts/testrun_wrapper $(INTERMEDIATE_TESTFILES)
+	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(OMI_ROOT)/output/lib; cd $(INTERMEDIATE_TESTFILES); ./createEnv.sh
+	cd $(INTERMEDIATE_TESTFILES); ./testrun_wrapper $(INTERMEDIATE_DIR)
+
+#--------------------------------------------------------------------------------
+# Build the distribution kit
+#
+# Build the packages via installbuilder
+#
+# While the "formal build" only builds ULINUX, we may build something else for DEV purposes.
+# Assume we ALWAYS build DPKG, but only build RPM if --enable-ulinux is speified in configure.
+
+kit : CONTAINERLIB_FILENAME = libcontainer.so
+kit : $(OMI_ROOT)/output $(PROVIDER_LIBRARY) fluentbitplugin
+
+ifeq ($(ULINUX),1)
+
+	@echo "========================= Performing Building RPM and DPKG packages"
+	$(MKPATH) $(INSTALLER_TMPDIR)
+	sudo $(RMDIR) $(STAGING_DIR)
+	$(MKPATH) $(INTERMEDIATE_DIR)
+	python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \
+		--BASE_DIR=$(BASE_DIR) \
+		--TARGET_DIR=$(INTERMEDIATE_DIR) \
+		--INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \
+		--STAGING_DIR=$(STAGING_DIR) \
+		--BUILD_TYPE=$(BUILD_TYPE) \
+		--BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \
+		--PFARCH=$(PF_ARCH) \
+		--PFDISTRO=$(PF_DISTRO) \
+		--PFMAJOR=$(PF_MAJOR) \
+		--PFMINOR=$(PF_MINOR) \
+		--VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \
+		--RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \
+		--CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \
+		--OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \
+		--DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \
+		base_container.data linux.data linux_rpm.data
+
+	sudo $(RMDIR) $(STAGING_DIR)
+	$(MKPATH) $(INTERMEDIATE_DIR)
+	python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \
+		--BASE_DIR=$(BASE_DIR) \
+		--TARGET_DIR=$(INTERMEDIATE_DIR) \
+		--INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \
+		--STAGING_DIR=$(STAGING_DIR) \
+		--BUILD_TYPE=$(BUILD_TYPE) \
+		--BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \
+		--PFARCH=$(PF_ARCH) \
+		--PFDISTRO=$(PF_DISTRO) \
+		--PFMAJOR=$(PF_MAJOR) \
+		--PFMINOR=$(PF_MINOR) \
+		--VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \
+		--RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \
+		--CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \
+		$(DPKG_LOCATION) \
+		--OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \
+		--DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \
+		base_container.data linux.data linux_dpkg.data
+
+	# Strip the package extension from the package filename
+	sed -re 's/.rpm$$|.deb$$//' $(INTERMEDIATE_DIR)/package_filename > $(INTERMEDIATE_DIR)/package_file.tmp; mv $(INTERMEDIATE_DIR)/package_file.tmp $(INTERMEDIATE_DIR)/package_filename
+
+	# Build the tar file containing both .rpm and .deb packages
+	cd $(INTERMEDIATE_DIR); tar cvf $(OUTPUT_PACKAGE_PREFIX).tar $(OUTPUT_PACKAGE_PREFIX).rpm $(OUTPUT_PACKAGE_PREFIX).deb
+
+	../installer/bundle/create_bundle.sh $(PF)_$(PF_DISTRO) $(INTERMEDIATE_DIR) $(OUTPUT_PACKAGE_PREFIX)
+	# Copy the shell bundle to the target directory
+	$(MKPATH) $(TARGET_DIR)
+	cd $(INTERMEDIATE_DIR); $(COPY) `cat $(INTERMEDIATE_DIR)/package_filename`.sh $(TARGET_DIR)
+
+else
+
+	@echo "========================= Performing Building RPM and DPKG packages"
+	sudo $(RMDIR) $(STAGING_DIR)
+	$(MKPATH) $(INTERMEDIATE_DIR)
+	python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \
+		--BASE_DIR=$(BASE_DIR) \
+		--TARGET_DIR=$(INTERMEDIATE_DIR) \
+		--INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \
+		--STAGING_DIR=$(STAGING_DIR) \
+		--BUILD_TYPE=$(BUILD_TYPE) \
+		--BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \
+		--PFARCH=$(PF_ARCH) \
+		--PFDISTRO=$(PF_DISTRO) \
+		--PFMAJOR=$(PF_MAJOR) \
+		--PFMINOR=$(PF_MINOR) \
+		--VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \
+		--RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \
+		--CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \
+		$(DPKG_LOCATION) \
+		--OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \
+		--DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \
+		base_container.data linux.data linux_dpkg.data
+
+endif
diff --git a/installer/conf/out_oms.conf b/installer/conf/out_oms.conf
new file mode 100644
index 000000000..d4b797757
--- /dev/null
+++ b/installer/conf/out_oms.conf
@@ -0,0 +1,6 @@
+omsadmin_conf_path=/etc/opt/microsoft/omsagent/conf/omsadmin.conf
+cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt
+key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key
+container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname
+container_inventory_refresh_interval=60
+kube_system_containers_refresh_interval=300
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
new file mode 100644
index 000000000..cf490c077
--- /dev/null
+++ b/installer/conf/td-agent-bit.conf
@@ -0,0 +1,35 @@
+[SERVICE]
+    Flush         5
+    Log_Level     info
+    Parsers_File  /etc/td-agent-bit/parsers.conf
+    Log_File      /var/log/fluent-bit.log
+
+[INPUT]
+    Name tail
+    Tag oms.container.log.*
+    Path /var/log/containers/*.log
+    DB /var/log/fblogs.db
+    Parser docker
+    Mem_Buf_Limit 30m
+    Path_Key filepath
+
+[FILTER]
+    Name record_modifier
+    Match oms.container.log.*
+    Whitelist_key log
+    Whitelist_key stream
+    Whitelist_key time
+    Whitelist_key filepath
+
+[FILTER]
+    Name modify
+    Match oms.container.log.*
+    Rename log LogEntry
+    Rename stream LogEntrySource
+    Rename time LogEntryTimeStamp
+    Rename filepath Filepath
+    Add_if_not_present SourceSystem Containers
+
+[OUTPUT]
+    Name            oms
+    Match           oms.container.log.*
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index ec0728c01..85a128b2a 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -37,7 +37,9 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/in_kube_services.rb;			        source/code/plugin/in_kube_services.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
 
-
+/opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
+/etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
@@ -76,6 +78,9 @@ MAINTAINER:              'Microsoft Corporation'
 /var/opt/microsoft/docker-cimprov/state/ImageInventory; 755; root; root
 /var/opt/microsoft/docker-cimprov/log;                  755; root; root
 
+/opt/td-agent-bit;                          755; root; root;sysdir
+/opt/td-agent-bit/bin;                      755; root; root;sysdir
+
 %Dependencies
 
 %Postinstall_10
diff --git a/source/code/go/src/plugins/Makefile b/source/code/go/src/plugins/Makefile
new file mode 100644
index 000000000..dfdc65d81
--- /dev/null
+++ b/source/code/go/src/plugins/Makefile
@@ -0,0 +1,20 @@
+GITVERSION := 0.1
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+	BUILDDATE := $(shell date --rfc-3339=seconds)
+endif
+ifeq ($(UNAME_S),Darwin)
+	BUILDDATE := $(shell gdate --rfc-3339=seconds)
+endif
+
+fbplugin:
+	go build -ldflags "-X 'main.revision=$(GITVERSION)' -X 'main.builddate=$(BUILDDATE)'" -buildmode=c-shared -o out_oms.so .
+
+test:
+	go test -cover -race -coverprofile=coverage.txt -covermode=atomic
+
+glide:
+	glide install
+
+clean:
+	rm -rf *.so *.h *~
diff --git a/source/code/go/src/plugins/glide.lock b/source/code/go/src/plugins/glide.lock
new file mode 100644
index 000000000..79745820b
--- /dev/null
+++ b/source/code/go/src/plugins/glide.lock
@@ -0,0 +1,209 @@
+hash: a4b073d827b5cbb4a772dada9ff3bcf55c55afc3cda83ddec1e6edcdca8e219a
+updated: 2018-09-06T04:07:01.808678175Z
+imports:
+- name: github.com/fluent/fluent-bit-go
+  version: c4a158a6e3a793166c6ecfa2d5c80d71eada8959
+  subpackages:
+  - output
+- name: github.com/ghodss/yaml
+  version: 73d445a93680fa1a78ae23a5839bad48f32ba1ee
+- name: github.com/gogo/protobuf
+  version: c0656edd0d9eab7c66d1eb0c568f9039345796f7
+  subpackages:
+  - proto
+  - sortkeys
+- name: github.com/golang/glog
+  version: 44145f04b68cf362d9c4df2182967c2275eaefed
+- name: github.com/golang/protobuf
+  version: b4deda0973fb4c70b50d226b1af49f3da59f5265
+  subpackages:
+  - proto
+  - ptypes
+  - ptypes/any
+  - ptypes/duration
+  - ptypes/timestamp
+- name: github.com/google/btree
+  version: 7d79101e329e5a3adf994758c578dab82b90c017
+- name: github.com/google/gofuzz
+  version: 44d81051d367757e1c7c6a5a86423ece9afcf63c
+- name: github.com/googleapis/gnostic
+  version: 0c5108395e2debce0d731cf0287ddf7242066aba
+  subpackages:
+  - OpenAPIv2
+  - compiler
+  - extensions
+- name: github.com/gregjones/httpcache
+  version: 787624de3eb7bd915c329cba748687a3b22666a6
+  subpackages:
+  - diskcache
+- name: github.com/json-iterator/go
+  version: f2b4162afba35581b6d4a50d3b8f34e33c144682
+- name: github.com/mitchellh/mapstructure
+  version: fa473d140ef3c6adf42d6b391fe76707f1f243c8
+- name: github.com/modern-go/concurrent
+  version: bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94
+- name: github.com/modern-go/reflect2
+  version: 05fbef0ca5da472bbf96c9322b84a53edc03c9fd
+- name: github.com/peterbourgon/diskv
+  version: 5f041e8faa004a95c88a202771f4cc3e991971e6
+- name: github.com/ugorji/go
+  version: 00b869d2f4a5e27445c2d916fa106fc72c106d4c
+  subpackages:
+  - codec
+- name: golang.org/x/crypto
+  version: 49796115aa4b964c318aad4f3084fdb41e9aa067
+  subpackages:
+  - ssh/terminal
+- name: golang.org/x/net
+  version: 1c05540f6879653db88113bc4a2b70aec4bd491f
+  subpackages:
+  - context
+  - html
+  - html/atom
+  - http2
+  - http2/hpack
+  - idna
+  - lex/httplex
+  - websocket
+- name: golang.org/x/sys
+  version: 95c6576299259db960f6c5b9b69ea52422860fce
+  subpackages:
+  - unix
+  - windows
+- name: golang.org/x/text
+  version: b19bf474d317b857955b12035d2c5acb57ce8b01
+  subpackages:
+  - secure/bidirule
+  - transform
+  - unicode/bidi
+  - unicode/norm
+- name: golang.org/x/time
+  version: f51c12702a4d776e4c1fa9b0fabab841babae631
+  subpackages:
+  - rate
+- name: gopkg.in/inf.v0
+  version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4
+- name: gopkg.in/yaml.v2
+  version: 670d4cfef0544295bc27a114dbac37980d83185a
+- name: k8s.io/api
+  version: 072894a440bdee3a891dea811fe42902311cd2a3
+  subpackages:
+  - admissionregistration/v1alpha1
+  - admissionregistration/v1beta1
+  - apps/v1
+  - apps/v1beta1
+  - apps/v1beta2
+  - authentication/v1
+  - authentication/v1beta1
+  - authorization/v1
+  - authorization/v1beta1
+  - autoscaling/v1
+  - autoscaling/v2beta1
+  - batch/v1
+  - batch/v1beta1
+  - batch/v2alpha1
+  - certificates/v1beta1
+  - core/v1
+  - events/v1beta1
+  - extensions/v1beta1
+  - imagepolicy/v1alpha1
+  - networking/v1
+  - policy/v1beta1
+  - rbac/v1
+  - rbac/v1alpha1
+  - rbac/v1beta1
+  - scheduling/v1alpha1
+  - scheduling/v1beta1
+  - settings/v1alpha1
+  - storage/v1
+  - storage/v1alpha1
+  - storage/v1beta1
+- name: k8s.io/apimachinery
+  version: 103fd098999dc9c0c88536f5c9ad2e5da39373ae
+  subpackages:
+  - pkg/api/errors
+  - pkg/api/meta
+  - pkg/api/resource
+  - pkg/apis/meta/v1
+  - pkg/apis/meta/v1/unstructured
+  - pkg/apis/meta/v1beta1
+  - pkg/conversion
+  - pkg/conversion/queryparams
+  - pkg/fields
+  - pkg/labels
+  - pkg/runtime
+  - pkg/runtime/schema
+  - pkg/runtime/serializer
+  - pkg/runtime/serializer/json
+  - pkg/runtime/serializer/protobuf
+  - pkg/runtime/serializer/recognizer
+  - pkg/runtime/serializer/streaming
+  - pkg/runtime/serializer/versioning
+  - pkg/selection
+  - pkg/types
+  - pkg/util/clock
+  - pkg/util/errors
+  - pkg/util/framer
+  - pkg/util/intstr
+  - pkg/util/json
+  - pkg/util/net
+  - pkg/util/runtime
+  - pkg/util/sets
+  - pkg/util/validation
+  - pkg/util/validation/field
+  - pkg/util/wait
+  - pkg/util/yaml
+  - pkg/version
+  - pkg/watch
+  - third_party/forked/golang/reflect
+- name: k8s.io/client-go
+  version: 7d04d0e2a0a1a4d4a1cd6baa432a2301492e4e65
+  subpackages:
+  - discovery
+  - kubernetes
+  - kubernetes/scheme
+  - kubernetes/typed/admissionregistration/v1alpha1
+  - kubernetes/typed/admissionregistration/v1beta1
+  - kubernetes/typed/apps/v1
+  - kubernetes/typed/apps/v1beta1
+  - kubernetes/typed/apps/v1beta2
+  - kubernetes/typed/authentication/v1
+  - kubernetes/typed/authentication/v1beta1
+  - kubernetes/typed/authorization/v1
+  - kubernetes/typed/authorization/v1beta1
+  - kubernetes/typed/autoscaling/v1
+  - kubernetes/typed/autoscaling/v2beta1
+  - kubernetes/typed/batch/v1
+  - kubernetes/typed/batch/v1beta1
+  - kubernetes/typed/batch/v2alpha1
+  - kubernetes/typed/certificates/v1beta1
+  - kubernetes/typed/core/v1
+  - kubernetes/typed/events/v1beta1
+  - kubernetes/typed/extensions/v1beta1
+  - kubernetes/typed/networking/v1
+  - kubernetes/typed/policy/v1beta1
+  - kubernetes/typed/rbac/v1
+  - kubernetes/typed/rbac/v1alpha1
+  - kubernetes/typed/rbac/v1beta1
+  - kubernetes/typed/scheduling/v1alpha1
+  - kubernetes/typed/scheduling/v1beta1
+  - kubernetes/typed/settings/v1alpha1
+  - kubernetes/typed/storage/v1
+  - kubernetes/typed/storage/v1alpha1
+  - kubernetes/typed/storage/v1beta1
+  - pkg/apis/clientauthentication
+  - pkg/apis/clientauthentication/v1alpha1
+  - pkg/apis/clientauthentication/v1beta1
+  - pkg/version
+  - plugin/pkg/client/auth/exec
+  - rest
+  - rest/watch
+  - tools/clientcmd/api
+  - tools/metrics
+  - tools/reference
+  - transport
+  - util/cert
+  - util/connrotation
+  - util/flowcontrol
+  - util/integer
+testImports: []
diff --git a/source/code/go/src/plugins/glide.yaml b/source/code/go/src/plugins/glide.yaml
new file mode 100644
index 000000000..b986ece21
--- /dev/null
+++ b/source/code/go/src/plugins/glide.yaml
@@ -0,0 +1,15 @@
+package: plugins
+import:
+- package: github.com/fluent/fluent-bit-go
+  subpackages:
+  - output
+- package: github.com/mitchellh/mapstructure
+  version: ^1.0.0
+- package: k8s.io/apimachinery
+  subpackages:
+  - pkg/apis/meta/v1
+- package: k8s.io/client-go
+  version: ^8.0.0
+  subpackages:
+  - kubernetes
+  - rest
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
new file mode 100644
index 000000000..49472c74b
--- /dev/null
+++ b/source/code/go/src/plugins/oms.go
@@ -0,0 +1,359 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/fluent/fluent-bit-go/output"
+	"github.com/mitchellh/mapstructure"
+	lumberjack "gopkg.in/natefinch/lumberjack.v2"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+// DataType for Container Log
+const DataType = "CONTAINER_LOG_BLOB"
+
+// IPName for Container Log
+const IPName = "Containers"
+const containerInventoryPath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory"
+const defaultContainerInventoryRefreshInterval = 60
+const defaultKubeSystemContainersRefreshInterval = 300
+
+var (
+	// PluginConfiguration the plugins configuration
+	PluginConfiguration map[string]string
+	// HTTPClient for making POST requests to OMSEndpoint
+	HTTPClient http.Client
+	// OMSEndpoint ingestion endpoint
+	OMSEndpoint string
+	// Computer (Hostname) when ingesting into ContainerLog table
+	Computer string
+)
+
+var (
+	// ImageIDMap caches the container id to image mapping
+	ImageIDMap map[string]string
+	// NameIDMap caches the container it to Name mapping
+	NameIDMap map[string]string
+	// IgnoreIDSet set of  container Ids of kube-system pods
+	IgnoreIDSet map[string]bool
+
+	// DataUpdateMutex read and write mutex access to the container id set
+	DataUpdateMutex = &sync.Mutex{}
+)
+
+var (
+	// FLBLogger stream
+	FLBLogger = createLogger()
+
+	// Log wrapper function
+	Log = FLBLogger.Printf
+)
+
+// ContainerInventory represents the container info
+type ContainerInventory struct {
+	ElementName       string `json:"ElementName"`
+	CreatedTime       string `json:"CreatedTime"`
+	State             string `json:"State"`
+	ExitCode          int    `json:"ExitCode"`
+	StartedTime       string `json:"StartedTime"`
+	FinishedTime      string `json:"FinishedTime"`
+	ImageID           string `json:"ImageId"`
+	Image             string `json:"Image"`
+	Repository        string `json:"Repository"`
+	ImageTag          string `json:"ImageTag"`
+	ComposeGroup      string `json:"ComposeGroup"`
+	ContainerHostname string `json:"ContainerHostname"`
+	Computer          string `json:"Computer"`
+	Command           string `json:"Command"`
+	EnvironmentVar    string `json:"EnvironmentVar"`
+	Ports             string `json:"Ports"`
+	Links             string `json:"Links"`
+}
+
+// DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
+type DataItem struct {
+	LogEntry          string `json:"LogEntry"`
+	LogEntrySource    string `json:"LogEntrySource"`
+	LogEntryTimeStamp string `json:"LogEntryTimeStamp"`
+	ID                string `json:"Id"`
+	Image             string `json:"Image"`
+	Name              string `json:"Name"`
+	SourceSystem      string `json:"SourceSystem"`
+	Computer          string `json:"Computer"`
+	Filepath          string `json:"Filepath"`
+}
+
+// ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
+type ContainerLogBlob struct {
+	DataType  string     `json:"DataType"`
+	IPName    string     `json:"IPName"`
+	DataItems []DataItem `json:"DataItems"`
+}
+
+func populateMaps() {
+
+	Log("Updating ImageIDMap and NameIDMap")
+
+	_imageIDMap := make(map[string]string)
+	_nameIDMap := make(map[string]string)
+	files, err := ioutil.ReadDir(containerInventoryPath)
+
+	if err != nil {
+		Log("error when reading container inventory %s\n", err.Error())
+	}
+
+	for _, file := range files {
+		fullPath := fmt.Sprintf("%s/%s", containerInventoryPath, file.Name())
+		fileContent, err := ioutil.ReadFile(fullPath)
+		if err != nil {
+			Log("Error reading file content %s", fullPath)
+			Log(err.Error())
+		}
+		var containerInventory ContainerInventory
+		unmarshallErr := json.Unmarshal(fileContent, &containerInventory)
+
+		if unmarshallErr != nil {
+			Log("Unmarshall error when reading file %s %s \n", fullPath, unmarshallErr.Error())
+		}
+
+		_imageIDMap[file.Name()] = containerInventory.Image
+		_nameIDMap[file.Name()] = containerInventory.ElementName
+	}
+	Log("Locking to update image and name maps")
+	DataUpdateMutex.Lock()
+	ImageIDMap = _imageIDMap
+	NameIDMap = _nameIDMap
+	DataUpdateMutex.Unlock()
+	Log("Unlocking after updating image and name maps")
+}
+
+func createLogger() *log.Logger {
+
+	var logfile *os.File
+	path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log"
+	if _, err := os.Stat(path); err == nil {
+		fmt.Printf("File Exists. Opening file in append mode...\n")
+		logfile, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0600)
+		if err != nil {
+			fmt.Printf(err.Error())
+		}
+	}
+
+	if _, err := os.Stat(path); os.IsNotExist(err) {
+		fmt.Printf("File Doesnt Exist. Creating file...\n")
+		logfile, err = os.Create(path)
+		if err != nil {
+			fmt.Printf(err.Error())
+		}
+	}
+
+	logger := log.New(logfile, "", 0)
+
+	logger.SetOutput(&lumberjack.Logger{
+		Filename:   path,
+		MaxSize:    10, //megabytes
+		MaxBackups: 3,
+		MaxAge:     28,   //days
+		Compress:   true, // false by default
+	})
+
+	logger.SetFlags(log.Ltime | log.Lshortfile | log.LstdFlags)
+	return logger
+}
+
+func updateContainersData() {
+
+	containerInventoryRefreshInterval, err := strconv.Atoi(PluginConfiguration["container_inventory_refresh_interval"])
+	if err != nil {
+		Log("Error Reading Container Inventory Refresh Interval %s", err.Error())
+		containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval
+	}
+	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
+	go initMaps(containerInventoryRefreshInterval)
+
+	kubeSystemContainersRefreshInterval, err := strconv.Atoi(PluginConfiguration["kube_system_containers_refresh_interval"])
+	if err != nil {
+		Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error())
+		kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval
+	}
+	Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval)
+
+	go updateIgnoreContainerIds(kubeSystemContainersRefreshInterval)
+}
+
+func initMaps(refreshInterval int) {
+	ImageIDMap = make(map[string]string)
+	NameIDMap = make(map[string]string)
+
+	populateMaps()
+
+	for range time.Tick(time.Second * time.Duration(refreshInterval)) {
+		populateMaps()
+	}
+}
+
+func updateIgnoreContainerIds(refreshInterval int) {
+	IgnoreIDSet = make(map[string]bool)
+
+	updateKubeSystemContainerIDs()
+
+	for range time.Tick(time.Second * time.Duration(refreshInterval)) {
+		updateKubeSystemContainerIDs()
+	}
+}
+
+func updateKubeSystemContainerIDs() {
+
+	if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 {
+		Log("Kube System Log Collection is ENABLED.")
+		return
+	}
+
+	Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records")
+	config, err := rest.InClusterConfig()
+	if err != nil {
+		Log("Error getting config %s\n", err.Error())
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		Log("Error getting clientset %s", err.Error())
+	}
+
+	pods, err := clientset.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
+	if err != nil {
+		Log("Error getting pods %s\n", err.Error())
+	}
+
+	_ignoreIDSet := make(map[string]bool)
+	for _, pod := range pods.Items {
+		for _, status := range pod.Status.ContainerStatuses {
+			lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
+			_ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true
+		}
+	}
+
+	Log("Locking to update kube-system container IDs")
+	DataUpdateMutex.Lock()
+	IgnoreIDSet = _ignoreIDSet
+	DataUpdateMutex.Unlock()
+	Log("Unlocking after updating kube-system container IDs")
+}
+
+// PostDataHelper sends data to the OMS endpoint
+func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
+
+	start := time.Now()
+	var dataItems []DataItem
+	DataUpdateMutex.Lock()
+
+	for _, record := range tailPluginRecords {
+
+		containerID := getContainerIDFromFilePath(toString(record["Filepath"]))
+
+		if containsKey(IgnoreIDSet, containerID) {
+			continue
+		}
+
+		var dataItem DataItem
+		stringMap := make(map[string]string)
+
+		// convert map[interface{}]interface{} to  map[string]string
+		for key, value := range record {
+			strKey := fmt.Sprintf("%v", key)
+			strValue := toString(value)
+			stringMap[strKey] = strValue
+		}
+
+		stringMap["Id"] = containerID
+		stringMap["Image"] = ImageIDMap[containerID]
+		stringMap["Name"] = NameIDMap[containerID]
+		stringMap["Computer"] = Computer
+		mapstructure.Decode(stringMap, &dataItem)
+		dataItems = append(dataItems, dataItem)
+	}
+	DataUpdateMutex.Unlock()
+
+	if len(dataItems) > 0 {
+		logEntry := ContainerLogBlob{
+			DataType:  DataType,
+			IPName:    IPName,
+			DataItems: dataItems}
+
+		marshalled, err := json.Marshal(logEntry)
+		req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled))
+		req.Header.Set("Content-Type", "application/json")
+
+		resp, err := HTTPClient.Do(req)
+		elapsed := time.Since(start)
+
+		if err != nil {
+			Log("Error when sending request %s \n", err.Error())
+			Log("Failed to flush %d records after %s", len(dataItems), elapsed)
+			return output.FLB_RETRY
+		}
+
+		if resp == nil || resp.StatusCode != 200 {
+			if resp != nil {
+				Log("Status %s Status Code %d", resp.Status, resp.StatusCode)
+			}
+			return output.FLB_RETRY
+		}
+
+		Log("Successfully flushed %d records in %s", len(dataItems), elapsed)
+	}
+
+	return output.FLB_OK
+}
+
+func containsKey(currentMap map[string]bool, key string) bool {
+	_, c := currentMap[key]
+	return c
+}
+
+func toString(s interface{}) string {
+	value := s.([]uint8)
+	return string([]byte(value[:]))
+}
+
+func getContainerIDFromFilePath(filepath string) string {
+	start := strings.LastIndex(filepath, "-")
+	end := strings.LastIndex(filepath, ".")
+	return filepath[start+1 : end]
+}
+
+// ReadConfig reads and populates plugin configuration
+func ReadConfig(pluginConfPath string) map[string]string {
+
+	pluginConf, err := ReadConfiguration(pluginConfPath)
+	omsadminConf, err := ReadConfiguration(pluginConf["omsadmin_conf_path"])
+
+	if err != nil {
+		Log(err.Error())
+	}
+
+	containerHostName, err := ioutil.ReadFile(pluginConf["container_host_file_path"])
+	if err != nil {
+		Log("Error when reading containerHostName file %s", err.Error())
+	}
+
+	Computer = strings.TrimSuffix(toString(containerHostName), "\n")
+	Log("Computer == %s \n", Computer)
+
+	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
+	Log("OMSEndpoint %s", OMSEndpoint)
+
+	return pluginConf
+}
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
new file mode 100644
index 000000000..dad0ede81
--- /dev/null
+++ b/source/code/go/src/plugins/out_oms.go
@@ -0,0 +1,57 @@
+package main
+
+import (
+	"github.com/fluent/fluent-bit-go/output"
+)
+import (
+	"C"
+	"unsafe"
+)
+
+//export FLBPluginRegister
+func FLBPluginRegister(ctx unsafe.Pointer) int {
+	return output.FLBPluginRegister(ctx, "oms", "Stdout GO!")
+}
+
+//export FLBPluginInit
+// (fluentbit will call this)
+// ctx (context) pointer to fluentbit context (state/ c code)
+func FLBPluginInit(ctx unsafe.Pointer) int {
+	Log("Initializing out_oms go plugin for fluentbit")
+	PluginConfiguration = ReadConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf")
+	CreateHTTPClient()
+	updateContainersData()
+	return output.FLB_OK
+}
+
+//export FLBPluginFlush
+func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
+	var count int
+	var ret int
+	var record map[interface{}]interface{}
+	var records []map[interface{}]interface{}
+
+	// Create Fluent Bit decoder
+	dec := output.NewDecoder(data, int(length))
+
+	// Iterate Records
+	count = 0
+	for {
+		// Extract Record
+		ret, _, record = output.GetRecord(dec)
+		if ret != 0 {
+			break
+		}
+		records = append(records, record)
+		count++
+	}
+	return PostDataHelper(records)
+}
+
+// FLBPluginExit exits the plugin
+func FLBPluginExit() int {
+	return output.FLB_OK
+}
+
+func main() {
+}
diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go
new file mode 100644
index 000000000..0e33f43f9
--- /dev/null
+++ b/source/code/go/src/plugins/utils.go
@@ -0,0 +1,67 @@
+package main
+
+import (
+	"bufio"
+	"crypto/tls"
+	"log"
+	"net/http"
+	"os"
+	"strings"
+)
+
+// ReadConfiguration reads a property file
+func ReadConfiguration(filename string) (map[string]string, error) {
+	config := map[string]string{}
+
+	if len(filename) == 0 {
+		return config, nil
+	}
+
+	file, err := os.Open(filename)
+	if err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		currentLine := scanner.Text()
+		if equalIndex := strings.Index(currentLine, "="); equalIndex >= 0 {
+			if key := strings.TrimSpace(currentLine[:equalIndex]); len(key) > 0 {
+				value := ""
+				if len(currentLine) > equalIndex {
+					value = strings.TrimSpace(currentLine[equalIndex+1:])
+				}
+				config[key] = value
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+
+	return config, nil
+}
+
+// CreateHTTPClient used to create the client for sending post requests to OMSEndpoint
+func CreateHTTPClient() {
+
+	cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"])
+	if err != nil {
+		Log("Error when loading cert %s", err.Error())
+	}
+
+	tlsConfig := &tls.Config{
+		Certificates: []tls.Certificate{cert},
+	}
+
+	tlsConfig.BuildNameToCertificate()
+	transport := &http.Transport{TLSClientConfig: tlsConfig}
+
+	HTTPClient = http.Client{Transport: transport}
+
+	Log("Successfully created HTTP Client")
+}

From b02f2ec57e47c68648596ef7487bf320fa5e9331 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 14 Sep 2018 11:24:12 -0700
Subject: [PATCH 007/160] Dilipr/glide updates (#127)

* Updating glide.* files to include lumberjack
---
 source/code/go/src/plugins/glide.lock | 6 ++++--
 source/code/go/src/plugins/glide.yaml | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/source/code/go/src/plugins/glide.lock b/source/code/go/src/plugins/glide.lock
index 79745820b..4597b594a 100644
--- a/source/code/go/src/plugins/glide.lock
+++ b/source/code/go/src/plugins/glide.lock
@@ -1,5 +1,5 @@
-hash: a4b073d827b5cbb4a772dada9ff3bcf55c55afc3cda83ddec1e6edcdca8e219a
-updated: 2018-09-06T04:07:01.808678175Z
+hash: bb32415f402ab29751f29b8e394bc974cbc31861453d817aaeb94ef83dacc488
+updated: 2018-09-14T18:14:28.748047598Z
 imports:
 - name: github.com/fluent/fluent-bit-go
   version: c4a158a6e3a793166c6ecfa2d5c80d71eada8959
@@ -83,6 +83,8 @@ imports:
   - rate
 - name: gopkg.in/inf.v0
   version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4
+- name: gopkg.in/natefinch/lumberjack.v2
+  version: a96e63847dc3c67d17befa69c303767e2f84e54f
 - name: gopkg.in/yaml.v2
   version: 670d4cfef0544295bc27a114dbac37980d83185a
 - name: k8s.io/api
diff --git a/source/code/go/src/plugins/glide.yaml b/source/code/go/src/plugins/glide.yaml
index b986ece21..403e1efc4 100644
--- a/source/code/go/src/plugins/glide.yaml
+++ b/source/code/go/src/plugins/glide.yaml
@@ -5,6 +5,8 @@ import:
   - output
 - package: github.com/mitchellh/mapstructure
   version: ^1.0.0
+- package: gopkg.in/natefinch/lumberjack.v2
+  version: ^2.1.0
 - package: k8s.io/apimachinery
   subpackages:
   - pkg/apis/meta/v1

From e01c67845cd5d99f77b8dafd3e579d933984c3af Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 17 Sep 2018 15:42:01 -0700
Subject: [PATCH 008/160] containerID="" for pull issues

---
 source/code/plugin/in_kube_podinventory.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index f478705f6..2cd1e1bc3 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -143,7 +143,8 @@ def parse_and_emit_records(podInventory, serviceList)
               if !container['containerID'].nil?	
                 record['ContainerID'] = container['containerID'].split("//")[1]		
               else 
-                record['ContainerID'] = "00000000-0000-0000-0000-000000000000"  
+                # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0
+                record['ContainerID'] = ""  
               end
               #keeping this as <PodUid/container_name> which is same as InstanceName in perf table		
               record['ContainerName'] = podUid + "/" +container['name']		

From b0ba22deaf43c29058d61f0dd76c2c64c34f5ac4 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 18 Sep 2018 16:59:46 -0700
Subject: [PATCH 009/160] Using KubeAPI for getting image,name. Adding more
 logs (#129)

* Using KubeAPI for getting image,name. Adding more logs

* Moving log file and state file to within the omsagent container

* Changing log and state paths
---
 installer/conf/td-agent-bit.conf      |   4 +-
 source/code/go/src/plugins/oms.go     | 105 +++++++++++++-------------
 source/code/go/src/plugins/out_oms.go |   2 +-
 3 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index cf490c077..84a9fcf94 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -2,13 +2,13 @@
     Flush         5
     Log_Level     info
     Parsers_File  /etc/td-agent-bit/parsers.conf
-    Log_File      /var/log/fluent-bit.log
+    Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
 
 [INPUT]
     Name tail
     Tag oms.container.log.*
     Path /var/log/containers/*.log
-    DB /var/log/fblogs.db
+    DB /var/opt/microsoft/docker-cimprov/state/fblogs.db
     Parser docker
     Mem_Buf_Limit 30m
     Path_Key filepath
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 49472c74b..c18135dcc 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -26,7 +26,6 @@ const DataType = "CONTAINER_LOG_BLOB"
 
 // IPName for Container Log
 const IPName = "Containers"
-const containerInventoryPath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory"
 const defaultContainerInventoryRefreshInterval = 60
 const defaultKubeSystemContainersRefreshInterval = 300
 
@@ -51,6 +50,9 @@ var (
 
 	// DataUpdateMutex read and write mutex access to the container id set
 	DataUpdateMutex = &sync.Mutex{}
+
+	// ClientSet for querying KubeAPIs
+	ClientSet *kubernetes.Clientset
 )
 
 var (
@@ -61,27 +63,6 @@ var (
 	Log = FLBLogger.Printf
 )
 
-// ContainerInventory represents the container info
-type ContainerInventory struct {
-	ElementName       string `json:"ElementName"`
-	CreatedTime       string `json:"CreatedTime"`
-	State             string `json:"State"`
-	ExitCode          int    `json:"ExitCode"`
-	StartedTime       string `json:"StartedTime"`
-	FinishedTime      string `json:"FinishedTime"`
-	ImageID           string `json:"ImageId"`
-	Image             string `json:"Image"`
-	Repository        string `json:"Repository"`
-	ImageTag          string `json:"ImageTag"`
-	ComposeGroup      string `json:"ComposeGroup"`
-	ContainerHostname string `json:"ContainerHostname"`
-	Computer          string `json:"Computer"`
-	Command           string `json:"Command"`
-	EnvironmentVar    string `json:"EnvironmentVar"`
-	Ports             string `json:"Ports"`
-	Links             string `json:"Links"`
-}
-
 // DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
 type DataItem struct {
 	LogEntry          string `json:"LogEntry"`
@@ -108,29 +89,25 @@ func populateMaps() {
 
 	_imageIDMap := make(map[string]string)
 	_nameIDMap := make(map[string]string)
-	files, err := ioutil.ReadDir(containerInventoryPath)
 
+	pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{})
 	if err != nil {
-		Log("error when reading container inventory %s\n", err.Error())
+		Log("Error getting pods %s\n", err.Error())
 	}
 
-	for _, file := range files {
-		fullPath := fmt.Sprintf("%s/%s", containerInventoryPath, file.Name())
-		fileContent, err := ioutil.ReadFile(fullPath)
-		if err != nil {
-			Log("Error reading file content %s", fullPath)
-			Log(err.Error())
-		}
-		var containerInventory ContainerInventory
-		unmarshallErr := json.Unmarshal(fileContent, &containerInventory)
-
-		if unmarshallErr != nil {
-			Log("Unmarshall error when reading file %s %s \n", fullPath, unmarshallErr.Error())
+	for _, pod := range pods.Items {
+		for _, status := range pod.Status.ContainerStatuses {
+			lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
+			containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)]
+			image := status.Image
+			name := fmt.Sprintf("%s/%s", pod.UID, status.Name)
+			if containerID != "" {
+				_imageIDMap[containerID] = image
+				_nameIDMap[containerID] = name
+			}
 		}
-
-		_imageIDMap[file.Name()] = containerInventory.Image
-		_nameIDMap[file.Name()] = containerInventory.ElementName
 	}
+
 	Log("Locking to update image and name maps")
 	DataUpdateMutex.Lock()
 	ImageIDMap = _imageIDMap
@@ -164,7 +141,7 @@ func createLogger() *log.Logger {
 	logger.SetOutput(&lumberjack.Logger{
 		Filename:   path,
 		MaxSize:    10, //megabytes
-		MaxBackups: 3,
+		MaxBackups: 1,
 		MaxAge:     28,   //days
 		Compress:   true, // false by default
 	})
@@ -222,17 +199,8 @@ func updateKubeSystemContainerIDs() {
 	}
 
 	Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records")
-	config, err := rest.InClusterConfig()
-	if err != nil {
-		Log("Error getting config %s\n", err.Error())
-	}
 
-	clientset, err := kubernetes.NewForConfig(config)
-	if err != nil {
-		Log("Error getting clientset %s", err.Error())
-	}
-
-	pods, err := clientset.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
+	pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
 	if err != nil {
 		Log("Error getting pods %s\n", err.Error())
 	}
@@ -278,8 +246,27 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		}
 
 		stringMap["Id"] = containerID
-		stringMap["Image"] = ImageIDMap[containerID]
-		stringMap["Name"] = NameIDMap[containerID]
+
+		if val, ok := ImageIDMap[containerID]; ok {
+			stringMap["Image"] = val
+		} else {
+			Log("ContainerId %s not present in Map ", containerID)
+			Log("CurrentMap Snapshot \n")
+			for k, v := range ImageIDMap {
+				Log("%s ==> %s", k, v)
+			}
+		}
+
+		if val, ok := NameIDMap[containerID]; ok {
+			stringMap["Name"] = val
+		} else {
+			Log("ContainerId %s not present in Map ", containerID)
+			Log("CurrentMap Snapshot \n")
+			for k, v := range NameIDMap {
+				Log("%s ==> %s", k, v)
+			}
+		}
+
 		stringMap["Computer"] = Computer
 		mapstructure.Decode(stringMap, &dataItem)
 		dataItems = append(dataItems, dataItem)
@@ -334,8 +321,8 @@ func getContainerIDFromFilePath(filepath string) string {
 	return filepath[start+1 : end]
 }
 
-// ReadConfig reads and populates plugin configuration
-func ReadConfig(pluginConfPath string) map[string]string {
+// InitializeConfig reads and populates plugin configuration
+func InitializeConfig(pluginConfPath string) map[string]string {
 
 	pluginConf, err := ReadConfiguration(pluginConfPath)
 	omsadminConf, err := ReadConfiguration(pluginConf["omsadmin_conf_path"])
@@ -355,5 +342,15 @@ func ReadConfig(pluginConfPath string) map[string]string {
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
 	Log("OMSEndpoint %s", OMSEndpoint)
 
+	config, err := rest.InClusterConfig()
+	if err != nil {
+		Log("Error getting config %s\n", err.Error())
+	}
+
+	ClientSet, err = kubernetes.NewForConfig(config)
+	if err != nil {
+		Log("Error getting clientset %s", err.Error())
+	}
+
 	return pluginConf
 }
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index dad0ede81..8c23f47a8 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -18,7 +18,7 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 // ctx (context) pointer to fluentbit context (state/ c code)
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
-	PluginConfiguration = ReadConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf")
+	PluginConfiguration = InitializeConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf")
 	CreateHTTPClient()
 	updateContainersData()
 	return output.FLB_OK

From 97834199721172ba0a67828b19a6f26de1a4b0a0 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 27 Sep 2018 14:35:29 -0700
Subject: [PATCH 010/160] Dilipr/mark comments (#130)

* Marks Comments + Error Handling

* Drop records from files that are not in k8s format

* Remove unnecessary log line'

* Adding Log to the file that doesn't conform to the expected format
---
 source/code/go/src/plugins/oms.go     | 227 ++++++++++++++------------
 source/code/go/src/plugins/out_oms.go |   6 +-
 source/code/go/src/plugins/utils.go   |   1 +
 3 files changed, 123 insertions(+), 111 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index c18135dcc..2e9e2f3d0 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -12,7 +12,8 @@ import (
 	"strings"
 	"sync"
 	"time"
-
+)
+import (
 	"github.com/fluent/fluent-bit-go/output"
 	"github.com/mitchellh/mapstructure"
 	lumberjack "gopkg.in/natefinch/lumberjack.v2"
@@ -24,6 +25,9 @@ import (
 // DataType for Container Log
 const DataType = "CONTAINER_LOG_BLOB"
 
+// ContainerLogPluginConfFilePath --> config file path for container log plugin
+const ContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
+
 // IPName for Container Log
 const IPName = "Containers"
 const defaultContainerInventoryRefreshInterval = 60
@@ -47,18 +51,22 @@ var (
 	NameIDMap map[string]string
 	// IgnoreIDSet set of  container Ids of kube-system pods
 	IgnoreIDSet map[string]bool
-
 	// DataUpdateMutex read and write mutex access to the container id set
 	DataUpdateMutex = &sync.Mutex{}
-
 	// ClientSet for querying KubeAPIs
 	ClientSet *kubernetes.Clientset
 )
 
+var (
+	// KubeSystemContainersRefreshTicker updates the kube-system containers
+	KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * 300)
+	// ContainerImageNameRefreshTicker updates the container image and names periodically
+	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * 60)
+)
+
 var (
 	// FLBLogger stream
 	FLBLogger = createLogger()
-
 	// Log wrapper function
 	Log = FLBLogger.Printf
 )
@@ -83,41 +91,7 @@ type ContainerLogBlob struct {
 	DataItems []DataItem `json:"DataItems"`
 }
 
-func populateMaps() {
-
-	Log("Updating ImageIDMap and NameIDMap")
-
-	_imageIDMap := make(map[string]string)
-	_nameIDMap := make(map[string]string)
-
-	pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{})
-	if err != nil {
-		Log("Error getting pods %s\n", err.Error())
-	}
-
-	for _, pod := range pods.Items {
-		for _, status := range pod.Status.ContainerStatuses {
-			lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
-			containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)]
-			image := status.Image
-			name := fmt.Sprintf("%s/%s", pod.UID, status.Name)
-			if containerID != "" {
-				_imageIDMap[containerID] = image
-				_nameIDMap[containerID] = name
-			}
-		}
-	}
-
-	Log("Locking to update image and name maps")
-	DataUpdateMutex.Lock()
-	ImageIDMap = _imageIDMap
-	NameIDMap = _nameIDMap
-	DataUpdateMutex.Unlock()
-	Log("Unlocking after updating image and name maps")
-}
-
 func createLogger() *log.Logger {
-
 	var logfile *os.File
 	path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log"
 	if _, err := os.Stat(path); err == nil {
@@ -150,88 +124,85 @@ func createLogger() *log.Logger {
 	return logger
 }
 
-func updateContainersData() {
+func updateContainerImageNameMaps() {
+	for ; true; <-ContainerImageNameRefreshTicker.C {
+		Log("Updating ImageIDMap and NameIDMap")
 
-	containerInventoryRefreshInterval, err := strconv.Atoi(PluginConfiguration["container_inventory_refresh_interval"])
-	if err != nil {
-		Log("Error Reading Container Inventory Refresh Interval %s", err.Error())
-		containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval
-	}
-	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
-	go initMaps(containerInventoryRefreshInterval)
+		_imageIDMap := make(map[string]string)
+		_nameIDMap := make(map[string]string)
 
-	kubeSystemContainersRefreshInterval, err := strconv.Atoi(PluginConfiguration["kube_system_containers_refresh_interval"])
-	if err != nil {
-		Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error())
-		kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval
-	}
-	Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval)
-
-	go updateIgnoreContainerIds(kubeSystemContainersRefreshInterval)
-}
-
-func initMaps(refreshInterval int) {
-	ImageIDMap = make(map[string]string)
-	NameIDMap = make(map[string]string)
-
-	populateMaps()
-
-	for range time.Tick(time.Second * time.Duration(refreshInterval)) {
-		populateMaps()
-	}
-}
-
-func updateIgnoreContainerIds(refreshInterval int) {
-	IgnoreIDSet = make(map[string]bool)
+		pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{})
+		if err != nil {
+			Log("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+		}
 
-	updateKubeSystemContainerIDs()
+		for _, pod := range pods.Items {
+			for _, status := range pod.Status.ContainerStatuses {
+				lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
+				containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)]
+				image := status.Image
+				name := fmt.Sprintf("%s/%s", pod.UID, status.Name)
+				if containerID != "" {
+					_imageIDMap[containerID] = image
+					_nameIDMap[containerID] = name
+				}
+			}
+		}
 
-	for range time.Tick(time.Second * time.Duration(refreshInterval)) {
-		updateKubeSystemContainerIDs()
+		Log("Locking to update image and name maps")
+		DataUpdateMutex.Lock()
+		ImageIDMap = _imageIDMap
+		NameIDMap = _nameIDMap
+		DataUpdateMutex.Unlock()
+		Log("Unlocking after updating image and name maps")
 	}
 }
 
 func updateKubeSystemContainerIDs() {
+	for ; true; <-KubeSystemContainersRefreshTicker.C {
+		if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 {
+			Log("Kube System Log Collection is ENABLED.")
+			return
+		}
 
-	if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 {
-		Log("Kube System Log Collection is ENABLED.")
-		return
-	}
-
-	Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records")
+		Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records")
 
-	pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
-	if err != nil {
-		Log("Error getting pods %s\n", err.Error())
-	}
+		pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
+		if err != nil {
+			Log("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error())
+		}
 
-	_ignoreIDSet := make(map[string]bool)
-	for _, pod := range pods.Items {
-		for _, status := range pod.Status.ContainerStatuses {
-			lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
-			_ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true
+		_ignoreIDSet := make(map[string]bool)
+		for _, pod := range pods.Items {
+			for _, status := range pod.Status.ContainerStatuses {
+				lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
+				_ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true
+			}
 		}
-	}
 
-	Log("Locking to update kube-system container IDs")
-	DataUpdateMutex.Lock()
-	IgnoreIDSet = _ignoreIDSet
-	DataUpdateMutex.Unlock()
-	Log("Unlocking after updating kube-system container IDs")
+		Log("Locking to update kube-system container IDs")
+		DataUpdateMutex.Lock()
+		IgnoreIDSet = _ignoreIDSet
+		DataUpdateMutex.Unlock()
+		Log("Unlocking after updating kube-system container IDs")
+	}
 }
 
 // PostDataHelper sends data to the OMS endpoint
 func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
+	defer DataUpdateMutex.Unlock()
+
 	start := time.Now()
 	var dataItems []DataItem
 	DataUpdateMutex.Lock()
 
 	for _, record := range tailPluginRecords {
 
-		containerID := getContainerIDFromFilePath(toString(record["Filepath"]))
+		filepath := toString(record["Filepath"])
+		containerID := getContainerIDFromFilePath(filepath)
 
-		if containsKey(IgnoreIDSet, containerID) {
+		if containerID == "" || containsKey(IgnoreIDSet, containerID) {
 			continue
 		}
 
@@ -271,7 +242,6 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		mapstructure.Decode(stringMap, &dataItem)
 		dataItems = append(dataItems, dataItem)
 	}
-	DataUpdateMutex.Unlock()
 
 	if len(dataItems) > 0 {
 		logEntry := ContainerLogBlob{
@@ -318,39 +288,80 @@ func toString(s interface{}) string {
 func getContainerIDFromFilePath(filepath string) string {
 	start := strings.LastIndex(filepath, "-")
 	end := strings.LastIndex(filepath, ".")
+	if start >= end || start == -1 || end == -1 {
+		// This means the file is not a managed Kubernetes docker log file.
+		// Drop all records from the file
+		Log("File %s is not a Kubernetes managed docker log file. Dropping all records from the file", filepath)
+		return ""
+	}
 	return filepath[start+1 : end]
 }
 
-// InitializeConfig reads and populates plugin configuration
-func InitializeConfig(pluginConfPath string) map[string]string {
+// InitializePlugin reads and populates plugin configuration
+func InitializePlugin(pluginConfPath string) {
+
+	IgnoreIDSet = make(map[string]bool)
+	ImageIDMap = make(map[string]string)
+	NameIDMap = make(map[string]string)
 
-	pluginConf, err := ReadConfiguration(pluginConfPath)
-	omsadminConf, err := ReadConfiguration(pluginConf["omsadmin_conf_path"])
+	pluginConfig, err := ReadConfiguration(pluginConfPath)
+	if err != nil {
+		Log("Error Reading plugin config path : %s \n", err.Error())
+		log.Fatalf("Error Reading plugin config path : %s \n", err.Error())
+	}
 
+	omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"])
 	if err != nil {
 		Log(err.Error())
+		log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error())
 	}
+	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
+	Log("OMSEndpoint %s", OMSEndpoint)
 
-	containerHostName, err := ioutil.ReadFile(pluginConf["container_host_file_path"])
+	// Initialize image,name map refresh ticker
+	containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"])
 	if err != nil {
-		Log("Error when reading containerHostName file %s", err.Error())
+		Log("Error Reading Container Inventory Refresh Interval %s", err.Error())
+		Log("Using Default Refresh Interval of %d s\n", defaultContainerInventoryRefreshInterval)
+		containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval
 	}
+	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
+	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval))
 
+	// Initialize Kube System Refresh Ticker
+	kubeSystemContainersRefreshInterval, err := strconv.Atoi(pluginConfig["kube_system_containers_refresh_interval"])
+	if err != nil {
+		Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error())
+		Log("Using Default Refresh Interval of %d s\n", defaultKubeSystemContainersRefreshInterval)
+		kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval
+	}
+	Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval)
+	KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * time.Duration(kubeSystemContainersRefreshInterval))
+
+	// Populate Computer field
+	containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"])
+	if err != nil {
+		// It is ok to log here and continue, because only the Computer column will be missing,
+		// which can be deduced from a combination of containerId, and docker logs on the node
+		Log("Error when reading containerHostName file %s.\n It is ok to log here and continue, because only the Computer column will be missing, which can be deduced from a combination of containerId, and docker logs on the nodes\n", err.Error())
+	}
 	Computer = strings.TrimSuffix(toString(containerHostName), "\n")
 	Log("Computer == %s \n", Computer)
 
-	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
-	Log("OMSEndpoint %s", OMSEndpoint)
-
+	// Initialize KubeAPI Client
 	config, err := rest.InClusterConfig()
 	if err != nil {
-		Log("Error getting config %s\n", err.Error())
+		Log("Error getting config %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 	}
 
 	ClientSet, err = kubernetes.NewForConfig(config)
 	if err != nil {
-		Log("Error getting clientset %s", err.Error())
+		Log("Error getting clientset %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 	}
 
-	return pluginConf
+	PluginConfiguration = pluginConfig
+
+	CreateHTTPClient()
+	go updateKubeSystemContainerIDs()
+	go updateContainerImageNameMaps()
 }
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 8c23f47a8..ec9a573d1 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -18,9 +18,7 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 // ctx (context) pointer to fluentbit context (state/ c code)
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
-	PluginConfiguration = InitializeConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf")
-	CreateHTTPClient()
-	updateContainersData()
+	InitializePlugin(ContainerLogPluginConfFilePath)
 	return output.FLB_OK
 }
 
@@ -50,6 +48,8 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 
 // FLBPluginExit exits the plugin
 func FLBPluginExit() int {
+	KubeSystemContainersRefreshTicker.Stop()
+	ContainerImageNameRefreshTicker.Stop()
 	return output.FLB_OK
 }
 
diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go
index 0e33f43f9..1ac9b05a9 100644
--- a/source/code/go/src/plugins/utils.go
+++ b/source/code/go/src/plugins/utils.go
@@ -52,6 +52,7 @@ func CreateHTTPClient() {
 	cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"])
 	if err != nil {
 		Log("Error when loading cert %s", err.Error())
+		log.Fatalf("Error when loading cert %s", err.Error())
 	}
 
 	tlsConfig := &tls.Config{

From 8e35b7365bab9de6d087718887d5021167617a0d Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 27 Sep 2018 15:52:13 -0700
Subject: [PATCH 011/160] Rashmi/segfault latest (#132)

* adding null checks in all providers

* fixing type

* fixing type

* adding more null checks

* update cjson
---
 source/code/cjson/cJSON.c                     | 3478 +++++++++++++----
 source/code/cjson/cJSON.h                     |  398 +-
 ...iner_ContainerInventory_Class_Provider.cpp |   34 +-
 ...ner_ContainerStatistics_Class_Provider.cpp |   39 +-
 .../Container_DaemonEvent_Class_Provider.cpp  |    6 +-
 ...ontainer_ImageInventory_Class_Provider.cpp |   19 +-
 .../Container_Process_Class_Provider.cpp      |    2 +-
 7 files changed, 3146 insertions(+), 830 deletions(-)

diff --git a/source/code/cjson/cJSON.c b/source/code/cjson/cJSON.c
index 77dbfe959..c561c7ceb 100755
--- a/source/code/cjson/cJSON.c
+++ b/source/code/cjson/cJSON.c
@@ -1,770 +1,2930 @@
 /*
-  Copyright (c) 2009 Dave Gamble
-
-  Permission is hereby granted, free of charge, to any person obtaining a copy
-  of this software and associated documentation files (the "Software"), to deal
-  in the Software without restriction, including without limitation the rights
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-  copies of the Software, and to permit persons to whom the Software is
-  furnished to do so, subject to the following conditions:
-
-  The above copyright notice and this permission notice shall be included in
-  all copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-  THE SOFTWARE.
+Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
 */
 
 /* cJSON */
 /* JSON parser in C. */
 
+/* disable warnings about old C89 functions in MSVC */
+#if !defined(_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER)
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#ifdef __GNUC__
+#pragma GCC visibility push(default)
+#endif
+#if defined(_MSC_VER)
+#pragma warning (push)
+/* disable warning about single line comments in system headers */
+#pragma warning (disable : 4001)
+#endif
+
 #include <string.h>
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
-#include <float.h>
 #include <limits.h>
 #include <ctype.h>
+
+#ifdef ENABLE_LOCALES
+#include <locale.h>
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning (pop)
+#endif
+#ifdef __GNUC__
+#pragma GCC visibility pop
+#endif
+
 #include "cJSON.h"
-static const char *ep;
 
-const char *cJSON_GetErrorPtr(void) {return ep;}
+/* define our own boolean type */
+#define true ((cJSON_bool)1)
+#define false ((cJSON_bool)0)
 
-static int cJSON_strcasecmp(const char *s1,const char *s2)
+typedef struct {
+	const unsigned char *json;
+	size_t position;
+} error;
+static error global_error = { NULL, 0 };
+
+CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void)
 {
-    if (!s1) return (s1==s2)?0:1;if (!s2) return 1;
-    for(; tolower(*s1) == tolower(*s2); ++s1, ++s2)	if(*s1 == 0)	return 0;
-    return tolower(*(const unsigned char *)s1) - tolower(*(const unsigned char *)s2);
+	return (const char*)(global_error.json + global_error.position);
 }
 
-static void *(*cJSON_malloc)(size_t sz) = malloc;
-static void (*cJSON_free)(void *ptr) = free;
+CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item) {
+	if (!cJSON_IsString(item)) {
+		return NULL;
+	}
+
+	return item->valuestring;
+}
 
-static char* cJSON_strdup(const char* str)
+/* This is a safeguard to prevent copy-pasters from using incompatible C and header files */
+#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || (CJSON_VERSION_PATCH != 8)
+#error cJSON.h and cJSON.c have different versions. Make sure that both have the same.
+#endif
+
+CJSON_PUBLIC(const char*) cJSON_Version(void)
 {
-    size_t len;
-    char* copy;
+	static char version[15];
+	sprintf(version, "%i.%i.%i", CJSON_VERSION_MAJOR, CJSON_VERSION_MINOR, CJSON_VERSION_PATCH);
 
-    len = strlen(str) + 1;
-    if (!(copy = (char*)cJSON_malloc(len))) return 0;
-    memcpy(copy,str,len);
-    return copy;
+	return version;
 }
 
-void cJSON_InitHooks(cJSON_Hooks* hooks)
+/* Case insensitive string comparison, doesn't consider two NULL pointers equal though */
+static int case_insensitive_strcmp(const unsigned char *string1, const unsigned char *string2)
 {
-    if (!hooks) { /* Reset hooks */
-        cJSON_malloc = malloc;
-        cJSON_free = free;
-        return;
-    }
+	if ((string1 == NULL) || (string2 == NULL))
+	{
+		return 1;
+	}
+
+	if (string1 == string2)
+	{
+		return 0;
+	}
+
+	for (; tolower(*string1) == tolower(*string2); (void)string1++, string2++)
+	{
+		if (*string1 == '\0')
+		{
+			return 0;
+		}
+	}
+
+	return tolower(*string1) - tolower(*string2);
+}
 
-    cJSON_malloc = (hooks->malloc_fn)?hooks->malloc_fn:malloc;
-    cJSON_free	 = (hooks->free_fn)?hooks->free_fn:free;
+typedef struct internal_hooks
+{
+	void *(CJSON_CDECL *allocate)(size_t size);
+	void (CJSON_CDECL *deallocate)(void *pointer);
+	void *(CJSON_CDECL *reallocate)(void *pointer, size_t size);
+} internal_hooks;
+
+#if defined(_MSC_VER)
+/* work around MSVC error C2322: '...' address of dillimport '...' is not static */
+static void * CJSON_CDECL internal_malloc(size_t size)
+{
+	return malloc(size);
+}
+static void CJSON_CDECL internal_free(void *pointer)
+{
+	free(pointer);
+}
+static void * CJSON_CDECL internal_realloc(void *pointer, size_t size)
+{
+	return realloc(pointer, size);
 }
+#else
+#define internal_malloc malloc
+#define internal_free free
+#define internal_realloc realloc
+#endif
 
-/* Internal constructor. */
-static cJSON *cJSON_New_Item(void)
+static internal_hooks global_hooks = { internal_malloc, internal_free, internal_realloc };
+
+static unsigned char* cJSON_strdup(const unsigned char* string, const internal_hooks * const hooks)
 {
-    cJSON* node = (cJSON*)cJSON_malloc(sizeof(cJSON));
-    if (node) memset(node,0,sizeof(cJSON));
-    return node;
+	size_t length = 0;
+	unsigned char *copy = NULL;
+
+	if (string == NULL)
+	{
+		return NULL;
+	}
+
+	length = strlen((const char*)string) + sizeof("");
+	copy = (unsigned char*)hooks->allocate(length);
+	if (copy == NULL)
+	{
+		return NULL;
+	}
+	memcpy(copy, string, length);
+
+	return copy;
 }
 
+CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks)
+{
+	if (hooks == NULL)
+	{
+		/* Reset hooks */
+		global_hooks.allocate = malloc;
+		global_hooks.deallocate = free;
+		global_hooks.reallocate = realloc;
+		return;
+	}
+
+	global_hooks.allocate = malloc;
+	if (hooks->malloc_fn != NULL)
+	{
+		global_hooks.allocate = hooks->malloc_fn;
+	}
+
+	global_hooks.deallocate = free;
+	if (hooks->free_fn != NULL)
+	{
+		global_hooks.deallocate = hooks->free_fn;
+	}
+
+	/* use realloc only if both free and malloc are used */
+	global_hooks.reallocate = NULL;
+	if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free))
+	{
+		global_hooks.reallocate = realloc;
+	}
+}
+
+/* Internal constructor. */
+static cJSON *cJSON_New_Item(const internal_hooks * const hooks)
+{
+	cJSON* node = (cJSON*)hooks->allocate(sizeof(cJSON));
+	if (node)
+	{
+		memset(node, '\0', sizeof(cJSON));
+	}
+
+	return node;
+}
 
 /* Delete a cJSON structure. */
-void cJSON_Delete(cJSON *c)
+CJSON_PUBLIC(void) cJSON_Delete(cJSON *item)
 {
-    cJSON *next;
-    while (c)
-    {
-        next=c->next;
-        if (!(c->type&cJSON_IsReference) && c->child) cJSON_Delete(c->child);
-        if (!(c->type&cJSON_IsReference) && c->valuestring) cJSON_free(c->valuestring);
-        if (!(c->type&cJSON_StringIsConst) && c->string) cJSON_free(c->string);
-        cJSON_free(c);
-        c=next;
-    }
+	cJSON *next = NULL;
+	while (item != NULL)
+	{
+		next = item->next;
+		if (!(item->type & cJSON_IsReference) && (item->child != NULL))
+		{
+			cJSON_Delete(item->child);
+		}
+		if (!(item->type & cJSON_IsReference) && (item->valuestring != NULL))
+		{
+			global_hooks.deallocate(item->valuestring);
+		}
+		if (!(item->type & cJSON_StringIsConst) && (item->string != NULL))
+		{
+			global_hooks.deallocate(item->string);
+		}
+		global_hooks.deallocate(item);
+		item = next;
+	}
 }
 
-/* Parse the input text to generate a number, and populate the result into item. */
-static const char *parse_number(cJSON *item,const char *num)
+/* get the decimal point character of the current locale */
+static unsigned char get_decimal_point(void)
 {
-    double n=0,sign=1,scale=0;int subscale=0,signsubscale=1;
+#ifdef ENABLE_LOCALES
+	struct lconv *lconv = localeconv();
+	return (unsigned char)lconv->decimal_point[0];
+#else
+	return '.';
+#endif
+}
 
-    if (*num=='-') sign=-1,num++;	/* Has sign? */
-    if (*num=='0') num++;			/* is zero */
-    if (*num>='1' && *num<='9')	do	n=(n*10.0)+(*num++ -'0');	while (*num>='0' && *num<='9');	/* Number? */
-    if (*num=='.' && num[1]>='0' && num[1]<='9') {num++;		do	n=(n*10.0)+(*num++ -'0'),scale--; while (*num>='0' && *num<='9');}	/* Fractional part? */
-    if (*num=='e' || *num=='E')		/* Exponent? */
-    {	num++;if (*num=='+') num++;	else if (*num=='-') signsubscale=-1,num++;		/* With sign? */
-        while (*num>='0' && *num<='9') subscale=(subscale*10)+(*num++ - '0');	/* Number? */
-    }
+typedef struct
+{
+	const unsigned char *content;
+	size_t length;
+	size_t offset;
+	size_t depth; /* How deeply nested (in arrays/objects) is the input at the current offset. */
+	internal_hooks hooks;
+} parse_buffer;
+
+/* check if the given size is left to read in a given parse buffer (starting with 1) */
+#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) <= (buffer)->length))
+/* check if the buffer can be accessed at the given index (starting with 0) */
+#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length))
+#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index))
+/* get a pointer to the buffer at the position */
+#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset)
 
-    n=sign*n*pow(10.0,(scale+subscale*signsubscale));	/* number = +/- number.fraction * 10^+/- exponent */
-	
-    item->valuedouble=n;
-    item->valueint=(int)n;
-    item->type=cJSON_Number;
-    return num;
+/* Parse the input text to generate a number, and populate the result into item. */
+static cJSON_bool parse_number(cJSON * const item, parse_buffer * const input_buffer)
+{
+	double number = 0;
+	unsigned char *after_end = NULL;
+	unsigned char number_c_string[64];
+	unsigned char decimal_point = get_decimal_point();
+	size_t i = 0;
+
+	if ((input_buffer == NULL) || (input_buffer->content == NULL))
+	{
+		return false;
+	}
+
+	/* copy the number into a temporary buffer and replace '.' with the decimal point
+	* of the current locale (for strtod)
+	* This also takes care of '\0' not necessarily being available for marking the end of the input */
+	for (i = 0; (i < (sizeof(number_c_string) - 1)) && can_access_at_index(input_buffer, i); i++)
+	{
+		switch (buffer_at_offset(input_buffer)[i])
+		{
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case '+':
+		case '-':
+		case 'e':
+		case 'E':
+			number_c_string[i] = buffer_at_offset(input_buffer)[i];
+			break;
+
+		case '.':
+			number_c_string[i] = decimal_point;
+			break;
+
+		default:
+			goto loop_end;
+		}
+	}
+loop_end:
+	number_c_string[i] = '\0';
+
+	number = strtod((const char*)number_c_string, (char**)&after_end);
+	if (number_c_string == after_end)
+	{
+		return false; /* parse_error */
+	}
+
+	item->valuedouble = number;
+
+	/* use saturation in case of overflow */
+	if (number >= INT_MAX)
+	{
+		item->valueint = INT_MAX;
+	}
+	else if (number <= INT_MIN)
+	{
+		item->valueint = INT_MIN;
+	}
+	else
+	{
+		item->valueint = (int)number;
+	}
+
+	item->type = cJSON_Number;
+
+	input_buffer->offset += (size_t)(after_end - number_c_string);
+	return true;
 }
 
-static int pow2gt (int x)	{	--x;	x|=x>>1;	x|=x>>2;	x|=x>>4;	x|=x>>8;	x|=x>>16;	return x+1;	}
+/* don't ask me, but the original cJSON_SetNumberValue returns an integer or double */
+CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number)
+{
+	if (number >= INT_MAX)
+	{
+		object->valueint = INT_MAX;
+	}
+	else if (number <= INT_MIN)
+	{
+		object->valueint = INT_MIN;
+	}
+	else
+	{
+		object->valueint = (int)number;
+	}
+
+	return object->valuedouble = number;
+}
 
-typedef struct {char *buffer; int length; int offset; } printbuffer;
+typedef struct
+{
+	unsigned char *buffer;
+	size_t length;
+	size_t offset;
+	size_t depth; /* current nesting depth (for formatted printing) */
+	cJSON_bool noalloc;
+	cJSON_bool format; /* is this print a formatted print */
+	internal_hooks hooks;
+} printbuffer;
+
+/* realloc printbuffer if necessary to have at least "needed" bytes more */
+static unsigned char* ensure(printbuffer * const p, size_t needed)
+{
+	unsigned char *newbuffer = NULL;
+	size_t newsize = 0;
+
+	if ((p == NULL) || (p->buffer == NULL))
+	{
+		return NULL;
+	}
+
+	if ((p->length > 0) && (p->offset >= p->length))
+	{
+		/* make sure that offset is valid */
+		return NULL;
+	}
+
+	if (needed > INT_MAX)
+	{
+		/* sizes bigger than INT_MAX are currently not supported */
+		return NULL;
+	}
+
+	needed += p->offset + 1;
+	if (needed <= p->length)
+	{
+		return p->buffer + p->offset;
+	}
+
+	if (p->noalloc) {
+		return NULL;
+	}
+
+	/* calculate new buffer size */
+	if (needed > (INT_MAX / 2))
+	{
+		/* overflow of int, use INT_MAX if possible */
+		if (needed <= INT_MAX)
+		{
+			newsize = INT_MAX;
+		}
+		else
+		{
+			return NULL;
+		}
+	}
+	else
+	{
+		newsize = needed * 2;
+	}
+
+	if (p->hooks.reallocate != NULL)
+	{
+		/* reallocate with realloc if available */
+		newbuffer = (unsigned char*)p->hooks.reallocate(p->buffer, newsize);
+		if (newbuffer == NULL)
+		{
+			p->hooks.deallocate(p->buffer);
+			p->length = 0;
+			p->buffer = NULL;
+
+			return NULL;
+		}
+	}
+	else
+	{
+		/* otherwise reallocate manually */
+		newbuffer = (unsigned char*)p->hooks.allocate(newsize);
+		if (!newbuffer)
+		{
+			p->hooks.deallocate(p->buffer);
+			p->length = 0;
+			p->buffer = NULL;
+
+			return NULL;
+		}
+		if (newbuffer)
+		{
+			memcpy(newbuffer, p->buffer, p->offset + 1);
+		}
+		p->hooks.deallocate(p->buffer);
+	}
+	p->length = newsize;
+	p->buffer = newbuffer;
+
+	return newbuffer + p->offset;
+}
 
-static char* ensure(printbuffer *p,int needed)
+/* calculate the new length of the string in a printbuffer and update the offset */
+static void update_offset(printbuffer * const buffer)
 {
-    char *newbuffer;int newsize;
-    if (!p || !p->buffer) return 0;
-    needed+=p->offset;
-    if (needed<=p->length) return p->buffer+p->offset;
+	const unsigned char *buffer_pointer = NULL;
+	if ((buffer == NULL) || (buffer->buffer == NULL))
+	{
+		return;
+	}
+	buffer_pointer = buffer->buffer + buffer->offset;
+
+	buffer->offset += strlen((const char*)buffer_pointer);
+}
 
-    newsize=pow2gt(needed);
-    newbuffer=(char*)cJSON_malloc(newsize);
-    if (!newbuffer) {cJSON_free(p->buffer);p->length=0,p->buffer=0;return 0;}
-    if (newbuffer) memcpy(newbuffer,p->buffer,p->length);
-    cJSON_free(p->buffer);
-    p->length=newsize;
-    p->buffer=newbuffer;
-    return newbuffer+p->offset;
+/* Render the number nicely from the given item into a string. */
+static cJSON_bool print_number(const cJSON * const item, printbuffer * const output_buffer)
+{
+	unsigned char *output_pointer = NULL;
+	double d = item->valuedouble;
+	int length = 0;
+	size_t i = 0;
+	unsigned char number_buffer[26]; /* temporary buffer to print the number into */
+	unsigned char decimal_point = get_decimal_point();
+	double test;
+
+	if (output_buffer == NULL)
+	{
+		return false;
+	}
+
+	/* This checks for NaN and Infinity */
+	if ((d * 0) != 0)
+	{
+		length = sprintf((char*)number_buffer, "null");
+	}
+	else
+	{
+		/* Try 15 decimal places of precision to avoid nonsignificant nonzero digits */
+		length = sprintf((char*)number_buffer, "%1.15g", d);
+
+		/* Check whether the original double can be recovered */
+		if ((sscanf((char*)number_buffer, "%lg", &test) != 1) || ((double)test != d))
+		{
+			/* If not, print with 17 decimal places of precision */
+			length = sprintf((char*)number_buffer, "%1.17g", d);
+		}
+	}
+
+	/* sprintf failed or buffer overrun occured */
+	if ((length < 0) || (length >(int)(sizeof(number_buffer) - 1)))
+	{
+		return false;
+	}
+
+	/* reserve appropriate space in the output */
+	output_pointer = ensure(output_buffer, (size_t)length + sizeof(""));
+	if (output_pointer == NULL)
+	{
+		return false;
+	}
+
+	/* copy the printed number to the output and replace locale
+	* dependent decimal point with '.' */
+	for (i = 0; i < ((size_t)length); i++)
+	{
+		if (number_buffer[i] == decimal_point)
+		{
+			output_pointer[i] = '.';
+			continue;
+		}
+
+		output_pointer[i] = number_buffer[i];
+	}
+	output_pointer[i] = '\0';
+
+	output_buffer->offset += (size_t)length;
+
+	return true;
 }
 
-static int update(printbuffer *p)
+/* parse 4 digit hexadecimal number */
+static unsigned parse_hex4(const unsigned char * const input)
 {
-    char *str;
-    if (!p || !p->buffer) return 0;
-    str=p->buffer+p->offset;
-    return p->offset+strlen(str);
+	unsigned int h = 0;
+	size_t i = 0;
+
+	for (i = 0; i < 4; i++)
+	{
+		/* parse digit */
+		if ((input[i] >= '0') && (input[i] <= '9'))
+		{
+			h += (unsigned int)input[i] - '0';
+		}
+		else if ((input[i] >= 'A') && (input[i] <= 'F'))
+		{
+			h += (unsigned int)10 + input[i] - 'A';
+		}
+		else if ((input[i] >= 'a') && (input[i] <= 'f'))
+		{
+			h += (unsigned int)10 + input[i] - 'a';
+		}
+		else /* invalid */
+		{
+			return 0;
+		}
+
+		if (i < 3)
+		{
+			/* shift left to make place for the next nibble */
+			h = h << 4;
+		}
+	}
+
+	return h;
 }
 
-/* Render the number nicely from the given item into a string. */
-static char *print_number(cJSON *item,printbuffer *p)
-{
-    char *str=0;
-    double d=item->valuedouble;
-    if (d==0)
-    {
-        if (p)	str=ensure(p,2);
-        else	str=(char*)cJSON_malloc(2);	/* special case for 0. */
-        if (str) strcpy(str,"0");
-    }
-    else if (fabs(((double)item->valueint)-d)<=DBL_EPSILON && d<=INT_MAX && d>=INT_MIN)
-    {
-        if (p)	str=ensure(p,21);
-        else	str=(char*)cJSON_malloc(21);	/* 2^64+1 can be represented in 21 chars. */
-        if (str)	sprintf(str,"%d",item->valueint);
-    }
-    else
-    {
-        if (p)	str=ensure(p,64);
-        else	str=(char*)cJSON_malloc(64);	/* This is a nice tradeoff. */
-        if (str)
-        {
-            if (fabs(floor(d)-d)<=DBL_EPSILON && fabs(d)<1.0e60)sprintf(str,"%.0f",d);
-            else if (fabs(d)<1.0e-6 || fabs(d)>1.0e9)			sprintf(str,"%e",d);
-            else												sprintf(str,"%f",d);
-        }
-    }
-    return str;
-}
-
-static unsigned parse_hex4(const char *str)
-{
-    unsigned h=0;
-    if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
-    h=h<<4;str++;
-    if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
-    h=h<<4;str++;
-    if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
-    h=h<<4;str++;
-    if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
-    return h;
-}
-
-/* Parse the input text into an unescaped cstring, and populate item. */
-static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-static const char *parse_string(cJSON *item,const char *str)
-{
-    const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
-    if (*str!='\"') {ep=str;return 0;}	/* not a string! */
-	
-    while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;	/* Skip escaped quotes. */
-	
-    out=(char*)cJSON_malloc(len+1);	/* This is how long we need for the string, roughly. */
-    if (!out) return 0;
-	
-    ptr=str+1;ptr2=out;
-    while (*ptr!='\"' && *ptr)
-    {
-        if (*ptr!='\\') *ptr2++=*ptr++;
-        else
-        {
-            ptr++;
-            switch (*ptr)
-            {
-                case 'b': *ptr2++='\b';	break;
-                case 'f': *ptr2++='\f';	break;
-                case 'n': *ptr2++='\n';	break;
-                case 'r': *ptr2++='\r';	break;
-                case 't': *ptr2++='\t';	break;
-                case 'u':	 /* transcode utf16 to utf8. */
-                    uc=parse_hex4(ptr+1);ptr+=4;	/* get the unicode char. */
-
-                    if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)	break;	/* check for invalid.	*/
-
-                    if (uc>=0xD800 && uc<=0xDBFF)	/* UTF16 surrogate pairs.	*/
-                    {
-                        if (ptr[1]!='\\' || ptr[2]!='u')	break;	/* missing second-half of surrogate.	*/
-                        uc2=parse_hex4(ptr+3);ptr+=6;
-                        if (uc2<0xDC00 || uc2>0xDFFF)		break;	/* invalid second-half of surrogate.	*/
-                        uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
-                    }
-
-                    len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
-					
-                    switch (len) {
-                        case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
-                        case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
-                        case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
-                        case 1: *--ptr2 =(uc | firstByteMark[len]);
-                    }
-                    ptr2+=len;
-                    break;
-                default:  *ptr2++=*ptr; break;
-            }
-            ptr++;
-        }
-    }
-    *ptr2=0;
-    if (*ptr=='\"') ptr++;
-    item->valuestring=out;
-    item->type=cJSON_String;
-    return ptr;
+/* converts a UTF-16 literal to UTF-8
+* A literal can be one or two sequences of the form \uXXXX */
+static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer)
+{
+	long unsigned int codepoint = 0;
+	unsigned int first_code = 0;
+	const unsigned char *first_sequence = input_pointer;
+	unsigned char utf8_length = 0;
+	unsigned char utf8_position = 0;
+	unsigned char sequence_length = 0;
+	unsigned char first_byte_mark = 0;
+
+	if ((input_end - first_sequence) < 6)
+	{
+		/* input ends unexpectedly */
+		goto fail;
+	}
+
+	/* get the first utf16 sequence */
+	first_code = parse_hex4(first_sequence + 2);
+
+	/* check that the code is valid */
+	if (((first_code >= 0xDC00) && (first_code <= 0xDFFF)))
+	{
+		goto fail;
+	}
+
+	/* UTF16 surrogate pair */
+	if ((first_code >= 0xD800) && (first_code <= 0xDBFF))
+	{
+		const unsigned char *second_sequence = first_sequence + 6;
+		unsigned int second_code = 0;
+		sequence_length = 12; /* \uXXXX\uXXXX */
+
+		if ((input_end - second_sequence) < 6)
+		{
+			/* input ends unexpectedly */
+			goto fail;
+		}
+
+		if ((second_sequence[0] != '\\') || (second_sequence[1] != 'u'))
+		{
+			/* missing second half of the surrogate pair */
+			goto fail;
+		}
+
+		/* get the second utf16 sequence */
+		second_code = parse_hex4(second_sequence + 2);
+		/* check that the code is valid */
+		if ((second_code < 0xDC00) || (second_code > 0xDFFF))
+		{
+			/* invalid second half of the surrogate pair */
+			goto fail;
+		}
+
+
+		/* calculate the unicode codepoint from the surrogate pair */
+		codepoint = 0x10000 + (((first_code & 0x3FF) << 10) | (second_code & 0x3FF));
+	}
+	else
+	{
+		sequence_length = 6; /* \uXXXX */
+		codepoint = first_code;
+	}
+
+	/* encode as UTF-8
+	* takes at maximum 4 bytes to encode:
+	* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+	if (codepoint < 0x80)
+	{
+		/* normal ascii, encoding 0xxxxxxx */
+		utf8_length = 1;
+	}
+	else if (codepoint < 0x800)
+	{
+		/* two bytes, encoding 110xxxxx 10xxxxxx */
+		utf8_length = 2;
+		first_byte_mark = 0xC0; /* 11000000 */
+	}
+	else if (codepoint < 0x10000)
+	{
+		/* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */
+		utf8_length = 3;
+		first_byte_mark = 0xE0; /* 11100000 */
+	}
+	else if (codepoint <= 0x10FFFF)
+	{
+		/* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */
+		utf8_length = 4;
+		first_byte_mark = 0xF0; /* 11110000 */
+	}
+	else
+	{
+		/* invalid unicode codepoint */
+		goto fail;
+	}
+
+	/* encode as utf8 */
+	for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--)
+	{
+		/* 10xxxxxx */
+		(*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF);
+		codepoint >>= 6;
+	}
+	/* encode first byte */
+	if (utf8_length > 1)
+	{
+		(*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF);
+	}
+	else
+	{
+		(*output_pointer)[0] = (unsigned char)(codepoint & 0x7F);
+	}
+
+	*output_pointer += utf8_length;
+
+	return sequence_length;
+
+fail:
+	return 0;
+}
+
+/* Parse the input text into an unescaped cinput, and populate item. */
+static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer)
+{
+	const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1;
+	const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;
+	unsigned char *output_pointer = NULL;
+	unsigned char *output = NULL;
+
+	/* not a string */
+	if (buffer_at_offset(input_buffer)[0] != '\"')
+	{
+		goto fail;
+	}
+
+	{
+		/* calculate approximate size of the output (overestimate) */
+		size_t allocation_length = 0;
+		size_t skipped_bytes = 0;
+		while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"'))
+		{
+			/* is escape sequence */
+			if (input_end[0] == '\\')
+			{
+				if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length)
+				{
+					/* prevent buffer overflow when last input character is a backslash */
+					goto fail;
+				}
+				skipped_bytes++;
+				input_end++;
+			}
+			input_end++;
+		}
+		if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"'))
+		{
+			goto fail; /* string ended unexpectedly */
+		}
+
+		/* This is at most how much we need for the output */
+		allocation_length = (size_t)(input_end - buffer_at_offset(input_buffer)) - skipped_bytes;
+		output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof(""));
+		if (output == NULL)
+		{
+			goto fail; /* allocation failure */
+		}
+	}
+
+	output_pointer = output;
+	/* loop through the string literal */
+	while (input_pointer < input_end)
+	{
+		if (*input_pointer != '\\')
+		{
+			*output_pointer++ = *input_pointer++;
+		}
+		/* escape sequence */
+		else
+		{
+			unsigned char sequence_length = 2;
+			if ((input_end - input_pointer) < 1)
+			{
+				goto fail;
+			}
+
+			switch (input_pointer[1])
+			{
+			case 'b':
+				*output_pointer++ = '\b';
+				break;
+			case 'f':
+				*output_pointer++ = '\f';
+				break;
+			case 'n':
+				*output_pointer++ = '\n';
+				break;
+			case 'r':
+				*output_pointer++ = '\r';
+				break;
+			case 't':
+				*output_pointer++ = '\t';
+				break;
+			case '\"':
+			case '\\':
+			case '/':
+				*output_pointer++ = input_pointer[1];
+				break;
+
+				/* UTF-16 literal */
+			case 'u':
+				sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer);
+				if (sequence_length == 0)
+				{
+					/* failed to convert UTF16-literal to UTF-8 */
+					goto fail;
+				}
+				break;
+
+			default:
+				goto fail;
+			}
+			input_pointer += sequence_length;
+		}
+	}
+
+	/* zero terminate the output */
+	*output_pointer = '\0';
+
+	item->type = cJSON_String;
+	item->valuestring = (char*)output;
+
+	input_buffer->offset = (size_t)(input_end - input_buffer->content);
+	input_buffer->offset++;
+
+	return true;
+
+fail:
+	if (output != NULL)
+	{
+		input_buffer->hooks.deallocate(output);
+	}
+
+	if (input_pointer != NULL)
+	{
+		input_buffer->offset = (size_t)(input_pointer - input_buffer->content);
+	}
+
+	return false;
 }
 
 /* Render the cstring provided to an escaped version that can be printed. */
-static char *print_string_ptr(const char *str,printbuffer *p)
-{
-    const char *ptr;char *ptr2,*out;int len=0,flag=0;unsigned char token;
-	
-    for (ptr=str;*ptr;ptr++) flag|=((*ptr>0 && *ptr<32)||(*ptr=='\"')||(*ptr=='\\'))?1:0;
-    if (!flag)
-    {
-        len=ptr-str;
-        if (p) out=ensure(p,len+3);
-        else		out=(char*)cJSON_malloc(len+3);
-        if (!out) return 0;
-        ptr2=out;*ptr2++='\"';
-        strcpy(ptr2,str);
-        ptr2[len]='\"';
-        ptr2[len+1]=0;
-        return out;
-    }
-	
-    if (!str)
-    {
-        if (p)	out=ensure(p,3);
-        else	out=(char*)cJSON_malloc(3);
-        if (!out) return 0;
-        strcpy(out,"\"\"");
-        return out;
-    }
-    ptr=str;while ((token=*ptr) && ++len) {if (strchr("\"\\\b\f\n\r\t",token)) len++; else if (token<32) len+=5;ptr++;}
-	
-    if (p)	out=ensure(p,len+3);
-    else	out=(char*)cJSON_malloc(len+3);
-    if (!out) return 0;
-
-    ptr2=out;ptr=str;
-    *ptr2++='\"';
-    while (*ptr)
-    {
-        if ((unsigned char)*ptr>31 && *ptr!='\"' && *ptr!='\\') *ptr2++=*ptr++;
-        else
-        {
-            *ptr2++='\\';
-            switch (token=*ptr++)
-            {
-                case '\\':	*ptr2++='\\';	break;
-                case '\"':	*ptr2++='\"';	break;
-                case '\b':	*ptr2++='b';	break;
-                case '\f':	*ptr2++='f';	break;
-                case '\n':	*ptr2++='n';	break;
-                case '\r':	*ptr2++='r';	break;
-                case '\t':	*ptr2++='t';	break;
-                default: sprintf(ptr2,"u%04x",token);ptr2+=5;	break;	/* escape and print */
-            }
-        }
-    }
-    *ptr2++='\"';*ptr2++=0;
-    return out;
-}
-/* Invote print_string_ptr (which is useful) on an item. */
-static char *print_string(cJSON *item,printbuffer *p)	{return print_string_ptr(item->valuestring,p);}
+static cJSON_bool print_string_ptr(const unsigned char * const input, printbuffer * const output_buffer)
+{
+	const unsigned char *input_pointer = NULL;
+	unsigned char *output = NULL;
+	unsigned char *output_pointer = NULL;
+	size_t output_length = 0;
+	/* numbers of additional characters needed for escaping */
+	size_t escape_characters = 0;
+
+	if (output_buffer == NULL)
+	{
+		return false;
+	}
+
+	/* empty string */
+	if (input == NULL)
+	{
+		output = ensure(output_buffer, sizeof("\"\""));
+		if (output == NULL)
+		{
+			return false;
+		}
+		strcpy((char*)output, "\"\"");
+
+		return true;
+	}
+
+	/* set "flag" to 1 if something needs to be escaped */
+	for (input_pointer = input; *input_pointer; input_pointer++)
+	{
+		switch (*input_pointer)
+		{
+		case '\"':
+		case '\\':
+		case '\b':
+		case '\f':
+		case '\n':
+		case '\r':
+		case '\t':
+			/* one character escape sequence */
+			escape_characters++;
+			break;
+		default:
+			if (*input_pointer < 32)
+			{
+				/* UTF-16 escape sequence uXXXX */
+				escape_characters += 5;
+			}
+			break;
+		}
+	}
+	output_length = (size_t)(input_pointer - input) + escape_characters;
+
+	output = ensure(output_buffer, output_length + sizeof("\"\""));
+	if (output == NULL)
+	{
+		return false;
+	}
+
+	/* no characters have to be escaped */
+	if (escape_characters == 0)
+	{
+		output[0] = '\"';
+		memcpy(output + 1, input, output_length);
+		output[output_length + 1] = '\"';
+		output[output_length + 2] = '\0';
+
+		return true;
+	}
+
+	output[0] = '\"';
+	output_pointer = output + 1;
+	/* copy the string */
+	for (input_pointer = input; *input_pointer != '\0'; (void)input_pointer++, output_pointer++)
+	{
+		if ((*input_pointer > 31) && (*input_pointer != '\"') && (*input_pointer != '\\'))
+		{
+			/* normal character, copy */
+			*output_pointer = *input_pointer;
+		}
+		else
+		{
+			/* character needs to be escaped */
+			*output_pointer++ = '\\';
+			switch (*input_pointer)
+			{
+			case '\\':
+				*output_pointer = '\\';
+				break;
+			case '\"':
+				*output_pointer = '\"';
+				break;
+			case '\b':
+				*output_pointer = 'b';
+				break;
+			case '\f':
+				*output_pointer = 'f';
+				break;
+			case '\n':
+				*output_pointer = 'n';
+				break;
+			case '\r':
+				*output_pointer = 'r';
+				break;
+			case '\t':
+				*output_pointer = 't';
+				break;
+			default:
+				/* escape and print as unicode codepoint */
+				sprintf((char*)output_pointer, "u%04x", *input_pointer);
+				output_pointer += 4;
+				break;
+			}
+		}
+	}
+	output[output_length + 1] = '\"';
+	output[output_length + 2] = '\0';
+
+	return true;
+}
+
+/* Invoke print_string_ptr (which is useful) on an item. */
+static cJSON_bool print_string(const cJSON * const item, printbuffer * const p)
+{
+	return print_string_ptr((unsigned char*)item->valuestring, p);
+}
 
 /* Predeclare these prototypes. */
-static const char *parse_value(cJSON *item,const char *value);
-static char *print_value(cJSON *item,int depth,int fmt,printbuffer *p);
-static const char *parse_array(cJSON *item,const char *value);
-static char *print_array(cJSON *item,int depth,int fmt,printbuffer *p);
-static const char *parse_object(cJSON *item,const char *value);
-static char *print_object(cJSON *item,int depth,int fmt,printbuffer *p);
+static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer);
+static cJSON_bool print_value(const cJSON * const item, printbuffer * const output_buffer);
+static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer);
+static cJSON_bool print_array(const cJSON * const item, printbuffer * const output_buffer);
+static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer);
+static cJSON_bool print_object(const cJSON * const item, printbuffer * const output_buffer);
 
 /* Utility to jump whitespace and cr/lf */
-static const char *skip(const char *in) {while (in && *in && (unsigned char)*in<=32) in++; return in;}
+static parse_buffer *buffer_skip_whitespace(parse_buffer * const buffer)
+{
+	if ((buffer == NULL) || (buffer->content == NULL))
+	{
+		return NULL;
+	}
+
+	while (can_access_at_index(buffer, 0) && (buffer_at_offset(buffer)[0] <= 32))
+	{
+		buffer->offset++;
+	}
+
+	if (buffer->offset == buffer->length)
+	{
+		buffer->offset--;
+	}
+
+	return buffer;
+}
 
-/* Parse an object - create a new root, and populate. */
-cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated)
+/* skip the UTF-8 BOM (byte order mark) if it is at the beginning of a buffer */
+static parse_buffer *skip_utf8_bom(parse_buffer * const buffer)
 {
-    const char *end=0;
-    cJSON *c=cJSON_New_Item();
-    ep=0;
-    if (!c) return 0;       /* memory fail */
+	if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0))
+	{
+		return NULL;
+	}
 
-    end=parse_value(c,skip(value));
-    if (!end)	{cJSON_Delete(c);return 0;}	/* parse failure. ep is set. */
+	if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0))
+	{
+		buffer->offset += 3;
+	}
 
-    /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */
-    if (require_null_terminated) {end=skip(end);if (*end) {cJSON_Delete(c);ep=end;return 0;}}
-    if (return_parse_end) *return_parse_end=end;
-    return c;
+	return buffer;
 }
-/* Default options for cJSON_Parse */
-cJSON *cJSON_Parse(const char *value) {return cJSON_ParseWithOpts(value,0,0);}
 
-/* Render a cJSON item/entity/structure to text. */
-char *cJSON_Print(cJSON *item)				{return print_value(item,0,1,0);}
-char *cJSON_PrintUnformatted(cJSON *item)	{return print_value(item,0,0,0);}
+/* Parse an object - create a new root, and populate. */
+CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated)
+{
+	parse_buffer buffer = { 0, 0, 0, 0,{ 0, 0, 0 } };
+	cJSON *item = NULL;
+
+	/* reset error position */
+	global_error.json = NULL;
+	global_error.position = 0;
+
+	if (value == NULL)
+	{
+		goto fail;
+	}
+
+	buffer.content = (const unsigned char*)value;
+	buffer.length = strlen((const char*)value) + sizeof("");
+	buffer.offset = 0;
+	buffer.hooks = global_hooks;
+
+	item = cJSON_New_Item(&global_hooks);
+	if (item == NULL) /* memory fail */
+	{
+		goto fail;
+	}
+
+	if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer))))
+	{
+		/* parse failure. ep is set. */
+		goto fail;
+	}
+
+	/* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */
+	if (require_null_terminated)
+	{
+		buffer_skip_whitespace(&buffer);
+		if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0')
+		{
+			goto fail;
+		}
+	}
+	if (return_parse_end)
+	{
+		*return_parse_end = (const char*)buffer_at_offset(&buffer);
+	}
+
+	return item;
+
+fail:
+	if (item != NULL)
+	{
+		cJSON_Delete(item);
+	}
+
+	if (value != NULL)
+	{
+		error local_error;
+		local_error.json = (const unsigned char*)value;
+		local_error.position = 0;
+
+		if (buffer.offset < buffer.length)
+		{
+			local_error.position = buffer.offset;
+		}
+		else if (buffer.length > 0)
+		{
+			local_error.position = buffer.length - 1;
+		}
+
+		if (return_parse_end != NULL)
+		{
+			*return_parse_end = (const char*)local_error.json + local_error.position;
+		}
+
+		global_error = local_error;
+	}
+
+	return NULL;
+}
 
-char *cJSON_PrintBuffered(cJSON *item,int prebuffer,int fmt)
+/* Default options for cJSON_Parse */
+CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value)
 {
-    printbuffer p;
-    p.buffer=(char*)cJSON_malloc(prebuffer);
-    p.length=prebuffer;
-    p.offset=0;
-    return print_value(item,0,fmt,&p);
-    return p.buffer;
+	return cJSON_ParseWithOpts(value, 0, 0);
 }
 
+#define cjson_min(a, b) ((a < b) ? a : b)
 
-/* Parser core - when encountering text, process appropriately. */
-static const char *parse_value(cJSON *item,const char *value)
+static unsigned char *print(const cJSON * const item, cJSON_bool format, const internal_hooks * const hooks)
 {
-    if (!value)						return 0;	/* Fail on null. */
-    if (!strncmp(value,"null",4))	{ item->type=cJSON_NULL;  return value+4; }
-    if (!strncmp(value,"false",5))	{ item->type=cJSON_False; return value+5; }
-    if (!strncmp(value,"true",4))	{ item->type=cJSON_True; item->valueint=1;	return value+4; }
-    if (*value=='\"')				{ return parse_string(item,value); }
-    if (*value=='-' || (*value>='0' && *value<='9'))	{ return parse_number(item,value); }
-    if (*value=='[')				{ return parse_array(item,value); }
-    if (*value=='{')				{ return parse_object(item,value); }
+	static const size_t default_buffer_size = 256;
+	printbuffer buffer[1];
+	unsigned char *printed = NULL;
+
+	memset(buffer, 0, sizeof(buffer));
+
+	/* create buffer */
+	buffer->buffer = (unsigned char*)hooks->allocate(default_buffer_size);
+	buffer->length = default_buffer_size;
+	buffer->format = format;
+	buffer->hooks = *hooks;
+	if (buffer->buffer == NULL)
+	{
+		goto fail;
+	}
+
+	/* print the value */
+	if (!print_value(item, buffer))
+	{
+		goto fail;
+	}
+	update_offset(buffer);
+
+	/* check if reallocate is available */
+	if (hooks->reallocate != NULL)
+	{
+		printed = (unsigned char*)hooks->reallocate(buffer->buffer, buffer->offset + 1);
+		if (printed == NULL) {
+			goto fail;
+		}
+		buffer->buffer = NULL;
+	}
+	else /* otherwise copy the JSON over to a new buffer */
+	{
+		printed = (unsigned char*)hooks->allocate(buffer->offset + 1);
+		if (printed == NULL)
+		{
+			goto fail;
+		}
+		memcpy(printed, buffer->buffer, cjson_min(buffer->length, buffer->offset + 1));
+		printed[buffer->offset] = '\0'; /* just to be sure */
+
+										/* free the buffer */
+		hooks->deallocate(buffer->buffer);
+	}
+
+	return printed;
+
+fail:
+	if (buffer->buffer != NULL)
+	{
+		hooks->deallocate(buffer->buffer);
+	}
+
+	if (printed != NULL)
+	{
+		hooks->deallocate(printed);
+	}
+
+	return NULL;
+}
 
-    ep=value;return 0;	/* failure. */
+/* Render a cJSON item/entity/structure to text. */
+CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item)
+{
+	return (char*)print(item, true, &global_hooks);
 }
 
-/* Render a value to text. */
-static char *print_value(cJSON *item,int depth,int fmt,printbuffer *p)
-{
-    char *out=0;
-    if (!item) return 0;
-    if (p)
-    {
-        switch ((item->type)&255)
-        {
-            case cJSON_NULL:	{out=ensure(p,5);	if (out) strcpy(out,"null");	break;}
-            case cJSON_False:	{out=ensure(p,6);	if (out) strcpy(out,"false");	break;}
-            case cJSON_True:	{out=ensure(p,5);	if (out) strcpy(out,"true");	break;}
-            case cJSON_Number:	out=print_number(item,p);break;
-            case cJSON_String:	out=print_string(item,p);break;
-            case cJSON_Array:	out=print_array(item,depth,fmt,p);break;
-            case cJSON_Object:	out=print_object(item,depth,fmt,p);break;
-        }
-    }
-    else
-    {
-        switch ((item->type)&255)
-        {
-            case cJSON_NULL:	out=cJSON_strdup("null");	break;
-            case cJSON_False:	out=cJSON_strdup("false");break;
-            case cJSON_True:	out=cJSON_strdup("true"); break;
-            case cJSON_Number:	out=print_number(item,0);break;
-            case cJSON_String:	out=print_string(item,0);break;
-            case cJSON_Array:	out=print_array(item,depth,fmt,0);break;
-            case cJSON_Object:	out=print_object(item,depth,fmt,0);break;
-        }
-    }
-    return out;
+CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item)
+{
+	return (char*)print(item, false, &global_hooks);
 }
 
-/* Build an array from input text. */
-static const char *parse_array(cJSON *item,const char *value)
+CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt)
 {
-    cJSON *child;
-    if (*value!='[')	{ep=value;return 0;}	/* not an array! */
+	printbuffer p = { 0, 0, 0, 0, 0, 0,{ 0, 0, 0 } };
+
+	if (prebuffer < 0)
+	{
+		return NULL;
+	}
+
+	p.buffer = (unsigned char*)global_hooks.allocate((size_t)prebuffer);
+	if (!p.buffer)
+	{
+		return NULL;
+	}
+
+	p.length = (size_t)prebuffer;
+	p.offset = 0;
+	p.noalloc = false;
+	p.format = fmt;
+	p.hooks = global_hooks;
+
+	if (!print_value(item, &p))
+	{
+		global_hooks.deallocate(p.buffer);
+		return NULL;
+	}
+
+	return (char*)p.buffer;
+}
 
-    item->type=cJSON_Array;
-    value=skip(value+1);
-    if (*value==']') return value+1;	/* empty array. */
+CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buf, const int len, const cJSON_bool fmt)
+{
+	printbuffer p = { 0, 0, 0, 0, 0, 0,{ 0, 0, 0 } };
 
-    item->child=child=cJSON_New_Item();
-    if (!item->child) return 0;		 /* memory fail */
-    value=skip(parse_value(child,skip(value)));	/* skip any spacing, get the value. */
-    if (!value) return 0;
+	if ((len < 0) || (buf == NULL))
+	{
+		return false;
+	}
 
-    while (*value==',')
-    {
-        cJSON *new_item;
-        if (!(new_item=cJSON_New_Item())) return 0; 	/* memory fail */
-        child->next=new_item;new_item->prev=child;child=new_item;
-        value=skip(parse_value(child,skip(value+1)));
-        if (!value) return 0;	/* memory fail */
-    }
+	p.buffer = (unsigned char*)buf;
+	p.length = (size_t)len;
+	p.offset = 0;
+	p.noalloc = true;
+	p.format = fmt;
+	p.hooks = global_hooks;
+
+	return print_value(item, &p);
+}
 
-    if (*value==']') return value+1;	/* end of array */
-    ep=value;return 0;	/* malformed. */
+/* Parser core - when encountering text, process appropriately. */
+static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer)
+{
+	if ((input_buffer == NULL) || (input_buffer->content == NULL))
+	{
+		return false; /* no input */
+	}
+
+	/* parse the different types of values */
+	/* null */
+	if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0))
+	{
+		item->type = cJSON_NULL;
+		input_buffer->offset += 4;
+		return true;
+	}
+	/* false */
+	if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0))
+	{
+		item->type = cJSON_False;
+		input_buffer->offset += 5;
+		return true;
+	}
+	/* true */
+	if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0))
+	{
+		item->type = cJSON_True;
+		item->valueint = 1;
+		input_buffer->offset += 4;
+		return true;
+	}
+	/* string */
+	if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"'))
+	{
+		return parse_string(item, input_buffer);
+	}
+	/* number */
+	if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9'))))
+	{
+		return parse_number(item, input_buffer);
+	}
+	/* array */
+	if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '['))
+	{
+		return parse_array(item, input_buffer);
+	}
+	/* object */
+	if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{'))
+	{
+		return parse_object(item, input_buffer);
+	}
+
+	return false;
+}
+
+/* Render a value to text. */
+static cJSON_bool print_value(const cJSON * const item, printbuffer * const output_buffer)
+{
+	unsigned char *output = NULL;
+
+	if ((item == NULL) || (output_buffer == NULL))
+	{
+		return false;
+	}
+
+	switch ((item->type) & 0xFF)
+	{
+	case cJSON_NULL:
+		output = ensure(output_buffer, 5);
+		if (output == NULL)
+		{
+			return false;
+		}
+		strcpy((char*)output, "null");
+		return true;
+
+	case cJSON_False:
+		output = ensure(output_buffer, 6);
+		if (output == NULL)
+		{
+			return false;
+		}
+		strcpy((char*)output, "false");
+		return true;
+
+	case cJSON_True:
+		output = ensure(output_buffer, 5);
+		if (output == NULL)
+		{
+			return false;
+		}
+		strcpy((char*)output, "true");
+		return true;
+
+	case cJSON_Number:
+		return print_number(item, output_buffer);
+
+	case cJSON_Raw:
+	{
+		size_t raw_length = 0;
+		if (item->valuestring == NULL)
+		{
+			return false;
+		}
+
+		raw_length = strlen(item->valuestring) + sizeof("");
+		output = ensure(output_buffer, raw_length);
+		if (output == NULL)
+		{
+			return false;
+		}
+		memcpy(output, item->valuestring, raw_length);
+		return true;
+	}
+
+	case cJSON_String:
+		return print_string(item, output_buffer);
+
+	case cJSON_Array:
+		return print_array(item, output_buffer);
+
+	case cJSON_Object:
+		return print_object(item, output_buffer);
+
+	default:
+		return false;
+	}
+}
+
+/* Build an array from input text. */
+static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer)
+{
+	cJSON *head = NULL; /* head of the linked list */
+	cJSON *current_item = NULL;
+
+	if (input_buffer->depth >= CJSON_NESTING_LIMIT)
+	{
+		return false; /* to deeply nested */
+	}
+	input_buffer->depth++;
+
+	if (buffer_at_offset(input_buffer)[0] != '[')
+	{
+		/* not an array */
+		goto fail;
+	}
+
+	input_buffer->offset++;
+	buffer_skip_whitespace(input_buffer);
+	if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ']'))
+	{
+		/* empty array */
+		goto success;
+	}
+
+	/* check if we skipped to the end of the buffer */
+	if (cannot_access_at_index(input_buffer, 0))
+	{
+		input_buffer->offset--;
+		goto fail;
+	}
+
+	/* step back to character in front of the first element */
+	input_buffer->offset--;
+	/* loop through the comma separated array elements */
+	do
+	{
+		/* allocate next item */
+		cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks));
+		if (new_item == NULL)
+		{
+			goto fail; /* allocation failure */
+		}
+
+		/* attach next item to list */
+		if (head == NULL)
+		{
+			/* start the linked list */
+			current_item = head = new_item;
+		}
+		else
+		{
+			/* add to the end and advance */
+			current_item->next = new_item;
+			new_item->prev = current_item;
+			current_item = new_item;
+		}
+
+		/* parse next value */
+		input_buffer->offset++;
+		buffer_skip_whitespace(input_buffer);
+		if (!parse_value(current_item, input_buffer))
+		{
+			goto fail; /* failed to parse value */
+		}
+		buffer_skip_whitespace(input_buffer);
+	} while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ','));
+
+	if (cannot_access_at_index(input_buffer, 0) || buffer_at_offset(input_buffer)[0] != ']')
+	{
+		goto fail; /* expected end of array */
+	}
+
+success:
+	input_buffer->depth--;
+
+	item->type = cJSON_Array;
+	item->child = head;
+
+	input_buffer->offset++;
+
+	return true;
+
+fail:
+	if (head != NULL)
+	{
+		cJSON_Delete(head);
+	}
+
+	return false;
 }
 
 /* Render an array to text */
-static char *print_array(cJSON *item,int depth,int fmt,printbuffer *p)
-{
-    char **entries;
-    char *out=0,*ptr,*ret;int len=5;
-    cJSON *child=item->child;
-    int numentries=0,i=0,fail=0;
-    size_t tmplen=0;
-	
-    /* How many entries in the array? */
-    while (child) numentries++,child=child->next;
-    /* Explicitly handle numentries==0 */
-    if (!numentries)
-    {
-        if (p)	out=ensure(p,3);
-        else	out=(char*)cJSON_malloc(3);
-        if (out) strcpy(out,"[]");
-        return out;
-    }
-
-    if (p)
-    {
-        /* Compose the output array. */
-        i=p->offset;
-        ptr=ensure(p,1);if (!ptr) return 0;	*ptr='[';	p->offset++;
-        child=item->child;
-        while (child && !fail)
-        {
-            print_value(child,depth+1,fmt,p);
-            p->offset=update(p);
-            if (child->next) {len=fmt?2:1;ptr=ensure(p,len+1);if (!ptr) return 0;*ptr++=',';if(fmt)*ptr++=' ';*ptr=0;p->offset+=len;}
-            child=child->next;
-        }
-        ptr=ensure(p,2);if (!ptr) return 0;	*ptr++=']';*ptr=0;
-        out=(p->buffer)+i;
-    }
-    else
-    {
-        /* Allocate an array to hold the values for each */
-        entries=(char**)cJSON_malloc(numentries*sizeof(char*));
-        if (!entries) return 0;
-        memset(entries,0,numentries*sizeof(char*));
-        /* Retrieve all the results: */
-        child=item->child;
-        while (child && !fail)
-        {
-            ret=print_value(child,depth+1,fmt,0);
-            entries[i++]=ret;
-            if (ret) len+=strlen(ret)+2+(fmt?1:0); else fail=1;
-            child=child->next;
-        }
-		
-        /* If we didn't fail, try to malloc the output string */
-        if (!fail)	out=(char*)cJSON_malloc(len);
-        /* If that fails, we fail. */
-        if (!out) fail=1;
-
-        /* Handle failure. */
-        if (fail)
-        {
-            for (i=0;i<numentries;i++) if (entries[i]) cJSON_free(entries[i]);
-            cJSON_free(entries);
-            return 0;
-        }
-		
-        /* Compose the output array. */
-        *out='[';
-        ptr=out+1;*ptr=0;
-        for (i=0;i<numentries;i++)
-        {
-            tmplen=strlen(entries[i]);memcpy(ptr,entries[i],tmplen);ptr+=tmplen;
-            if (i!=numentries-1) {*ptr++=',';if(fmt)*ptr++=' ';*ptr=0;}
-            cJSON_free(entries[i]);
-        }
-        cJSON_free(entries);
-        *ptr++=']';*ptr++=0;
-    }
-    return out;	
+static cJSON_bool print_array(const cJSON * const item, printbuffer * const output_buffer)
+{
+	unsigned char *output_pointer = NULL;
+	size_t length = 0;
+	cJSON *current_element = item->child;
+
+	if (output_buffer == NULL)
+	{
+		return false;
+	}
+
+	/* Compose the output array. */
+	/* opening square bracket */
+	output_pointer = ensure(output_buffer, 1);
+	if (output_pointer == NULL)
+	{
+		return false;
+	}
+
+	*output_pointer = '[';
+	output_buffer->offset++;
+	output_buffer->depth++;
+
+	while (current_element != NULL)
+	{
+		if (!print_value(current_element, output_buffer))
+		{
+			return false;
+		}
+		update_offset(output_buffer);
+		if (current_element->next)
+		{
+			length = (size_t)(output_buffer->format ? 2 : 1);
+			output_pointer = ensure(output_buffer, length + 1);
+			if (output_pointer == NULL)
+			{
+				return false;
+			}
+			*output_pointer++ = ',';
+			if (output_buffer->format)
+			{
+				*output_pointer++ = ' ';
+			}
+			*output_pointer = '\0';
+			output_buffer->offset += length;
+		}
+		current_element = current_element->next;
+	}
+
+	output_pointer = ensure(output_buffer, 2);
+	if (output_pointer == NULL)
+	{
+		return false;
+	}
+	*output_pointer++ = ']';
+	*output_pointer = '\0';
+	output_buffer->depth--;
+
+	return true;
 }
 
 /* Build an object from the text. */
-static const char *parse_object(cJSON *item,const char *value)
-{
-    cJSON *child;
-    if (*value!='{')	{ep=value;return 0;}	/* not an object! */
-	
-    item->type=cJSON_Object;
-    value=skip(value+1);
-    if (*value=='}') return value+1;	/* empty array. */
-	
-    item->child=child=cJSON_New_Item();
-    if (!item->child) return 0;
-    value=skip(parse_string(child,skip(value)));
-    if (!value) return 0;
-    child->string=child->valuestring;child->valuestring=0;
-    if (*value!=':') {ep=value;return 0;}	/* fail! */
-    value=skip(parse_value(child,skip(value+1)));	/* skip any spacing, get the value. */
-    if (!value) return 0;
-	
-    while (*value==',')
-    {
-        cJSON *new_item;
-        if (!(new_item=cJSON_New_Item()))	return 0; /* memory fail */
-        child->next=new_item;new_item->prev=child;child=new_item;
-        value=skip(parse_string(child,skip(value+1)));
-        if (!value) return 0;
-        child->string=child->valuestring;child->valuestring=0;
-        if (*value!=':') {ep=value;return 0;}	/* fail! */
-        value=skip(parse_value(child,skip(value+1)));	/* skip any spacing, get the value. */
-        if (!value) return 0;
-    }
-	
-    if (*value=='}') return value+1;	/* end of array */
-    ep=value;return 0;	/* malformed. */
+static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer)
+{
+	cJSON *head = NULL; /* linked list head */
+	cJSON *current_item = NULL;
+
+	if (input_buffer->depth >= CJSON_NESTING_LIMIT)
+	{
+		return false; /* to deeply nested */
+	}
+	input_buffer->depth++;
+
+	if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '{'))
+	{
+		goto fail; /* not an object */
+	}
+
+	input_buffer->offset++;
+	buffer_skip_whitespace(input_buffer);
+	if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '}'))
+	{
+		goto success; /* empty object */
+	}
+
+	/* check if we skipped to the end of the buffer */
+	if (cannot_access_at_index(input_buffer, 0))
+	{
+		input_buffer->offset--;
+		goto fail;
+	}
+
+	/* step back to character in front of the first element */
+	input_buffer->offset--;
+	/* loop through the comma separated array elements */
+	do
+	{
+		/* allocate next item */
+		cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks));
+		if (new_item == NULL)
+		{
+			goto fail; /* allocation failure */
+		}
+
+		/* attach next item to list */
+		if (head == NULL)
+		{
+			/* start the linked list */
+			current_item = head = new_item;
+		}
+		else
+		{
+			/* add to the end and advance */
+			current_item->next = new_item;
+			new_item->prev = current_item;
+			current_item = new_item;
+		}
+
+		/* parse the name of the child */
+		input_buffer->offset++;
+		buffer_skip_whitespace(input_buffer);
+		if (!parse_string(current_item, input_buffer))
+		{
+			goto fail; /* faile to parse name */
+		}
+		buffer_skip_whitespace(input_buffer);
+
+		/* swap valuestring and string, because we parsed the name */
+		current_item->string = current_item->valuestring;
+		current_item->valuestring = NULL;
+
+		if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != ':'))
+		{
+			goto fail; /* invalid object */
+		}
+
+		/* parse the value */
+		input_buffer->offset++;
+		buffer_skip_whitespace(input_buffer);
+		if (!parse_value(current_item, input_buffer))
+		{
+			goto fail; /* failed to parse value */
+		}
+		buffer_skip_whitespace(input_buffer);
+	} while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ','));
+
+	if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '}'))
+	{
+		goto fail; /* expected end of object */
+	}
+
+success:
+	input_buffer->depth--;
+
+	item->type = cJSON_Object;
+	item->child = head;
+
+	input_buffer->offset++;
+	return true;
+
+fail:
+	if (head != NULL)
+	{
+		cJSON_Delete(head);
+	}
+
+	return false;
 }
 
 /* Render an object to text. */
-static char *print_object(cJSON *item,int depth,int fmt,printbuffer *p)
-{
-    char **entries=0,**names=0;
-    char *out=0,*ptr,*ret,*str;int len=7,i=0,j;
-    cJSON *child=item->child;
-    int numentries=0,fail=0;
-    size_t tmplen=0;
-    /* Count the number of entries. */
-    while (child) numentries++,child=child->next;
-    /* Explicitly handle empty object case */
-    if (!numentries)
-    {
-        if (p) out=ensure(p,fmt?depth+4:3);
-        else	out=(char*)cJSON_malloc(fmt?depth+4:3);
-        if (!out)	return 0;
-        ptr=out;*ptr++='{';
-        if (fmt) {*ptr++='\n';for (i=0;i<depth-1;i++) *ptr++='\t';}
-        *ptr++='}';*ptr++=0;
-        return out;
-    }
-    if (p)
-    {
-        /* Compose the output: */
-        i=p->offset;
-        len=fmt?2:1;	ptr=ensure(p,len+1);	if (!ptr) return 0;
-        *ptr++='{';	if (fmt) *ptr++='\n';	*ptr=0;	p->offset+=len;
-        child=item->child;depth++;
-        while (child)
-        {
-            if (fmt)
-            {
-                ptr=ensure(p,depth);	if (!ptr) return 0;
-                for (j=0;j<depth;j++) *ptr++='\t';
-                p->offset+=depth;
-            }
-            print_string_ptr(child->string,p);
-            p->offset=update(p);
-			
-            len=fmt?2:1;
-            ptr=ensure(p,len);	if (!ptr) return 0;
-            *ptr++=':';if (fmt) *ptr++='\t';
-            p->offset+=len;
-			
-            print_value(child,depth,fmt,p);
-            p->offset=update(p);
-
-            len=(fmt?1:0)+(child->next?1:0);
-            ptr=ensure(p,len+1); if (!ptr) return 0;
-            if (child->next) *ptr++=',';
-            if (fmt) *ptr++='\n';*ptr=0;
-            p->offset+=len;
-            child=child->next;
-        }
-        ptr=ensure(p,fmt?(depth+1):2);	 if (!ptr) return 0;
-        if (fmt)	for (i=0;i<depth-1;i++) *ptr++='\t';
-        *ptr++='}';*ptr=0;
-        out=(p->buffer)+i;
-    }
-    else
-    {
-        /* Allocate space for the names and the objects */
-        entries=(char**)cJSON_malloc(numentries*sizeof(char*));
-        if (!entries) return 0;
-        names=(char**)cJSON_malloc(numentries*sizeof(char*));
-        if (!names) {cJSON_free(entries);return 0;}
-        memset(entries,0,sizeof(char*)*numentries);
-        memset(names,0,sizeof(char*)*numentries);
-
-        /* Collect all the results into our arrays: */
-        child=item->child;depth++;if (fmt) len+=depth;
-        while (child)
-        {
-            names[i]=str=print_string_ptr(child->string,0);
-            entries[i++]=ret=print_value(child,depth,fmt,0);
-            if (str && ret) len+=strlen(ret)+strlen(str)+2+(fmt?2+depth:0); else fail=1;
-            child=child->next;
-        }
-		
-        /* Try to allocate the output string */
-        if (!fail)	out=(char*)cJSON_malloc(len);
-        if (!out) fail=1;
-
-        /* Handle failure */
-        if (fail)
-        {
-            for (i=0;i<numentries;i++) {if (names[i]) cJSON_free(names[i]);if (entries[i]) cJSON_free(entries[i]);}
-            cJSON_free(names);cJSON_free(entries);
-            return 0;
-        }
-		
-        /* Compose the output: */
-        *out='{';ptr=out+1;if (fmt)*ptr++='\n';*ptr=0;
-        for (i=0;i<numentries;i++)
-        {
-            if (fmt) for (j=0;j<depth;j++) *ptr++='\t';
-            tmplen=strlen(names[i]);memcpy(ptr,names[i],tmplen);ptr+=tmplen;
-            *ptr++=':';if (fmt) *ptr++='\t';
-            strcpy(ptr,entries[i]);ptr+=strlen(entries[i]);
-            if (i!=numentries-1) *ptr++=',';
-            if (fmt) *ptr++='\n';*ptr=0;
-            cJSON_free(names[i]);cJSON_free(entries[i]);
-        }
-		
-        cJSON_free(names);cJSON_free(entries);
-        if (fmt) for (i=0;i<depth-1;i++) *ptr++='\t';
-        *ptr++='}';*ptr++=0;
-    }
-    return out;	
+static cJSON_bool print_object(const cJSON * const item, printbuffer * const output_buffer)
+{
+	unsigned char *output_pointer = NULL;
+	size_t length = 0;
+	cJSON *current_item = item->child;
+
+	if (output_buffer == NULL)
+	{
+		return false;
+	}
+
+	/* Compose the output: */
+	length = (size_t)(output_buffer->format ? 2 : 1); /* fmt: {\n */
+	output_pointer = ensure(output_buffer, length + 1);
+	if (output_pointer == NULL)
+	{
+		return false;
+	}
+
+	*output_pointer++ = '{';
+	output_buffer->depth++;
+	if (output_buffer->format)
+	{
+		*output_pointer++ = '\n';
+	}
+	output_buffer->offset += length;
+
+	while (current_item)
+	{
+		if (output_buffer->format)
+		{
+			size_t i;
+			output_pointer = ensure(output_buffer, output_buffer->depth);
+			if (output_pointer == NULL)
+			{
+				return false;
+			}
+			for (i = 0; i < output_buffer->depth; i++)
+			{
+				*output_pointer++ = '\t';
+			}
+			output_buffer->offset += output_buffer->depth;
+		}
+
+		/* print key */
+		if (!print_string_ptr((unsigned char*)current_item->string, output_buffer))
+		{
+			return false;
+		}
+		update_offset(output_buffer);
+
+		length = (size_t)(output_buffer->format ? 2 : 1);
+		output_pointer = ensure(output_buffer, length);
+		if (output_pointer == NULL)
+		{
+			return false;
+		}
+		*output_pointer++ = ':';
+		if (output_buffer->format)
+		{
+			*output_pointer++ = '\t';
+		}
+		output_buffer->offset += length;
+
+		/* print value */
+		if (!print_value(current_item, output_buffer))
+		{
+			return false;
+		}
+		update_offset(output_buffer);
+
+		/* print comma if not last */
+		length = (size_t)((output_buffer->format ? 1 : 0) + (current_item->next ? 1 : 0));
+		output_pointer = ensure(output_buffer, length + 1);
+		if (output_pointer == NULL)
+		{
+			return false;
+		}
+		if (current_item->next)
+		{
+			*output_pointer++ = ',';
+		}
+
+		if (output_buffer->format)
+		{
+			*output_pointer++ = '\n';
+		}
+		*output_pointer = '\0';
+		output_buffer->offset += length;
+
+		current_item = current_item->next;
+	}
+
+	output_pointer = ensure(output_buffer, output_buffer->format ? (output_buffer->depth + 1) : 2);
+	if (output_pointer == NULL)
+	{
+		return false;
+	}
+	if (output_buffer->format)
+	{
+		size_t i;
+		for (i = 0; i < (output_buffer->depth - 1); i++)
+		{
+			*output_pointer++ = '\t';
+		}
+	}
+	*output_pointer++ = '}';
+	*output_pointer = '\0';
+	output_buffer->depth--;
+
+	return true;
 }
 
 /* Get Array size/item / object item. */
-int    cJSON_GetArraySize(cJSON *array)							{cJSON *c=array->child;int i=0;while(c)i++,c=c->next;return i;}
-cJSON *cJSON_GetArrayItem(cJSON *array,int item)				
+CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array)
+{
+	cJSON *child = NULL;
+	size_t size = 0;
+
+	if (array == NULL)
+	{
+		return 0;
+	}
+
+	child = array->child;
+
+	while (child != NULL)
+	{
+		size++;
+		child = child->next;
+	}
+
+	/* FIXME: Can overflow here. Cannot be fixed without breaking the API */
+
+	return (int)size;
+}
+
+static cJSON* get_array_item(const cJSON *array, size_t index)
+{
+	cJSON *current_child = NULL;
+
+	if (array == NULL)
+	{
+		return NULL;
+	}
+
+	current_child = array->child;
+	while ((current_child != NULL) && (index > 0))
+	{
+		index--;
+		current_child = current_child->next;
+	}
+
+	return current_child;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index)
 {
-    cJSON *c = (array != NULL) ? array->child : NULL;
-    while ((c != NULL) && (item > 0))
-    {
-        item--;
-        c = c->next;
-    }
+	if (index < 0)
+	{
+		return NULL;
+	}
 
-    return c;
+	return get_array_item(array, (size_t)index);
 }
 
-cJSON *cJSON_GetObjectItem(cJSON *object, const char *string)
+static cJSON *get_object_item(const cJSON * const object, const char * const name, const cJSON_bool case_sensitive)
 {
-    cJSON *c = (object != NULL) ? object->child : NULL;
-    while ((c != NULL) && (cJSON_strcasecmp(c->string, string)))
-    {
-        c = c->next;
-    }
-    return c;
+	cJSON *current_element = NULL;
+
+	if ((object == NULL) || (name == NULL))
+	{
+		return NULL;
+	}
+
+	current_element = object->child;
+	if (case_sensitive)
+	{
+		while ((current_element != NULL) && (strcmp(name, current_element->string) != 0))
+		{
+			current_element = current_element->next;
+		}
+	}
+	else
+	{
+		while ((current_element != NULL) && (case_insensitive_strcmp((const unsigned char*)name, (const unsigned char*)(current_element->string)) != 0))
+		{
+			current_element = current_element->next;
+		}
+	}
+
+	return current_element;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string)
+{
+	return get_object_item(object, string, false);
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string)
+{
+	return get_object_item(object, string, true);
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string)
+{
+	return cJSON_GetObjectItem(object, string) ? 1 : 0;
 }
 
 /* Utility for array list handling. */
-static void suffix_object(cJSON *prev,cJSON *item) {prev->next=item;item->prev=prev;}
+static void suffix_object(cJSON *prev, cJSON *item)
+{
+	prev->next = item;
+	item->prev = prev;
+}
+
 /* Utility for handling references. */
-static cJSON *create_reference(cJSON *item) {cJSON *ref=cJSON_New_Item();if (!ref) return 0;memcpy(ref,item,sizeof(cJSON));ref->string=0;ref->type|=cJSON_IsReference;ref->next=ref->prev=0;return ref;}
+static cJSON *create_reference(const cJSON *item, const internal_hooks * const hooks)
+{
+	cJSON *reference = NULL;
+	if (item == NULL)
+	{
+		return NULL;
+	}
+
+	reference = cJSON_New_Item(hooks);
+	if (reference == NULL)
+	{
+		return NULL;
+	}
+
+	memcpy(reference, item, sizeof(cJSON));
+	reference->string = NULL;
+	reference->type |= cJSON_IsReference;
+	reference->next = reference->prev = NULL;
+	return reference;
+}
+
+static cJSON_bool add_item_to_array(cJSON *array, cJSON *item)
+{
+	cJSON *child = NULL;
+
+	if ((item == NULL) || (array == NULL))
+	{
+		return false;
+	}
+
+	child = array->child;
+
+	if (child == NULL)
+	{
+		/* list is empty, start new one */
+		array->child = item;
+	}
+	else
+	{
+		/* append to the end */
+		while (child->next)
+		{
+			child = child->next;
+		}
+		suffix_object(child, item);
+	}
+
+	return true;
+}
 
 /* Add item to array/object. */
-void   cJSON_AddItemToArray(cJSON *array, cJSON *item)						{cJSON *c=array->child;if (!item) return; if (!c) {array->child=item;} else {while (c && c->next) c=c->next; suffix_object(c,item);}}
-void   cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item)	{if (!item) return; if (item->string) cJSON_free(item->string);item->string=cJSON_strdup(string);cJSON_AddItemToArray(object,item);}
-void   cJSON_AddItemToObjectCS(cJSON *object,const char *string,cJSON *item)	{if (!item) return; if (!(item->type&cJSON_StringIsConst) && item->string) cJSON_free(item->string);item->string=(char*)string;item->type|=cJSON_StringIsConst;cJSON_AddItemToArray(object,item);}
-void	cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item)						{cJSON_AddItemToArray(array,create_reference(item));}
-void	cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item)	{cJSON_AddItemToObject(object,string,create_reference(item));}
-
-cJSON *cJSON_DetachItemFromArray(cJSON *array,int which)			{cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return 0;
-    if (c->prev) c->prev->next=c->next;if (c->next) c->next->prev=c->prev;if (c==array->child) array->child=c->next;c->prev=c->next=0;return c;}
-void   cJSON_DeleteItemFromArray(cJSON *array,int which)			{cJSON_Delete(cJSON_DetachItemFromArray(array,which));}
-cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string) {int i=0;cJSON *c=object->child;while (c && cJSON_strcasecmp(c->string,string)) i++,c=c->next;if (c) return cJSON_DetachItemFromArray(object,i);return 0;}
-void   cJSON_DeleteItemFromObject(cJSON *object,const char *string) {cJSON_Delete(cJSON_DetachItemFromObject(object,string));}
+CJSON_PUBLIC(void) cJSON_AddItemToArray(cJSON *array, cJSON *item)
+{
+	add_item_to_array(array, item);
+}
+
+#if defined(__clang__) || (defined(__GNUC__)  && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))
+#pragma GCC diagnostic push
+#endif
+#ifdef __GNUC__
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+/* helper function to cast away const */
+static void* cast_away_const(const void* string)
+{
+	return (void*)string;
+}
+#if defined(__clang__) || (defined(__GNUC__)  && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))
+#pragma GCC diagnostic pop
+#endif
+
+
+static cJSON_bool add_item_to_object(cJSON * const object, const char * const string, cJSON * const item, const internal_hooks * const hooks, const cJSON_bool constant_key)
+{
+	char *new_key = NULL;
+	int new_type = cJSON_Invalid;
+
+	if ((object == NULL) || (string == NULL) || (item == NULL))
+	{
+		return false;
+	}
+
+	if (constant_key)
+	{
+		new_key = (char*)cast_away_const(string);
+		new_type = item->type | cJSON_StringIsConst;
+	}
+	else
+	{
+		new_key = (char*)cJSON_strdup((const unsigned char*)string, hooks);
+		if (new_key == NULL)
+		{
+			return false;
+		}
+
+		new_type = item->type & ~cJSON_StringIsConst;
+	}
+
+	if (!(item->type & cJSON_StringIsConst) && (item->string != NULL))
+	{
+		hooks->deallocate(item->string);
+	}
+
+	item->string = new_key;
+	item->type = new_type;
+
+	return add_item_to_array(object, item);
+}
+
+CJSON_PUBLIC(void) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item)
+{
+	add_item_to_object(object, string, item, &global_hooks, false);
+}
+
+/* Add an item to an object with constant string as key */
+CJSON_PUBLIC(void) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item)
+{
+	add_item_to_object(object, string, item, &global_hooks, true);
+}
+
+CJSON_PUBLIC(void) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item)
+{
+	if (array == NULL)
+	{
+		return;
+	}
+
+	add_item_to_array(array, create_reference(item, &global_hooks));
+}
+
+CJSON_PUBLIC(void) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item)
+{
+	if ((object == NULL) || (string == NULL))
+	{
+		return;
+	}
+
+	add_item_to_object(object, string, create_reference(item, &global_hooks), &global_hooks, false);
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name)
+{
+	cJSON *null = cJSON_CreateNull();
+	if (add_item_to_object(object, name, null, &global_hooks, false))
+	{
+		return null;
+	}
+
+	cJSON_Delete(null);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name)
+{
+	cJSON *true_item = cJSON_CreateTrue();
+	if (add_item_to_object(object, name, true_item, &global_hooks, false))
+	{
+		return true_item;
+	}
+
+	cJSON_Delete(true_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name)
+{
+	cJSON *false_item = cJSON_CreateFalse();
+	if (add_item_to_object(object, name, false_item, &global_hooks, false))
+	{
+		return false_item;
+	}
+
+	cJSON_Delete(false_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean)
+{
+	cJSON *bool_item = cJSON_CreateBool(boolean);
+	if (add_item_to_object(object, name, bool_item, &global_hooks, false))
+	{
+		return bool_item;
+	}
+
+	cJSON_Delete(bool_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number)
+{
+	cJSON *number_item = cJSON_CreateNumber(number);
+	if (add_item_to_object(object, name, number_item, &global_hooks, false))
+	{
+		return number_item;
+	}
+
+	cJSON_Delete(number_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string)
+{
+	cJSON *string_item = cJSON_CreateString(string);
+	if (add_item_to_object(object, name, string_item, &global_hooks, false))
+	{
+		return string_item;
+	}
+
+	cJSON_Delete(string_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw)
+{
+	cJSON *raw_item = cJSON_CreateRaw(raw);
+	if (add_item_to_object(object, name, raw_item, &global_hooks, false))
+	{
+		return raw_item;
+	}
+
+	cJSON_Delete(raw_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name)
+{
+	cJSON *object_item = cJSON_CreateObject();
+	if (add_item_to_object(object, name, object_item, &global_hooks, false))
+	{
+		return object_item;
+	}
+
+	cJSON_Delete(object_item);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name)
+{
+	cJSON *array = cJSON_CreateArray();
+	if (add_item_to_object(object, name, array, &global_hooks, false))
+	{
+		return array;
+	}
+
+	cJSON_Delete(array);
+	return NULL;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item)
+{
+	if ((parent == NULL) || (item == NULL))
+	{
+		return NULL;
+	}
+
+	if (item->prev != NULL)
+	{
+		/* not the first element */
+		item->prev->next = item->next;
+	}
+	if (item->next != NULL)
+	{
+		/* not the last element */
+		item->next->prev = item->prev;
+	}
+
+	if (item == parent->child)
+	{
+		/* first element */
+		parent->child = item->next;
+	}
+	/* make sure the detached item doesn't point anywhere anymore */
+	item->prev = NULL;
+	item->next = NULL;
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which)
+{
+	if (which < 0)
+	{
+		return NULL;
+	}
+
+	return cJSON_DetachItemViaPointer(array, get_array_item(array, (size_t)which));
+}
+
+CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which)
+{
+	cJSON_Delete(cJSON_DetachItemFromArray(array, which));
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string)
+{
+	cJSON *to_detach = cJSON_GetObjectItem(object, string);
+
+	return cJSON_DetachItemViaPointer(object, to_detach);
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string)
+{
+	cJSON *to_detach = cJSON_GetObjectItemCaseSensitive(object, string);
+
+	return cJSON_DetachItemViaPointer(object, to_detach);
+}
+
+CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string)
+{
+	cJSON_Delete(cJSON_DetachItemFromObject(object, string));
+}
+
+CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string)
+{
+	cJSON_Delete(cJSON_DetachItemFromObjectCaseSensitive(object, string));
+}
 
 /* Replace array/object items with new ones. */
-void   cJSON_InsertItemInArray(cJSON *array,int which,cJSON *newitem)		{cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) {cJSON_AddItemToArray(array,newitem);return;}
-    newitem->next=c;newitem->prev=c->prev;c->prev=newitem;if (c==array->child) array->child=newitem; else newitem->prev->next=newitem;}
-void   cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem)		{cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return;
-    newitem->next=c->next;newitem->prev=c->prev;if (newitem->next) newitem->next->prev=newitem;
-    if (c==array->child) array->child=newitem; else newitem->prev->next=newitem;c->next=c->prev=0;cJSON_Delete(c);}
-void   cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem){int i=0;cJSON *c=object->child;while(c && cJSON_strcasecmp(c->string,string))i++,c=c->next;if(c){newitem->string=cJSON_strdup(string);cJSON_ReplaceItemInArray(object,i,newitem);}}
+CJSON_PUBLIC(void) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem)
+{
+	cJSON *after_inserted = NULL;
+
+	if (which < 0)
+	{
+		return;
+	}
+
+	after_inserted = get_array_item(array, (size_t)which);
+	if (after_inserted == NULL)
+	{
+		add_item_to_array(array, newitem);
+		return;
+	}
+
+	newitem->next = after_inserted;
+	newitem->prev = after_inserted->prev;
+	after_inserted->prev = newitem;
+	if (after_inserted == array->child)
+	{
+		array->child = newitem;
+	}
+	else
+	{
+		newitem->prev->next = newitem;
+	}
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement)
+{
+	if ((parent == NULL) || (replacement == NULL) || (item == NULL))
+	{
+		return false;
+	}
+
+	if (replacement == item)
+	{
+		return true;
+	}
+
+	replacement->next = item->next;
+	replacement->prev = item->prev;
+
+	if (replacement->next != NULL)
+	{
+		replacement->next->prev = replacement;
+	}
+	if (replacement->prev != NULL)
+	{
+		replacement->prev->next = replacement;
+	}
+	if (parent->child == item)
+	{
+		parent->child = replacement;
+	}
+
+	item->next = NULL;
+	item->prev = NULL;
+	cJSON_Delete(item);
+
+	return true;
+}
+
+CJSON_PUBLIC(void) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem)
+{
+	if (which < 0)
+	{
+		return;
+	}
+
+	cJSON_ReplaceItemViaPointer(array, get_array_item(array, (size_t)which), newitem);
+}
+
+static cJSON_bool replace_item_in_object(cJSON *object, const char *string, cJSON *replacement, cJSON_bool case_sensitive)
+{
+	if ((replacement == NULL) || (string == NULL))
+	{
+		return false;
+	}
+
+	/* replace the name in the replacement */
+	if (!(replacement->type & cJSON_StringIsConst) && (replacement->string != NULL))
+	{
+		cJSON_free(replacement->string);
+	}
+	replacement->string = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks);
+	replacement->type &= ~cJSON_StringIsConst;
+
+	cJSON_ReplaceItemViaPointer(object, get_object_item(object, string, case_sensitive), replacement);
+
+	return true;
+}
+
+CJSON_PUBLIC(void) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem)
+{
+	replace_item_in_object(object, string, newitem, false);
+}
+
+CJSON_PUBLIC(void) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem)
+{
+	replace_item_in_object(object, string, newitem, true);
+}
 
 /* Create basic types: */
-cJSON *cJSON_CreateNull(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_NULL;return item;}
-cJSON *cJSON_CreateTrue(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_True;return item;}
-cJSON *cJSON_CreateFalse(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_False;return item;}
-cJSON *cJSON_CreateBool(int b)					{cJSON *item=cJSON_New_Item();if(item)item->type=b?cJSON_True:cJSON_False;return item;}
-cJSON *cJSON_CreateNumber(double num)			{cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_Number;item->valuedouble=num;item->valueint=(int)num;}return item;}
-cJSON *cJSON_CreateString(const char *string)	{cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_String;item->valuestring=cJSON_strdup(string);}return item;}
-cJSON *cJSON_CreateArray(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Array;return item;}
-cJSON *cJSON_CreateObject(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Object;return item;}
+CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_NULL;
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_True;
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_False;
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool b)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = b ? cJSON_True : cJSON_False;
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_Number;
+		item->valuedouble = num;
+
+		/* use saturation in case of overflow */
+		if (num >= INT_MAX)
+		{
+			item->valueint = INT_MAX;
+		}
+		else if (num <= INT_MIN)
+		{
+			item->valueint = INT_MIN;
+		}
+		else
+		{
+			item->valueint = (int)num;
+		}
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_String;
+		item->valuestring = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks);
+		if (!item->valuestring)
+		{
+			cJSON_Delete(item);
+			return NULL;
+		}
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item != NULL)
+	{
+		item->type = cJSON_String | cJSON_IsReference;
+		item->valuestring = (char*)cast_away_const(string);
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item != NULL) {
+		item->type = cJSON_Object | cJSON_IsReference;
+		item->child = (cJSON*)cast_away_const(child);
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child) {
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item != NULL) {
+		item->type = cJSON_Array | cJSON_IsReference;
+		item->child = (cJSON*)cast_away_const(child);
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_Raw;
+		item->valuestring = (char*)cJSON_strdup((const unsigned char*)raw, &global_hooks);
+		if (!item->valuestring)
+		{
+			cJSON_Delete(item);
+			return NULL;
+		}
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_Array;
+	}
+
+	return item;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void)
+{
+	cJSON *item = cJSON_New_Item(&global_hooks);
+	if (item)
+	{
+		item->type = cJSON_Object;
+	}
+
+	return item;
+}
 
 /* Create Arrays: */
-cJSON *cJSON_CreateIntArray(const int *numbers,int count)		{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateNumber(numbers[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
-cJSON *cJSON_CreateFloatArray(const float *numbers,int count)	{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateNumber(numbers[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
-cJSON *cJSON_CreateDoubleArray(const double *numbers,int count)	{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateNumber(numbers[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
-cJSON *cJSON_CreateStringArray(const char **strings,int count)	{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateString(strings[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
+CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count)
+{
+	size_t i = 0;
+	cJSON *n = NULL;
+	cJSON *p = NULL;
+	cJSON *a = NULL;
+
+	if ((count < 0) || (numbers == NULL))
+	{
+		return NULL;
+	}
+
+	a = cJSON_CreateArray();
+	for (i = 0; a && (i < (size_t)count); i++)
+	{
+		n = cJSON_CreateNumber(numbers[i]);
+		if (!n)
+		{
+			cJSON_Delete(a);
+			return NULL;
+		}
+		if (!i)
+		{
+			a->child = n;
+		}
+		else
+		{
+			suffix_object(p, n);
+		}
+		p = n;
+	}
+
+	return a;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count)
+{
+	size_t i = 0;
+	cJSON *n = NULL;
+	cJSON *p = NULL;
+	cJSON *a = NULL;
+
+	if ((count < 0) || (numbers == NULL))
+	{
+		return NULL;
+	}
+
+	a = cJSON_CreateArray();
+
+	for (i = 0; a && (i < (size_t)count); i++)
+	{
+		n = cJSON_CreateNumber((double)numbers[i]);
+		if (!n)
+		{
+			cJSON_Delete(a);
+			return NULL;
+		}
+		if (!i)
+		{
+			a->child = n;
+		}
+		else
+		{
+			suffix_object(p, n);
+		}
+		p = n;
+	}
+
+	return a;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count)
+{
+	size_t i = 0;
+	cJSON *n = NULL;
+	cJSON *p = NULL;
+	cJSON *a = NULL;
+
+	if ((count < 0) || (numbers == NULL))
+	{
+		return NULL;
+	}
+
+	a = cJSON_CreateArray();
+
+	for (i = 0; a && (i < (size_t)count); i++)
+	{
+		n = cJSON_CreateNumber(numbers[i]);
+		if (!n)
+		{
+			cJSON_Delete(a);
+			return NULL;
+		}
+		if (!i)
+		{
+			a->child = n;
+		}
+		else
+		{
+			suffix_object(p, n);
+		}
+		p = n;
+	}
+
+	return a;
+}
+
+CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count)
+{
+	size_t i = 0;
+	cJSON *n = NULL;
+	cJSON *p = NULL;
+	cJSON *a = NULL;
+
+	if ((count < 0) || (strings == NULL))
+	{
+		return NULL;
+	}
+
+	a = cJSON_CreateArray();
+
+	for (i = 0; a && (i < (size_t)count); i++)
+	{
+		n = cJSON_CreateString(strings[i]);
+		if (!n)
+		{
+			cJSON_Delete(a);
+			return NULL;
+		}
+		if (!i)
+		{
+			a->child = n;
+		}
+		else
+		{
+			suffix_object(p, n);
+		}
+		p = n;
+	}
+
+	return a;
+}
 
 /* Duplication */
-cJSON *cJSON_Duplicate(cJSON *item,int recurse)
-{
-    cJSON *newitem,*cptr,*nptr=0,*newchild;
-    /* Bail on bad ptr */
-    if (!item) return 0;
-    /* Create new item */
-    newitem=cJSON_New_Item();
-    if (!newitem) return 0;
-    /* Copy over all vars */
-    newitem->type=item->type&(~cJSON_IsReference),newitem->valueint=item->valueint,newitem->valuedouble=item->valuedouble;
-    if (item->valuestring)	{newitem->valuestring=cJSON_strdup(item->valuestring);	if (!newitem->valuestring)	{cJSON_Delete(newitem);return 0;}}
-    if (item->string)		{newitem->string=cJSON_strdup(item->string);			if (!newitem->string)		{cJSON_Delete(newitem);return 0;}}
-    /* If non-recursive, then we're done! */
-    if (!recurse) return newitem;
-    /* Walk the ->next chain for the child. */
-    cptr=item->child;
-    while (cptr)
-    {
-        newchild=cJSON_Duplicate(cptr,1);		/* Duplicate (with recurse) each item in the ->next chain */
-        if (!newchild) {cJSON_Delete(newitem);return 0;}
-        if (nptr)	{nptr->next=newchild,newchild->prev=nptr;nptr=newchild;}	/* If newitem->child already set, then crosswire ->prev and ->next and move on */
-        else		{newitem->child=newchild;nptr=newchild;}					/* Set newitem->child and move to it */
-        cptr=cptr->next;
-    }
-    return newitem;
-}
-
-void cJSON_Minify(char *json)
-{
-    char *into=json;
-    while (*json)
-    {
-        if (*json==' ') json++;
-        else if (*json=='\t') json++;	/* Whitespace characters. */
-        else if (*json=='\r') json++;
-        else if (*json=='\n') json++;
-        else if (*json=='/' && json[1]=='/')  while (*json && *json!='\n') json++;	/* double-slash comments, to end of line. */
-        else if (*json=='/' && json[1]=='*') {while (*json && !(*json=='*' && json[1]=='/')) json++;json+=2;}	/* multiline comments. */
-        else if (*json=='\"'){*into++=*json++;while (*json && *json!='\"'){if (*json=='\\') *into++=*json++;*into++=*json++;}*into++=*json++;} /* string literals, which are \" sensitive. */
-        else *into++=*json++;			/* All other characters. */
-    }
-    *into=0;	/* and null-terminate. */
+CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse)
+{
+	cJSON *newitem = NULL;
+	cJSON *child = NULL;
+	cJSON *next = NULL;
+	cJSON *newchild = NULL;
+
+	/* Bail on bad ptr */
+	if (!item)
+	{
+		goto fail;
+	}
+	/* Create new item */
+	newitem = cJSON_New_Item(&global_hooks);
+	if (!newitem)
+	{
+		goto fail;
+	}
+	/* Copy over all vars */
+	newitem->type = item->type & (~cJSON_IsReference);
+	newitem->valueint = item->valueint;
+	newitem->valuedouble = item->valuedouble;
+	if (item->valuestring)
+	{
+		newitem->valuestring = (char*)cJSON_strdup((unsigned char*)item->valuestring, &global_hooks);
+		if (!newitem->valuestring)
+		{
+			goto fail;
+		}
+	}
+	if (item->string)
+	{
+		newitem->string = (item->type&cJSON_StringIsConst) ? item->string : (char*)cJSON_strdup((unsigned char*)item->string, &global_hooks);
+		if (!newitem->string)
+		{
+			goto fail;
+		}
+	}
+	/* If non-recursive, then we're done! */
+	if (!recurse)
+	{
+		return newitem;
+	}
+	/* Walk the ->next chain for the child. */
+	child = item->child;
+	while (child != NULL)
+	{
+		newchild = cJSON_Duplicate(child, true); /* Duplicate (with recurse) each item in the ->next chain */
+		if (!newchild)
+		{
+			goto fail;
+		}
+		if (next != NULL)
+		{
+			/* If newitem->child already set, then crosswire ->prev and ->next and move on */
+			next->next = newchild;
+			newchild->prev = next;
+			next = newchild;
+		}
+		else
+		{
+			/* Set newitem->child and move to it */
+			newitem->child = newchild;
+			next = newchild;
+		}
+		child = child->next;
+	}
+
+	return newitem;
+
+fail:
+	if (newitem != NULL)
+	{
+		cJSON_Delete(newitem);
+	}
+
+	return NULL;
+}
+
+CJSON_PUBLIC(void) cJSON_Minify(char *json)
+{
+	unsigned char *into = (unsigned char*)json;
+
+	if (json == NULL)
+	{
+		return;
+	}
+
+	while (*json)
+	{
+		if (*json == ' ')
+		{
+			json++;
+		}
+		else if (*json == '\t')
+		{
+			/* Whitespace characters. */
+			json++;
+		}
+		else if (*json == '\r')
+		{
+			json++;
+		}
+		else if (*json == '\n')
+		{
+			json++;
+		}
+		else if ((*json == '/') && (json[1] == '/'))
+		{
+			/* double-slash comments, to end of line. */
+			while (*json && (*json != '\n'))
+			{
+				json++;
+			}
+		}
+		else if ((*json == '/') && (json[1] == '*'))
+		{
+			/* multiline comments. */
+			while (*json && !((*json == '*') && (json[1] == '/')))
+			{
+				json++;
+			}
+			json += 2;
+		}
+		else if (*json == '\"')
+		{
+			/* string literals, which are \" sensitive. */
+			*into++ = (unsigned char)*json++;
+			while (*json && (*json != '\"'))
+			{
+				if (*json == '\\')
+				{
+					*into++ = (unsigned char)*json++;
+				}
+				*into++ = (unsigned char)*json++;
+			}
+			*into++ = (unsigned char)*json++;
+		}
+		else
+		{
+			/* All other characters. */
+			*into++ = (unsigned char)*json++;
+		}
+	}
+
+	/* and null-terminate. */
+	*into = '\0';
 }
 
+CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_Invalid;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_False;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xff) == cJSON_True;
+}
+
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & (cJSON_True | cJSON_False)) != 0;
+}
+CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_NULL;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_Number;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_String;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_Array;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_Object;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item)
+{
+	if (item == NULL)
+	{
+		return false;
+	}
+
+	return (item->type & 0xFF) == cJSON_Raw;
+}
+
+CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive)
+{
+	if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF)) || cJSON_IsInvalid(a))
+	{
+		return false;
+	}
+
+	/* check if type is valid */
+	switch (a->type & 0xFF)
+	{
+	case cJSON_False:
+	case cJSON_True:
+	case cJSON_NULL:
+	case cJSON_Number:
+	case cJSON_String:
+	case cJSON_Raw:
+	case cJSON_Array:
+	case cJSON_Object:
+		break;
+
+	default:
+		return false;
+	}
+
+	/* identical objects are equal */
+	if (a == b)
+	{
+		return true;
+	}
+
+	switch (a->type & 0xFF)
+	{
+		/* in these cases and equal type is enough */
+	case cJSON_False:
+	case cJSON_True:
+	case cJSON_NULL:
+		return true;
+
+	case cJSON_Number:
+		if (a->valuedouble == b->valuedouble)
+		{
+			return true;
+		}
+		return false;
+
+	case cJSON_String:
+	case cJSON_Raw:
+		if ((a->valuestring == NULL) || (b->valuestring == NULL))
+		{
+			return false;
+		}
+		if (strcmp(a->valuestring, b->valuestring) == 0)
+		{
+			return true;
+		}
+
+		return false;
+
+	case cJSON_Array:
+	{
+		cJSON *a_element = a->child;
+		cJSON *b_element = b->child;
+
+		for (; (a_element != NULL) && (b_element != NULL);)
+		{
+			if (!cJSON_Compare(a_element, b_element, case_sensitive))
+			{
+				return false;
+			}
+
+			a_element = a_element->next;
+			b_element = b_element->next;
+		}
+
+		/* one of the arrays is longer than the other */
+		if (a_element != b_element) {
+			return false;
+		}
+
+		return true;
+	}
+
+	case cJSON_Object:
+	{
+		cJSON *a_element = NULL;
+		cJSON *b_element = NULL;
+		cJSON_ArrayForEach(a_element, a)
+		{
+			/* TODO This has O(n^2) runtime, which is horrible! */
+			b_element = get_object_item(b, a_element->string, case_sensitive);
+			if (b_element == NULL)
+			{
+				return false;
+			}
+
+			if (!cJSON_Compare(a_element, b_element, case_sensitive))
+			{
+				return false;
+			}
+		}
+
+		/* doing this twice, once on a and b to prevent true comparison if a subset of b
+		* TODO: Do this the proper way, this is just a fix for now */
+		cJSON_ArrayForEach(b_element, b)
+		{
+			a_element = get_object_item(a, b_element->string, case_sensitive);
+			if (a_element == NULL)
+			{
+				return false;
+			}
+
+			if (!cJSON_Compare(b_element, a_element, case_sensitive))
+			{
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	default:
+		return false;
+	}
+}
+
+CJSON_PUBLIC(void *) cJSON_malloc(size_t size)
+{
+	return global_hooks.allocate(size);
+}
+
+CJSON_PUBLIC(void) cJSON_free(void *object)
+{
+	global_hooks.deallocate(object);
+}
\ No newline at end of file
diff --git a/source/code/cjson/cJSON.h b/source/code/cjson/cJSON.h
index 662948612..d4a2dfed3 100644
--- a/source/code/cjson/cJSON.h
+++ b/source/code/cjson/cJSON.h
@@ -1,147 +1,285 @@
 /*
-  Copyright (c) 2009 Dave Gamble
- 
-  Permission is hereby granted, free of charge, to any person obtaining a copy
-  of this software and associated documentation files (the "Software"), to deal
-  in the Software without restriction, including without limitation the rights
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-  copies of the Software, and to permit persons to whom the Software is
-  furnished to do so, subject to the following conditions:
- 
-  The above copyright notice and this permission notice shall be included in
-  all copies or substantial portions of the Software.
- 
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-  THE SOFTWARE.
+Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
 */
 
 #ifndef cJSON__h
 #define cJSON__h
+
 #ifdef __cplusplus
 extern "C"
 {
 #endif
 
-/* cJSON Types: */
-#define cJSON_False 0
-#define cJSON_True 1
-#define cJSON_NULL 2
-#define cJSON_Number 3
-#define cJSON_String 4
-#define cJSON_Array 5
-#define cJSON_Object 6
-	
+#if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32))
+#define __WINDOWS__
+#endif
+
+#ifdef __WINDOWS__
+
+	/* When compiling for windows, we specify a specific calling convention to avoid issues where we are being called from a project with a different default calling convention.  For windows you have 3 define options:
+
+	CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever dllexport symbols
+	CJSON_EXPORT_SYMBOLS - Define this on library build when you want to dllexport symbols (default)
+	CJSON_IMPORT_SYMBOLS - Define this if you want to dllimport symbol
+
+	For *nix builds that support visibility attribute, you can define similar behavior by
+
+	setting default visibility to hidden by adding
+	-fvisibility=hidden (for gcc)
+	or
+	-xldscope=hidden (for sun cc)
+	to CFLAGS
+
+	then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJSON_EXPORT_SYMBOLS does
+
+	*/
+
+#define CJSON_CDECL __cdecl
+#define CJSON_STDCALL __stdcall
+
+	/* export symbols by default, this is necessary for copy pasting the C and header file */
+#if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) && !defined(CJSON_EXPORT_SYMBOLS)
+#define CJSON_EXPORT_SYMBOLS
+#endif
+
+#if defined(CJSON_HIDE_SYMBOLS)
+#define CJSON_PUBLIC(type)   type CJSON_STDCALL
+#elif defined(CJSON_EXPORT_SYMBOLS)
+#define CJSON_PUBLIC(type)   __declspec(dllexport) type CJSON_STDCALL
+#elif defined(CJSON_IMPORT_SYMBOLS)
+#define CJSON_PUBLIC(type)   __declspec(dllimport) type CJSON_STDCALL
+#endif
+#else /* !__WINDOWS__ */
+#define CJSON_CDECL
+#define CJSON_STDCALL
+
+#if (defined(__GNUC__) || defined(__SUNPRO_CC) || defined (__SUNPRO_C)) && defined(CJSON_API_VISIBILITY)
+#define CJSON_PUBLIC(type)   __attribute__((visibility("default"))) type
+#else
+#define CJSON_PUBLIC(type) type
+#endif
+#endif
+
+	/* project version */
+#define CJSON_VERSION_MAJOR 1
+#define CJSON_VERSION_MINOR 7
+#define CJSON_VERSION_PATCH 8
+
+#include <stddef.h>
+
+	/* cJSON Types: */
+#define cJSON_Invalid (0)
+#define cJSON_False  (1 << 0)
+#define cJSON_True   (1 << 1)
+#define cJSON_NULL   (1 << 2)
+#define cJSON_Number (1 << 3)
+#define cJSON_String (1 << 4)
+#define cJSON_Array  (1 << 5)
+#define cJSON_Object (1 << 6)
+#define cJSON_Raw    (1 << 7) /* raw json */
+
 #define cJSON_IsReference 256
 #define cJSON_StringIsConst 512
 
-/* The cJSON structure: */
-    typedef struct cJSON {
-	struct cJSON *next,*prev;	/* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */
-	struct cJSON *child;		/* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */
-
-	int type;					/* The type of the item, as above. */
-
-	char *valuestring;			/* The item's string, if type==cJSON_String */
-	int valueint;				/* The item's number, if type==cJSON_Number */
-	double valuedouble;			/* The item's number, if type==cJSON_Number */
-
-	char *string;				/* The item's name string, if this item is the child of, or is in the list of subitems of an object. */
-    } cJSON;
-
-    typedef struct cJSON_Hooks {
-        void *(*malloc_fn)(size_t sz);
-        void (*free_fn)(void *ptr);
-    } cJSON_Hooks;
-
-/* Supply malloc, realloc and free functions to cJSON */
-    extern void cJSON_InitHooks(cJSON_Hooks* hooks);
-
-
-/* Supply a block of JSON, and this returns a cJSON object you can interrogate. Call cJSON_Delete when finished. */
-    extern cJSON *cJSON_Parse(const char *value);
-/* Render a cJSON entity to text for transfer/storage. Free the char* when finished. */
-    extern char  *cJSON_Print(cJSON *item);
-/* Render a cJSON entity to text for transfer/storage without any formatting. Free the char* when finished. */
-    extern char  *cJSON_PrintUnformatted(cJSON *item);
-/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */
-    extern char *cJSON_PrintBuffered(cJSON *item,int prebuffer,int fmt);
-/* Delete a cJSON entity and all subentities. */
-    extern void   cJSON_Delete(cJSON *c);
-
-/* Returns the number of items in an array (or object). */
-    extern int	  cJSON_GetArraySize(cJSON *array);
-/* Retrieve item number "item" from array "array". Returns NULL if unsuccessful. */
-    extern cJSON *cJSON_GetArrayItem(cJSON *array,int item);
-/* Get item "string" from object. Case insensitive. */
-    extern cJSON *cJSON_GetObjectItem(cJSON *object,const char *string);
-
-/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */
-    extern const char *cJSON_GetErrorPtr(void);
-	
-/* These calls create a cJSON item of the appropriate type. */
-    extern cJSON *cJSON_CreateNull(void);
-    extern cJSON *cJSON_CreateTrue(void);
-    extern cJSON *cJSON_CreateFalse(void);
-    extern cJSON *cJSON_CreateBool(int b);
-    extern cJSON *cJSON_CreateNumber(double num);
-    extern cJSON *cJSON_CreateString(const char *string);
-    extern cJSON *cJSON_CreateArray(void);
-    extern cJSON *cJSON_CreateObject(void);
-
-/* These utilities create an Array of count items. */
-    extern cJSON *cJSON_CreateIntArray(const int *numbers,int count);
-    extern cJSON *cJSON_CreateFloatArray(const float *numbers,int count);
-    extern cJSON *cJSON_CreateDoubleArray(const double *numbers,int count);
-    extern cJSON *cJSON_CreateStringArray(const char **strings,int count);
-
-/* Append item to the specified array/object. */
-    extern void cJSON_AddItemToArray(cJSON *array, cJSON *item);
-    extern void	cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item);
-    extern void	cJSON_AddItemToObjectCS(cJSON *object,const char *string,cJSON *item);	/* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object */
-/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */
-    extern void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item);
-    extern void	cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item);
-
-/* Remove/Detatch items from Arrays/Objects. */
-    extern cJSON *cJSON_DetachItemFromArray(cJSON *array,int which);
-    extern void   cJSON_DeleteItemFromArray(cJSON *array,int which);
-    extern cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string);
-    extern void   cJSON_DeleteItemFromObject(cJSON *object,const char *string);
-	
-/* Update array items. */
-    extern void cJSON_InsertItemInArray(cJSON *array,int which,cJSON *newitem);	/* Shifts pre-existing items to the right. */
-    extern void cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem);
-    extern void cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem);
-
-/* Duplicate a cJSON item */
-    extern cJSON *cJSON_Duplicate(cJSON *item,int recurse);
-/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will
-   need to be released. With recurse!=0, it will duplicate any children connected to the item.
-   The item->next and ->prev pointers are always zero on return from Duplicate. */
-
-/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */
-    extern cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated);
-
-    extern void cJSON_Minify(char *json);
-
-/* Macros for creating things quickly. */
-#define cJSON_AddNullToObject(object,name)		cJSON_AddItemToObject(object, name, cJSON_CreateNull())
-#define cJSON_AddTrueToObject(object,name)		cJSON_AddItemToObject(object, name, cJSON_CreateTrue())
-#define cJSON_AddFalseToObject(object,name)		cJSON_AddItemToObject(object, name, cJSON_CreateFalse())
-#define cJSON_AddBoolToObject(object,name,b)	cJSON_AddItemToObject(object, name, cJSON_CreateBool(b))
-#define cJSON_AddNumberToObject(object,name,n)	cJSON_AddItemToObject(object, name, cJSON_CreateNumber(n))
-#define cJSON_AddStringToObject(object,name,s)	cJSON_AddItemToObject(object, name, cJSON_CreateString(s))
-
-/* When assigning an integer value, it needs to be propagated to valuedouble too. */
-#define cJSON_SetIntValue(object,val)			((object)?(object)->valueint=(object)->valuedouble=(val):(val))
-#define cJSON_SetNumberValue(object,val)		((object)?(object)->valueint=(object)->valuedouble=(val):(val))
+	/* The cJSON structure: */
+	typedef struct cJSON
+	{
+		/* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */
+		struct cJSON *next;
+		struct cJSON *prev;
+		/* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */
+		struct cJSON *child;
+
+		/* The type of the item, as above. */
+		int type;
+
+		/* The item's string, if type==cJSON_String  and type == cJSON_Raw */
+		char *valuestring;
+		/* writing to valueint is DEPRECATED, use cJSON_SetNumberValue instead */
+		int valueint;
+		/* The item's number, if type==cJSON_Number */
+		double valuedouble;
+
+		/* The item's name string, if this item is the child of, or is in the list of subitems of an object. */
+		char *string;
+	} cJSON;
+
+	typedef struct cJSON_Hooks
+	{
+		/* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */
+		void *(CJSON_CDECL *malloc_fn)(size_t sz);
+		void (CJSON_CDECL *free_fn)(void *ptr);
+	} cJSON_Hooks;
+
+	typedef int cJSON_bool;
+
+	/* Limits how deeply nested arrays/objects can be before cJSON rejects to parse them.
+	* This is to prevent stack overflows. */
+#ifndef CJSON_NESTING_LIMIT
+#define CJSON_NESTING_LIMIT 1000
+#endif
+
+	/* returns the version of cJSON as a string */
+	CJSON_PUBLIC(const char*) cJSON_Version(void);
+
+	/* Supply malloc, realloc and free functions to cJSON */
+	CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks);
+
+	/* Memory Management: the caller is always responsible to free the results from all variants of cJSON_Parse (with cJSON_Delete) and cJSON_Print (with stdlib free, cJSON_Hooks.free_fn, or cJSON_free as appropriate). The exception is cJSON_PrintPreallocated, where the caller has full responsibility of the buffer. */
+	/* Supply a block of JSON, and this returns a cJSON object you can interrogate. */
+	CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);
+	/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */
+	/* If you supply a ptr in return_parse_end and parsing fails, then return_parse_end will contain a pointer to the error so will match cJSON_GetErrorPtr(). */
+	CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated);
+
+	/* Render a cJSON entity to text for transfer/storage. */
+	CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item);
+	/* Render a cJSON entity to text for transfer/storage without any formatting. */
+	CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item);
+	/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */
+	CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt);
+	/* Render a cJSON entity to text using a buffer already allocated in memory with given length. Returns 1 on success and 0 on failure. */
+	/* NOTE: cJSON is not always 100% accurate in estimating how much memory it will use, so to be safe allocate 5 bytes more than you actually need */
+	CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format);
+	/* Delete a cJSON entity and all subentities. */
+	CJSON_PUBLIC(void) cJSON_Delete(cJSON *c);
+
+	/* Returns the number of items in an array (or object). */
+	CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array);
+	/* Retrieve item number "index" from array "array". Returns NULL if unsuccessful. */
+	CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index);
+	/* Get item "string" from object. Case insensitive. */
+	CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string);
+	CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string);
+	CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string);
+	/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */
+	CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void);
+
+	/* Check if the item is a string and return its valuestring */
+	CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item);
+
+	/* These functions check the type of an item */
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item);
+	CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item);
+
+	/* These calls create a cJSON item of the appropriate type. */
+	CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string);
+	/* raw json */
+	CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void);
+
+	/* Create a string where valuestring references a string so
+	* it will not be freed by cJSON_Delete */
+	CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string);
+	/* Create an object/arrray that only references it's elements so
+	* they will not be freed by cJSON_Delete */
+	CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child);
+
+	/* These utilities create an Array of count items. */
+	CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count);
+	CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count);
+
+	/* Append item to the specified array/object. */
+	CJSON_PUBLIC(void) cJSON_AddItemToArray(cJSON *array, cJSON *item);
+	CJSON_PUBLIC(void) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item);
+	/* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object.
+	* WARNING: When this function was used, make sure to always check that (item->type & cJSON_StringIsConst) is zero before
+	* writing to `item->string` */
+	CJSON_PUBLIC(void) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item);
+	/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */
+	CJSON_PUBLIC(void) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item);
+	CJSON_PUBLIC(void) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item);
+
+	/* Remove/Detatch items from Arrays/Objects. */
+	CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item);
+	CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which);
+	CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which);
+	CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string);
+	CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string);
+	CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string);
+	CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string);
+
+	/* Update array items. */
+	CJSON_PUBLIC(void) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem); /* Shifts pre-existing items to the right. */
+	CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement);
+	CJSON_PUBLIC(void) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem);
+	CJSON_PUBLIC(void) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem);
+	CJSON_PUBLIC(void) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem);
+
+	/* Duplicate a cJSON item */
+	CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse);
+	/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will
+	need to be released. With recurse!=0, it will duplicate any children connected to the item.
+	The item->next and ->prev pointers are always zero on return from Duplicate. */
+	/* Recursively compare two cJSON items for equality. If either a or b is NULL or invalid, they will be considered unequal.
+	* case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */
+	CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive);
+
+
+	CJSON_PUBLIC(void) cJSON_Minify(char *json);
+
+	/* Helper functions for creating and adding items to an object at the same time.
+	* They return the added item or NULL on failure. */
+	CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name);
+	CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name);
+	CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name);
+	CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean);
+	CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number);
+	CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string);
+	CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw);
+	CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name);
+	CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name);
+
+	/* When assigning an integer value, it needs to be propagated to valuedouble too. */
+#define cJSON_SetIntValue(object, number) ((object) ? (object)->valueint = (object)->valuedouble = (number) : (number))
+	/* helper for the cJSON_SetNumberValue macro */
+	CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number);
+#define cJSON_SetNumberValue(object, number) ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) : (number))
+
+	/* Macro for iterating over an array or object */
+#define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next)
+
+	/* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */
+	CJSON_PUBLIC(void *) cJSON_malloc(size_t size);
+	CJSON_PUBLIC(void) cJSON_free(void *object);
 
 #ifdef __cplusplus
 }
 #endif
-#endif
+
+#endif
\ No newline at end of file
diff --git a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp
index 7fdd746a1..68c13053a 100644
--- a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp
+++ b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp
@@ -103,11 +103,11 @@ class ContainerQuery
 				{
 					cJSON* entry = cJSON_GetArrayItem(response[0], i);
 
-					if (entry)
+					if (entry != NULL)
 					{
 						cJSON* tags = cJSON_GetObjectItem(entry, "RepoTags");
 
-						if (tags && cJSON_GetArraySize(tags))
+						if ((tags != NULL) && cJSON_GetArraySize(tags))
 						{
 							string value = "";
 							cJSON* arrItem = cJSON_GetArrayItem(tags, 0);
@@ -168,7 +168,7 @@ class ContainerQuery
 		try {
 			cJSON* config = cJSON_GetObjectItem(entry, "Config");
 
-			if (config)
+			if (config != NULL)
 			{
 				// Hostname of container
 				string hostnamevalue = "";
@@ -232,11 +232,11 @@ class ContainerQuery
 				// Compose group
 				instance.ComposeGroup_value("");
 
-				if (labels)
+				if (labels != NULL)
 				{
 					cJSON* groupName = cJSON_GetObjectItem(labels, "com.docker.compose.project");
 
-					if (groupName)
+					if (groupName != NULL)
 					{
 						instance.ComposeGroup_value(groupName->valuestring);
 					}
@@ -244,7 +244,10 @@ class ContainerQuery
 			}
 			else
 			{
-				syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				if (cJSON_GetObjectItem(entry, "Id") != NULL)
+				{
+					syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				}
 			}
 		}
 		catch (std::exception &e)
@@ -268,7 +271,7 @@ class ContainerQuery
 		try {
 			cJSON* state = cJSON_GetObjectItem(entry, "State");
 
-			if (state)
+			if (state != NULL)
 			{
 				cJSON* objItem = cJSON_GetObjectItem(state, "ExitCode");
 				if (objItem != NULL)
@@ -278,7 +281,10 @@ class ContainerQuery
 					if (exitCode < 0)
 					{
 						exitCode = 128;
-						syslog(LOG_NOTICE, "Container %s returned negative exit code", cJSON_GetObjectItem(entry, "Id")->valuestring);
+						if (cJSON_GetObjectItem(entry, "Id") != NULL)
+						{
+							syslog(LOG_NOTICE, "Container %s returned negative exit code", cJSON_GetObjectItem(entry, "Id")->valuestring);
+						}
 					}
 
 					instance.ExitCode_value(exitCode);
@@ -328,7 +334,10 @@ class ContainerQuery
 			}
 			else
 			{
-				syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				if (cJSON_GetObjectItem(entry, "Id"))
+				{
+					syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				}
 			}
 		}
 		catch (std::exception &e)
@@ -352,7 +361,7 @@ class ContainerQuery
 		try {
 			cJSON* hostConfig = cJSON_GetObjectItem(entry, "HostConfig");
 
-			if (hostConfig)
+			if (hostConfig != NULL)
 			{
 				// Links
 				cJSON* objItem = cJSON_GetObjectItem(hostConfig, "Links");
@@ -372,7 +381,10 @@ class ContainerQuery
 			}
 			else
 			{
-				syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				if (cJSON_GetObjectItem(entry, "Id"))
+				{
+					syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				}
 			}
 		}
 		catch (std::exception &e)
diff --git a/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp b/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp
index c43057ec7..08b68b1d8 100644
--- a/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp
+++ b/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp
@@ -34,17 +34,17 @@ class StatsQuery
 			int totalRx = 0;
 			int totalTx = 0;
 
-			if (stats)
+			if (stats != NULL)
 			{
 				cJSON* network = cJSON_GetObjectItem(stats, "networks");
 
-				if (network)
+				if (network != NULL)
 				{
 					// Docker 1.9+
 					network = network->child;
 
 					// Sum the number of bytes from each NIC if there is more than one
-					while (network)
+					while (network != NULL)
 					{
 						cJSON* objItem = cJSON_GetObjectItem(network, "rx_bytes");
 						if (objItem != NULL) {
@@ -66,7 +66,7 @@ class StatsQuery
 				{
 					// Docker 1.8.x
 					network = cJSON_GetObjectItem(stats, "network");
-					if (network)
+					if (network != NULL)
 					{
 						cJSON* objItem = cJSON_GetObjectItem(network, "rx_bytes");
 						if (objItem != NULL) {
@@ -110,7 +110,7 @@ class StatsQuery
     static void TrySetContainerMemoryData(Container_ContainerStatistics_Class& instance, cJSON* stats)
     {
 		try {
-			if (stats)
+			if (stats != NULL)
 			{
 				cJSON* memory_stats = cJSON_GetObjectItem(stats, "memory_stats");
 				if (memory_stats != NULL) {
@@ -150,27 +150,27 @@ class StatsQuery
 			instance.DiskBytesRead_value(0);
 			instance.DiskBytesWritten_value(0);
 
-			if (stats)
+			if (stats != NULL)
 			{
 				cJSON* blkio_stats = cJSON_GetObjectItem(stats, "blkio_stats");
 
-				if (blkio_stats)
+				if (blkio_stats != NULL)
 				{
 					cJSON* values = cJSON_GetObjectItem(blkio_stats, "io_service_bytes_recursive");
 
 					bool readFlag = false;
 					bool writeFlag = false;
 
-					for (int i = 0; values && !(readFlag && writeFlag) && i < cJSON_GetArraySize(values); i++)
+					for (int i = 0; values != NULL && !(readFlag && writeFlag) && i < cJSON_GetArraySize(values); i++)
 					{
 						cJSON* entry = cJSON_GetArrayItem(values, i);
 
-						if (entry)
+						if (entry != NULL)
 						{
 							cJSON* op = cJSON_GetObjectItem(entry, "op");
 							cJSON* rawValue = cJSON_GetObjectItem(entry, "value");
 
-							if (op && rawValue)
+							if ((op != NULL) && (rawValue != NULL))
 							{
 								if (!strcmp(op->valuestring, "Read"))
 								{
@@ -215,15 +215,15 @@ class StatsQuery
         result["system"] = 0;
 
 		try {
-			if (stats)
+			if (stats != NULL)
 			{
 				cJSON* cpu_stats = cJSON_GetObjectItem(stats, "cpu_stats");
 
-				if (cpu_stats)
+				if (cpu_stats != NULL)
 				{
 					cJSON* cpu_usage = cJSON_GetObjectItem(cpu_stats, "cpu_usage");
 
-					if (cpu_usage)
+					if (cpu_usage != NULL)
 					{
 						cJSON* objItem = cJSON_GetObjectItem(cpu_usage, "total_usage");
 						if (objItem != NULL) {
@@ -269,15 +269,15 @@ class StatsQuery
 			instance.CPUTotal_value(0);
 			instance.CPUTotalPct_value(0);
 
-			if (stats)
+			if (stats != NULL)
 			{
 				cJSON* cpu_stats = cJSON_GetObjectItem(stats, "cpu_stats");
 
-				if (cpu_stats)
+				if (cpu_stats != NULL)
 				{
 					cJSON* cpu_usage = cJSON_GetObjectItem(cpu_stats, "cpu_usage");
 
-					if (cpu_usage)
+					if (cpu_usage != NULL)
 					{
 						cJSON* totalUsageItem = cJSON_GetObjectItem(cpu_usage, "total_usage");
 						cJSON* systemCpuUsageItem = cJSON_GetObjectItem(cpu_stats, "system_cpu_usage");
@@ -333,7 +333,7 @@ class StatsQuery
 				{
 					cJSON* entry = cJSON_GetArrayItem(response[0], i);
 
-					if (entry)
+					if (entry != NULL)
 					{
 						// New perf entry
 						Container_ContainerStatistics_Class instance;
@@ -396,7 +396,10 @@ class StatsQuery
 					// See http://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#get-container-stats-based-on-resource-usage for example output
 					if (!subResponse.empty() && subResponse[0])
 					{
-						TrySetContainerCpuData(result[i], subResponse[0], previousStatsList[i]);
+						if (i < previousStatsList.size())
+						{
+							TrySetContainerCpuData(result[i], subResponse[0], previousStatsList[i]);
+						}
 
 						// Set container name in 'InstanceName' field of Perf data.
 						result[i].InstanceID_value(result[i].ElementName_value());
diff --git a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
index d5d2ce6f2..bf2ab3b53 100644
--- a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
+++ b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
@@ -137,11 +137,11 @@ class EventQuery
 				{
 					cJSON* entry = cJSON_GetArrayItem(response[0], i);
 
-					if (entry)
+					if (entry != NULL)
 					{
 						cJSON* nameField = cJSON_GetObjectItem(entry, "Names");
 
-						if (nameField && cJSON_GetArraySize(nameField))
+						if ((nameField != NULL) && cJSON_GetArraySize(nameField))
 						{
 							// Docker API documentation says that this field contains the short ID but that is not the case; use full ID instead
 							cJSON* objItem = cJSON_GetObjectItem(entry, "Id");
@@ -239,7 +239,7 @@ class EventQuery
 						cJSON* entry = cJSON_GetArrayItem(response[0], i);
 
 						// the newer versions of the API may return objects that do not have status or id
-						if (entry && cJSON_GetObjectItem(entry, "status") != NULL && cJSON_GetObjectItem(entry, "id") != NULL)
+						if ((entry != NULL) && cJSON_GetObjectItem(entry, "status") != NULL && cJSON_GetObjectItem(entry, "id") != NULL)
 						{
 							// New inventory entry
 							Container_DaemonEvent_Class instance;
diff --git a/source/code/providers/Container_ImageInventory_Class_Provider.cpp b/source/code/providers/Container_ImageInventory_Class_Provider.cpp
index 3cc088683..01d1c639c 100644
--- a/source/code/providers/Container_ImageInventory_Class_Provider.cpp
+++ b/source/code/providers/Container_ImageInventory_Class_Provider.cpp
@@ -35,7 +35,7 @@ class InventoryQuery
         string result = "";
 		try {
 
-			if (tags && cJSON_GetArraySize(tags))
+			if ((tags != NULL) && cJSON_GetArraySize(tags))
 			{
 				bool flag = false;
 
@@ -164,7 +164,7 @@ class InventoryQuery
 		try {
 			cJSON* state = cJSON_GetObjectItem(entry, "State");
 
-			if (state)
+			if (state != NULL)
 			{
 				cJSON* objItem = cJSON_GetObjectItem(entry, "Image");
 				if (objItem != NULL)
@@ -173,10 +173,10 @@ class InventoryQuery
 					{
 						string id = string(objItem->valuestring);
 
-						if (cJSON_GetObjectItem(state, "Running")->valueint)
+						if (cJSON_GetObjectItem(state, "Running") != NULL && cJSON_GetObjectItem(state, "Running")->valueint)
 						{
 							// Running container
-							if (cJSON_GetObjectItem(state, "Paused")->valueint)
+							if (cJSON_GetObjectItem(state, "Paused") != NULL && cJSON_GetObjectItem(state, "Paused")->valueint)
 							{
 								// Paused container
 								instances[idTable[id]].Paused_value(instances[idTable[id]].Paused_value() + 1);
@@ -188,7 +188,7 @@ class InventoryQuery
 						}
 						else
 						{
-							if (cJSON_GetObjectItem(state, "ExitCode")->valueint)
+							if (cJSON_GetObjectItem(state, "ExitCode") != NULL && cJSON_GetObjectItem(state, "ExitCode")->valueint)
 							{
 								// Container exited nonzero
 								instances[idTable[id]].Failed_value(instances[idTable[id]].Failed_value() + 1);
@@ -206,7 +206,10 @@ class InventoryQuery
 			}
 			else
 			{
-				syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				if (cJSON_GetObjectItem(entry, "Id") != NULL)
+				{
+					syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+				}
 			}
 		}
 		catch (std::exception &e)
@@ -239,7 +242,7 @@ class InventoryQuery
 				{
 					cJSON* entry = cJSON_GetArrayItem(response[0], i);
 
-					if (entry)
+					if (entry != NULL)
 					{
 						cJSON* objItem = cJSON_GetObjectItem(entry, "Id");
 						if (objItem != NULL)
@@ -321,7 +324,7 @@ class InventoryQuery
 				{
 					cJSON* entry = cJSON_GetArrayItem(response[0], i);
 
-					if (entry)
+					if (entry != NULL)
 					{
 						// New inventory entry
 						Container_ImageInventory_Class instance;
diff --git a/source/code/providers/Container_Process_Class_Provider.cpp b/source/code/providers/Container_Process_Class_Provider.cpp
index 76b15bdfc..9adc4edcd 100644
--- a/source/code/providers/Container_Process_Class_Provider.cpp
+++ b/source/code/providers/Container_Process_Class_Provider.cpp
@@ -55,7 +55,7 @@ class ContainerProcessQuery
 				for (int i = 0; i < cJSON_GetArraySize(dockerPsResponse[0]); i++)
 				{
 					cJSON* containerEntry = cJSON_GetArrayItem(dockerPsResponse[0], i);
-					if (containerEntry)
+					if (containerEntry != NULL)
 					{
 						cJSON* objItem = cJSON_GetObjectItem(containerEntry, "Id");
 						if (objItem != NULL)

From 4b630215824d85d568fd384b1bbee071996bec1a Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 27 Sep 2018 16:10:59 -0700
Subject: [PATCH 012/160] Adding a missed null check (#135)

---
 .../code/providers/Container_DaemonEvent_Class_Provider.cpp  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
index bf2ab3b53..51e253d73 100644
--- a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
+++ b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
@@ -289,7 +289,10 @@ class EventQuery
 							else
 							{
 								// Image event
-								instance.ElementName_value(cJSON_GetObjectItem(entry, "id")->valuestring);
+								if (cJSON_GetObjectItem(entry, "id") != NULL)
+								{
+									instance.ElementName_value(cJSON_GetObjectItem(entry, "id")->valuestring);
+								}
 								instance.Id_value("");
 								instance.ContainerName_value("");
 							}

From 8b964fd7ee54948b7374ed44f3253d0d89ceb443 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 27 Sep 2018 17:01:04 -0700
Subject: [PATCH 013/160] reusing some variables (#136)

---
 ...iner_ContainerInventory_Class_Provider.cpp | 26 ++++++++++++-------
 .../Container_DaemonEvent_Class_Provider.cpp  |  5 ++--
 ...ontainer_ImageInventory_Class_Provider.cpp | 16 +++++++-----
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp
index 68c13053a..ded8fb869 100644
--- a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp
+++ b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp
@@ -210,7 +210,11 @@ class ContainerQuery
 							correctedstring = stringToTruncate + "\"]";
 						}
 						instance.EnvironmentVar_value(correctedstring.c_str());
-						syslog(LOG_WARNING, "Environment variable truncated for container %s", cJSON_GetObjectItem(entry, "Id")->valuestring);
+						cJSON* idItem = cJSON_GetObjectItem(entry, "Id");
+						if (idItem != NULL)
+						{
+							syslog(LOG_WARNING, "Environment variable truncated for container %s", idItem->valuestring);
+						}
 					}
 					else {
 						instance.EnvironmentVar_value(strcmp(env, "null") ? env : "");
@@ -244,9 +248,10 @@ class ContainerQuery
 			}
 			else
 			{
-				if (cJSON_GetObjectItem(entry, "Id") != NULL)
+				cJSON* idItem = cJSON_GetObjectItem(entry, "Id");
+				if (idItem != NULL)
 				{
-					syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+					syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", idItem->valuestring);
 				}
 			}
 		}
@@ -281,9 +286,10 @@ class ContainerQuery
 					if (exitCode < 0)
 					{
 						exitCode = 128;
-						if (cJSON_GetObjectItem(entry, "Id") != NULL)
+						cJSON* idItem = cJSON_GetObjectItem(entry, "Id");
+						if (idItem != NULL)
 						{
-							syslog(LOG_NOTICE, "Container %s returned negative exit code", cJSON_GetObjectItem(entry, "Id")->valuestring);
+							syslog(LOG_NOTICE, "Container %s returned negative exit code", idItem->valuestring);
 						}
 					}
 
@@ -334,9 +340,10 @@ class ContainerQuery
 			}
 			else
 			{
-				if (cJSON_GetObjectItem(entry, "Id"))
+				cJSON* idItem = cJSON_GetObjectItem(entry, "Id");
+				if (idItem)
 				{
-					syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+					syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", idItem->valuestring);
 				}
 			}
 		}
@@ -381,9 +388,10 @@ class ContainerQuery
 			}
 			else
 			{
-				if (cJSON_GetObjectItem(entry, "Id"))
+				cJSON* idItem = cJSON_GetObjectItem(entry, "Id");
+				if (idItem != NULL)
 				{
-					syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+					syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", idItem->valuestring);
 				}
 			}
 		}
diff --git a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
index 51e253d73..0c28e4769 100644
--- a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
+++ b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp
@@ -288,10 +288,11 @@ class EventQuery
 							}
 							else
 							{
+								cJSON* idItem = cJSON_GetObjectItem(entry, "id");
 								// Image event
-								if (cJSON_GetObjectItem(entry, "id") != NULL)
+								if (idItem != NULL)
 								{
-									instance.ElementName_value(cJSON_GetObjectItem(entry, "id")->valuestring);
+									instance.ElementName_value(idItem->valuestring);
 								}
 								instance.Id_value("");
 								instance.ContainerName_value("");
diff --git a/source/code/providers/Container_ImageInventory_Class_Provider.cpp b/source/code/providers/Container_ImageInventory_Class_Provider.cpp
index 01d1c639c..f5742ef5f 100644
--- a/source/code/providers/Container_ImageInventory_Class_Provider.cpp
+++ b/source/code/providers/Container_ImageInventory_Class_Provider.cpp
@@ -173,10 +173,12 @@ class InventoryQuery
 					{
 						string id = string(objItem->valuestring);
 
-						if (cJSON_GetObjectItem(state, "Running") != NULL && cJSON_GetObjectItem(state, "Running")->valueint)
+						cJSON* runningItem = cJSON_GetObjectItem(state, "Running");
+						if (runningItem != NULL && runningItem->valueint)
 						{
 							// Running container
-							if (cJSON_GetObjectItem(state, "Paused") != NULL && cJSON_GetObjectItem(state, "Paused")->valueint)
+							cJSON* pausedItem = cJSON_GetObjectItem(state, "Paused");
+							if (pausedItem != NULL && pausedItem->valueint)
 							{
 								// Paused container
 								instances[idTable[id]].Paused_value(instances[idTable[id]].Paused_value() + 1);
@@ -188,7 +190,8 @@ class InventoryQuery
 						}
 						else
 						{
-							if (cJSON_GetObjectItem(state, "ExitCode") != NULL && cJSON_GetObjectItem(state, "ExitCode")->valueint)
+							cJSON* exitCodeItem = cJSON_GetObjectItem(state, "ExitCode");
+							if (exitCodeItem != NULL && exitCodeItem->valueint)
 							{
 								// Container exited nonzero
 								instances[idTable[id]].Failed_value(instances[idTable[id]].Failed_value() + 1);
@@ -206,9 +209,10 @@ class InventoryQuery
 			}
 			else
 			{
-				if (cJSON_GetObjectItem(entry, "Id") != NULL)
+				cJSON* idItem = cJSON_GetObjectItem(entry, "Id");
+				if (idItem != NULL)
 				{
-					syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+					syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", idItem->valuestring);
 				}
 			}
 		}
@@ -263,7 +267,7 @@ class InventoryQuery
 								}
 								else
 								{
-									syslog(LOG_WARNING, "API call in AggregateContainerStatus to inspect container %s returned null", cJSON_GetObjectItem(entry, "Id")->valuestring);
+									syslog(LOG_WARNING, "API call in AggregateContainerStatus to inspect container %s returned null", objItem->valuestring);
 								}
 							}
 						}

From 938c2edc0d84917c123c2947c791fa3806fce25c Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 28 Sep 2018 16:00:29 -0700
Subject: [PATCH 014/160] Rashmi/cjson delete null check (#138)

* adding null check for cjson-delete

* null chk

* removing null check
---
 source/code/providers/Container_Process_Class_Provider.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/code/providers/Container_Process_Class_Provider.cpp b/source/code/providers/Container_Process_Class_Provider.cpp
index 9adc4edcd..e27df1788 100644
--- a/source/code/providers/Container_Process_Class_Provider.cpp
+++ b/source/code/providers/Container_Process_Class_Provider.cpp
@@ -163,7 +163,10 @@ class ContainerProcessQuery
 					}
 				}
 			}
-			cJSON_Delete(dockerPsResponse[0]);
+			if (!dockerPsResponse.empty() && dockerPsResponse[0])
+			{
+				cJSON_Delete(dockerPsResponse[0]);
+			}
 		}
 		catch (std::exception &e)
 		{

From fbfdf11e98cebbbc623bd845bf3010b46dd3918b Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 2 Oct 2018 17:33:22 -0700
Subject: [PATCH 015/160] updating log level to debug for some provider
 workflows (#139)

---
 installer/conf/container.conf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 9eaed9b47..a41b963a9 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -111,7 +111,7 @@
 
 <match oms.container.containerinventory**>	
   type out_oms
-  log_level info
+  log_level debug
   buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
@@ -124,7 +124,7 @@
 
 <match oms.container.imageinventory**>
   type out_oms
-  log_level info
+  log_level debug
   buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_imageinventory*.buffer
@@ -137,7 +137,7 @@
 
 <match oms.container.servicelog**>
   type out_oms
-  log_level info
+  log_level debug
   buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_servicelog*.buffer

From d4260663ccaeae093911052ab47bb2f644f3e56c Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 4 Oct 2018 14:01:11 -0700
Subject: [PATCH 016/160] Fixing CPU Utilization and removing Fluent-bit
 filters (#140)

Removing fluent-bit filters, CPU optimizations
---
 installer/conf/td-agent-bit.conf      | 20 ++----------
 source/code/go/src/plugins/oms.go     | 47 ++++++++++++++++-----------
 source/code/go/src/plugins/out_oms.go |  2 +-
 3 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 84a9fcf94..27916eafd 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -12,23 +12,9 @@
     Parser docker
     Mem_Buf_Limit 30m
     Path_Key filepath
-
-[FILTER]
-    Name record_modifier
-    Match oms.container.log.*
-    Whitelist_key log
-    Whitelist_key stream
-    Whitelist_key time
-    Whitelist_key filepath
-
-[FILTER]
-    Name modify
-    Match oms.container.log.*
-    Rename log LogEntry
-    Rename stream LogEntrySource
-    Rename time LogEntryTimeStamp
-    Rename filepath Filepath
-    Add_if_not_present SourceSystem Containers
+    Buffer_Chunk_Size 1m
+    Buffer_Max_Size 1m
+    Skip_Long_Lines On
 
 [OUTPUT]
     Name            oms
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 2e9e2f3d0..c7fe8eb42 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -12,11 +12,11 @@ import (
 	"strings"
 	"sync"
 	"time"
-)
-import (
+
 	"github.com/fluent/fluent-bit-go/output"
-	"github.com/mitchellh/mapstructure"
+
 	lumberjack "gopkg.in/natefinch/lumberjack.v2"
+
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/rest"
@@ -81,7 +81,6 @@ type DataItem struct {
 	Name              string `json:"Name"`
 	SourceSystem      string `json:"SourceSystem"`
 	Computer          string `json:"Computer"`
-	Filepath          string `json:"Filepath"`
 }
 
 // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
@@ -199,23 +198,18 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 	for _, record := range tailPluginRecords {
 
-		filepath := toString(record["Filepath"])
-		containerID := getContainerIDFromFilePath(filepath)
+		containerID := GetContainerIDFromFilePath(toString(record["filepath"]))
 
 		if containerID == "" || containsKey(IgnoreIDSet, containerID) {
 			continue
 		}
 
-		var dataItem DataItem
 		stringMap := make(map[string]string)
 
-		// convert map[interface{}]interface{} to  map[string]string
-		for key, value := range record {
-			strKey := fmt.Sprintf("%v", key)
-			strValue := toString(value)
-			stringMap[strKey] = strValue
-		}
-
+		stringMap["LogEntry"] = toString(record["log"])
+		stringMap["LogEntrySource"] = toString(record["stream"])
+		stringMap["LogEntryTimeStamp"] = toString(record["time"])
+		stringMap["SourceSystem"] = "Containers"
 		stringMap["Id"] = containerID
 
 		if val, ok := ImageIDMap[containerID]; ok {
@@ -238,8 +232,17 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			}
 		}
 
-		stringMap["Computer"] = Computer
-		mapstructure.Decode(stringMap, &dataItem)
+		dataItem := DataItem{
+			ID:                stringMap["Id"],
+			LogEntry:          stringMap["LogEntry"],
+			LogEntrySource:    stringMap["LogEntrySource"],
+			LogEntryTimeStamp: stringMap["LogEntryTimeStamp"],
+			SourceSystem:      stringMap["SourceSystem"],
+			Computer:          Computer,
+			Image:             stringMap["Image"],
+			Name:              stringMap["Name"],
+		}
+
 		dataItems = append(dataItems, dataItem)
 	}
 
@@ -281,11 +284,17 @@ func containsKey(currentMap map[string]bool, key string) bool {
 }
 
 func toString(s interface{}) string {
-	value := s.([]uint8)
-	return string([]byte(value[:]))
+	switch t := s.(type) {
+	case []byte:
+		// prevent encoding to base64
+		return string(t)
+	default:
+		return ""
+	}
 }
 
-func getContainerIDFromFilePath(filepath string) string {
+// GetContainerIDFromFilePath Gets the container ID From the file Path
+func GetContainerIDFromFilePath(filepath string) string {
 	start := strings.LastIndex(filepath, "-")
 	end := strings.LastIndex(filepath, ".")
 	if start >= end || start == -1 || end == -1 {
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index ec9a573d1..0efc1242d 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -10,7 +10,7 @@ import (
 
 //export FLBPluginRegister
 func FLBPluginRegister(ctx unsafe.Pointer) int {
-	return output.FLBPluginRegister(ctx, "oms", "Stdout GO!")
+	return output.FLBPluginRegister(ctx, "oms", "OMS GO!")
 }
 
 //export FLBPluginInit

From c2cabab7199870af23bb90de10bca4d8eb50e847 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 9 Oct 2018 14:50:10 -0700
Subject: [PATCH 017/160] Minor tweaks 1. Remove some logging 2. Added more
 Error Handling 3. Continue when there is an error with k8s api (#141)

* Removing some logs, added more error checking, continue on kube-api error

* Return FLB OK for json Marshall error, instead of RETRY
---
 source/code/go/src/plugins/oms.go | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index c7fe8eb42..d20f11d57 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -133,6 +133,7 @@ func updateContainerImageNameMaps() {
 		pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{})
 		if err != nil {
 			Log("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+			continue
 		}
 
 		for _, pod := range pods.Items {
@@ -216,20 +217,12 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			stringMap["Image"] = val
 		} else {
 			Log("ContainerId %s not present in Map ", containerID)
-			Log("CurrentMap Snapshot \n")
-			for k, v := range ImageIDMap {
-				Log("%s ==> %s", k, v)
-			}
 		}
 
 		if val, ok := NameIDMap[containerID]; ok {
 			stringMap["Name"] = val
 		} else {
 			Log("ContainerId %s not present in Map ", containerID)
-			Log("CurrentMap Snapshot \n")
-			for k, v := range NameIDMap {
-				Log("%s ==> %s", k, v)
-			}
 		}
 
 		dataItem := DataItem{
@@ -253,6 +246,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			DataItems: dataItems}
 
 		marshalled, err := json.Marshal(logEntry)
+		if err != nil {
+			Log("Error while Marshalling log Entry: %s", err.Error())
+			return output.FLB_OK
+		}
 		req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled))
 		req.Header.Set("Content-Type", "application/json")
 

From 32567db6965f65154663c0204c1a3e2a599530d0 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Wed, 10 Oct 2018 14:09:04 -0700
Subject: [PATCH 018/160] * Change FluentBit flush interval to 30 secs (from 5
 secs) * Remove ContainerPerf, ContainerServiceLog,ContainerProcess (OMI
 workflows) for Daemonset

---
 installer/conf/container.conf    | 33 --------------------------------
 installer/conf/td-agent-bit.conf |  2 +-
 2 files changed, 1 insertion(+), 34 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index a41b963a9..1916300cb 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -7,19 +7,6 @@
 	bind 127.0.0.1
 </source>
 
-# Filter container logs
-<filter docker.**>
-	type filter_docker_log
-	log_path "/var/opt/microsoft/omsagent/log/filter_docker_log.txt"
-</filter>
-
-# Container perf
-<source>
-	type oms_omi
-	object_name "Container"
-	interval 30s
-</source>
-
 # Container inventory
 <source>
 	type omi
@@ -40,16 +27,6 @@
 	]
 </source>
 
-# Container service log
-<source>
-	type omi
-	run_interval 60s
-	tag oms.container.servicelog
-	items [
-		["root/cimv2","Container_DaemonEvent"]
-	]
-</source>
-
 # Container host inventory
 <source>
 	type omi
@@ -60,16 +37,6 @@
 	]
 </source>
 
-# Container processes
-<source>
-	type omi
-	run_interval 60s
-	tag oms.api.ContainerProcess
-	items [
-		["root/cimv2","Container_Process"]
-	]
-</source>
-
 #cadvisor perf
 <source>
 	type cadvisorperf
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 27916eafd..b5d2309e1 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -1,5 +1,5 @@
 [SERVICE]
-    Flush         5
+    Flush         30
     Log_Level     info
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log

From afc981d504c3f44fd3232892e4823d5d09503d14 Mon Sep 17 00:00:00 2001
From: r-dilip <dilip.rangarajan@gmail.com>
Date: Thu, 11 Oct 2018 21:37:09 -0700
Subject: [PATCH 019/160] Container Log Telemetry

---
 .gitignore                              |   3 +
 installer/conf/td-agent-bit.conf        |   7 +-
 source/code/go/src/plugins/glide.lock   |  10 +-
 source/code/go/src/plugins/glide.yaml   |   8 +-
 source/code/go/src/plugins/oms.go       |   9 +-
 source/code/go/src/plugins/out_oms.go   |  10 ++
 source/code/go/src/plugins/telemetry.go | 151 ++++++++++++++++++++++++
 7 files changed, 188 insertions(+), 10 deletions(-)
 create mode 100644 source/code/go/src/plugins/telemetry.go

diff --git a/.gitignore b/.gitignore
index 92c8c0cf2..e58d69f7b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,6 @@
 
 /test/code/providers/TestScriptPath.h
 /test/code/providers/providertestutils.cpp
+source/code/go/src/plugins/profiling
+.vscode/launch.json
+source/code/go/src/plugins/vendor/
\ No newline at end of file
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index b5d2309e1..5a1c105bf 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -17,5 +17,8 @@
     Skip_Long_Lines On
 
 [OUTPUT]
-    Name            oms
-    Match           oms.container.log.*
\ No newline at end of file
+    Name                    oms
+    EnableTelemetry         true
+	TelemetryPushInterval   300
+    Match                   oms.container.log.*
+    AgentVersion            internaltest1004-2
\ No newline at end of file
diff --git a/source/code/go/src/plugins/glide.lock b/source/code/go/src/plugins/glide.lock
index 4597b594a..fc147fe74 100644
--- a/source/code/go/src/plugins/glide.lock
+++ b/source/code/go/src/plugins/glide.lock
@@ -1,5 +1,5 @@
-hash: bb32415f402ab29751f29b8e394bc974cbc31861453d817aaeb94ef83dacc488
-updated: 2018-09-14T18:14:28.748047598Z
+hash: a6a873d09ed9c3d890a70122e61efba992ead9850fe48f6fcb020d86800d4ade
+updated: 2018-10-10T13:37:51.9703908-07:00
 imports:
 - name: github.com/fluent/fluent-bit-go
   version: c4a158a6e3a793166c6ecfa2d5c80d71eada8959
@@ -38,8 +38,10 @@ imports:
   - diskcache
 - name: github.com/json-iterator/go
   version: f2b4162afba35581b6d4a50d3b8f34e33c144682
-- name: github.com/mitchellh/mapstructure
-  version: fa473d140ef3c6adf42d6b391fe76707f1f243c8
+- name: github.com/Microsoft/ApplicationInsights-Go
+  version: d2df5d440eda5372f24fcac03839a64d6cb5f7e5
+  subpackages:
+  - appinsights
 - name: github.com/modern-go/concurrent
   version: bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94
 - name: github.com/modern-go/reflect2
diff --git a/source/code/go/src/plugins/glide.yaml b/source/code/go/src/plugins/glide.yaml
index 403e1efc4..b2829391b 100644
--- a/source/code/go/src/plugins/glide.yaml
+++ b/source/code/go/src/plugins/glide.yaml
@@ -1,10 +1,8 @@
-package: plugins
+package: .
 import:
 - package: github.com/fluent/fluent-bit-go
   subpackages:
   - output
-- package: github.com/mitchellh/mapstructure
-  version: ^1.0.0
 - package: gopkg.in/natefinch/lumberjack.v2
   version: ^2.1.0
 - package: k8s.io/apimachinery
@@ -15,3 +13,7 @@ import:
   subpackages:
   - kubernetes
   - rest
+- package: github.com/Microsoft/ApplicationInsights-Go
+  version: ^0.4.2
+  subpackages:
+  - appinsights
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index d20f11d57..807e00937 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -42,6 +42,8 @@ var (
 	OMSEndpoint string
 	// Computer (Hostname) when ingesting into ContainerLog table
 	Computer string
+	// WorkspaceID log analytics workspace id
+	WorkspaceID string
 )
 
 var (
@@ -170,6 +172,7 @@ func updateKubeSystemContainerIDs() {
 		pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
 		if err != nil {
 			Log("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error())
+			continue
 		}
 
 		_ignoreIDSet := make(map[string]bool)
@@ -269,7 +272,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			return output.FLB_RETRY
 		}
 
-		Log("Successfully flushed %d records in %s", len(dataItems), elapsed)
+		numRecords := len(dataItems)
+		Log("Successfully flushed %d records in %s", numRecords, elapsed)
+		FlushedRecordsCount += float64(numRecords)
+		FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond)
 	}
 
 	return output.FLB_OK
@@ -322,6 +328,7 @@ func InitializePlugin(pluginConfPath string) {
 		log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error())
 	}
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
+	WorkspaceID = omsadminConf["WORKSPACE_ID"]
 	Log("OMSEndpoint %s", OMSEndpoint)
 
 	// Initialize image,name map refresh ticker
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 0efc1242d..37c9eb12b 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -5,6 +5,7 @@ import (
 )
 import (
 	"C"
+	"strings"
 	"unsafe"
 )
 
@@ -19,6 +20,14 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
 	InitializePlugin(ContainerLogPluginConfFilePath)
+	enablePlugin := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
+	telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushInterval")
+	agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
+
+	if strings.Compare(strings.ToLower(enablePlugin), "true") == 0 {
+		go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion)
+		SendEvent(EventNameContainerLogInit, make(map[string]string))
+	}
 	return output.FLB_OK
 }
 
@@ -48,6 +57,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 
 // FLBPluginExit exits the plugin
 func FLBPluginExit() int {
+	defer TelemetryShutdown()
 	KubeSystemContainersRefreshTicker.Stop()
 	ContainerImageNameRefreshTicker.Stop()
 	return output.FLB_OK
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
new file mode 100644
index 000000000..4d4ab2371
--- /dev/null
+++ b/source/code/go/src/plugins/telemetry.go
@@ -0,0 +1,151 @@
+package main
+
+import (
+	"encoding/base64"
+	"errors"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
+)
+
+var (
+	// FlushedRecordsCount indicates the number of flushed records in the current period
+	FlushedRecordsCount float64
+	// FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period
+	FlushedRecordsTimeTaken float64
+	// CommonProperties indicates the dimensions that are sent with every event/metric
+	CommonProperties map[string]string
+	// TelemetryClient is the client used to send the telemetry
+	TelemetryClient appinsights.TelemetryClient
+	// ContainerLogTelemetryTicker sends telemetry periodically
+	ContainerLogTelemetryTicker *time.Ticker
+)
+
+const (
+	clusterTypeACS                   = "ACS"
+	clusterTypeAKS                   = "AKS"
+	controllerTypeDaemonSet          = "DaemonSet"
+	controllerTypeReplicaSet         = "ReplicaSet"
+	envAKSResourceID                 = "AKS_RESOURCE_ID"
+	envACSResourceName               = "ACS_RESOURCE_NAME"
+	envAppInsightsInstrumentationKey = "APPLICATIONINSIGHTS_INSTRUMENTATIONKEY"
+	metricNameAvgFlushRate           = "ContainerLogAvgRecordsFlushedPerSec"
+	defaultTelemetryPushInterval     = 300
+
+	// EventNameContainerLogInit name of the event
+	EventNameContainerLogInit = "ContainerLogPluginInitialized"
+)
+
+// Initialize initializes the telemetry artifacts
+func initialize(telemetryIntervalProperty string, agentVersion string) (int, error) {
+
+	telemetryInterval, err := strconv.Atoi(telemetryIntervalProperty)
+	if err != nil {
+		telemetryInterval = defaultTelemetryPushInterval
+	}
+
+	ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryInterval))
+
+	encodedIkey := os.Getenv(envAppInsightsInstrumentationKey)
+	if encodedIkey == "" {
+		Log("App Insights IKey missing in Environment Variables \n")
+		return -1, errors.New("Missing App Insights Instrumentation Key Environment Variable")
+	}
+
+	decIkey, err := base64.StdEncoding.DecodeString(encodedIkey)
+	if err != nil {
+		Log("Error Decoding encoded Instrumentation key %s", err.Error())
+		return -1, err
+	}
+
+	TelemetryClient = appinsights.NewTelemetryClient(string(decIkey))
+
+	CommonProperties = make(map[string]string)
+	CommonProperties["Computer"] = Computer
+	CommonProperties["WorkspaceID"] = WorkspaceID
+	CommonProperties["ControllerType"] = controllerTypeDaemonSet
+	CommonProperties["AgentVersion"] = agentVersion
+
+	aksResourceID := os.Getenv(envAKSResourceID)
+	// if the aks resource id is not defined, it is most likely an ACS Cluster
+	if aksResourceID == "" {
+		CommonProperties["ACSResourceName"] = os.Getenv(envACSResourceName)
+		CommonProperties["ClusterType"] = clusterTypeACS
+
+		CommonProperties["SubscriptionID"] = ""
+		CommonProperties["ResourceGroupName"] = ""
+		CommonProperties["ClusterName"] = ""
+		CommonProperties["Region"] = ""
+
+	} else {
+		CommonProperties["ACSResourceName"] = ""
+		splitStrings := strings.Split(aksResourceID, "/")
+		CommonProperties["SubscriptionID"] = splitStrings[2]
+		CommonProperties["ResourceGroupName"] = splitStrings[4]
+		CommonProperties["ClusterName"] = splitStrings[8]
+		CommonProperties["ClusterType"] = clusterTypeAKS
+
+		region := os.Getenv("AKS_REGION")
+		if region != "" {
+			CommonProperties["Region"] = region
+		}
+	}
+
+	TelemetryClient.Context().CommonProperties = CommonProperties
+	return 0, nil
+}
+
+// SendContainerLogFlushRateMetric is a go-routine that flushes the data periodically (every 5 mins to App Insights)
+func SendContainerLogFlushRateMetric(telemetryIntervalProperty string, agentVersion string) {
+
+	ret, err := initialize(telemetryIntervalProperty, agentVersion)
+	if ret != 0 || err != nil {
+		Log("Error During Telemetry Initialization :%s", err.Error())
+		return
+	}
+
+	for ; true; <-ContainerLogTelemetryTicker.C {
+		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
+		metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
+		Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate)
+		TelemetryClient.Track(metric)
+		FlushedRecordsCount = 0.0
+		FlushedRecordsTimeTaken = 0.0
+	}
+}
+
+// TelemetryShutdown stops the ticker that sends data to App Insights periodically
+func TelemetryShutdown() {
+	Log("Shutting down ContainerLog Telemetry\n")
+	ContainerLogTelemetryTicker.Stop()
+}
+
+// SendEvent sends an event to App Insights
+func SendEvent(eventName string, dimensions map[string]string) {
+	// this is because the TelemetryClient is initialized in a different goroutine. A simple wait loop here is just waiting for it to be initialized. This will happen only for the init event. Any subsequent Event should work just fine
+	for TelemetryClient == nil {
+		Log("Waiting for Telemetry Client to be initialized")
+		time.Sleep(1 * time.Second)
+	}
+
+	// take a copy so the CommonProperties can be restored later
+	_commonProps := make(map[string]string)
+	for k, v := range TelemetryClient.Context().CommonProperties {
+		_commonProps[k] = v
+	}
+
+	// add any extra dimensions
+	for k, v := range dimensions {
+		TelemetryClient.Context().CommonProperties[k] = v
+	}
+
+	Log("Sending Event : %s\n", eventName)
+	event := appinsights.NewEventTelemetry(eventName)
+	TelemetryClient.Track(event)
+
+	// restore original CommonProperties
+	TelemetryClient.Context().CommonProperties = _commonProps
+}

From 4b958dde94450e96d6d46351756c83500df7935f Mon Sep 17 00:00:00 2001
From: r-dilip <dilip.rangarajan@gmail.com>
Date: Fri, 12 Oct 2018 09:18:10 -0700
Subject: [PATCH 020/160] Fixing an issue with Send Init Event if Telemetry is
 not initialized properly, tab to whitespace in conf file

---
 installer/conf/td-agent-bit.conf        |  2 +-
 source/code/go/src/plugins/out_oms.go   |  7 ++--
 source/code/go/src/plugins/telemetry.go | 44 ++++++++++++++-----------
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 5a1c105bf..6849a3744 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -19,6 +19,6 @@
 [OUTPUT]
     Name                    oms
     EnableTelemetry         true
-	TelemetryPushInterval   300
+    TelemetryPushInterval   300
     Match                   oms.container.log.*
     AgentVersion            internaltest1004-2
\ No newline at end of file
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 37c9eb12b..2603368ab 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -20,13 +20,14 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
 	InitializePlugin(ContainerLogPluginConfFilePath)
-	enablePlugin := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
+	enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
 	telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushInterval")
 	agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
 
-	if strings.Compare(strings.ToLower(enablePlugin), "true") == 0 {
+	if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 {
 		go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion)
-		SendEvent(EventNameContainerLogInit, make(map[string]string))
+	} else {
+		Log("Telemetry is not enabled for the plugin %s \n", output.FLBPluginConfigKey(ctx, "Name"))
 	}
 	return output.FLB_OK
 }
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 4d4ab2371..c2f565a45 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -4,6 +4,7 @@ import (
 	"encoding/base64"
 	"errors"
 	"os"
+	"runtime"
 	"strconv"
 	"strings"
 	"time"
@@ -25,39 +26,40 @@ var (
 )
 
 const (
-	clusterTypeACS                   = "ACS"
-	clusterTypeAKS                   = "AKS"
-	controllerTypeDaemonSet          = "DaemonSet"
-	controllerTypeReplicaSet         = "ReplicaSet"
-	envAKSResourceID                 = "AKS_RESOURCE_ID"
-	envACSResourceName               = "ACS_RESOURCE_NAME"
-	envAppInsightsInstrumentationKey = "APPLICATIONINSIGHTS_INSTRUMENTATIONKEY"
-	metricNameAvgFlushRate           = "ContainerLogAvgRecordsFlushedPerSec"
-	defaultTelemetryPushInterval     = 300
+	clusterTypeACS               = "ACS"
+	clusterTypeAKS               = "AKS"
+	controllerTypeDaemonSet      = "DaemonSet"
+	controllerTypeReplicaSet     = "ReplicaSet"
+	envAKSResourceID             = "AKS_RESOURCE_ID"
+	envACSResourceName           = "ACS_RESOURCE_NAME"
+	envAppInsightsAuth           = "APPLICATIONINSIGHTS_AUTH"
+	metricNameAvgFlushRate       = "ContainerLogAvgRecordsFlushedPerSec"
+	defaultTelemetryPushInterval = 300
 
 	// EventNameContainerLogInit name of the event
 	EventNameContainerLogInit = "ContainerLogPluginInitialized"
 )
 
 // Initialize initializes the telemetry artifacts
-func initialize(telemetryIntervalProperty string, agentVersion string) (int, error) {
+func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, error) {
 
-	telemetryInterval, err := strconv.Atoi(telemetryIntervalProperty)
+	telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty)
 	if err != nil {
-		telemetryInterval = defaultTelemetryPushInterval
+		Log("Error Converting telemetryPushIntervalProperty %s. Using Default Interval... %d \n", telemetryPushIntervalProperty, defaultTelemetryPushInterval)
+		telemetryPushInterval = defaultTelemetryPushInterval
 	}
 
-	ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryInterval))
+	ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryPushInterval))
 
-	encodedIkey := os.Getenv(envAppInsightsInstrumentationKey)
+	encodedIkey := os.Getenv(envAppInsightsAuth)
 	if encodedIkey == "" {
-		Log("App Insights IKey missing in Environment Variables \n")
-		return -1, errors.New("Missing App Insights Instrumentation Key Environment Variable")
+		Log("Environment Variable Missing \n")
+		return -1, errors.New("Missing Environment Variable")
 	}
 
 	decIkey, err := base64.StdEncoding.DecodeString(encodedIkey)
 	if err != nil {
-		Log("Error Decoding encoded Instrumentation key %s", err.Error())
+		Log("Decoding Error %s", err.Error())
 		return -1, err
 	}
 
@@ -99,14 +101,16 @@ func initialize(telemetryIntervalProperty string, agentVersion string) (int, err
 }
 
 // SendContainerLogFlushRateMetric is a go-routine that flushes the data periodically (every 5 mins to App Insights)
-func SendContainerLogFlushRateMetric(telemetryIntervalProperty string, agentVersion string) {
+func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agentVersion string) {
 
-	ret, err := initialize(telemetryIntervalProperty, agentVersion)
+	ret, err := initialize(telemetryPushIntervalProperty, agentVersion)
 	if ret != 0 || err != nil {
 		Log("Error During Telemetry Initialization :%s", err.Error())
-		return
+		runtime.Goexit()
 	}
 
+	SendEvent(EventNameContainerLogInit, make(map[string]string))
+
 	for ; true; <-ContainerLogTelemetryTicker.C {
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
 		metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)

From 510ef9f95b8e5de04e7b5952e24458374d6cbf6b Mon Sep 17 00:00:00 2001
From: r-dilip <dilip.rangarajan@gmail.com>
Date: Fri, 12 Oct 2018 10:45:14 -0700
Subject: [PATCH 021/160] PR feedback

---
 installer/conf/td-agent-bit.conf        | 10 ++++-----
 source/code/go/src/plugins/out_oms.go   |  8 +++----
 source/code/go/src/plugins/telemetry.go | 30 +++++++++++--------------
 3 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 6849a3744..b01b3a352 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -17,8 +17,8 @@
     Skip_Long_Lines On
 
 [OUTPUT]
-    Name                    oms
-    EnableTelemetry         true
-    TelemetryPushInterval   300
-    Match                   oms.container.log.*
-    AgentVersion            internaltest1004-2
\ No newline at end of file
+    Name                            oms
+    EnableTelemetry                 true
+    TelemetryPushIntervalSeconds    300
+    Match                           oms.container.log.*
+    AgentVersion                    internaltest1004-2
\ No newline at end of file
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 2603368ab..732ae5216 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -21,13 +21,13 @@ func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
 	InitializePlugin(ContainerLogPluginConfFilePath)
 	enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
-	telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushInterval")
-	agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
-
 	if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 {
+		telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds")
+		agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
 		go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion)
 	} else {
 		Log("Telemetry is not enabled for the plugin %s \n", output.FLBPluginConfigKey(ctx, "Name"))
+		return output.FLB_OK
 	}
 	return output.FLB_OK
 }
@@ -58,7 +58,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 
 // FLBPluginExit exits the plugin
 func FLBPluginExit() int {
-	defer TelemetryShutdown()
+	ContainerLogTelemetryTicker.Stop()
 	KubeSystemContainersRefreshTicker.Stop()
 	ContainerImageNameRefreshTicker.Stop()
 	return output.FLB_OK
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index c2f565a45..4396ea655 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -26,15 +26,15 @@ var (
 )
 
 const (
-	clusterTypeACS               = "ACS"
-	clusterTypeAKS               = "AKS"
-	controllerTypeDaemonSet      = "DaemonSet"
-	controllerTypeReplicaSet     = "ReplicaSet"
-	envAKSResourceID             = "AKS_RESOURCE_ID"
-	envACSResourceName           = "ACS_RESOURCE_NAME"
-	envAppInsightsAuth           = "APPLICATIONINSIGHTS_AUTH"
-	metricNameAvgFlushRate       = "ContainerLogAvgRecordsFlushedPerSec"
-	defaultTelemetryPushInterval = 300
+	clusterTypeACS                      = "ACS"
+	clusterTypeAKS                      = "AKS"
+	controllerTypeDaemonSet             = "DaemonSet"
+	controllerTypeReplicaSet            = "ReplicaSet"
+	envAKSResourceID                    = "AKS_RESOURCE_ID"
+	envACSResourceName                  = "ACS_RESOURCE_NAME"
+	envAppInsightsAuth                  = "APPLICATIONINSIGHTS_AUTH"
+	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
+	defaultTelemetryPushIntervalSeconds = 300
 
 	// EventNameContainerLogInit name of the event
 	EventNameContainerLogInit = "ContainerLogPluginInitialized"
@@ -45,8 +45,8 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int,
 
 	telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty)
 	if err != nil {
-		Log("Error Converting telemetryPushIntervalProperty %s. Using Default Interval... %d \n", telemetryPushIntervalProperty, defaultTelemetryPushInterval)
-		telemetryPushInterval = defaultTelemetryPushInterval
+		Log("Error Converting telemetryPushIntervalProperty %s. Using Default Interval... %d \n", telemetryPushIntervalProperty, defaultTelemetryPushIntervalSeconds)
+		telemetryPushInterval = defaultTelemetryPushIntervalSeconds
 	}
 
 	ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryPushInterval))
@@ -116,17 +116,13 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent
 		metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
 		Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate)
 		TelemetryClient.Track(metric)
+		DataUpdateMutex.Lock()
 		FlushedRecordsCount = 0.0
 		FlushedRecordsTimeTaken = 0.0
+		DataUpdateMutex.Unlock()
 	}
 }
 
-// TelemetryShutdown stops the ticker that sends data to App Insights periodically
-func TelemetryShutdown() {
-	Log("Shutting down ContainerLog Telemetry\n")
-	ContainerLogTelemetryTicker.Stop()
-}
-
 // SendEvent sends an event to App Insights
 func SendEvent(eventName string, dimensions map[string]string) {
 	// this is because the TelemetryClient is initialized in a different goroutine. A simple wait loop here is just waiting for it to be initialized. This will happen only for the init event. Any subsequent Event should work just fine

From 684c39b63581fab69595885ec2c98942098be4f6 Mon Sep 17 00:00:00 2001
From: r-dilip <dilip.rangarajan@gmail.com>
Date: Fri, 12 Oct 2018 15:44:25 -0700
Subject: [PATCH 022/160] PR feedback

---
 source/code/go/src/plugins/telemetry.go | 42 +++++++++----------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 4396ea655..621d88eec 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -81,19 +81,21 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int,
 		CommonProperties["ResourceGroupName"] = ""
 		CommonProperties["ClusterName"] = ""
 		CommonProperties["Region"] = ""
+		CommonProperties["AKS_RESOURCE_ID"] = ""
 
 	} else {
 		CommonProperties["ACSResourceName"] = ""
+		CommonProperties["AKS_RESOURCE_ID"] = aksResourceID
 		splitStrings := strings.Split(aksResourceID, "/")
-		CommonProperties["SubscriptionID"] = splitStrings[2]
-		CommonProperties["ResourceGroupName"] = splitStrings[4]
-		CommonProperties["ClusterName"] = splitStrings[8]
+		if len(aksResourceID) > 0 && len(aksResourceID) < 10 {
+			CommonProperties["SubscriptionID"] = splitStrings[2]
+			CommonProperties["ResourceGroupName"] = splitStrings[4]
+			CommonProperties["ClusterName"] = splitStrings[8]
+		}
 		CommonProperties["ClusterType"] = clusterTypeAKS
 
 		region := os.Getenv("AKS_REGION")
-		if region != "" {
-			CommonProperties["Region"] = region
-		}
+		CommonProperties["Region"] = region
 	}
 
 	TelemetryClient.Context().CommonProperties = CommonProperties
@@ -112,40 +114,26 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent
 	SendEvent(EventNameContainerLogInit, make(map[string]string))
 
 	for ; true; <-ContainerLogTelemetryTicker.C {
+		DataUpdateMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
-		metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
 		Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate)
-		TelemetryClient.Track(metric)
-		DataUpdateMutex.Lock()
 		FlushedRecordsCount = 0.0
 		FlushedRecordsTimeTaken = 0.0
 		DataUpdateMutex.Unlock()
+		metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
+		TelemetryClient.Track(metric)
 	}
 }
 
 // SendEvent sends an event to App Insights
 func SendEvent(eventName string, dimensions map[string]string) {
-	// this is because the TelemetryClient is initialized in a different goroutine. A simple wait loop here is just waiting for it to be initialized. This will happen only for the init event. Any subsequent Event should work just fine
-	for TelemetryClient == nil {
-		Log("Waiting for Telemetry Client to be initialized")
-		time.Sleep(1 * time.Second)
-	}
-
-	// take a copy so the CommonProperties can be restored later
-	_commonProps := make(map[string]string)
-	for k, v := range TelemetryClient.Context().CommonProperties {
-		_commonProps[k] = v
-	}
+	Log("Sending Event : %s\n", eventName)
+	event := appinsights.NewEventTelemetry(eventName)
 
-	// add any extra dimensions
+	// add any extra Properties
 	for k, v := range dimensions {
-		TelemetryClient.Context().CommonProperties[k] = v
+		event.Properties[k] = v
 	}
 
-	Log("Sending Event : %s\n", eventName)
-	event := appinsights.NewEventTelemetry(eventName)
 	TelemetryClient.Track(event)
-
-	// restore original CommonProperties
-	TelemetryClient.Context().CommonProperties = _commonProps
 }

From e165275bb8c346051cf851fb36dbb91ad7cf8afc Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Mon, 15 Oct 2018 15:14:41 -0700
Subject: [PATCH 023/160] Sending an event every 5 mins(Heartbeat) (#146)

---
 installer/conf/td-agent-bit.conf        | 2 --
 source/code/go/src/plugins/telemetry.go | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index b01b3a352..2553f405f 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -12,8 +12,6 @@
     Parser docker
     Mem_Buf_Limit 30m
     Path_Key filepath
-    Buffer_Chunk_Size 1m
-    Buffer_Max_Size 1m
     Skip_Long_Lines On
 
 [OUTPUT]
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 621d88eec..b1bc4439b 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -36,8 +36,8 @@ const (
 	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
 	defaultTelemetryPushIntervalSeconds = 300
 
-	// EventNameContainerLogInit name of the event
-	EventNameContainerLogInit = "ContainerLogPluginInitialized"
+	eventNameContainerLogInit   = "ContainerLogPluginInitialized"
+	eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent"
 )
 
 // Initialize initializes the telemetry artifacts
@@ -111,9 +111,10 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent
 		runtime.Goexit()
 	}
 
-	SendEvent(EventNameContainerLogInit, make(map[string]string))
+	SendEvent(eventNameContainerLogInit, make(map[string]string))
 
 	for ; true; <-ContainerLogTelemetryTicker.C {
+		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
 		DataUpdateMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
 		Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate)

From cfe1ca94c259c533a938834a54f1279e703d7e4b Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 16 Oct 2018 13:03:30 -0700
Subject: [PATCH 024/160] PR feedback to cleanup removed workflows

---
 installer/conf/container.conf | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 1916300cb..17317871c 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -46,7 +46,7 @@
 </source>
 
 # Filter for correct format to endpoint
-<filter oms.container.containerinventory oms.container.imageinventory oms.container.servicelog>
+<filter oms.container.containerinventory oms.container.imageinventory>
 	type filter_container
 </filter>
 
@@ -63,19 +63,6 @@
   max_retry_wait 9m
 </match>
 
-<match oms.api.ContainerProcess**>
-  type out_oms_api
-  log_level debug
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_containerprocess*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
-
 <match oms.container.containerinventory**>	
   type out_oms
   log_level debug
@@ -102,19 +89,6 @@
   max_retry_wait 9m
 </match>
 
-<match oms.container.servicelog**>
-  type out_oms
-  log_level debug
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_servicelog*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
-
 <match oms.api.cadvisorperf**>	
   type out_oms
   log_level debug

From 892b51c6b166cf10424bf5b6768633f44aa4cfa7 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 16 Oct 2018 13:04:55 -0700
Subject: [PATCH 025/160] updating agent version for telemetry

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 2553f405f..667f2edc2 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -19,4 +19,4 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    internaltest1004-2
\ No newline at end of file
+    AgentVersion                    ciprod10162018

From 9c83160dfa92a4f9ae1ab2b010678148aab4fc4d Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 16 Oct 2018 19:33:43 -0700
Subject: [PATCH 026/160] updating agent version

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 667f2edc2..b39587a97 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -19,4 +19,4 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod10162018
+    AgentVersion                    ciprod10162018-2

From f0b5a61ea7597d8044f0ef3347f3258996c97c39 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 25 Oct 2018 11:17:39 -0700
Subject: [PATCH 027/160] Telemetry Updates (#149)

* Telemetry Fixes 1. Added Log Generation Rate 2. Fixed parsing bugs 3. Added code to send Exceptions/errors

* PR Feedback
---
 source/code/go/src/plugins/oms.go       | 78 +++++++++++++++++++------
 source/code/go/src/plugins/out_oms.go   |  3 -
 source/code/go/src/plugins/telemetry.go | 29 ++++++---
 source/code/go/src/plugins/utils.go     |  8 ++-
 4 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 807e00937..665c3f9f2 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -55,15 +55,18 @@ var (
 	IgnoreIDSet map[string]bool
 	// DataUpdateMutex read and write mutex access to the container id set
 	DataUpdateMutex = &sync.Mutex{}
+	// ContainerLogTelemetryMutex read and write mutex access to the Container Log Telemetry
+	ContainerLogTelemetryMutex = &sync.Mutex{}
+
 	// ClientSet for querying KubeAPIs
 	ClientSet *kubernetes.Clientset
 )
 
 var (
 	// KubeSystemContainersRefreshTicker updates the kube-system containers
-	KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * 300)
+	KubeSystemContainersRefreshTicker *time.Ticker
 	// ContainerImageNameRefreshTicker updates the container image and names periodically
-	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * 60)
+	ContainerImageNameRefreshTicker *time.Ticker
 )
 
 var (
@@ -99,6 +102,7 @@ func createLogger() *log.Logger {
 		fmt.Printf("File Exists. Opening file in append mode...\n")
 		logfile, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0600)
 		if err != nil {
+			SendException(err.Error())
 			fmt.Printf(err.Error())
 		}
 	}
@@ -107,6 +111,7 @@ func createLogger() *log.Logger {
 		fmt.Printf("File Doesnt Exist. Creating file...\n")
 		logfile, err = os.Create(path)
 		if err != nil {
+			SendException(err.Error())
 			fmt.Printf(err.Error())
 		}
 	}
@@ -134,7 +139,9 @@ func updateContainerImageNameMaps() {
 
 		pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{})
 		if err != nil {
-			Log("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+			Log(message)
+			SendException(message)
 			continue
 		}
 
@@ -171,7 +178,9 @@ func updateKubeSystemContainerIDs() {
 
 		pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
 		if err != nil {
-			Log("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error())
+			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error())
+			SendException(message)
+			Log(message)
 			continue
 		}
 
@@ -194,17 +203,29 @@ func updateKubeSystemContainerIDs() {
 // PostDataHelper sends data to the OMS endpoint
 func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
-	defer DataUpdateMutex.Unlock()
-
 	start := time.Now()
 	var dataItems []DataItem
+	ignoreIDSet := make(map[string]bool)
+	imageIDMap := make(map[string]string)
+	nameIDMap := make(map[string]string)
+
 	DataUpdateMutex.Lock()
+	for k, v := range IgnoreIDSet {
+		ignoreIDSet[k] = v
+	}
+	for k, v := range ImageIDMap {
+		imageIDMap[k] = v
+	}
+	for k, v := range NameIDMap {
+		nameIDMap[k] = v
+	}
+	DataUpdateMutex.Unlock()
 
 	for _, record := range tailPluginRecords {
 
 		containerID := GetContainerIDFromFilePath(toString(record["filepath"]))
 
-		if containerID == "" || containsKey(IgnoreIDSet, containerID) {
+		if containerID == "" || containsKey(ignoreIDSet, containerID) {
 			continue
 		}
 
@@ -216,13 +237,13 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		stringMap["SourceSystem"] = "Containers"
 		stringMap["Id"] = containerID
 
-		if val, ok := ImageIDMap[containerID]; ok {
+		if val, ok := imageIDMap[containerID]; ok {
 			stringMap["Image"] = val
 		} else {
 			Log("ContainerId %s not present in Map ", containerID)
 		}
 
-		if val, ok := NameIDMap[containerID]; ok {
+		if val, ok := nameIDMap[containerID]; ok {
 			stringMap["Name"] = val
 		} else {
 			Log("ContainerId %s not present in Map ", containerID)
@@ -250,7 +271,9 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 		marshalled, err := json.Marshal(logEntry)
 		if err != nil {
-			Log("Error while Marshalling log Entry: %s", err.Error())
+			message := fmt.Sprintf("Error while Marshalling log Entry: %s", err.Error())
+			Log(message)
+			SendException(message)
 			return output.FLB_OK
 		}
 		req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled))
@@ -260,8 +283,11 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		elapsed := time.Since(start)
 
 		if err != nil {
-			Log("Error when sending request %s \n", err.Error())
+			message := fmt.Sprintf("Error when sending request %s \n", err.Error())
+			Log(message)
+			SendException(message)
 			Log("Failed to flush %d records after %s", len(dataItems), elapsed)
+
 			return output.FLB_RETRY
 		}
 
@@ -274,8 +300,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 		numRecords := len(dataItems)
 		Log("Successfully flushed %d records in %s", numRecords, elapsed)
+		ContainerLogTelemetryMutex.Lock()
 		FlushedRecordsCount += float64(numRecords)
 		FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond)
+		ContainerLogTelemetryMutex.Unlock()
 	}
 
 	return output.FLB_OK
@@ -318,13 +346,17 @@ func InitializePlugin(pluginConfPath string) {
 
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
-		Log("Error Reading plugin config path : %s \n", err.Error())
-		log.Fatalf("Error Reading plugin config path : %s \n", err.Error())
+		message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error())
+		Log(message)
+		SendException(message)
+		time.Sleep(30 * time.Second)
+		log.Fatalln(message)
 	}
 
 	omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"])
 	if err != nil {
 		Log(err.Error())
+		SendException(err.Error())
 		log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error())
 	}
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
@@ -334,7 +366,9 @@ func InitializePlugin(pluginConfPath string) {
 	// Initialize image,name map refresh ticker
 	containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"])
 	if err != nil {
-		Log("Error Reading Container Inventory Refresh Interval %s", err.Error())
+		message := fmt.Sprintf("Error Reading Container Inventory Refresh Interval %s", err.Error())
+		Log(message)
+		SendException(message)
 		Log("Using Default Refresh Interval of %d s\n", defaultContainerInventoryRefreshInterval)
 		containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval
 	}
@@ -344,7 +378,9 @@ func InitializePlugin(pluginConfPath string) {
 	// Initialize Kube System Refresh Ticker
 	kubeSystemContainersRefreshInterval, err := strconv.Atoi(pluginConfig["kube_system_containers_refresh_interval"])
 	if err != nil {
-		Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error())
+		message := fmt.Sprintf("Error Reading Kube System Container Ids Refresh Interval %s", err.Error())
+		Log(message)
+		SendException(message)
 		Log("Using Default Refresh Interval of %d s\n", defaultKubeSystemContainersRefreshInterval)
 		kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval
 	}
@@ -356,7 +392,9 @@ func InitializePlugin(pluginConfPath string) {
 	if err != nil {
 		// It is ok to log here and continue, because only the Computer column will be missing,
 		// which can be deduced from a combination of containerId, and docker logs on the node
-		Log("Error when reading containerHostName file %s.\n It is ok to log here and continue, because only the Computer column will be missing, which can be deduced from a combination of containerId, and docker logs on the nodes\n", err.Error())
+		message := fmt.Sprintf("Error when reading containerHostName file %s.\n It is ok to log here and continue, because only the Computer column will be missing, which can be deduced from a combination of containerId, and docker logs on the nodes\n", err.Error())
+		Log(message)
+		SendException(message)
 	}
 	Computer = strings.TrimSuffix(toString(containerHostName), "\n")
 	Log("Computer == %s \n", Computer)
@@ -364,12 +402,16 @@ func InitializePlugin(pluginConfPath string) {
 	// Initialize KubeAPI Client
 	config, err := rest.InClusterConfig()
 	if err != nil {
-		Log("Error getting config %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+		message := fmt.Sprintf("Error getting config %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+		Log(message)
+		SendException(message)
 	}
 
 	ClientSet, err = kubernetes.NewForConfig(config)
 	if err != nil {
-		Log("Error getting clientset %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+		message := fmt.Sprintf("Error getting clientset %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
+		SendException(message)
+		Log(message)
 	}
 
 	PluginConfiguration = pluginConfig
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 732ae5216..e2ee324e7 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -34,7 +34,6 @@ func FLBPluginInit(ctx unsafe.Pointer) int {
 
 //export FLBPluginFlush
 func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
-	var count int
 	var ret int
 	var record map[interface{}]interface{}
 	var records []map[interface{}]interface{}
@@ -43,7 +42,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 	dec := output.NewDecoder(data, int(length))
 
 	// Iterate Records
-	count = 0
 	for {
 		// Extract Record
 		ret, _, record = output.GetRecord(dec)
@@ -51,7 +49,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 			break
 		}
 		records = append(records, record)
-		count++
 	}
 	return PostDataHelper(records)
 }
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index b1bc4439b..72454948d 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -34,13 +34,14 @@ const (
 	envACSResourceName                  = "ACS_RESOURCE_NAME"
 	envAppInsightsAuth                  = "APPLICATIONINSIGHTS_AUTH"
 	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
+	metricNameAvgLogGenerationRate      = "ContainerLogsGeneratedPerSec"
 	defaultTelemetryPushIntervalSeconds = 300
 
 	eventNameContainerLogInit   = "ContainerLogPluginInitialized"
 	eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent"
 )
 
-// Initialize initializes the telemetry artifacts
+// initialize initializes the telemetry artifacts
 func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, error) {
 
 	telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty)
@@ -87,7 +88,7 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int,
 		CommonProperties["ACSResourceName"] = ""
 		CommonProperties["AKS_RESOURCE_ID"] = aksResourceID
 		splitStrings := strings.Split(aksResourceID, "/")
-		if len(aksResourceID) > 0 && len(aksResourceID) < 10 {
+		if len(splitStrings) > 0 && len(splitStrings) < 10 {
 			CommonProperties["SubscriptionID"] = splitStrings[2]
 			CommonProperties["ResourceGroupName"] = splitStrings[4]
 			CommonProperties["ClusterName"] = splitStrings[8]
@@ -110,19 +111,24 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent
 		Log("Error During Telemetry Initialization :%s", err.Error())
 		runtime.Goexit()
 	}
-
+	start := time.Now()
 	SendEvent(eventNameContainerLogInit, make(map[string]string))
 
 	for ; true; <-ContainerLogTelemetryTicker.C {
 		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
-		DataUpdateMutex.Lock()
+		elapsed := time.Since(start)
+		ContainerLogTelemetryMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
-		Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate)
+		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
 		FlushedRecordsCount = 0.0
 		FlushedRecordsTimeTaken = 0.0
-		DataUpdateMutex.Unlock()
-		metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
-		TelemetryClient.Track(metric)
+		ContainerLogTelemetryMutex.Unlock()
+
+		flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
+		TelemetryClient.Track(flushRateMetric)
+		logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
+		TelemetryClient.Track(logRateMetric)
+		start = time.Now()
 	}
 }
 
@@ -138,3 +144,10 @@ func SendEvent(eventName string, dimensions map[string]string) {
 
 	TelemetryClient.Track(event)
 }
+
+// SendException  send an event to the configured app insights instance
+func SendException(err interface{}) {
+	if TelemetryClient != nil {
+		TelemetryClient.TrackException(err)
+	}
+}
diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go
index 1ac9b05a9..94db033bd 100644
--- a/source/code/go/src/plugins/utils.go
+++ b/source/code/go/src/plugins/utils.go
@@ -3,6 +3,7 @@ package main
 import (
 	"bufio"
 	"crypto/tls"
+	"fmt"
 	"log"
 	"net/http"
 	"os"
@@ -19,7 +20,9 @@ func ReadConfiguration(filename string) (map[string]string, error) {
 
 	file, err := os.Open(filename)
 	if err != nil {
+		SendException(err)
 		log.Fatal(err)
+
 		return nil, err
 	}
 	defer file.Close()
@@ -39,6 +42,7 @@ func ReadConfiguration(filename string) (map[string]string, error) {
 	}
 
 	if err := scanner.Err(); err != nil {
+		SendException(err)
 		log.Fatal(err)
 		return nil, err
 	}
@@ -51,7 +55,9 @@ func CreateHTTPClient() {
 
 	cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"])
 	if err != nil {
-		Log("Error when loading cert %s", err.Error())
+		message := fmt.Sprintf("Error when loading cert %s", err.Error())
+		SendException(message)
+		Log(message)
 		log.Fatalf("Error when loading cert %s", err.Error())
 	}
 

From a58998ec5a03b3a4bd502a9fb7be5e0bdfd3eee2 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 30 Oct 2018 09:52:36 -0700
Subject: [PATCH 028/160] Changes to send omsagent/omsagent-rs kubectl logs to
 App Insights (#159)

* Changes to send omsagent/omsagent-rs kubectl logs to App Insights

* PR Feedback
---
 installer/conf/td-agent-bit.conf        |   9 +++
 source/code/go/src/plugins/oms.go       |  37 +++++----
 source/code/go/src/plugins/out_oms.go   |  12 ++-
 source/code/go/src/plugins/telemetry.go | 102 +++++++++++++-----------
 source/code/go/src/plugins/utils.go     |  21 ++++-
 5 files changed, 107 insertions(+), 74 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index b39587a97..2a6199987 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -14,6 +14,15 @@
     Path_Key filepath
     Skip_Long_Lines On
 
+[INPUT]
+    Name tail
+    Tag oms.container.log.flbplugin.*
+    Path /var/log/containers/omsagent*.log
+    DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db
+    Mem_Buf_Limit 30m
+    Path_Key filepath
+    Skip_Long_Lines On
+
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 665c3f9f2..e0abaea1f 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -223,7 +223,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 	for _, record := range tailPluginRecords {
 
-		containerID := GetContainerIDFromFilePath(toString(record["filepath"]))
+		containerID := GetContainerIDFromFilePath(ToString(record["filepath"]))
 
 		if containerID == "" || containsKey(ignoreIDSet, containerID) {
 			continue
@@ -231,9 +231,9 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 		stringMap := make(map[string]string)
 
-		stringMap["LogEntry"] = toString(record["log"])
-		stringMap["LogEntrySource"] = toString(record["stream"])
-		stringMap["LogEntryTimeStamp"] = toString(record["time"])
+		stringMap["LogEntry"] = ToString(record["log"])
+		stringMap["LogEntrySource"] = ToString(record["stream"])
+		stringMap["LogEntryTimeStamp"] = ToString(record["time"])
 		stringMap["SourceSystem"] = "Containers"
 		stringMap["Id"] = containerID
 
@@ -314,16 +314,6 @@ func containsKey(currentMap map[string]bool, key string) bool {
 	return c
 }
 
-func toString(s interface{}) string {
-	switch t := s.(type) {
-	case []byte:
-		// prevent encoding to base64
-		return string(t)
-	default:
-		return ""
-	}
-}
-
 // GetContainerIDFromFilePath Gets the container ID From the file Path
 func GetContainerIDFromFilePath(filepath string) string {
 	start := strings.LastIndex(filepath, "-")
@@ -338,12 +328,19 @@ func GetContainerIDFromFilePath(filepath string) string {
 }
 
 // InitializePlugin reads and populates plugin configuration
-func InitializePlugin(pluginConfPath string) {
+func InitializePlugin(pluginConfPath string, agentVersion string) {
 
 	IgnoreIDSet = make(map[string]bool)
 	ImageIDMap = make(map[string]string)
 	NameIDMap = make(map[string]string)
 
+	ret, err := InitializeTelemetryClient(agentVersion)
+	if ret != 0 || err != nil {
+		message := fmt.Sprintf("Error During Telemetry Initialization :%s", err.Error())
+		fmt.Printf(message)
+		Log(message)
+	}
+
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
 		message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error())
@@ -355,9 +352,11 @@ func InitializePlugin(pluginConfPath string) {
 
 	omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"])
 	if err != nil {
-		Log(err.Error())
-		SendException(err.Error())
-		log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error())
+		message := fmt.Sprintf("Error Reading omsadmin configuration %s\n", err.Error())
+		Log(message)
+		SendException(message)
+		time.Sleep(30 * time.Second)
+		log.Fatalln(message)
 	}
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
 	WorkspaceID = omsadminConf["WORKSPACE_ID"]
@@ -396,7 +395,7 @@ func InitializePlugin(pluginConfPath string) {
 		Log(message)
 		SendException(message)
 	}
-	Computer = strings.TrimSuffix(toString(containerHostName), "\n")
+	Computer = strings.TrimSuffix(ToString(containerHostName), "\n")
 	Log("Computer == %s \n", Computer)
 
 	// Initialize KubeAPI Client
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index e2ee324e7..133e0f039 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -19,12 +19,12 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 // ctx (context) pointer to fluentbit context (state/ c code)
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
-	InitializePlugin(ContainerLogPluginConfFilePath)
+	agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
+	InitializePlugin(ContainerLogPluginConfFilePath, agentVersion)
 	enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
 	if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 {
 		telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds")
-		agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
-		go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion)
+		go SendContainerLogPluginMetrics(telemetryPushInterval)
 	} else {
 		Log("Telemetry is not enabled for the plugin %s \n", output.FLBPluginConfigKey(ctx, "Name"))
 		return output.FLB_OK
@@ -50,6 +50,12 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 		}
 		records = append(records, record)
 	}
+
+	incomingTag := C.GoString(tag)
+	if strings.Contains(strings.ToLower(incomingTag), "oms.container.log.flbplugin") {
+		return PushToAppInsightsTraces(records)
+	}
+
 	return PostDataHelper(records)
 }
 
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 72454948d..d943c8eda 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -4,12 +4,12 @@ import (
 	"encoding/base64"
 	"errors"
 	"os"
-	"runtime"
 	"strconv"
 	"strings"
 	"time"
 
 	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
+	"github.com/fluent/fluent-bit-go/output"
 )
 
 var (
@@ -41,8 +41,8 @@ const (
 	eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent"
 )
 
-// initialize initializes the telemetry artifacts
-func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, error) {
+// SendContainerLogPluginMetrics is a go-routine that flushes the data periodically (every 5 mins to App Insights)
+func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 
 	telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty)
 	if err != nil {
@@ -52,6 +52,49 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int,
 
 	ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryPushInterval))
 
+	start := time.Now()
+	SendEvent(eventNameContainerLogInit, make(map[string]string))
+
+	for ; true; <-ContainerLogTelemetryTicker.C {
+		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
+		elapsed := time.Since(start)
+		ContainerLogTelemetryMutex.Lock()
+		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
+		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
+		FlushedRecordsCount = 0.0
+		FlushedRecordsTimeTaken = 0.0
+		ContainerLogTelemetryMutex.Unlock()
+
+		flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
+		TelemetryClient.Track(flushRateMetric)
+		logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
+		TelemetryClient.Track(logRateMetric)
+		start = time.Now()
+	}
+}
+
+// SendEvent sends an event to App Insights
+func SendEvent(eventName string, dimensions map[string]string) {
+	Log("Sending Event : %s\n", eventName)
+	event := appinsights.NewEventTelemetry(eventName)
+
+	// add any extra Properties
+	for k, v := range dimensions {
+		event.Properties[k] = v
+	}
+
+	TelemetryClient.Track(event)
+}
+
+// SendException  send an event to the configured app insights instance
+func SendException(err interface{}) {
+	if TelemetryClient != nil {
+		TelemetryClient.TrackException(err)
+	}
+}
+
+// InitializeTelemetryClient sets up the telemetry client to send telemetry to the App Insights instance
+func InitializeTelemetryClient(agentVersion string) (int, error) {
 	encodedIkey := os.Getenv(envAppInsightsAuth)
 	if encodedIkey == "" {
 		Log("Environment Variable Missing \n")
@@ -103,51 +146,14 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int,
 	return 0, nil
 }
 
-// SendContainerLogFlushRateMetric is a go-routine that flushes the data periodically (every 5 mins to App Insights)
-func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agentVersion string) {
-
-	ret, err := initialize(telemetryPushIntervalProperty, agentVersion)
-	if ret != 0 || err != nil {
-		Log("Error During Telemetry Initialization :%s", err.Error())
-		runtime.Goexit()
-	}
-	start := time.Now()
-	SendEvent(eventNameContainerLogInit, make(map[string]string))
-
-	for ; true; <-ContainerLogTelemetryTicker.C {
-		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
-		elapsed := time.Since(start)
-		ContainerLogTelemetryMutex.Lock()
-		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
-		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
-		FlushedRecordsCount = 0.0
-		FlushedRecordsTimeTaken = 0.0
-		ContainerLogTelemetryMutex.Unlock()
-
-		flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
-		TelemetryClient.Track(flushRateMetric)
-		logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
-		TelemetryClient.Track(logRateMetric)
-		start = time.Now()
-	}
-}
-
-// SendEvent sends an event to App Insights
-func SendEvent(eventName string, dimensions map[string]string) {
-	Log("Sending Event : %s\n", eventName)
-	event := appinsights.NewEventTelemetry(eventName)
-
-	// add any extra Properties
-	for k, v := range dimensions {
-		event.Properties[k] = v
+// PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance
+func PushToAppInsightsTraces(records []map[interface{}]interface{}) int {
+	var logLines []string
+	for _, record := range records {
+		logLines = append(logLines, ToString(record["log"]))
 	}
 
-	TelemetryClient.Track(event)
-}
-
-// SendException  send an event to the configured app insights instance
-func SendException(err interface{}) {
-	if TelemetryClient != nil {
-		TelemetryClient.TrackException(err)
-	}
+	traceEntry := strings.Join(logLines, "\n")
+	TelemetryClient.TrackTrace(traceEntry, 1)
+	return output.FLB_OK
 }
diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go
index 94db033bd..91e433a0f 100644
--- a/source/code/go/src/plugins/utils.go
+++ b/source/code/go/src/plugins/utils.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"os"
 	"strings"
+	"time"
 )
 
 // ReadConfiguration reads a property file
@@ -21,8 +22,8 @@ func ReadConfiguration(filename string) (map[string]string, error) {
 	file, err := os.Open(filename)
 	if err != nil {
 		SendException(err)
-		log.Fatal(err)
-
+		time.Sleep(30 * time.Second)
+		fmt.Printf("%s", err.Error())
 		return nil, err
 	}
 	defer file.Close()
@@ -43,7 +44,8 @@ func ReadConfiguration(filename string) (map[string]string, error) {
 
 	if err := scanner.Err(); err != nil {
 		SendException(err)
-		log.Fatal(err)
+		time.Sleep(30 * time.Second)
+		log.Fatalf("%s", err.Error())
 		return nil, err
 	}
 
@@ -52,11 +54,11 @@ func ReadConfiguration(filename string) (map[string]string, error) {
 
 // CreateHTTPClient used to create the client for sending post requests to OMSEndpoint
 func CreateHTTPClient() {
-
 	cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"])
 	if err != nil {
 		message := fmt.Sprintf("Error when loading cert %s", err.Error())
 		SendException(message)
+		time.Sleep(30 * time.Second)
 		Log(message)
 		log.Fatalf("Error when loading cert %s", err.Error())
 	}
@@ -72,3 +74,14 @@ func CreateHTTPClient() {
 
 	Log("Successfully created HTTP Client")
 }
+
+// ToString converts an interface into a string
+func ToString(s interface{}) string {
+	switch t := s.(type) {
+	case []byte:
+		// prevent encoding to base64
+		return string(t)
+	default:
+		return ""
+	}
+}

From 4c2da9f831d5aa39edc3c0096ad639f3c01243a1 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 5 Nov 2018 15:46:02 -0800
Subject: [PATCH 029/160] Rashmi/fluentd docker inventory (#160)

* first stab

* changes

* changes

* docker util changes

* working tested util

* input plugin and conf

* changes

* changes

* changes

* changes

* changes

* working containerinventory

* fixing omi removal from container.conf

* removing comments

* file write and read

* deleted containers working

* changes

* changes

* socket timeout

* deleting test files

* adding log

* fixing comment

* appinsights changes

* changes

* tel changes

* changes

* changes

* changes

* changes

* lib changes

* changes

* changes

* fixes

* PR comments

* changes

* updating the ownership

* changes

* changes

* changes to container data

* removing comment

* changes

* adding collection time

* bug fix

* env string truncation

* changes for acs-engine test
---
 installer/conf/container.conf                 |  46 +--
 installer/datafiles/base_container.data       |  61 +++-
 .../code/plugin/ApplicationInsightsUtility.rb | 142 ++++++++++
 source/code/plugin/ContainerInventoryState.rb |  65 +++++
 source/code/plugin/DockerApiClient.rb         | 162 +++++++++++
 source/code/plugin/DockerApiRestHelper.rb     |  55 ++++
 source/code/plugin/in_containerinventory.rb   | 266 ++++++++++++++++++
 .../code/plugin/lib/application_insights.rb   |   9 +
 .../channel/asynchronous_queue.rb             |  58 ++++
 .../channel/asynchronous_sender.rb            | 133 +++++++++
 .../channel/contracts/application.rb          |  13 +
 .../channel/contracts/availability_data.rb    |  34 +++
 .../channel/contracts/base.rb                 |  13 +
 .../channel/contracts/cloud.rb                |  14 +
 .../channel/contracts/data.rb                 |  14 +
 .../channel/contracts/data_point.rb           |  25 ++
 .../channel/contracts/data_point_type.rb      |   7 +
 .../channel/contracts/dependency_kind.rb      |   9 +
 .../contracts/dependency_source_type.rb       |   9 +
 .../channel/contracts/device.rb               |  18 ++
 .../channel/contracts/domain.rb               |  10 +
 .../channel/contracts/envelope.rb             |  32 +++
 .../channel/contracts/event_data.rb           |  28 ++
 .../channel/contracts/exception_data.rb       |  35 +++
 .../channel/contracts/exception_details.rb    |  28 ++
 .../channel/contracts/internal.rb             |  15 +
 .../channel/contracts/json_serializable.rb    |  59 ++++
 .../channel/contracts/location.rb             |  13 +
 .../channel/contracts/message_data.rb         |  24 ++
 .../channel/contracts/metric_data.rb          |  27 ++
 .../channel/contracts/operation.rb            |  17 ++
 .../channel/contracts/page_view_data.rb       |  33 +++
 .../channel/contracts/page_view_perf_data.rb  |  39 +++
 .../contracts/remote_dependency_data.rb       |  40 +++
 .../channel/contracts/reopenings.rb           |  27 ++
 .../channel/contracts/request_data.rb         |  35 +++
 .../channel/contracts/session.rb              |  14 +
 .../channel/contracts/severity_level.rb       |  13 +
 .../channel/contracts/stack_frame.rb          |  17 ++
 .../channel/contracts/user.rb                 |  15 +
 .../lib/application_insights/channel/event.rb |  68 +++++
 .../channel/queue_base.rb                     |  73 +++++
 .../channel/sender_base.rb                    |  88 ++++++
 .../channel/synchronous_queue.rb              |  45 +++
 .../channel/synchronous_sender.rb             |  17 ++
 .../channel/telemetry_channel.rb              | 131 +++++++++
 .../channel/telemetry_context.rb              |  85 ++++++
 .../rack/track_request.rb                     | 154 ++++++++++
 .../application_insights/telemetry_client.rb  | 232 +++++++++++++++
 .../unhandled_exception.rb                    |  49 ++++
 .../lib/application_insights/version.rb       |   3 +
 51 files changed, 2581 insertions(+), 38 deletions(-)
 create mode 100644 source/code/plugin/ApplicationInsightsUtility.rb
 create mode 100644 source/code/plugin/ContainerInventoryState.rb
 create mode 100644 source/code/plugin/DockerApiClient.rb
 create mode 100644 source/code/plugin/DockerApiRestHelper.rb
 create mode 100644 source/code/plugin/in_containerinventory.rb
 create mode 100644 source/code/plugin/lib/application_insights.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/application.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/base.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/cloud.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/data_point.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/device.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/domain.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/envelope.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/event_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/internal.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/location.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/message_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/operation.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/request_data.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/session.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/user.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/event.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/queue_base.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/sender_base.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/synchronous_queue.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/synchronous_sender.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/telemetry_channel.rb
 create mode 100644 source/code/plugin/lib/application_insights/channel/telemetry_context.rb
 create mode 100644 source/code/plugin/lib/application_insights/rack/track_request.rb
 create mode 100644 source/code/plugin/lib/application_insights/telemetry_client.rb
 create mode 100644 source/code/plugin/lib/application_insights/unhandled_exception.rb
 create mode 100644 source/code/plugin/lib/application_insights/version.rb

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 17317871c..798bd8eb6 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -9,22 +9,10 @@
 
 # Container inventory
 <source>
-	type omi
-	run_interval 60s
-	tag oms.container.containerinventory
-	items [
-		["root/cimv2","Container_ContainerInventory"]
-	]
-</source>
-
-# Image inventory
-<source>
-	type omi
-	run_interval 60s
-	tag oms.container.imageinventory
-	items [
-		["root/cimv2","Container_ImageInventory"]
-	]
+  type containerinventory
+  tag oms.containerinsights.containerinventory
+  run_interval 60s
+  log_level debug
 </source>
 
 # Container host inventory
@@ -45,11 +33,6 @@
   log_level debug
 </source>
 
-# Filter for correct format to endpoint
-<filter oms.container.containerinventory oms.container.imageinventory>
-	type filter_container
-</filter>
-
 <match oms.api.ContainerNodeInventory**>
   type out_oms_api
   log_level debug
@@ -63,33 +46,22 @@
   max_retry_wait 9m
 </match>
 
-<match oms.container.containerinventory**>	
+<match oms.containerinsights.containerinventory**>
   type out_oms
   log_level debug
+  num_threads 5
   buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
   buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
   flush_interval 20s
   retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
-
-<match oms.container.imageinventory**>
-  type out_oms
-  log_level debug
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_imageinventory*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
+  retry_wait 30s
   max_retry_wait 9m
 </match>
 
-<match oms.api.cadvisorperf**>	
+<match oms.api.cadvisorperf**>
   type out_oms
   log_level debug
   num_threads 5
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 85a128b2a..7181929e2 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -37,6 +37,57 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/in_kube_services.rb;			        source/code/plugin/in_kube_services.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
 
+/opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb;									source/code/plugin/ApplicationInsightsUtility.rb;	644; root; root
+/opt/microsoft/omsagent/plugin/ContainerInventoryState.rb;										source/code/plugin/ContainerInventoryState.rb;		644; root; root
+/opt/microsoft/omsagent/plugin/DockerApiClient.rb;												source/code/plugin/DockerApiClient.rb;				644; root; root
+/opt/microsoft/omsagent/plugin/DockerApiRestHelper.rb;											source/code/plugin/DockerApiRestHelper.rb;			644; root; root
+/opt/microsoft/omsagent/plugin/in_containerinventory.rb;										source/code/plugin/in_containerinventory.rb;		644; root; root
+
+/opt/microsoft/omsagent/plugin/lib/application_insights/version.rb;								source/code/plugin/lib/application_insights/version.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb;					source/code/plugin/lib/application_insights/rack/track_request.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb;					source/code/plugin/lib/application_insights/unhandled_exception.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/telemetry_client.rb;					source/code/plugin/lib/application_insights/telemetry_client.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/queue_base.rb;					source/code/plugin/lib/application_insights/channel/queue_base.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_queue.rb;			source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_sender.rb;			source/code/plugin/lib/application_insights/channel/synchronous_sender.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point_type.rb;   source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb;       644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point.rb;        source/code/plugin/lib/application_insights/channel/contracts/data_point.rb;    644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/stack_frame.rb;       source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb;   644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/request_data.rb;      source/code/plugin/lib/application_insights/channel/contracts/request_data.rb;  644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/session.rb;			source/code/plugin/lib/application_insights/channel/contracts/session.rb;       644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_data.rb;    source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb;    source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_data.rb;    source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/location.rb;			source/code/plugin/lib/application_insights/channel/contracts/location.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/operation.rb;			source/code/plugin/lib/application_insights/channel/contracts/operation.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data.rb;				source/code/plugin/lib/application_insights/channel/contracts/data.rb;  644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/event_data.rb;        source/code/plugin/lib/application_insights/channel/contracts/event_data.rb;    644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/metric_data.rb;       source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb;   644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/device.rb;			 source/code/plugin/lib/application_insights/channel/contracts/device.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/message_data.rb;      source/code/plugin/lib/application_insights/channel/contracts/message_data.rb;  644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb;    source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/user.rb;				source/code/plugin/lib/application_insights/channel/contracts/user.rb;  644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/severity_level.rb;    source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/application.rb;       source/code/plugin/lib/application_insights/channel/contracts/application.rb;   644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_kind.rb;   source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb;       644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/cloud.rb;				source/code/plugin/lib/application_insights/channel/contracts/cloud.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/envelope.rb;			source/code/plugin/lib/application_insights/channel/contracts/envelope.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/json_serializable.rb; source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/domain.rb;			source/code/plugin/lib/application_insights/channel/contracts/domain.rb;        644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/base.rb;				source/code/plugin/lib/application_insights/channel/contracts/base.rb;  644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/reopenings.rb;        source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb;    644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb;       source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb;   644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/internal.rb;			source/code/plugin/lib/application_insights/channel/contracts/internal.rb;      644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/availability_data.rb; source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_details.rb; source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_queue.rb;			source/code/plugin/lib/application_insights/channel/synchronous_queue.rb;       644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/sender_base.rb;					source/code/plugin/lib/application_insights/channel/sender_base.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_context.rb;			source/code/plugin/lib/application_insights/channel/telemetry_context.rb;       644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_sender.rb;			source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb;     644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_channel.rb;			source/code/plugin/lib/application_insights/channel/telemetry_channel.rb;       644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/event.rb;						source/code/plugin/lib/application_insights/channel/event.rb;   644; root; root
+/opt/microsoft/omsagent/plugin/lib/application_insights.rb;     source/code/plugin/lib/application_insights.rb; 644; root; root
+
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
@@ -75,12 +126,17 @@ MAINTAINER:              'Microsoft Corporation'
 /var/opt/microsoft/docker-cimprov;                      755; root; root
 /var/opt/microsoft/docker-cimprov/state;                755; root; root
 /var/opt/microsoft/docker-cimprov/state/ContainerInventory; 755; root; root
-/var/opt/microsoft/docker-cimprov/state/ImageInventory; 755; root; root
 /var/opt/microsoft/docker-cimprov/log;                  755; root; root
 
 /opt/td-agent-bit;                          755; root; root;sysdir
 /opt/td-agent-bit/bin;                      755; root; root;sysdir
 
+/opt/microsoft/omsagent/plugin/lib;												755; root; root; sysdir
+/opt/microsoft/omsagent/plugin/lib/application_insights;                        755; root; root; sysdir
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel;                755; root; root; sysdir
+/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts;      755; root; root; sysdir
+/opt/microsoft/omsagent/plugin/lib/application_insights/rack;                   755; root; root; sysdir
+
 %Dependencies
 
 %Postinstall_10
@@ -90,6 +146,9 @@ WriteInstallInfo() {
 }
 WriteInstallInfo
 
+#Make omsagent owner for ContainerInventory directory. This is needed for ruby plugin to have access
+chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/ContainerInventory
+
 # Get the state file in place with proper permissions
 touch /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
 chmod 644 /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
new file mode 100644
index 000000000..14fc9f2f8
--- /dev/null
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -0,0 +1,142 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+class ApplicationInsightsUtility
+    require_relative 'lib/application_insights'
+    require_relative 'omslog'
+    require_relative 'DockerApiClient'
+    require 'json'
+    require 'base64'
+
+    @@HeartBeat = 'HeartBeatEvent'
+    @@Exception = 'ExceptionEvent'
+    @@AcsClusterType = 'ACS'
+    @@AksClusterType = 'AKS'
+    @@DaemonsetControllerType = 'DaemonSet'
+    @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf'
+    @@EnvAcsResourceName = 'ACS_RESOURCE_NAME'
+    @@EnvAksRegion = 'AKS_REGION'
+    @@EnvAgentVersion = 'AGENT_VERSION'
+    @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH'
+    @@CustomProperties = {}
+    @@Tc = nil
+
+    def initialize
+    end
+
+    class << self
+        #Set default properties for telemetry event
+        def initializeUtility()
+            begin
+                resourceInfo = ENV['AKS_RESOURCE_ID']
+                if resourceInfo.nil? || resourceInfo.empty?
+                    @@CustomProperties["ACSResourceName"] = ENV[@@EnvAcsResourceName]
+		            @@CustomProperties["ClusterType"] = @@AcsClusterType
+		            @@CustomProperties["SubscriptionID"] = ""
+		            @@CustomProperties["ResourceGroupName"] = ""
+		            @@CustomProperties["ClusterName"] = ""
+		            @@CustomProperties["Region"] = ""
+                else
+                    @@CustomProperties["AKS_RESOURCE_ID"] = resourceInfo
+                    begin
+                        splitStrings = resourceInfo.split('/')
+                        subscriptionId = splitStrings[2]
+                        resourceGroupName = splitStrings[4]
+                        clusterName = splitStrings[8]
+                    rescue => errorStr
+                        $log.warn("Exception in AppInsightsUtility: parsing AKS resourceId: #{resourceInfo}, error: #{errorStr}")
+                    end
+		            @@CustomProperties["ClusterType"] = @@AksClusterType
+		            @@CustomProperties["SubscriptionID"] = subscriptionId
+		            @@CustomProperties["ResourceGroupName"] = resourceGroupName
+		            @@CustomProperties["ClusterName"] = clusterName
+		            @@CustomProperties["Region"] = ENV[@@EnvAksRegion]
+                end
+                @@CustomProperties['ControllerType'] = @@DaemonsetControllerType
+                dockerInfo = DockerApiClient.dockerInfo
+                @@CustomProperties['DockerVersion'] = dockerInfo['Version']
+                @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion']
+                @@CustomProperties['WorkspaceID'] = getWorkspaceId
+                @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion]
+                encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
+                if !encodedAppInsightsKey.nil?
+                    decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
+                    @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
+                end
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")
+            end
+        end
+
+        def sendHeartBeatEvent(pluginName)
+            begin
+                eventName = pluginName + @@HeartBeat
+                if !(@@Tc.nil?)
+                    @@Tc.track_event eventName , :properties => @@CustomProperties
+                    @@Tc.flush
+                    $log.info("AppInsights Heartbeat Telemetry sent successfully")
+                end
+            rescue =>errorStr
+                $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}")
+            end
+        end
+
+        def sendCustomEvent(pluginName, properties)
+            begin
+                if !(@@Tc.nil?)
+                    @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], 
+                    :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, 
+                    :properties => @@CustomProperties
+                    @@Tc.flush
+                    $log.info("AppInsights Container Count Telemetry sent successfully")
+                end
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}")
+            end
+        end
+
+        def sendExceptionTelemetry(errorStr)
+            begin
+                if @@CustomProperties.empty? || @@CustomProperties.nil?
+                    initializeUtility
+                end
+                if !(@@Tc.nil?)
+                    @@Tc.track_exception errorStr , :properties => @@CustomProperties
+                    @@Tc.flush
+                    $log.info("AppInsights Exception Telemetry sent successfully")
+                end
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}")
+            end
+        end
+
+        #Method to send heartbeat and container inventory count
+        def sendTelemetry(pluginName, properties)
+            begin
+                if @@CustomProperties.empty? || @@CustomProperties.nil?
+                    initializeUtility
+                end
+                @@CustomProperties['Computer'] = properties['Computer']
+                sendHeartBeatEvent(pluginName)
+                sendCustomEvent(pluginName, properties)
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}")
+            end
+        end
+
+        def getWorkspaceId()
+            begin
+                adminConf = {}
+                confFile = File.open(@OmsAdminFilePath, "r")
+                confFile.each_line do |line|
+                    splitStrings = line.split('=')
+                    adminConf[splitStrings[0]] = splitStrings[1]
+                end
+                workspaceId = adminConf['WORKSPACE_ID']
+                return workspaceId
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}")
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/ContainerInventoryState.rb b/source/code/plugin/ContainerInventoryState.rb
new file mode 100644
index 000000000..7e5ca18e8
--- /dev/null
+++ b/source/code/plugin/ContainerInventoryState.rb
@@ -0,0 +1,65 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+class ContainerInventoryState
+    require 'json'
+    require_relative 'omslog'
+    @@InventoryDirectory = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/"
+
+    def initialize
+    end
+
+    class << self
+       # Write the container information to disk with the data that is obtained from the current plugin execution
+       def writeContainerState(container)
+            containerId = container['InstanceID']
+            if !containerId.nil? && !containerId.empty?
+                begin
+                    file = File.open(@@InventoryDirectory + containerId, "w")
+                    if !file.nil?
+                        file.write(container.to_json)
+                        file.close
+                    else
+                        $log.warn("Exception while opening file with id: #{containerId}")
+                    end
+                rescue => errorStr
+                    $log.warn("Exception in writeContainerState: #{errorStr}")
+                end
+            end
+       end
+
+       # Reads the container state for the deleted container
+       def readContainerState(containerId)
+            begin
+                containerObject = nil
+                filepath = @@InventoryDirectory + containerId
+                file = File.open(filepath, "r")
+                if !file.nil?
+                    fileContents = file.read
+                    containerObject = JSON.parse(fileContents)
+                    file.close
+                    # Delete the file since the state is update to deleted
+                    File.delete(filepath) if File.exist?(filepath)
+                else
+                    $log.warn("Open file for container with id returned nil: #{containerId}")
+                end
+            rescue => errorStr
+                $log.warn("Exception in readContainerState: #{errorStr}")
+            end
+            return containerObject
+       end
+
+       # Gets the containers that were written to the disk with the previous plugin invocation but do not exist in the current container list
+       # Doing this because we need to update the container state to deleted. Else this will stay running forever.
+       def getDeletedContainers(containerIds)
+            deletedContainers = nil
+            begin
+                previousContainerList = Dir.entries(@@InventoryDirectory) - [".", ".."]
+                deletedContainers = previousContainerList - containerIds
+            rescue => errorStr
+                $log.warn("Exception in getDeletedContainers: #{errorStr}")
+            end
+            return deletedContainers
+       end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
new file mode 100644
index 000000000..b93411980
--- /dev/null
+++ b/source/code/plugin/DockerApiClient.rb
@@ -0,0 +1,162 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+class DockerApiClient
+
+    require 'socket'
+    require 'json'
+    require 'timeout'
+    require_relative 'omslog'
+    require_relative 'DockerApiRestHelper'
+    require_relative 'ApplicationInsightsUtility'
+
+    @@SocketPath = "/var/run/docker.sock"
+    @@ChunkSize = 4096
+    @@TimeoutInSeconds = 5
+    @@PluginName = 'ContainerInventory'
+    def initialize
+    end
+
+    class << self
+        # Make docker socket call for requests
+        def getResponse(request, isMultiJson)
+            begin
+                socket = UNIXSocket.new(@@SocketPath)
+                dockerResponse = ""
+                isTimeOut = false
+                socket.write(request)
+                # iterate through the response until the last chunk is less than the chunk size so that we can read all data in socket.
+                loop do
+                    begin
+                        responseChunk = ""
+                        timeout(@@TimeoutInSeconds) do
+                            responseChunk = socket.recv(@@ChunkSize)
+                        end
+                        dockerResponse += responseChunk
+                    rescue Timeout::Error
+                        $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}")
+                        isTimeOut = true
+                    end
+                    break if responseChunk.length < @@ChunkSize
+                end
+                socket.close
+                return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson)
+            rescue => errorStr
+                $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+            end
+        end
+
+        def parseResponse(dockerResponse, isMultiJson)
+            # Doing this because the response is in the raw format and includes headers.
+            # Need to do a regex match to extract the json part of the response - Anything between [{}] in response
+            parsedJsonResponse = nil
+            begin
+                jsonResponse = isMultiJson ? dockerResponse[/\[{.+}\]/] : dockerResponse[/{.+}/]
+            rescue => errorStr
+                $log.warn("Regex match for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+            end
+            begin
+                if jsonResponse != nil
+                    parsedJsonResponse = JSON.parse(jsonResponse)
+                end
+            rescue => errorStr
+                $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+            end 
+            return parsedJsonResponse
+        end 
+
+
+        def getDockerHostName()
+            dockerHostName = ""
+            request = DockerApiRestHelper.restDockerInfo
+            response = getResponse(request, false)
+            if (response != nil)
+                dockerHostName = response['Name']
+            end
+            return dockerHostName
+        end
+
+        def listContainers()
+            ids = []
+            request = DockerApiRestHelper.restDockerPs
+            containers = getResponse(request, true)
+            if !containers.nil? && !containers.empty?
+                containers.each do |container|
+                    ids.push(container['Id'])
+                end
+            end
+            return ids
+        end
+
+        # This method splits the tag value into an array - repository, image and tag
+        def getImageRepositoryImageTag(tagValue)
+            result = ["", "", ""]
+            begin
+                if !tagValue.empty?
+                    # Find delimiters in the string of format repository/image:imagetag
+                    slashLocation = tagValue.index('/')
+                    colonLocation = tagValue.index(':')
+                    if !colonLocation.nil?
+                        if slashLocation.nil?
+                            # image:imagetag
+                            result[1] = tagValue[0..(colonLocation-1)]
+                        else
+                            # repository/image:imagetag
+                            result[0] = tagValue[0..(slashLocation-1)]
+                            result[1] = tagValue[(slashLocation + 1)..(colonLocation - 1)]
+                        end
+                        result[2] = tagValue[(colonLocation + 1)..-1]
+                    end
+                end
+            rescue => errorStr
+                $log.warn("Exception at getImageRepositoryImageTag: #{errorStr} @ #{Time.now.utc.iso8601}")
+            end
+            return result
+        end
+
+        # Image is in the format repository/image:imagetag - This method creates a hash of image id and repository, image and tag
+        def getImageIdMap()
+            result = nil
+            begin
+                request = DockerApiRestHelper.restDockerImages
+                images = getResponse(request, true)
+                if !images.nil? && !images.empty?
+                    result = {}
+                    images.each do |image|
+                        tagValue = ""
+                        tags = image['RepoTags']
+                        if !tags.nil? && tags.kind_of?(Array) && tags.length > 0
+                            tagValue = tags[0]
+                        end
+                        idValue = image['Id']
+                        if !idValue.nil?
+                            result[idValue] = getImageRepositoryImageTag(tagValue)
+                        end
+                    end
+                end
+            rescue => errorStr
+                $log.warn("Exception at getImageIdMap: #{errorStr} @ #{Time.now.utc.iso8601}")
+            end
+            return result
+        end
+
+        def dockerInspectContainer(id)
+            request = DockerApiRestHelper.restDockerInspect(id)
+            return getResponse(request, false)
+        end
+
+        # This method returns docker version and docker api version for telemetry
+        def dockerInfo()
+            request = DockerApiRestHelper.restDockerVersion
+            response = getResponse(request, false)
+            dockerInfo = {}
+            if (response != nil)
+                dockerInfo['Version'] = response['Version']
+                dockerInfo['ApiVersion'] = response['ApiVersion']
+            end
+            return dockerInfo
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/DockerApiRestHelper.rb b/source/code/plugin/DockerApiRestHelper.rb
new file mode 100644
index 000000000..76361b122
--- /dev/null
+++ b/source/code/plugin/DockerApiRestHelper.rb
@@ -0,0 +1,55 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+class DockerApiRestHelper
+    def initialize
+    end
+
+    class << self
+        # Create the REST request to list images
+        # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#list-images
+        # returns Request in string format
+        def restDockerImages()
+            begin
+                return "GET /images/json?all=0 HTTP/1.1\r\nHost: localhost\r\n\r\n";
+            end
+        end
+
+        # Create the REST request to list containers
+        # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#list-containers
+        # returns Request in string format
+        def restDockerPs()
+            begin
+                return "GET /containers/json?all=1 HTTP/1.1\r\nHost: localhost\r\n\r\n";
+            end
+        end
+
+        # Create the REST request to inspect a container
+        # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#inspect-a-container
+        # parameter - ID of the container to be inspected
+        # returns Request in string format
+        def restDockerInspect(id)
+            begin
+                return "GET /containers/" + id + "/json HTTP/1.1\r\nHost: localhost\r\n\r\n";
+            end
+        end
+
+        # Create the REST request to get docker info
+        # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#get-container-stats-based-on-resource-usage
+        # returns Request in string format
+        def restDockerInfo()
+            begin
+                return "GET /info HTTP/1.1\r\nHost: localhost\r\n\r\n";
+            end
+        end
+
+        # Create the REST request to get docker info
+        # https://docs.docker.com/engine/api/v1.21/#21-containers
+        # returns Request in string format
+        def restDockerVersion()
+            begin
+                return "GET /version HTTP/1.1\r\nHost: localhost\r\n\r\n";
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
new file mode 100644
index 000000000..43811e1e1
--- /dev/null
+++ b/source/code/plugin/in_containerinventory.rb
@@ -0,0 +1,266 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+module Fluent
+
+  class Container_Inventory_Input < Input
+    Plugin.register_input('containerinventory', self)
+
+    @@PluginName = 'ContainerInventory'
+    @@RunningState = 'Running'
+    @@FailedState = 'Failed'
+    @@StoppedState = 'Stopped'
+    @@PausedState = 'Paused'
+
+    def initialize
+      super
+      require 'json'
+      require_relative 'DockerApiClient'
+      require_relative 'ContainerInventoryState'
+      require_relative 'ApplicationInsightsUtility'
+      require_relative 'omslog'
+    end
+
+    config_param :run_interval, :time, :default => '1m'
+    config_param :tag, :string, :default => "oms.containerinsights.containerinventory"
+  
+    def configure (conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+        @@telemetryTimeTracker = DateTime.now.to_time.to_i
+      end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def obtainContainerConfig(instance, container)
+      begin
+        configValue = container['Config']
+        if !configValue.nil?
+          instance['ContainerHostname'] = configValue['Hostname']
+
+          envValue = configValue['Env']
+          envValueString = (envValue.nil?) ? "" : envValue.to_s
+          # Restricting the ENV string value to 200kb since the size of this string can go very high
+          if envValueString.length > 200000
+            envValueStringTruncated = envValueString.slice(0..200000)
+            lastIndex = envValueStringTruncated.rindex("\", ")
+            if !lastIndex.nil?
+              envValueStringTruncated = envValueStringTruncated.slice(0..lastIndex) + "]"
+            end
+            instance['EnvironmentVar'] = envValueStringTruncated
+          else
+            instance['EnvironmentVar'] = envValueString
+          end
+
+          cmdValue = configValue['Cmd']
+          cmdValueString = (cmdValue.nil?) ? "" : cmdValue.to_s
+          instance['Command'] = cmdValueString
+
+          instance['ComposeGroup'] = ""
+          labelsValue = configValue['Labels']
+          if !labelsValue.nil? && !labelsValue.empty?
+            instance['ComposeGroup'] = labelsValue['com.docker.compose.project']
+          end
+        else
+          $log.warn("Attempt in ObtainContainerConfig to get container: #{container['Id']} config information returned null")
+        end
+        rescue => errorStr
+          $log.warn("Exception in obtainContainerConfig: #{errorStr}")
+        end
+    end
+
+    def obtainContainerState(instance, container)
+      begin
+        stateValue = container['State']
+        if !stateValue.nil?
+          exitCodeValue  = stateValue['ExitCode']
+          # Exit codes less than 0 are not supported by the engine
+          if exitCodeValue < 0
+            exitCodeValue =  128
+            $log.info("obtainContainerState::Container: #{container['Id']} returned negative exit code")
+          end
+          instance['ExitCode'] = exitCodeValue
+          if exitCodeValue > 0
+            instance['State'] = @@FailedState
+          else
+            # Set the Container status : Running/Paused/Stopped
+            runningValue = stateValue['Running']
+            if runningValue
+              pausedValue = stateValue['Paused']
+              # Checking for paused within running is true state because docker returns true for both Running and Paused fields when the container is paused
+              if pausedValue
+                instance['State'] = @@PausedState
+              else
+                instance['State'] = @@RunningState
+              end
+            else
+              instance['State'] = @@StoppedState
+            end
+          end
+          instance['StartedTime'] = stateValue['StartedAt']
+          instance['FinishedTime'] = stateValue['FinishedAt']
+        else
+          $log.info("Attempt in ObtainContainerState to get container: #{container['Id']} state information returned null")
+        end
+        rescue => errorStr
+          $log.warn("Exception in obtainContainerState: #{errorStr}")
+      end
+    end
+
+    def obtainContainerHostConfig(instance, container)
+      begin
+        hostConfig = container['HostConfig']
+        if !hostConfig.nil?
+          links = hostConfig['Links']
+          instance['Links'] = ""
+          if !links.nil?
+            linksString = links.to_s
+            instance['Links'] = (linksString == "null")? "" : linksString
+          end
+          portBindings = hostConfig['PortBindings']
+          instance['Ports'] = ""
+          if !portBindings.nil?
+            portBindingsString = portBindings.to_s
+            instance['Ports'] = (portBindingsString == "null")? "" : portBindingsString
+          end
+        else
+          $log.info("Attempt in ObtainContainerHostConfig to get container: #{container['Id']} host config information returned null")
+        end
+        rescue => errorStr
+          $log.warn("Exception in obtainContainerHostConfig: #{errorStr}")
+        end
+    end
+
+    def inspectContainer(id, nameMap)
+      containerInstance = {}
+      begin
+        container = DockerApiClient.dockerInspectContainer(id)
+        if !container.nil? && !container.empty?
+          containerInstance['InstanceID'] = container['Id']
+          containerInstance['CreatedTime'] = container['Created']
+          containerName = container['Name']
+          if !containerName.nil? && !containerName.empty?
+            # Remove the leading / from the name if it exists (this is an API issue)
+            containerInstance['ElementName'] = (containerName[0] == '/') ? containerName[1..-1] : containerName
+          end
+          imageValue = container['Image']
+          if !imageValue.nil? && !imageValue.empty?
+            containerInstance['ImageId'] = imageValue
+            repoImageTagArray = nameMap[imageValue]
+            if nameMap.has_key? imageValue
+              containerInstance['Repository'] = repoImageTagArray[0]
+              containerInstance['Image'] = repoImageTagArray[1]
+              containerInstance['ImageTag'] = repoImageTagArray[2]
+            end
+          end
+          obtainContainerConfig(containerInstance, container);
+          obtainContainerState(containerInstance, container);
+          obtainContainerHostConfig(containerInstance, container);
+        end
+      rescue => errorStr
+          $log.warn("Exception in inspectContainer: #{errorStr} for container: #{id}")
+      end
+      return containerInstance
+    end
+
+    def enumerate
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
+      containerInventory = Array.new
+      $log.info("in_container_inventory::enumerate : Begin processing @ #{Time.now.utc.iso8601}")
+      hostname = DockerApiClient.getDockerHostName
+      begin
+        containerIds = DockerApiClient.listContainers
+        if !containerIds.empty?
+          eventStream = MultiEventStream.new
+          nameMap = DockerApiClient.getImageIdMap
+          containerIds.each do |containerId|
+            inspectedContainer = {}
+            inspectedContainer = inspectContainer(containerId, nameMap)
+            inspectedContainer['Computer'] = hostname
+            inspectedContainer['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
+            containerInventory.push inspectedContainer
+            ContainerInventoryState.writeContainerState(inspectedContainer)
+          end
+          # Update the state for deleted containers
+          deletedContainers = ContainerInventoryState.getDeletedContainers(containerIds)
+          if !deletedContainers.nil? && !deletedContainers.empty?
+            deletedContainers.each do |deletedContainer|
+              container = ContainerInventoryState.readContainerState(deletedContainer)
+              if !container.nil?
+                container.each{|k,v| container[k]=v}
+                container['State'] = "Deleted"
+                containerInventory.push container
+              end
+            end
+          end
+
+          containerInventory.each do |record|
+            wrapper = {
+              "DataType"=>"CONTAINER_INVENTORY_BLOB",
+              "IPName"=>"ContainerInsights",
+              "DataItems"=>[record.each{|k,v| record[k]=v}]
+            }
+            eventStream.add(emitTime, wrapper) if wrapper
+          end
+          router.emit_stream(@tag, eventStream) if eventStream
+          @@istestvar = ENV['ISTEST']
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
+            $log.info("containerInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+          timeDifference =  (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs
+          timeDifferenceInMinutes = timeDifference/60
+          if (timeDifferenceInMinutes >= 5)
+            @@telemetryTimeTracker = DateTime.now.to_time.to_i
+            telemetryProperties = {}
+            telemetryProperties['Computer'] = hostname
+            telemetryProperties['ContainerCount'] = containerInventory.length
+            ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties)
+          end
+          $log.info("in_container_inventory::enumerate : Processing complete - emitted stream @ #{Time.now.utc.iso8601}")
+        end
+      rescue => errorStr
+        $log.warn("Exception in enumerate container inventory: #{errorStr}")
+      end
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
+        done = @finished
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_container_inventory::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            $log.warn "in_container_inventory::run_periodic: Failed in enumerate container inventory: #{errorStr}"
+          end
+        end
+        @mutex.lock
+      end
+      @mutex.unlock
+    end
+
+  end # Container_Inventory_Input
+
+end # module
\ No newline at end of file
diff --git a/source/code/plugin/lib/application_insights.rb b/source/code/plugin/lib/application_insights.rb
new file mode 100644
index 000000000..0a683d484
--- /dev/null
+++ b/source/code/plugin/lib/application_insights.rb
@@ -0,0 +1,9 @@
+require_relative 'application_insights/telemetry_client'
+require_relative 'application_insights/unhandled_exception'
+require_relative 'application_insights/version'
+
+module ApplicationInsights
+  module Rack
+    autoload :TrackRequest, "application_insights/rack/track_request"
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb b/source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb
new file mode 100644
index 000000000..333f6968b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb
@@ -0,0 +1,58 @@
+require_relative 'event'
+require_relative 'queue_base'
+
+module ApplicationInsights
+  module Channel
+    # An asynchronous queue for use in conjunction with the {AsynchronousSender}.
+    # The queue will notify the sender that it needs to pick up items when it
+    # reaches {#max_queue_length}, or when the consumer calls {#flush} via the
+    # {#flush_notification} event.
+    #
+    # @example
+    #   require 'application_insights'
+    #   require 'thread'
+    #   queue = ApplicationInsights::Channel::AsynchronousQueue.new nil
+    #   Thread.new do
+    #     sleep 1
+    #     queue.push 1
+    #     queue.flush
+    #   end
+    #   queue.flush_notification.wait
+    #   queue.flush_notification.clear
+    #   result = queue.pop
+    class AsynchronousQueue < QueueBase
+      # Initializes a new instance of the class.
+      # @param [SenderBase] sender the sender object that will be used in
+      #   conjunction with this queue. In addition to the sender object must
+      #   support a {AsynchronousSender#start} method which is invoked each time
+      #   an item is pushed to the queue as well as use the {#flush_notification}
+      #   event.
+      def initialize(sender)
+        @flush_notification = Event.new
+        super sender
+      end
+
+      # The flush notification {ApplicationInsights::Channel::Event} that the {#sender}
+      # will use to get notified that a flush is needed.
+      # @return [Event] object that the {#sender} can wait on.
+      attr_reader :flush_notification
+
+      # Adds the passed in item object to the queue and notifies the {#sender}
+      # to start an asynchronous send operation
+      # by calling {AsynchronousSender#start}.
+      # @param [Contracts::Envelope] item the telemetry envelope object to send
+      #   to the service.
+      def push(item)
+        super item
+        @sender.start if @sender
+      end
+
+      # Flushes the current queue by notifying the {#sender} via the
+      # {#flush_notification} event.
+      def flush
+        @flush_notification.set
+        @sender.start if @sender
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb b/source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb
new file mode 100644
index 000000000..da573f08c
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb
@@ -0,0 +1,133 @@
+require_relative 'sender_base'
+require 'thread'
+
+module ApplicationInsights
+  module Channel
+    # An asynchronous sender that works in conjunction with the {AsynchronousQueue}.
+    # The sender object will start a worker thread that will pull items from the
+    # {#queue}. The thread will be created when the client calls {#start} and
+    # will check for queue items every {#send_interval} seconds. The worker thread
+    # can also be forced to check the queue by setting the
+    # {AsynchronousQueue#flush_notification} event.
+    #
+    # - If no items are found, the thread will go back to sleep.
+    # - If items are found, the worker thread will send items to the specified
+    #   service in batches of {#send_buffer_size}.
+    #
+    # If no queue items are found for {#send_time} seconds,  the worker thread
+    # will shut down (and {#start} will need  to be called again).
+    class AsynchronousSender < SenderBase
+      SERVICE_ENDPOINT_URI = 'https://dc.services.visualstudio.com/v2/track'
+      # Initializes a new instance of the class.
+      # @param [String] service_endpoint_uri the address of the service to send
+      #   telemetry data to.
+      def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI)
+        @send_interval = 1.0
+        @send_remaining_time = 0
+        @send_time = 3.0
+        @lock_work_thread = Mutex.new
+        @work_thread = nil
+        @start_notification_processed = true
+        super service_endpoint_uri
+      end
+
+      # The time span in seconds at which the the worker thread will check the
+      # {#queue} for items (defaults to: 1.0).
+      # @return [Fixnum] the interval in seconds.
+      attr_accessor :send_interval
+
+      # The time span in seconds for which the worker thread will stay alive if
+      # no items are found in the {#queue} (defaults to 3.0).
+      # @return [Fixnum] the interval in seconds.
+      attr_accessor :send_time
+
+      # The worker thread which checks queue items and send data every
+      # (#send_interval) seconds or upon flush.
+      # @return [Thread] the work thread
+      attr_reader :work_thread
+
+      # Calling this method will create a worker thread that checks the {#queue}
+      # every {#send_interval} seconds for a total duration of {#send_time}
+      # seconds for new items. If a worker thread has already been created,
+      # calling this method does nothing.
+      def start
+        @start_notification_processed = false
+        # Maintain one working thread at one time
+        unless @work_thread
+          @lock_work_thread.synchronize do
+            unless @work_thread
+              local_send_interval = [@send_interval, 0.1].max
+              @send_remaining_time = [@send_time, local_send_interval].max
+              @work_thread = Thread.new { run }
+              @work_thread.abort_on_exception = false
+            end
+          end
+        end
+      end
+
+      private
+
+      def run
+        # save the queue locally
+        local_queue = @queue
+        if local_queue.nil?
+          @work_thread = nil
+          return
+        end
+
+        begin
+          # fix up the send interval (can't be lower than 100ms)
+          local_send_interval = [@send_interval, 0.1].max
+
+          while true
+            @start_notification_processed = true
+            while true
+              # get at most @send_buffer_size items from the queue
+              data = []
+              @send_buffer_size.downto(1) do
+                item = local_queue.pop
+                break if not item
+                data.push item
+              end
+
+              # if we didn't get any items from the queue, we're done here
+              break if data.length == 0
+
+              # reset the send time
+              @send_remaining_time = @send_time
+
+              # finally send the data
+              send data
+            end
+
+            # wait at most @send_interval ms (or until we get signalled)
+            result = local_queue.flush_notification.wait local_send_interval
+            if result
+              local_queue.flush_notification.clear
+              next
+            end
+
+            # decrement the remaining time
+            @send_remaining_time -= local_send_interval
+            # If remaining time <=0 and there is no start notification unprocessed,
+            # then stop the working thread
+            if @send_remaining_time <= 0 && @start_notification_processed
+              # Note: there is still a chance some start notification could be
+              # missed, e.g., the start method got triggered between the above and
+              # following line. However the data is not lost as it would be
+              # processed later when next start notification comes after the worker
+              # thread stops. The cost to ensure no notification miss is high where
+              # a lock is required each time the start method calls.
+              @work_thread = nil
+              break
+            end
+          end
+        rescue Exception => e
+          # Make sure work_thread sets to nil when it terminates abnormally
+          @work_thread = nil
+          @logger.error('application_insights') { "Asynchronous sender work thread terminated abnormally: #{e.to_s}" }
+        end
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/application.rb b/source/code/plugin/lib/application_insights/channel/contracts/application.rb
new file mode 100644
index 000000000..071c37385
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/application.rb
@@ -0,0 +1,13 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Application
+    include JsonSerializable
+
+    attr_accessor :ver
+
+    attribute_mapping(
+      ver: 'ai.application.ver'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb
new file mode 100644
index 000000000..d560dd15b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb
@@ -0,0 +1,34 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class AvailabilityData
+    include JsonSerializable
+
+    attr_accessor :ver, :id, :name, :duration, :success, :run_location, :message,
+      :properties, :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      id: 'id',
+      name: 'name',
+      duration: 'duration',
+      success: 'success',
+      run_location: 'runLocation',
+      message: 'message',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/base.rb b/source/code/plugin/lib/application_insights/channel/contracts/base.rb
new file mode 100644
index 000000000..bb88a4625
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/base.rb
@@ -0,0 +1,13 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Base
+    include JsonSerializable
+
+    attr_accessor :base_type
+
+    attribute_mapping(
+      base_type: 'baseType'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/cloud.rb b/source/code/plugin/lib/application_insights/channel/contracts/cloud.rb
new file mode 100644
index 000000000..5aaeeee04
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/cloud.rb
@@ -0,0 +1,14 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Cloud
+    include JsonSerializable
+
+    attr_accessor :role, :role_instance
+
+    attribute_mapping(
+      role: 'ai.cloud.role',
+      role_instance: 'ai.cloud.roleInstance'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/data.rb b/source/code/plugin/lib/application_insights/channel/contracts/data.rb
new file mode 100644
index 000000000..c7184edfd
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/data.rb
@@ -0,0 +1,14 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Data
+    include JsonSerializable
+
+    attr_accessor :base_type, :base_data
+
+    attribute_mapping(
+      base_type: 'baseType',
+      base_data: 'baseData'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/data_point.rb b/source/code/plugin/lib/application_insights/channel/contracts/data_point.rb
new file mode 100644
index 000000000..6556b351b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/data_point.rb
@@ -0,0 +1,25 @@
+require_relative 'json_serializable'
+require_relative 'data_point_type'
+
+module ApplicationInsights::Channel::Contracts
+  class DataPoint
+    include JsonSerializable
+
+    attr_accessor :ns, :name, :kind, :value, :count, :min, :max, :std_dev
+
+    attribute_mapping(
+      ns: 'ns',
+      name: 'name',
+      kind: 'kind',
+      value: 'value',
+      count: 'count',
+      min: 'min',
+      max: 'max',
+      std_dev: 'stdDev'
+    )
+
+    def kind
+      @kind ||= DataPointType::MEASUREMENT
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb b/source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb
new file mode 100644
index 000000000..f9816e4a9
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb
@@ -0,0 +1,7 @@
+module ApplicationInsights::Channel::Contracts
+  class DataPointType
+    MEASUREMENT = 0
+
+    AGGREGATION = 1
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb b/source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb
new file mode 100644
index 000000000..38a441499
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb
@@ -0,0 +1,9 @@
+module ApplicationInsights::Channel::Contracts
+  class DependencyKind
+    SQL = 0
+
+    HTTP = 1
+
+    OTHER = 2
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb b/source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb
new file mode 100644
index 000000000..a68dad72b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb
@@ -0,0 +1,9 @@
+module ApplicationInsights::Channel::Contracts
+  class DependencySourceType
+    UNDEFINED = 0
+
+    AIC = 1
+
+    APMC = 2
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/device.rb b/source/code/plugin/lib/application_insights/channel/contracts/device.rb
new file mode 100644
index 000000000..af6855102
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/device.rb
@@ -0,0 +1,18 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Device
+    include JsonSerializable
+
+    attr_accessor :id, :locale, :model, :oem_name, :os_version, :type
+
+    attribute_mapping(
+      id: 'ai.device.id',
+      locale: 'ai.device.locale',
+      model: 'ai.device.model',
+      oem_name: 'ai.device.oemName',
+      os_version: 'ai.device.osVersion',
+      type: 'ai.device.type'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/domain.rb b/source/code/plugin/lib/application_insights/channel/contracts/domain.rb
new file mode 100644
index 000000000..8a7ba880d
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/domain.rb
@@ -0,0 +1,10 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Domain
+    include JsonSerializable
+
+    attribute_mapping(
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/envelope.rb b/source/code/plugin/lib/application_insights/channel/contracts/envelope.rb
new file mode 100644
index 000000000..b8608e388
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/envelope.rb
@@ -0,0 +1,32 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Envelope
+    include JsonSerializable
+
+    attr_accessor :ver, :name, :time, :sample_rate, :seq, :i_key, :tags, :data
+
+    attribute_mapping(
+      ver: 'ver',
+      name: 'name',
+      time: 'time',
+      sample_rate: 'sampleRate',
+      seq: 'seq',
+      i_key: 'iKey',
+      tags: 'tags',
+      data: 'data'
+    )
+
+    def ver
+      @ver ||= 1
+    end
+
+    def sample_rate
+      @sample_rate ||= 100.0
+    end
+
+    def tags
+      @tags ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/event_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/event_data.rb
new file mode 100644
index 000000000..4bfb16124
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/event_data.rb
@@ -0,0 +1,28 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class EventData
+    include JsonSerializable
+
+    attr_accessor :ver, :name, :properties, :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      name: 'name',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb
new file mode 100644
index 000000000..5cffd1253
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb
@@ -0,0 +1,35 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class ExceptionData
+    include JsonSerializable
+
+    attr_accessor :ver, :exceptions, :severity_level, :problem_id, :properties,
+      :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      exceptions: 'exceptions',
+      severity_level: 'severityLevel',
+      problem_id: 'problemId',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def exceptions
+      @exceptions ||= []
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb b/source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb
new file mode 100644
index 000000000..85bfc6282
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb
@@ -0,0 +1,28 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class ExceptionDetails
+    include JsonSerializable
+
+    attr_accessor :id, :outer_id, :type_name, :message, :has_full_stack, :stack,
+      :parsed_stack
+
+    attribute_mapping(
+      id: 'id',
+      outer_id: 'outerId',
+      type_name: 'typeName',
+      message: 'message',
+      has_full_stack: 'hasFullStack',
+      stack: 'stack',
+      parsed_stack: 'parsedStack'
+    )
+
+    def has_full_stack
+      @has_full_stack.nil? ? true : @has_full_stack
+    end
+
+    def parsed_stack
+      @parsed_stack ||= []
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/internal.rb b/source/code/plugin/lib/application_insights/channel/contracts/internal.rb
new file mode 100644
index 000000000..6e8f3d300
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/internal.rb
@@ -0,0 +1,15 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Internal
+    include JsonSerializable
+
+    attr_accessor :sdk_version, :agent_version, :node_name
+
+    attribute_mapping(
+      sdk_version: 'ai.internal.sdkVersion',
+      agent_version: 'ai.internal.agentVersion',
+      node_name: 'ai.internal.nodeName'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
new file mode 100644
index 000000000..8f4677044
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
@@ -0,0 +1,59 @@
+﻿require 'json'
+
+module ApplicationInsights
+  module Channel
+    module Contracts
+      module JsonSerializable
+        module ClassMethods
+          attr_reader :json_mappings
+
+          def attribute_mapping(mappings = {})
+            @json_mappings = mappings
+          end
+        end
+
+        def self.included(klass)
+          klass.extend JsonSerializable::ClassMethods
+        end
+
+        def initialize(attributes = {})
+          attributes.each { |k, v| send(:"#{k}=", v) }
+        end
+
+        def to_h
+          output = {}
+          klass = self.class
+
+          klass.json_mappings.each do |attr, name|
+            value = visit self.send(attr)
+            is_empty = value.respond_to?(:empty?) && value.empty?
+
+            output[name] = value unless value.nil? || is_empty
+          end
+
+          output
+        end
+
+        def to_json(args = {})
+          JSON.generate self.to_h, args
+        end
+
+        private
+
+        def visit(object)
+          return if object.nil?
+
+          if object.is_a? Array
+            object.map { |e| visit e }
+          elsif object.is_a? Hash
+            Hash[object.map { |k, v| [k, visit(v)] }]
+          elsif object.respond_to? :to_h
+            object.to_h
+          else
+            object
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/location.rb b/source/code/plugin/lib/application_insights/channel/contracts/location.rb
new file mode 100644
index 000000000..4136c869b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/location.rb
@@ -0,0 +1,13 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Location
+    include JsonSerializable
+
+    attr_accessor :ip
+
+    attribute_mapping(
+      ip: 'ai.location.ip'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/message_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/message_data.rb
new file mode 100644
index 000000000..1340f5ba7
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/message_data.rb
@@ -0,0 +1,24 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class MessageData
+    include JsonSerializable
+
+    attr_accessor :ver, :message, :severity_level, :properties
+
+    attribute_mapping(
+      ver: 'ver',
+      message: 'message',
+      severity_level: 'severityLevel',
+      properties: 'properties'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def properties
+      @properties ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb
new file mode 100644
index 000000000..bcb5739d6
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb
@@ -0,0 +1,27 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class MetricData
+    include JsonSerializable
+
+    attr_accessor :ver, :metrics, :properties
+
+    attribute_mapping(
+      ver: 'ver',
+      metrics: 'metrics',
+      properties: 'properties'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def metrics
+      @metrics ||= []
+    end
+
+    def properties
+      @properties ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/operation.rb b/source/code/plugin/lib/application_insights/channel/contracts/operation.rb
new file mode 100644
index 000000000..c86dd111b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/operation.rb
@@ -0,0 +1,17 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Operation
+    include JsonSerializable
+
+    attr_accessor :id, :name, :parent_id, :synthetic_source, :correlation_vector
+
+    attribute_mapping(
+      id: 'ai.operation.id',
+      name: 'ai.operation.name',
+      parent_id: 'ai.operation.parentId',
+      synthetic_source: 'ai.operation.syntheticSource',
+      correlation_vector: 'ai.operation.correlationVector'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb
new file mode 100644
index 000000000..d17dd2f79
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb
@@ -0,0 +1,33 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class PageViewData
+    include JsonSerializable
+
+    attr_accessor :ver, :url, :name, :duration, :id, :referrer_uri, :properties,
+      :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      url: 'url',
+      name: 'name',
+      duration: 'duration',
+      id: 'id',
+      referrer_uri: 'referrerUri',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb
new file mode 100644
index 000000000..adde3f3ad
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb
@@ -0,0 +1,39 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class PageViewPerfData
+    include JsonSerializable
+
+    attr_accessor :ver, :url, :perf_total, :name, :duration, :network_connect,
+      :sent_request, :received_response, :id, :dom_processing, :referrer_uri,
+      :properties, :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      url: 'url',
+      perf_total: 'perfTotal',
+      name: 'name',
+      duration: 'duration',
+      network_connect: 'networkConnect',
+      sent_request: 'sentRequest',
+      received_response: 'receivedResponse',
+      id: 'id',
+      dom_processing: 'domProcessing',
+      referrer_uri: 'referrerUri',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb
new file mode 100644
index 000000000..a238841f6
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb
@@ -0,0 +1,40 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class RemoteDependencyData
+    include JsonSerializable
+
+    attr_accessor :ver, :name, :id, :result_code, :duration, :success, :data,
+      :target, :type, :properties, :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      name: 'name',
+      id: 'id',
+      result_code: 'resultCode',
+      duration: 'duration',
+      success: 'success',
+      data: 'data',
+      target: 'target',
+      type: 'type',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def success
+      @success.nil? ? true : @success
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb b/source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb
new file mode 100644
index 000000000..394bf8afb
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb
@@ -0,0 +1,27 @@
+module ApplicationInsights::Channel::Contracts
+    class ExceptionData
+        def handled_at
+            @properties["handledAt"] if @properties
+        end
+
+        def handled_at=(handled_at)
+            if handled_at
+                @properties ||= {}
+                @properties["handledAt"] = handled_at
+            end
+        end
+    end
+
+    class RequestData
+        def http_method
+            @properties["httpMethod"] if @properties
+        end
+
+        def http_method=(http_method)
+            if http_method
+                @properties ||= {}
+                @properties["httpMethod"] = http_method
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/request_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/request_data.rb
new file mode 100644
index 000000000..af2581c2b
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/request_data.rb
@@ -0,0 +1,35 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class RequestData
+    include JsonSerializable
+
+    attr_accessor :ver, :id, :source, :name, :duration, :response_code, :success,
+      :url, :properties, :measurements
+
+    attribute_mapping(
+      ver: 'ver',
+      id: 'id',
+      source: 'source',
+      name: 'name',
+      duration: 'duration',
+      response_code: 'responseCode',
+      success: 'success',
+      url: 'url',
+      properties: 'properties',
+      measurements: 'measurements'
+    )
+
+    def ver
+      @ver ||= 2
+    end
+
+    def properties
+      @properties ||= {}
+    end
+
+    def measurements
+      @measurements ||= {}
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/session.rb b/source/code/plugin/lib/application_insights/channel/contracts/session.rb
new file mode 100644
index 000000000..a761c51c5
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/session.rb
@@ -0,0 +1,14 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class Session
+    include JsonSerializable
+
+    attr_accessor :id, :is_first
+
+    attribute_mapping(
+      id: 'ai.session.id',
+      is_first: 'ai.session.isFirst'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb b/source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb
new file mode 100644
index 000000000..322a00ec3
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb
@@ -0,0 +1,13 @@
+module ApplicationInsights::Channel::Contracts
+  class SeverityLevel
+    VERBOSE = 0
+
+    INFORMATION = 1
+
+    WARNING = 2
+
+    ERROR = 3
+
+    CRITICAL = 4
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb b/source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb
new file mode 100644
index 000000000..b4f4b9844
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb
@@ -0,0 +1,17 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class StackFrame
+    include JsonSerializable
+
+    attr_accessor :level, :method, :assembly, :file_name, :line
+
+    attribute_mapping(
+      level: 'level',
+      method: 'method',
+      assembly: 'assembly',
+      file_name: 'fileName',
+      line: 'line'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/user.rb b/source/code/plugin/lib/application_insights/channel/contracts/user.rb
new file mode 100644
index 000000000..a7ff8a7cf
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/contracts/user.rb
@@ -0,0 +1,15 @@
+require_relative 'json_serializable'
+
+module ApplicationInsights::Channel::Contracts
+  class User
+    include JsonSerializable
+
+    attr_accessor :account_id, :id, :auth_user_id
+
+    attribute_mapping(
+      account_id: 'ai.user.accountId',
+      id: 'ai.user.id',
+      auth_user_id: 'ai.user.authUserId'
+    )
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/event.rb b/source/code/plugin/lib/application_insights/channel/event.rb
new file mode 100644
index 000000000..ae61064f8
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/event.rb
@@ -0,0 +1,68 @@
+require_relative 'queue_base'
+require 'thread'
+
+module ApplicationInsights
+  module Channel
+    # An event class that allows simple cross-thread signalling.
+    #
+    # An object of this type managers an internal flag that can be set to true
+    # via the {#set} method and reset via the {#clear} method. Calling the
+    # {#wait} method will block until the flag is set to true.
+    #
+    # @example
+    #   require 'application_insights'
+    #   require 'thread'
+    #   event = ApplicationInsights::Channel::Event.new
+    #   Thread.new do
+    #     sleep 1
+    #     event.set
+    #   end
+    #   puts 'Main screen turn on.'
+    #   result = event.wait
+    #   puts 'All your base are belong to us.'
+    class Event
+      # Initializes a new instance of the class.
+      def initialize
+        @mutex = Mutex.new
+        @condition_variable = ConditionVariable.new
+        @signal = false
+      end
+
+      # The signal value for this object. Note that the value of this property is
+      # not synchronized with respect to {#set} and {#clear} meaning that it
+      # could return false positives or negatives.
+      # @return [Boolean] the signal value.
+      attr_reader :signal
+
+      # Sets the internal flag to true. Calling this method will also cause all
+      # waiting threads to awaken.
+      def set
+        @mutex.synchronize do
+          @signal = true
+          @condition_variable.broadcast
+        end
+      end
+
+      # Sets the internal flag to false.
+      def clear
+        @mutex.synchronize do
+          @signal = false
+        end
+      end
+
+      # Calling this method will block until the internal flag is set to true.
+      # If the flag is set to true before calling this method, we will return
+      # immediately. If the timeout parameter is specified, the method will
+      # unblock after the specified number of seconds.
+      # @param [Fixnum] timeout the timeout for the operation in seconds.
+      # @return [Boolean] the value of the internal flag on exit.
+      def wait(timeout=nil)
+        @mutex.synchronize do
+          @condition_variable.wait(@mutex, timeout) unless @signal
+        end
+
+        @signal
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/queue_base.rb b/source/code/plugin/lib/application_insights/channel/queue_base.rb
new file mode 100644
index 000000000..91226b17f
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/queue_base.rb
@@ -0,0 +1,73 @@
+require 'thread'
+
+module ApplicationInsights
+  module Channel
+    # The base class for all types of queues for use in conjunction with an
+    # implementation of {SenderBase}. The queue will notify the sender that it
+    # needs to pick up items when it reaches {#max_queue_length}, or when the
+    # consumer calls {#flush}.
+    class QueueBase
+      # Initializes a new instance of the class.
+      # @param [SenderBase] sender the sender object that will be used in
+      #   conjunction with this queue.
+      def initialize(sender)
+        @queue = Queue.new
+        @max_queue_length = 500
+        self.sender = sender
+      end
+
+      # The maximum number of items that will be held by the queue before the
+      # queue will call the {#flush} method.
+      # @return [Fixnum] the maximum queue size. (defaults to: 500)
+      attr_accessor :max_queue_length
+
+      # The sender that is associated with this queue that this queue will use to
+      # send data to the service.
+      # @return [SenderBase] the sender object.
+      attr_reader :sender
+
+      # Change the sender that is associated with this queue.
+      # @param [SenderBase] sender the sender object.
+      # @return [SenderBase] the sender object.
+      def sender=(sender)
+        @sender = sender
+        @sender.queue = self if sender
+        @sender
+      end
+
+      # Adds the passed in item object to the queue and calls {#flush} if the
+      # size of the queue is larger than {#max_queue_length}. This method does
+      # nothing if the passed in item is nil.
+      # @param [Contracts::Envelope] item the telemetry envelope object to send
+      #   to the service.
+      def push(item)
+        return unless item
+
+        @queue.push(item)
+
+        flush if @queue.length >= @max_queue_length
+      end
+
+      # Pops a single item from the queue and returns it. If the queue is empty,
+      # this method will return nil.
+      # @return [Contracts::Envelope] a telemetry envelope object or nil if the
+      #   queue is empty.
+      def pop
+        return @queue.pop(true)
+      rescue ThreadError
+        return nil
+      end
+
+      # Flushes the current queue by notifying the {#sender}. This method needs
+      # to be overridden by a concrete implementations of the queue class.
+      def flush
+      end
+
+      # Indicates whether the queue is empty.
+      # @return [Boolean] true if the queue is empty
+      def empty?
+        @queue.empty?
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/sender_base.rb b/source/code/plugin/lib/application_insights/channel/sender_base.rb
new file mode 100644
index 000000000..2431bf748
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/sender_base.rb
@@ -0,0 +1,88 @@
+require 'json'
+require 'net/http'
+require 'openssl'
+require 'stringio'
+require 'zlib'
+require 'logger'
+
+module ApplicationInsights
+  module Channel
+    # The base class for all types of senders for use in conjunction with an
+    # implementation of {QueueBase}. The queue will notify the sender that it
+    # needs to pick up items. The concrete sender implementation will listen to
+    # these notifications and will pull items from the queue using
+    # {QueueBase#pop} getting at most {#send_buffer_size} items.
+    # It will then call {#send} using the list of items pulled from the queue.
+    class SenderBase
+      # Initializes a new instance of the class.
+      # @param [String] service_endpoint_uri the address of the service to send
+      #   telemetry data to.
+      def initialize(service_endpoint_uri)
+        @service_endpoint_uri = service_endpoint_uri
+        @queue = nil
+        @send_buffer_size = 100
+        @logger = Logger.new(STDOUT)
+      end
+
+      # The service endpoint URI where this sender will send data to.
+      # @return [String] the service endpoint URI.
+      attr_accessor :service_endpoint_uri
+
+      # The queue that this sender is draining. While {SenderBase} doesn't
+      # implement any means of doing so, derivations of this class do.
+      # @return [QueueBase] the queue instance that this sender is draining.
+      attr_accessor :queue
+
+      # The buffer size for a single batch of telemetry. This is the maximum number
+      # of items in a single service request that this sender is going to send.
+      # @return [Fixnum] the maximum number of items in a telemetry batch.
+      attr_accessor :send_buffer_size
+
+      # The logger for the sender.
+      attr_accessor :logger
+
+      # Immediately sends the data passed in to {#service_endpoint_uri}. If the
+      # service request fails, the passed in items are pushed back to the {#queue}.
+      # @param [Array<Contracts::Envelope>] data_to_send an array of
+      #   {Contracts::Envelope} objects to send to the service.
+      def send(data_to_send)
+        uri = URI(@service_endpoint_uri)
+        headers = {
+          'Accept' => 'application/json',
+          'Content-Type' => 'application/json; charset=utf-8',
+          'Content-Encoding' => 'gzip'
+        }
+        request = Net::HTTP::Post.new(uri.path, headers)
+
+        # Use JSON.generate instead of to_json, otherwise it will
+        # default to ActiveSupport::JSON.encode for Rails app
+        json = JSON.generate(data_to_send)
+        compressed_data = compress(json)
+        request.body = compressed_data
+
+        http = Net::HTTP.new uri.hostname, uri.port
+        if uri.scheme.downcase == 'https'
+          http.use_ssl = true
+          http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+        end
+
+        response = http.request(request)
+        http.finish if http.started?
+
+        if !response.kind_of? Net::HTTPSuccess
+          @logger.warn('application_insights') { "Failed to send data: #{response.message}" }
+        end
+      end
+
+      private
+
+      def compress(string)
+        wio = StringIO.new("w")
+        w_gz = Zlib::GzipWriter.new wio, nil, nil
+        w_gz.write(string)
+        w_gz.close
+        wio.string
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/synchronous_queue.rb b/source/code/plugin/lib/application_insights/channel/synchronous_queue.rb
new file mode 100644
index 000000000..13c2281ac
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/synchronous_queue.rb
@@ -0,0 +1,45 @@
+require_relative 'queue_base'
+
+module ApplicationInsights
+  module Channel
+    # A synchronous queue for use in conjunction with the {SynchronousSender}.
+    # The queue will call {SenderBase#send} when it reaches {#max_queue_length},
+    # or when the consumer calls {#flush}.
+    #
+    # @example
+    #   require 'application_insights'
+    #   require 'thread'
+    #   queue = ApplicationInsights::Channel::SynchronousQueue.new nil
+    #   queue.max_queue_length = 1
+    #   queue.push 1
+    class SynchronousQueue < QueueBase
+      # Initializes a new instance of the class.
+      # @param [SenderBase] sender the sender object that will be used in
+      #   conjunction with this queue.
+      def initialize(sender)
+        super sender
+      end
+
+      # Flushes the current queue by by calling {#sender}'s
+      # {SenderBase#send} method.
+      def flush
+        local_sender = @sender
+        return unless local_sender
+
+        while true
+          # get at most send_buffer_size items and send them
+          data = []
+          while data.length < local_sender.send_buffer_size
+            item = pop()
+            break if not item
+            data.push item
+          end
+
+          break if data.length == 0
+
+          local_sender.send(data)
+        end
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/synchronous_sender.rb b/source/code/plugin/lib/application_insights/channel/synchronous_sender.rb
new file mode 100644
index 000000000..ade2f086c
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/synchronous_sender.rb
@@ -0,0 +1,17 @@
+require_relative 'sender_base'
+
+module ApplicationInsights
+  module Channel
+    # A synchronous sender that works in conjunction with the {SynchronousQueue}.
+    # The queue will call {#send} on the current instance with the data to send.
+    class SynchronousSender < SenderBase
+      SERVICE_ENDPOINT_URI = 'https://dc.services.visualstudio.com/v2/track'
+      # Initializes a new instance of the class.
+      # @param [String] service_endpoint_uri the address of the service to send
+      # telemetry data to.
+      def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI)
+        super service_endpoint_uri
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/telemetry_channel.rb b/source/code/plugin/lib/application_insights/channel/telemetry_channel.rb
new file mode 100644
index 000000000..e026ebf7d
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/telemetry_channel.rb
@@ -0,0 +1,131 @@
+require 'time'
+require_relative 'asynchronous_queue'
+require_relative 'asynchronous_sender'
+require_relative 'telemetry_context'
+require_relative 'synchronous_queue'
+require_relative 'synchronous_sender'
+require_relative 'contracts/envelope'
+require_relative 'contracts/data'
+require_relative 'contracts/internal'
+require_relative '../../application_insights/version'
+
+module ApplicationInsights
+  module Channel
+    # The telemetry channel is responsible for constructing a
+    # {Contracts::Envelope} object from the passed in data and specified
+    # telemetry context.
+    #
+    # @example
+    #   require 'application_insights'
+    #   channel = ApplicationInsights::Channel::TelemetryChannel.new
+    #   event = ApplicationInsights::Channel::Contracts::EventData.new name: 'My event'
+    #   channel.write event
+    class TelemetryChannel
+      # Initializes a new instance of the class.
+      # @param [TelemetryContext] context the telemetry context to use when
+      #   sending telemetry data.
+      # @param [QueueBase] queue the queue to enqueue the resulting
+      #   {Contracts::Envelope} to.
+      def initialize(context=nil, queue=nil)
+        @context = context || TelemetryContext.new
+        @queue = queue || SynchronousQueue.new(SynchronousSender.new)
+      end
+
+      # The context associated with this channel. All {Contracts::Envelope}
+      # objects created by this channel will use this value if it's present or if
+      # none is specified as part of the {#write} call.
+      # @return [TelemetryContext] the context instance
+      #   (defaults to: TelemetryContext.new)
+      attr_reader :context
+
+      # The queue associated with this channel. All {Contracts::Envelope} objects
+      # created by this channel will be pushed to this queue.
+      # @return [QueueBase] the queue instance (defaults to: SynchronousQueue.new)
+      attr_reader :queue
+
+      # The sender associated with this channel. This instance will be used to
+      # transmit telemetry to the service.
+      # @return [SenderBase] the sender instance (defaults to: SynchronousSender.new)
+      def sender
+        @queue.sender
+      end
+
+      # Flushes the enqueued data by calling {QueueBase#flush}.
+      def flush
+        @queue.flush
+      end
+
+      # Enqueues the passed in data to the {#queue}. If the caller specifies a
+      # context as well, it will take precedence over the instance in {#context}.
+      # @param [Object] data the telemetry data to send. This will be wrapped in
+      #   an {Contracts::Envelope} before being enqueued to the {#queue}.
+      # @param [TelemetryContext] context the override context to use when
+      #   constructing the {Contracts::Envelope}.
+      # @param [Time|String] time the timestamp of the telemetry used to construct the
+      #   {Contracts::Envelope}.
+      def write(data, context=nil, time=nil)
+        local_context = context || @context
+        raise ArgumentError, 'Context was required but not provided' unless local_context
+
+        if time && time.is_a?(String)
+          local_time = time
+        elsif time && time.is_a?(Time)
+          local_time = time.iso8601(7)
+        else
+          local_time = Time.now.iso8601(7)
+        end
+
+        data_type = data.class.name.gsub(/^.*::/, '')
+        set_properties data, local_context
+        data_attributes = {
+            :base_type => data_type,
+            :base_data => data
+        }
+        envelope_attributes = {
+          :name => 'Microsoft.ApplicationInsights.' + data_type[0..-5],
+          :time => local_time,
+          :i_key => local_context.instrumentation_key,
+          :tags => get_tags(local_context),
+          :data => Contracts::Data.new(data_attributes)
+        }
+        envelope = Contracts::Envelope.new envelope_attributes
+        @queue.push(envelope)
+      end
+
+      private
+
+      def get_tags(context)
+        hash = {}
+        internal_context_attributes = {
+          :sdk_version => 'rb:' + ApplicationInsights::VERSION
+        }
+        internal_context = Contracts::Internal.new internal_context_attributes
+
+        [internal_context,
+          context.application,
+          context.cloud,
+          context.device,
+          context.user,
+          context.session,
+          context.location,
+          context.operation].each { |c|  hash.merge!(c.to_h) if c }
+
+        hash.delete_if { |k, v| v.nil? }
+
+        hash
+      end
+
+      def set_properties(data, context)
+        if context.properties
+          properties = data.properties || {}
+          context.properties.each do |key, value|
+            unless properties.key?(key)
+              properties[key] = value
+            end
+          end
+          data.properties = properties
+        end
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/channel/telemetry_context.rb b/source/code/plugin/lib/application_insights/channel/telemetry_context.rb
new file mode 100644
index 000000000..bb24af24e
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/channel/telemetry_context.rb
@@ -0,0 +1,85 @@
+require_relative 'contracts/application'
+require_relative 'contracts/cloud'
+require_relative 'contracts/device'
+require_relative 'contracts/user'
+require_relative 'contracts/session'
+require_relative 'contracts/operation'
+require_relative 'contracts/location'
+
+module ApplicationInsights
+  module Channel
+    # Represents the context for sending telemetry to the
+    # Application Insights service.
+    #
+    # @example
+    #   require 'application_insights'
+    #   context = ApplicationInsights::Channel::TelemetryContext.new
+    #   context.instrumentation_key = '<YOUR INSTRUMENTATION KEY GOES HERE>'
+    #   context.application.id = 'My application'
+    #   context.application.ver = '1.2.3'
+    #   context.device.id = 'My current device'
+    #   context.device.oem_name = 'Asus'
+    #   context.device.model = 'X31A'
+    #   context.device.type = "Other"
+    #   context.user.id = 'santa@northpole.net'
+    class TelemetryContext
+      # Initializes a new instance of the class.
+      def initialize
+        @instrumentation_key = nil
+        @application = Contracts::Application.new
+        @cloud = Contracts::Cloud.new
+        @device = Contracts::Device.new
+        @user = Contracts::User.new
+        @session = Contracts::Session.new
+        @operation = Contracts::Operation.new
+        @location = Contracts::Location.new
+        @properties = {}
+      end
+
+      # The instrumentation key that is used to identify which
+      # Application Insights application this data is for.
+      # @return [String] the instrumentation key.
+      attr_accessor :instrumentation_key
+
+      # The application context. This contains properties of the
+      # application you are running.
+      # @return [Contracts::Application] the context object.
+      attr_accessor :application
+
+      # The cloud context. This contains properties of the
+      # cloud role you are generating telemetry for.
+      # @return [Contracts::Cloud] the context object.
+      attr_accessor :cloud
+
+      # The device context. This contains properties of the
+      # device you are running on.
+      # @return [Contracts::Device] the context object.
+      attr_accessor :device
+
+      # The user context. This contains properties of the
+      # user you are generating telemetry for.
+      # @return [Contracts::User] the context object.
+      attr_accessor :user
+
+      # The session context. This contains properties of the
+      # session you are generating telemetry for.
+      # @return [Contracts::Session] the context object.
+      attr_accessor :session
+
+      # The operation context. This contains properties of the
+      # operation you are generating telemetry for.
+      # @return [Contracts::Operation] the context object.
+      attr_accessor :operation
+
+      # The location context. This contains properties of the
+      # location you are generating telemetry from.
+      # @return [Contracts::Location] the context object.
+      attr_accessor :location
+
+      # The property context. This contains free-form properties
+      # that you can add to your telemetry.
+      # @return [Hash<String, String>] the context object.
+      attr_accessor :properties
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/rack/track_request.rb b/source/code/plugin/lib/application_insights/rack/track_request.rb
new file mode 100644
index 000000000..62c2b0844
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/rack/track_request.rb
@@ -0,0 +1,154 @@
+require 'rack'
+require 'securerandom'
+require_relative '../channel/contracts/request_data'
+require_relative '../telemetry_client'
+
+module ApplicationInsights
+  module Rack
+    # Track every request and sends the request data to Application Insights.
+    class TrackRequest
+      # Initializes a new instance of the class.
+      # @param [Object] app the inner rack application.
+      # @param [String] instrumentation_key to identify which Application Insights
+      #   application this data is for.
+      # @param [Fixnum] buffer_size the buffer size and the buffered requests would
+      #   send to Application Insights when buffer is full.
+      # @param [Fixnum] send_interval the frequency (in seconds) to check buffer
+      #   and send buffered requests to Application Insights if any.
+      def initialize(app, instrumentation_key, buffer_size = 500, send_interval = 60)
+        @app = app
+        @instrumentation_key = instrumentation_key
+        @buffer_size = buffer_size
+        @send_interval = send_interval
+
+        @sender = Channel::AsynchronousSender.new
+        @sender.send_interval = @send_interval
+        queue = Channel::AsynchronousQueue.new @sender
+        queue.max_queue_length = @buffer_size
+        @channel = Channel::TelemetryChannel.new nil, queue
+
+        @client = TelemetryClient.new @instrumentation_key, @channel
+      end
+
+      # Track requests and send data to Application Insights asynchronously.
+      # @param [Hash] env the rack environment.
+      def call(env)
+        # Build a request ID, incorporating one from our request if one exists.
+        request_id = request_id_header(env['HTTP_REQUEST_ID'])
+        env['ApplicationInsights.request.id'] = request_id
+
+        start = Time.now
+        begin
+          status, headers, response = @app.call(env)
+        rescue Exception => ex
+          status = 500
+          exception = ex
+        end
+        stop = Time.now
+
+        start_time = start.iso8601(7)
+        duration = format_request_duration(stop - start)
+        success = status.to_i < 400
+
+        request = ::Rack::Request.new env
+        options = options_hash(request)
+
+        data = request_data(request_id, start_time, duration, status, success, options)
+        context = telemetry_context(request_id, env['HTTP_REQUEST_ID'])
+
+        @client.channel.write data, context, start_time
+
+        if exception
+          @client.track_exception exception, handled_at: 'Unhandled'
+          raise exception
+        end
+
+        [status, headers, response]
+      end
+
+      private
+
+      def sender=(sender)
+        if sender.is_a? Channel::AsynchronousSender
+          @sender = sender
+          @client.channel.queue.sender = @sender
+        end
+      end
+
+      def client
+        @client
+      end
+
+      def format_request_duration(duration_seconds)
+        if duration_seconds >= 86400
+          # just return 1 day when it takes more than 1 day which should not happen for requests.
+          return "%02d.%02d:%02d:%02d.%07d" % [1, 0, 0, 0, 0]
+        end
+
+        Time.at(duration_seconds).gmtime.strftime("00.%H:%M:%S.%7N")
+      end
+
+      def request_id_header(request_id)
+        valid_request_id_header = valid_request_id(request_id)
+
+        length = valid_request_id_header ? 5 : 10
+        id = SecureRandom.base64(length)
+
+        if valid_request_id_header
+          request_id_has_end = %w[. _].include?(request_id[-1])
+          request_id << '.' unless request_id_has_end
+
+          return "#{request_id}#{id}_"
+        end
+
+        "|#{id}."
+      end
+
+      def valid_request_id(request_id)
+        request_id && request_id[0] == '|'
+      end
+
+      def operation_id(id)
+        # Returns the root ID from the '|' to the first '.' if any.
+        root_start = id[0] == '|' ? 1 : 0
+
+        root_end = id.index('.')
+        root_end = root_end ? root_end - 1 : id.length - root_start
+
+        id[root_start..root_end]
+      end
+
+      def options_hash(request)
+        {
+            name: "#{request.request_method} #{request.path}",
+            http_method: request.request_method,
+            url: request.url
+        }
+      end
+
+      def request_data(request_id, start_time, duration, status, success, options)
+        Channel::Contracts::RequestData.new(
+            :id => request_id || 'Null',
+            :duration => duration || '0:00:00:00.0000000',
+            :response_code => status || 200,
+            :success => success == nil ? true : success,
+            :name => options[:name],
+            :url => options[:url],
+            :properties => options[:properties] || {},
+            :measurements => options[:measurements] || {},
+            # Must initialize http_method after properties because it's actually stored in properties
+            :http_method => options[:http_method]
+        )
+      end
+
+      def telemetry_context(request_id, request_id_header)
+        context = Channel::TelemetryContext.new
+        context.instrumentation_key = @instrumentation_key
+        context.operation.id = operation_id(request_id)
+        context.operation.parent_id = request_id_header
+
+        context
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/telemetry_client.rb b/source/code/plugin/lib/application_insights/telemetry_client.rb
new file mode 100644
index 000000000..bd066ae70
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/telemetry_client.rb
@@ -0,0 +1,232 @@
+require_relative 'channel/telemetry_context'
+require_relative 'channel/telemetry_channel'
+require_relative 'channel/contracts/page_view_data'
+require_relative 'channel/contracts/remote_dependency_data'
+require_relative 'channel/contracts/exception_data'
+require_relative 'channel/contracts/exception_details'
+require_relative 'channel/contracts/event_data'
+require_relative 'channel/contracts/data_point'
+require_relative 'channel/contracts/data_point_type'
+require_relative 'channel/contracts/metric_data'
+require_relative 'channel/contracts/message_data'
+require_relative 'channel/contracts/stack_frame'
+require_relative 'channel/contracts/request_data'
+require_relative 'channel/contracts/severity_level'
+require_relative 'channel/contracts/reopenings'
+
+module ApplicationInsights
+  # The telemetry client used for sending all types of telemetry. It serves as
+  # the main entry point for interacting with the Application Insights service.
+  class TelemetryClient
+    # Initializes a new instance of the class.
+    # @param [String] instrumentation_key to identify which Application Insights
+    #   application this data is for.
+    # @param [Channel::TelemetryChannel] telemetry_channel the optional telemetry
+    #   channel to be used instead of constructing a default one.
+    def initialize(instrumentation_key = nil, telemetry_channel = nil)
+      @context = Channel::TelemetryContext.new
+      @context.instrumentation_key = instrumentation_key
+      @channel = telemetry_channel || Channel::TelemetryChannel.new
+    end
+
+    # The context associated with this client. All data objects created by this
+    # client will be accompanied by this value.
+    # @return [Channel::TelemetryContext] the context instance.
+    attr_reader :context
+
+    # The channel associated with this telemetry client. All data created by this
+    # client will be passed along with the {#context} object to
+    # {Channel::TelemetryChannel#write}
+    # @return [Channel::TelemetryChannel] the channel instance.
+    attr_reader :channel
+
+    # Send information about the page viewed in the application (a web page for
+    # instance).
+    # @param [String] name the name of the page that was viewed.
+    # @param [String] url the URL of the page that was viewed.
+    # @param [Hash] options the options to create the
+    #   {Channel::Contracts::PageViewData} object.
+    # @option options [Fixnum] :duration the duration of the page view in
+    #   milliseconds. (defaults to: 0)
+    # @option options [Hash] :properties the set of custom properties the client
+    #   wants attached to this data item. (defaults to: {})
+    # @option options [Hash] :measurements the set of custom measurements the
+    #   client wants to attach to this data item (defaults to: {})
+    def track_page_view(name, url, options={})
+      data_attributes = {
+        :name => name || 'Null',
+        :url => url,
+        :duration => options[:duration],
+        :properties => options[:properties] || {},
+        :measurements => options[:measurements] || {}
+      }
+      data = Channel::Contracts::PageViewData.new data_attributes
+      self.channel.write(data, self.context)
+    end
+
+    # Send information about a single exception that occurred in the application.
+    # @param [Exception] exception the exception that the client wants to send.
+    # @param [Hash] options the options to create the
+    #   {Channel::Contracts::ExceptionData} object.
+    # @option options [String] :handled_at the type of exception
+    #   (defaults to: 'UserCode')
+    # @option options [Hash] :properties the set of custom properties the client
+    #   wants attached to this data item. (defaults to: {})
+    # @option options [Hash] :measurements the set of custom measurements the
+    #   client wants to attach to this data item (defaults to: {})
+    def track_exception(exception, options={})
+      return unless exception.is_a? Exception
+
+      parsed_stack = []
+      if exception.backtrace
+        frame_pattern = /^(?<file>.*):(?<line>\d+)(\.|:in `((?<method>.*)'$))/
+
+        exception.backtrace.each_with_index do |frame, counter|
+          match = frame_pattern.match frame
+          stack_frame = Channel::Contracts::StackFrame.new(
+            :assembly => 'Unknown',
+            :file_name => match['file'],
+            :level => counter,
+            :line => match['line'],
+            :method => match['method']
+          )
+
+          parsed_stack << stack_frame
+        end
+      end
+
+      details = Channel::Contracts::ExceptionDetails.new(
+        :id => 1,
+        :outer_id => 0,
+        :type_name => exception.class.name,
+        :message => exception.message,
+        :has_full_stack => exception.backtrace != nil,
+        :stack => (exception.backtrace.join("\n") if exception.backtrace),
+        :parsed_stack => parsed_stack
+      )
+
+      data = Channel::Contracts::ExceptionData.new(
+        :exceptions => [details],
+        :properties => options[:properties] || {},
+        :measurements => options[:measurements] || {},
+        # Must initialize handled_at after properties because it's actually stored in properties
+        :handled_at => options.fetch(:handled_at, 'UserCode')
+      )
+
+      self.channel.write(data, self.context)
+    end
+
+    # Send information about a single event that has occurred in the context of
+    # the application.
+    # @param [String] name the data to associate to this event.
+    # @param [Hash] options the options to create the
+    #   {Channel::Contracts::EventData} object.
+    # @option options [Hash] :properties the set of custom properties the client
+    #   wants attached to this data item. (defaults to: {})
+    # @option options [Hash] :measurements the set of custom measurements the
+    #   client wants to attach to this data item (defaults to: {})
+    def track_event(name, options={})
+      data = Channel::Contracts::EventData.new(
+        :name => name || 'Null',
+        :properties => options[:properties] || {},
+        :measurements => options[:measurements] || {}
+      )
+
+      self.channel.write(data, self.context)
+    end
+
+    # Send information about a single metric data point that was captured for
+    # the application.
+    # @param [String] name the name of the metric that was captured.
+    # @param [Fixnum] value the value of the metric that was captured.
+    # @param [Hash] options the options to create the
+    #   {Channel::Contracts::MetricData} object.
+    # @option options [Channel::Contracts::DataPointType] :type the type of the
+    #   metric (defaults to: {Channel::Contracts::DataPointType::AGGREGATION})
+    # @option options [Fixnum] :count the number of metrics that were aggregated
+    #   into this data point (defaults to: 0)
+    # @option options [Fixnum] :min the minimum of all metrics collected that
+    #   were aggregated into this data point (defaults to: 0)
+    # @option options [Fixnum] :max the maximum of all metrics collected that
+    #   were aggregated into this data point (defaults to: 0)
+    # @option options [Fixnum] :std_dev the standard deviation of all metrics
+    #   collected that were aggregated into this data point (defaults to: 0)
+    # @option options [Hash] :properties the set of custom properties the client
+    #   wants attached to this data item. (defaults to: {})
+    # @option options [Hash] :measurements the set of custom measurements the
+    #   client wants to attach to this data item (defaults to: {})
+    def track_metric(name, value, options={})
+      data_point = Channel::Contracts::DataPoint.new(
+        :name => name || 'Null',
+        :value => value || 0,
+        :kind => options[:type] || Channel::Contracts::DataPointType::AGGREGATION,
+        :count => options[:count],
+        :min => options[:min],
+        :max => options[:max],
+        :std_dev => options[:std_dev]
+      )
+
+      data = Channel::Contracts::MetricData.new(
+        :metrics => [data_point],
+        :properties => options[:properties] || {}
+      )
+
+      self.channel.write(data, self.context)
+    end
+
+    # Sends a single trace statement.
+    # @param [String] name the trace statement.
+    # @param [Channel::Contracts::SeverityLevel] severity_level the severity level.
+    # @param [Hash] options the options to create the
+    #   {Channel::Contracts::EventData} object.
+    # @option options [Hash] :properties the set of custom properties the client
+    #   wants attached to this data item. (defaults to: {})
+    def track_trace(name, severity_level = nil, options={})
+      data = Channel::Contracts::MessageData.new(
+        :message => name || 'Null',
+        :severity_level => severity_level || Channel::Contracts::SeverityLevel::INFORMATION,
+        :properties => options[:properties] || {}
+      )
+
+      self.channel.write(data, self.context)
+    end
+
+    # Sends a single request.
+    # @param [String] id the unique identifier of the request.
+    # @param (String) start_time the start time of the request.
+    # @param [String] duration the duration to process the request.
+    # @param [String] response_code the response code of the request.
+    # @param [Boolean] success indicates whether the request succeeds or not.
+    # @param [Hash] options the options to create the
+    #   {Channel::Contracts::RequestData} object.
+    # @option options [String] :name the name of the request.
+    # @option options [String] :http_method the http method used for the request.
+    # @option options [String] :url the url of the request.
+    # @option options [Hash] :properties the set of custom properties the client
+    #   wants attached to this data item. (defaults to: {})
+    # @option options [Hash] :measurements the set of custom measurements the
+    #   client wants to attach to this data item (defaults to: {})
+    def track_request(id, start_time, duration, response_code, success, options={})
+      data = Channel::Contracts::RequestData.new(
+        :id => id || 'Null',
+        :duration => duration || '0:00:00:00.0000000',
+        :response_code => response_code || 200,
+        :success => success = nil ? true : success,
+        :name => options[:name],
+        :url => options[:url],
+        :properties => options[:properties] || {},
+        :measurements => options[:measurements] || {},
+        # Must initialize http_method after properties because it's actually stored in properties
+        :http_method => options[:http_method]
+      )
+
+      self.channel.write(data, self.context, start_time)
+    end
+
+    # Flushes data in the queue. Data in the queue will be sent either immediately
+    # irrespective of what sender is being used.
+    def flush
+      self.channel.flush
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/unhandled_exception.rb b/source/code/plugin/lib/application_insights/unhandled_exception.rb
new file mode 100644
index 000000000..aa87b6f85
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/unhandled_exception.rb
@@ -0,0 +1,49 @@
+require_relative 'telemetry_client'
+require_relative 'channel/telemetry_channel'
+require_relative 'channel/synchronous_queue'
+require_relative 'channel/synchronous_sender'
+
+include ApplicationInsights
+
+module ApplicationInsights
+  module UnhandledException
+    @sender = nil
+
+    # Auto collects unhandled exception and send to the Application Insights service.
+    # @param (string) instrumentation_key used to identify which Application
+    #   Insights application this data is for.
+    # @example
+    #   require 'application_insights'
+    #   ApplicationInsights::UnhandledException.collect('<YOUR INSTRUMENTATION KEY GOES HERE>')
+    #   raise Exception, 'Boom!'
+    def self.collect(instrumentation_key)
+      at_exit do
+        # Avoid sending exception more than once if this method got invoked multiple times
+        send(instrumentation_key) unless @sender
+      end
+    end
+
+    # @api private
+    # Send the last raised exception to the Application Insights service if
+    # telemetry_sender is not customized.
+    # @param (string) instrumentation_key used to identify which Application
+    #   Insights application this data is for.
+    # @param (SenderBase) telemetry_sender used to send the last raised exception.
+    def self.send(instrumentation_key, telemetry_sender = nil)
+      if $! && !$!.is_a?(SystemExit) && !$!.is_a?(SignalException)
+        if telemetry_sender
+          @sender = telemetry_sender
+        elsif !@sender
+          # Use a synchronized sender to guarantee the data would be sent out once flush
+          @sender = Channel::SynchronousSender.new
+        end
+
+        queue = Channel::SynchronousQueue.new @sender
+        channel = Channel::TelemetryChannel.new nil, queue
+        client = TelemetryClient.new instrumentation_key, channel
+        client.track_exception($!, handled_at: 'Unhandled')
+        client.flush
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/lib/application_insights/version.rb b/source/code/plugin/lib/application_insights/version.rb
new file mode 100644
index 000000000..d2d56e833
--- /dev/null
+++ b/source/code/plugin/lib/application_insights/version.rb
@@ -0,0 +1,3 @@
+module ApplicationInsights
+  VERSION = '0.5.7'.freeze
+end

From 6698fcd365328f31b7cbda6fec205cec1ef7933c Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 7 Nov 2018 16:21:53 -0800
Subject: [PATCH 030/160] Fix Telemetry Bug -- Initialize Telemetry Client
 after Initializing all required properties (#162)

---
 source/code/go/src/plugins/oms.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index e0abaea1f..51a2bd47e 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -334,13 +334,6 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	ImageIDMap = make(map[string]string)
 	NameIDMap = make(map[string]string)
 
-	ret, err := InitializeTelemetryClient(agentVersion)
-	if ret != 0 || err != nil {
-		message := fmt.Sprintf("Error During Telemetry Initialization :%s", err.Error())
-		fmt.Printf(message)
-		Log(message)
-	}
-
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
 		message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error())
@@ -398,6 +391,13 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	Computer = strings.TrimSuffix(ToString(containerHostName), "\n")
 	Log("Computer == %s \n", Computer)
 
+	ret, err := InitializeTelemetryClient(agentVersion)
+	if ret != 0 || err != nil {
+		message := fmt.Sprintf("Error During Telemetry Initialization :%s", err.Error())
+		fmt.Printf(message)
+		Log(message)
+	}
+
 	// Initialize KubeAPI Client
 	config, err := rest.InClusterConfig()
 	if err != nil {

From ad6bb933f64c7d32c3eb779d031327c76e12d2e4 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Mon, 12 Nov 2018 11:45:57 -0800
Subject: [PATCH 031/160] Fix kube events memory leak due to yaml serialization
 for > 5k events (#163)

---
 source/code/plugin/in_kube_events.rb | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 6a6ae9296..5df31df95 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -10,7 +10,6 @@ class Kube_Event_Input < Input
 
     def initialize
       super
-      require 'yaml'
       require 'json'
 
       require_relative 'KubernetesApiClient'
@@ -62,6 +61,7 @@ def enumerate(eventList = nil)
               eventStream = MultiEventStream.new
               events['items'].each do |items|
                 record = {}
+                #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
                 record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
                 eventId = items['metadata']['uid'] + "/" + items['count'].to_s  
                 newEventQueryState.push(eventId)
@@ -86,7 +86,7 @@ def enumerate(eventList = nil)
                 end
                 record['ClusterName'] = KubernetesApiClient.getClusterName
                 record['ClusterId'] = KubernetesApiClient.getClusterId
-                eventStream.add(emitTime, record) if record    
+                eventStream.add(emitTime, record) if record 
               end
               router.emit_stream(@tag, eventStream) if eventStream
             end  
@@ -121,7 +121,10 @@ def getEventQueryState
       eventQueryState = []
       begin
         if File.file?(@@KubeEventsStateFile)
-          eventQueryState = YAML.load_file(@@KubeEventsStateFile, [])
+          # Do not read the entire file in one shot as it spikes memory (50+MB) for ~5k events
+          File.foreach(@@KubeEventsStateFile) do |line|
+            eventQueryState.push(line.chomp) #puts will append newline which needs to be removed
+          end
         end
       rescue  => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
@@ -132,7 +135,12 @@ def getEventQueryState
 
     def writeEventQueryState(eventQueryState)
       begin
-        File.write(@@KubeEventsStateFile, eventQueryState.to_yaml)
+        if(!eventQueryState.nil? && !eventQueryState.empty?)
+          # No need to close file handle (f) due to block scope
+          File.open(@@KubeEventsStateFile, "w") do |f|
+            f.puts(eventQueryState)
+          end
+        end
       rescue  => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)

From eff92df54914482b91604b90622fd9fdf2d917eb Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 14 Nov 2018 15:48:23 -0800
Subject: [PATCH 032/160] Setting Timeout for HTTP Client  in PostDataHelper in
 outoms go plugin(#164)

---
 source/code/go/src/plugins/utils.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go
index 91e433a0f..85af80d7a 100644
--- a/source/code/go/src/plugins/utils.go
+++ b/source/code/go/src/plugins/utils.go
@@ -70,7 +70,10 @@ func CreateHTTPClient() {
 	tlsConfig.BuildNameToCertificate()
 	transport := &http.Transport{TLSClientConfig: tlsConfig}
 
-	HTTPClient = http.Client{Transport: transport}
+	HTTPClient = http.Client{
+		Transport: transport,
+		Timeout:   30 * time.Second,
+	}
 
 	Log("Successfully created HTTP Client")
 }

From 9893e36d3aeb6a05259a45d449ad2b04453418ea Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 15 Nov 2018 17:01:18 -0800
Subject: [PATCH 033/160] Vishwa/perftelemetry 2 (#165)

* add cpu usage telemetry for ds & rs

* add cpu & memory usage telemetry for ds & rs
---
 .../code/plugin/ApplicationInsightsUtility.rb | 32 ++++++++++++
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 51 +++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 14fc9f2f8..78553a83f 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -5,6 +5,7 @@ class ApplicationInsightsUtility
     require_relative 'lib/application_insights'
     require_relative 'omslog'
     require_relative 'DockerApiClient'
+    require_relative 'oms_common'
     require 'json'
     require 'base64'
 
@@ -20,6 +21,7 @@ class ApplicationInsightsUtility
     @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH'
     @@CustomProperties = {}
     @@Tc = nil
+    @@hostName = (OMS::Common.get_hostname)
 
     def initialize
     end
@@ -124,6 +126,36 @@ def sendTelemetry(pluginName, properties)
             end
         end
 
+        #Method to send metric. It will merge passed-in properties with common custom properties
+        def sendMetricTelemetry(metricName, metricValue, properties)
+            begin
+                if (metricName.empty? || metricName.nil?)
+                    $log.warn("SendMetricTelemetry: metricName is missing")
+                    return
+                end
+                if @@CustomProperties.empty? || @@CustomProperties.nil?
+                    initializeUtility
+                end
+                telemetryProps = {}
+                telemetryProps["Computer"] = @@hostName
+                # add common dimensions
+                @@CustomProperties.each{ |k,v| telemetryProps[k]=v}
+                # add passed-in dimensions if any
+                if (!properties.nil? && !properties.empty?)
+                    properties.each{ |k,v| telemetryProps[k]=v}
+                end
+                if !(@@Tc.nil?)
+                    @@Tc.track_metric metricName, metricValue, 
+                    :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, 
+                    :properties => telemetryProps
+                    @@Tc.flush
+                    $log.info("AppInsights metric Telemetry #{metricName} sent successfully")
+                end
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}")
+            end
+        end
+
         def getWorkspaceId()
             begin
                 adminConf = {}
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index c10cbad4a..9e47e5a9e 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -12,6 +12,7 @@ class CAdvisorMetricsAPIClient
     
             require_relative 'oms_common'
             require_relative 'KubernetesApiClient'
+            require_relative 'ApplicationInsightsUtility'
     
             @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
             @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M
@@ -19,6 +20,8 @@ class CAdvisorMetricsAPIClient
             @@rxBytesTimeLast = nil
             @@txBytesLast = nil
             @@txBytesTimeLast = nil
+            @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
+            @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
     
             def initialize
             end
@@ -97,10 +100,15 @@ def getMetrics()
                 def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
                     metricItems = []
                     clusterId = KubernetesApiClient.getClusterId
+                    timeDifference =  (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
+                    timeDifferenceInMinutes = timeDifference/60
                     begin
                         metricInfo = metricJSON
                         metricInfo['pods'].each do |pod|
                             podUid = pod['podRef']['uid']
+                            podName = pod['podRef']['name']
+                            podNamespace = pod['podRef']['namespace']
+                            
                             if (!pod['containers'].nil?)
                                 pod['containers'].each do |container|
                                     #cpu metric
@@ -124,9 +132,29 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                                     metricProps['Collections'].push(metricCollections)
                                     metricItem['DataItems'].push(metricProps)
                                     metricItems.push(metricItem)
+                                    #Telemetry about agent performance
+                                    begin
+                                        # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
+                                        # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use
+                                        if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("cpuUsageNanoCores"))
+                                            
+                                            if (timeDifferenceInMinutes >= 10)
+                                                telemetryProps = {}
+                                                telemetryProps['PodName'] = podName
+                                                telemetryProps['ContainerName'] = containerName
+                                                ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
+                                            end
+                                        end
+                                    rescue => errorStr
+                                        $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}")
+                                    end
                                 end
                             end
                         end
+                        # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs)
+                        if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("cpuUsageNanoCores"))
+                            @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
+                        end
                         rescue => error
                         @Log.warn("getcontainerCpuMetricItems failed: #{error} for metric #{cpuMetricNameToCollect}")
                         return metricItems
@@ -137,10 +165,14 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                 def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn)
                     metricItems = []
                     clusterId = KubernetesApiClient.getClusterId
+                    timeDifference =  (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs
+                    timeDifferenceInMinutes = timeDifference/60
                     begin
                         metricInfo = metricJSON
                         metricInfo['pods'].each do |pod|
                             podUid = pod['podRef']['uid']
+                            podName = pod['podRef']['name']
+                            podNamespace = pod['podRef']['namespace']
                             if (!pod['containers'].nil?)
                                 pod['containers'].each do |container|
                                     containerName = container['name']
@@ -164,9 +196,28 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec
                                     metricProps['Collections'].push(metricCollections)
                                     metricItem['DataItems'].push(metricProps)
                                     metricItems.push(metricItem)
+                                    #Telemetry about agent performance
+                                    begin
+                                        # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
+                                        # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use
+                                        if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("memoryRssBytes"))
+                                            if (timeDifferenceInMinutes >= 10)
+                                                telemetryProps = {}
+                                                telemetryProps['PodName'] = podName
+                                                telemetryProps['ContainerName'] = containerName
+                                                ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
+                                            end
+                                        end
+                                    rescue => errorStr
+                                        $log.warn("Exception while generating Telemetry from getcontainerMemoryMetricItems failed: #{errorStr} for metric #{memoryMetricNameToCollect}")
+                                    end
                                 end
                             end
                         end
+                        # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs)
+                        if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("memoryRssBytes"))
+                            @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
+                        end
                         rescue => error
                         @Log.warn("getcontainerMemoryMetricItems failed: #{error} for metric #{memoryMetricNameToCollect}")
                         @Log.warn metricJSON

From 4f3c8988e4d1a989f8e9ab0e897443f1f4a94563 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 27 Nov 2018 10:39:41 -0800
Subject: [PATCH 034/160] environment variable fix (#166)

* environment variable fix

* updating agent version
---
 installer/conf/td-agent-bit.conf            | 2 +-
 source/code/plugin/in_containerinventory.rb | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 2a6199987..fe174f9a5 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,4 +28,4 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod10162018-2
+    AgentVersion                    internaltest1126
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index 43811e1e1..f501421a2 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -56,6 +56,11 @@ def obtainContainerConfig(instance, container)
 
           envValue = configValue['Env']
           envValueString = (envValue.nil?) ? "" : envValue.to_s
+          # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
+          if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
+            envValueString = ["AZMON_COLLECT_ENV=FALSE"]
+            $log.warn("Environment Variable collection for container: #{container['Id']} skipped because AZMON_COLLECT_ENV is set to false")
+          end
           # Restricting the ENV string value to 200kb since the size of this string can go very high
           if envValueString.length > 200000
             envValueStringTruncated = envValueString.slice(0..200000)

From 5e16467696df96d59d32d7219b901c1450b44201 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 27 Nov 2018 11:20:51 -0800
Subject: [PATCH 035/160] Fixing a bug where we were crashing due to container
 statuses not present when not was lost (#167)

---
 source/code/plugin/in_kube_podinventory.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 2cd1e1bc3..ec76bac61 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -101,7 +101,7 @@ def parse_and_emit_records(podInventory, serviceList)
           #podStatus
           # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
           podReadyCondition = true
-          if !items['status']['reason'].nil? && items['status']['reason'] == "NodeLost"
+          if !items['status']['reason'].nil? && items['status']['reason'] == "NodeLost" && !items['status']['conditions'].nil?
             items['status']['conditions'].each do |condition|
               if condition['type'] == "Ready" && condition['status'] == "False"
                 podReadyCondition = false

From b482b1ecb667d4f75cd3902c5baf6debd25990ef Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 28 Nov 2018 17:37:41 -0800
Subject: [PATCH 036/160] Updating title

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0c543e716..8755cedb3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# AKS Container Health monitoring
+# Azure Monitor for Containers
 
 ## Code of Conduct
 
@@ -40,4 +40,4 @@ additional questions or comments.
 - Kubernetes RBAC enablement
 - Latest released omsagent (1.6.0-42)
 - Bug fix so that we do not collect kube-system namespace container logs when kube api calls fail occasionally (Bug #215107)
-- .yaml changes (for RBAC)
\ No newline at end of file
+- .yaml changes (for RBAC)

From d75ba897b9ccd58a4ad8a049b87b09a990ea7934 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 28 Nov 2018 17:40:41 -0800
Subject: [PATCH 037/160] updating right versions for last release

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8755cedb3..ace2ff57b 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ additional questions or comments.
 
 ## Release History
 
-### 10/16/2018 - Version microsoft/oms:ciprod10162018
+### 10/16/2018 - Version microsoft/oms:ciprod10162018-2
 - Fix for containerID being 00000-00000-00000
 - Move from fluentD to fluentbit for container log collection
 - Seg fault fixes in json parsing for container inventory & container image inventory

From cbd815c90bea4f7878eb6c0908f3d0456737dbd5 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 29 Nov 2018 11:25:15 -0800
Subject: [PATCH 038/160] Updating the break condition to look for end of
 response (#168)

* Updating the break condition to look for end of response

* changes for docker response
---
 source/code/plugin/DockerApiClient.rb | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index b93411980..e12ef13ec 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -19,7 +19,7 @@ def initialize
 
     class << self
         # Make docker socket call for requests
-        def getResponse(request, isMultiJson)
+        def getResponse(request, isMultiJson, isVersion)
             begin
                 socket = UNIXSocket.new(@@SocketPath)
                 dockerResponse = ""
@@ -36,8 +36,9 @@ def getResponse(request, isMultiJson)
                     rescue Timeout::Error
                         $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}")
                         isTimeOut = true
+                        break
                     end
-                    break if responseChunk.length < @@ChunkSize
+                    break if (isVersion)? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
                 end
                 socket.close
                 return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson)
@@ -71,7 +72,7 @@ def parseResponse(dockerResponse, isMultiJson)
         def getDockerHostName()
             dockerHostName = ""
             request = DockerApiRestHelper.restDockerInfo
-            response = getResponse(request, false)
+            response = getResponse(request, false, false)
             if (response != nil)
                 dockerHostName = response['Name']
             end
@@ -81,7 +82,7 @@ def getDockerHostName()
         def listContainers()
             ids = []
             request = DockerApiRestHelper.restDockerPs
-            containers = getResponse(request, true)
+            containers = getResponse(request, true, false)
             if !containers.nil? && !containers.empty?
                 containers.each do |container|
                     ids.push(container['Id'])
@@ -121,7 +122,7 @@ def getImageIdMap()
             result = nil
             begin
                 request = DockerApiRestHelper.restDockerImages
-                images = getResponse(request, true)
+                images = getResponse(request, true, false)
                 if !images.nil? && !images.empty?
                     result = {}
                     images.each do |image|
@@ -144,13 +145,13 @@ def getImageIdMap()
 
         def dockerInspectContainer(id)
             request = DockerApiRestHelper.restDockerInspect(id)
-            return getResponse(request, false)
+            return getResponse(request, false, false)
         end
 
         # This method returns docker version and docker api version for telemetry
         def dockerInfo()
             request = DockerApiRestHelper.restDockerVersion
-            response = getResponse(request, false)
+            response = getResponse(request, false, true)
             dockerInfo = {}
             if (response != nil)
                 dockerInfo['Version'] = response['Version']
@@ -159,4 +160,4 @@ def dockerInfo()
             return dockerInfo
         end
     end
-end
\ No newline at end of file
+end

From d0d5bf78798e3d90655fc08f8a1666daa30c47d3 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 29 Nov 2018 12:01:11 -0800
Subject: [PATCH 039/160] updating AgentVersion for telemetry

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index fe174f9a5..c92bcdf07 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,4 +28,4 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    internaltest1126
+    AgentVersion                    ciprod11292018

From bfe27e5c6f7c3a97dc98f9e7296f25ea2c1d5a36 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 29 Nov 2018 12:16:35 -0800
Subject: [PATCH 040/160] Updating readme for latest release changes

---
 README.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/README.md b/README.md
index ace2ff57b..17a3cf3ad 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,22 @@ additional questions or comments.
 
 ## Release History
 
+### 11/29/2018 - Version microsoft/oms:ciprod11292018
+- Disable Container Image inventory workflow
+- Kube_Events memory leak fix for replica-set
+- Timeout (30 secs) for outOMS
+- Reduce critical lock duration for quicker log processing (for log enrichment)
+- Disable OMI based Container Inventory workflow to fluentD based Container Inventory
+- Moby support for the new Container Inventory workflow
+- Ability to disable environment variables collection by individual container
+- Bugfix - No inventory data due to container status(es) not available
+- Agent telemetry cpu usage & memory usage (for DaemonSet and ReplicaSet)
+- Agent telemetry - log generation rate
+- Agent telemetry - container count per node
+- Agent telemetry - collect container logs from agent (DaemonSet and ReplicaSet) as AI trace
+- Agent telemetry - errors/exceptions for Container Inventory workflow
+- Agent telemetry - Container Inventory Heartbeat
+
 ### 10/16/2018 - Version microsoft/oms:ciprod10162018-2
 - Fix for containerID being 00000-00000-00000
 - Move from fluentD to fluentbit for container log collection

From a621f883b0059db69ea1c2df48eef9671bc07b7e Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Sun, 16 Dec 2018 20:17:56 -0800
Subject: [PATCH 041/160] Changes - (#173)

* use /var/log for state
* new metric ContainerLogsAgentSideLatencyMs
* new field 'timeOfComand'
---
 installer/conf/td-agent-bit.conf        |  2 +-
 source/code/go/src/plugins/oms.go       | 43 ++++++++++++++++++-------
 source/code/go/src/plugins/telemetry.go | 12 +++++++
 3 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index c3252a185..b6b9bcc44 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -8,7 +8,7 @@
     Name tail
     Tag oms.container.log.*
     Path /var/log/containers/*.log
-    DB /var/opt/microsoft/docker-cimprov/state/fblogs.db
+    DB /var/log/omsagent-fblogs.db
     Parser docker
     Mem_Buf_Limit 30m
     Path_Key filepath
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 9876acc42..30e844915 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -77,9 +77,10 @@ var (
 
 // DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
 type DataItem struct {
-	LogEntry          string `json:"LogEntry"`
-	LogEntrySource    string `json:"LogEntrySource"`
-	LogEntryTimeStamp string `json:"LogEntryTimeStamp"`
+	LogEntry          		string `json:"LogEntry"`
+	LogEntrySource    		string `json:"LogEntrySource"`
+	LogEntryTimeStamp 		string `json:"LogEntryTimeStamp"`
+	LogEntryTimeOfCommand	string `json:"TimeOfCommand"`
 	ID                string `json:"Id"`
 	Image             string `json:"Image"`
 	Name              string `json:"Name"`
@@ -204,6 +205,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 	start := time.Now()
 	var dataItems []DataItem
+	var maxLatency float64
+	var maxLatencyContainer string
 	ignoreIDSet := make(map[string]bool)
 	imageIDMap := make(map[string]string)
 	nameIDMap := make(map[string]string)
@@ -248,18 +251,32 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			Log("ContainerId %s not present in Map ", containerID)
 		}
 
+
 		dataItem := DataItem{
-			ID:                stringMap["Id"],
-			LogEntry:          stringMap["LogEntry"],
-			LogEntrySource:    stringMap["LogEntrySource"],
-			LogEntryTimeStamp: stringMap["LogEntryTimeStamp"],
-			SourceSystem:      stringMap["SourceSystem"],
-			Computer:          Computer,
-			Image:             stringMap["Image"],
-			Name:              stringMap["Name"],
+			ID:                		stringMap["Id"],
+			LogEntry:          		stringMap["LogEntry"],
+			LogEntrySource:    		stringMap["LogEntrySource"],
+			LogEntryTimeStamp: 		stringMap["LogEntryTimeStamp"],
+			LogEntryTimeOfCommand: 	start.Format(time.RFC3339),
+			SourceSystem:      		stringMap["SourceSystem"],
+			Computer:          		Computer,
+			Image:             		stringMap["Image"],
+			Name:              		stringMap["Name"],
 		}
 
 		dataItems = append(dataItems, dataItem)
+		loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp)
+		if e!= nil {
+			message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error())
+			Log(message)
+			SendException(message)
+		} else {
+			ltncy := float64(start.Sub(loggedTime) / time.Millisecond)
+			if ltncy >= maxLatency {
+				maxLatency = ltncy
+				maxLatencyContainer = dataItem.Name + "=" + dataItem.ID
+			}
+		}
 	}
 
 	if len(dataItems) > 0 {
@@ -302,6 +319,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		ContainerLogTelemetryMutex.Lock()
 		FlushedRecordsCount += float64(numRecords)
 		FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond)
+		if maxLatency >= AgentLogProcessingMaxLatencyMs {
+			AgentLogProcessingMaxLatencyMs = maxLatency
+			AgentLogProcessingMaxLatencyMsContainer = maxLatencyContainer
+		}
 		ContainerLogTelemetryMutex.Unlock()
 	}
 
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 5952ac9ac..0d5513362 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -17,6 +17,10 @@ var (
 	FlushedRecordsCount float64
 	// FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period
 	FlushedRecordsTimeTaken float64
+	// This is telemetry for how old/latent logs we are processing in milliseconds (max over a period of time)
+	AgentLogProcessingMaxLatencyMs float64
+	// This is telemetry for which container logs were latent (max over a period of time)
+	AgentLogProcessingMaxLatencyMsContainer string
 	// CommonProperties indicates the dimensions that are sent with every event/metric
 	CommonProperties map[string]string
 	// TelemetryClient is the client used to send the telemetry
@@ -35,6 +39,7 @@ const (
 	envAppInsightsAuth                  = "APPLICATIONINSIGHTS_AUTH"
 	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
 	metricNameAvgLogGenerationRate      = "ContainerLogsGeneratedPerSec"
+	metricNameAgentLogProcessingMaxLatencyMs = "ContainerLogsAgentSideLatencyMs"
 	defaultTelemetryPushIntervalSeconds = 300
 
 	eventNameContainerLogInit   = "ContainerLogPluginInitialized"
@@ -62,12 +67,19 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
 		FlushedRecordsCount = 0.0
 		FlushedRecordsTimeTaken = 0.0
+		logLatencyMs := AgentLogProcessingMaxLatencyMs
+		logLatencyMsContainer := AgentLogProcessingMaxLatencyMsContainer
+		AgentLogProcessingMaxLatencyMs = 0
+		AgentLogProcessingMaxLatencyMsContainer = ""
 		ContainerLogTelemetryMutex.Unlock()
 
 		flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
 		TelemetryClient.Track(flushRateMetric)
 		logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
 		TelemetryClient.Track(logRateMetric)
+		logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs)
+		logLatencyMetric.Properties["Container"] = logLatencyMsContainer
+		TelemetryClient.Track(logLatencyMetric)
 		start = time.Now()
 	}
 }

From c9cf4fd7e5b3176136b47390ba405ee6afd6719b Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 17 Dec 2018 13:58:09 -0800
Subject: [PATCH 042/160] Rashmi/kubenodeinventory (#174)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id
---
 installer/conf/container.conf                 | 23 ----------
 installer/conf/kube.conf                      | 13 ++++++
 .../code/plugin/ApplicationInsightsUtility.rb |  6 +--
 source/code/plugin/in_kube_nodes.rb           | 45 ++++++++++++++++---
 source/code/plugin/in_kube_podinventory.rb    | 20 +++++++++
 5 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 798bd8eb6..091753230 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -15,16 +15,6 @@
   log_level debug
 </source>
 
-# Container host inventory
-<source>
-	type omi
-	run_interval 60s
-	tag oms.api.ContainerNodeInventory
-	items [
-		["root/cimv2","Container_HostInventory"]
-	]
-</source>
-
 #cadvisor perf
 <source>
 	type cadvisorperf
@@ -33,19 +23,6 @@
   log_level debug
 </source>
 
-<match oms.api.ContainerNodeInventory**>
-  type out_oms_api
-  log_level debug
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
-
 <match oms.containerinsights.containerinventory**>
   type out_oms
   log_level debug
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 94fe2ef0b..22c51ad0e 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -118,6 +118,19 @@
   max_retry_wait 9m
 </match>
 
+<match oms.api.ContainerNodeInventory**>
+  type out_oms_api
+  log_level debug
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
+  buffer_queue_limit 20
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 15s
+  max_retry_wait 9m
+</match>
+
 <match oms.api.KubePerf**>	
   type out_oms
   log_level debug
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 78553a83f..76e0b2926 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -83,7 +83,7 @@ def sendHeartBeatEvent(pluginName)
             end
         end
 
-        def sendCustomEvent(pluginName, properties)
+        def sendCustomMetric(pluginName, properties)
             begin
                 if !(@@Tc.nil?)
                     @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], 
@@ -93,7 +93,7 @@ def sendCustomEvent(pluginName, properties)
                     $log.info("AppInsights Container Count Telemetry sent successfully")
                 end
             rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}")
+                $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}")
             end
         end
 
@@ -120,7 +120,7 @@ def sendTelemetry(pluginName, properties)
                 end
                 @@CustomProperties['Computer'] = properties['Computer']
                 sendHeartBeatEvent(pluginName)
-                sendCustomEvent(pluginName, properties)
+                sendCustomMetric(pluginName, properties)
             rescue => errorStr
                 $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}")
             end
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index edbbdd37f..1c792d0da 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -6,12 +6,15 @@ module Fluent
     class Kube_nodeInventory_Input < Input
       Plugin.register_input('kubenodeinventory', self)
   
+      @@ContainerNodeInventoryTag = 'oms.api.ContainerNodeInventory'
+
       def initialize
         super
         require 'yaml'
         require 'json'
   
         require_relative 'KubernetesApiClient'
+        require_relative 'ApplicationInsightsUtility'
         require_relative 'oms_common'
         require_relative 'omslog'
       end
@@ -29,6 +32,7 @@ def start
           @condition = ConditionVariable.new
           @mutex = Mutex.new
           @thread = Thread.new(&method(:run_periodic))
+          @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       end
   
@@ -46,15 +50,22 @@ def enumerate
         currentTime = Time.now
         emitTime = currentTime.to_f
         batchTime = currentTime.utc.iso8601
-          $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-          nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body)
-          $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        telemetrySent = false
+        $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body)
+        $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
           begin
             if(!nodeInventory.empty?)
               eventStream = MultiEventStream.new
+              containerNodeInventoryEventStream = MultiEventStream.new 
                 #get node inventory 
                 nodeInventory['items'].each do |items|
                     record = {}
+                    # Sending records for ContainerNodeInventory
+                    containerNodeInventoryRecord = {}
+                    containerNodeInventoryRecord['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
+                    containerNodeInventoryRecord['Computer'] = items['metadata']['name']
+
                     record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
                     record['Computer'] = items['metadata']['name'] 
                     record['ClusterName'] = KubernetesApiClient.getClusterName
@@ -89,16 +100,40 @@ def enumerate
 
                     end
 
-                    record['KubeletVersion'] = items['status']['nodeInfo']['kubeletVersion']
-                    record['KubeProxyVersion'] = items['status']['nodeInfo']['kubeProxyVersion']
+                    nodeInfo = items['status']['nodeInfo']
+                    record['KubeletVersion'] = nodeInfo['kubeletVersion']
+                    record['KubeProxyVersion'] = nodeInfo['kubeProxyVersion']
+                    containerNodeInventoryRecord['OperatingSystem'] = nodeInfo['osImage']
+                    dockerVersion = nodeInfo['containerRuntimeVersion']
+                    dockerVersion.slice! "docker://"
+                    containerNodeInventoryRecord['DockerVersion'] = dockerVersion
+                    # ContainerNodeInventory data for docker version and operating system.
+                    containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord
+
                     wrapper = {
                       "DataType"=>"KUBE_NODE_INVENTORY_BLOB",
                       "IPName"=>"ContainerInsights",
                       "DataItems"=>[record.each{|k,v| record[k]=v}]
                     }
                     eventStream.add(emitTime, wrapper) if wrapper
+                    # Adding telemetry to send node telemetry every 5 minutes
+                    timeDifference =  (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+                    timeDifferenceInMinutes = timeDifference/60
+                    if (timeDifferenceInMinutes >= 5)
+                      properties = {}
+                      properties["Computer"] = record["Computer"]
+                      ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties)
+                      capacityInfo = items['status']['capacity']
+                      ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties)
+                      ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties)
+                      telemetrySent = true
+                    end
                 end 
                 router.emit_stream(@tag, eventStream) if eventStream
+                router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+                if telemetrySent == true
+                  @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+                end
                 @@istestvar = ENV['ISTEST']
                 if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
                   $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index ec76bac61..c6873e8fe 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -10,8 +10,10 @@ def initialize
       super
       require 'yaml'
       require 'json'
+      require 'set'
 
       require_relative 'KubernetesApiClient'
+      require_relative 'ApplicationInsightsUtility'
       require_relative 'oms_common'
       require_relative 'omslog'
     end
@@ -29,6 +31,7 @@ def start
         @condition = ConditionVariable.new
         @mutex = Mutex.new
         @thread = Thread.new(&method(:run_periodic))
+        @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
       end
     end
 
@@ -71,6 +74,8 @@ def parse_and_emit_records(podInventory, serviceList)
       emitTime = currentTime.to_f
       batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
+      controllerSet = Set.new []
+      telemetryFlush = false
       begin #begin block start
         podInventory['items'].each do |items| #podInventory block start
           records = []
@@ -78,6 +83,7 @@ def parse_and_emit_records(podInventory, serviceList)
           record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
           record['Name'] = items['metadata']['name']
           podNameSpace = items['metadata']['namespace']
+         
           if podNameSpace.eql?("kube-system") && !items['metadata'].key?("ownerReferences")
             # The above case seems to be the only case where you have horizontal scaling of pods
             # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
@@ -129,9 +135,18 @@ def parse_and_emit_records(podInventory, serviceList)
           record['ClusterId'] = KubernetesApiClient.getClusterId
           record['ClusterName'] = KubernetesApiClient.getClusterName
           record['ServiceName'] = getServiceNameFromLabels(items['metadata']['namespace'], items['metadata']['labels'], serviceList)
+           # Adding telemetry to send pod telemetry every 5 minutes
+           timeDifference =  (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
+           timeDifferenceInMinutes = timeDifference/60
+           if (timeDifferenceInMinutes >= 5)
+             telemetryFlush = true
+           end
           if !items['metadata']['ownerReferences'].nil?
             record['ControllerKind'] = items['metadata']['ownerReferences'][0]['kind']
             record['ControllerName'] = items['metadata']['ownerReferences'][0]['name']
+            if telemetryFlush == true
+              controllerSet.add(record['ControllerKind'] + record['ControllerName'])
+            end
           end
           podRestartCount = 0
           record['PodRestartCount'] = 0 
@@ -191,6 +206,11 @@ def parse_and_emit_records(podInventory, serviceList)
           end  
         end  #podInventory block end
         router.emit_stream(@tag, eventStream) if eventStream
+        if telemetryFlush == true
+          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {})
+          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {})
+          @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
         @@istestvar = ENV['ISTEST']
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
           $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")

From df6f1228a4649df3fb1bae1c9ea02f22daca8efd Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 20 Dec 2018 15:27:18 -0800
Subject: [PATCH 043/160] Get cpuusage from usageseconds (#175)

---
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 36 ++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 9e47e5a9e..03d6f89f5 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -20,8 +20,11 @@ class CAdvisorMetricsAPIClient
             @@rxBytesTimeLast = nil
             @@txBytesLast = nil
             @@txBytesTimeLast = nil
+            @@nodeCpuUsageNanoSecondsLast = nil
+            @@nodeCpuUsageNanoSecondsTimeLast = nil
             @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
             @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
+            
     
             def initialize
             end
@@ -73,7 +76,7 @@ def getMetrics()
                         metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes"))
                         metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
 
-                        metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "cpu", "usageNanoCores", "cpuUsageNanoCores"))
+                        metricDataItems.push(getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores"))
                         metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes"))
                         metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes"))
                         metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
@@ -274,24 +277,41 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl
                         metricValue = node[metricCategory][metricNameToCollect]
                         metricTime = node[metricCategory]['time']
 
-                        if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" )
-                            @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes & txBytes and not for #{metricNameToCollect}")
+                        if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds" )
+                            @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}")
                             return nil
                         elsif metricNameToCollect == "rxBytes"
-                            if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil?
+                            if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? || @@rxBytesLast > metricValue #when kubelet is restarted the last condition will be true
                                 @@rxBytesLast = metricValue
                                 @@rxBytesTimeLast = metricTime
                                 return nil
                             else
-                                metricValue = ((metricValue - @@rxBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time)
+                                metricRateValue = ((metricValue - @@rxBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time)
+                                @@rxBytesLast = metricValue
+                                @@rxBytesTimeLast = metricTime
+                                metricValue = metricRateValue
                             end
-                        else
-                            if @@txBytesLast.nil? || @@txBytesTimeLast.nil?
+                        elsif metricNameToCollect == "txBytes"
+                            if @@txBytesLast.nil? || @@txBytesTimeLast.nil? || @@txBytesLast > metricValue #when kubelet is restarted the last condition will be true
                                 @@txBytesLast = metricValue
                                 @@txBytesTimeLast = metricTime
                                 return nil
                             else
-                                metricValue = ((metricValue - @@txBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time)
+                                metricRateValue = ((metricValue - @@txBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time)
+                                @@txBytesLast = metricValue
+                                @@txBytesTimeLast = metricTime
+                                metricValue = metricRateValue
+                            end
+                        else
+                            if @@nodeCpuUsageNanoSecondsLast.nil? || @@nodeCpuUsageNanoSecondsTimeLast.nil? || @@nodeCpuUsageNanoSecondsLast > metricValue #when kubelet is restarted the last condition will be true
+                                @@nodeCpuUsageNanoSecondsLast = metricValue
+                                @@nodeCpuUsageNanoSecondsTimeLast = metricTime
+                                return nil
+                            else
+                                metricRateValue = ((metricValue - @@nodeCpuUsageNanoSecondsLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@nodeCpuUsageNanoSecondsTimeLast).to_time)
+                                @@nodeCpuUsageNanoSecondsLast = metricValue
+                                @@nodeCpuUsageNanoSecondsTimeLast = metricTime
+                                metricValue = metricRateValue
                             end
                         end
                         

From dac99311485f2600f9a1fd7b6c48470ada40e8ef Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 21 Dec 2018 10:46:56 -0800
Subject: [PATCH 044/160] Rashmi/kubenodeinventory (#176)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs
---
 .../code/plugin/ApplicationInsightsUtility.rb | 28 +++++++++++++------
 source/code/plugin/DockerApiClient.rb         |  3 +-
 source/code/plugin/in_containerinventory.rb   |  1 +
 source/code/plugin/in_kube_events.rb          |  6 ++++
 source/code/plugin/in_kube_nodes.rb           |  3 ++
 source/code/plugin/in_kube_podinventory.rb    |  6 ++++
 source/code/plugin/in_kube_services.rb        |  4 +++
 7 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 76e0b2926..2b2db673b 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -13,12 +13,12 @@ class ApplicationInsightsUtility
     @@Exception = 'ExceptionEvent'
     @@AcsClusterType = 'ACS'
     @@AksClusterType = 'AKS'
-    @@DaemonsetControllerType = 'DaemonSet'
     @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf'
     @@EnvAcsResourceName = 'ACS_RESOURCE_NAME'
     @@EnvAksRegion = 'AKS_REGION'
     @@EnvAgentVersion = 'AGENT_VERSION'
     @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH'
+    @@EnvControllerType = 'CONTROLLER_TYPE'
     @@CustomProperties = {}
     @@Tc = nil
     @@hostName = (OMS::Common.get_hostname)
@@ -54,12 +54,10 @@ def initializeUtility()
 		            @@CustomProperties["ClusterName"] = clusterName
 		            @@CustomProperties["Region"] = ENV[@@EnvAksRegion]
                 end
-                @@CustomProperties['ControllerType'] = @@DaemonsetControllerType
-                dockerInfo = DockerApiClient.dockerInfo
-                @@CustomProperties['DockerVersion'] = dockerInfo['Version']
-                @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion']
+                getDockerInfo()
                 @@CustomProperties['WorkspaceID'] = getWorkspaceId
                 @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion]
+                @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType]
                 encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
                 if !encodedAppInsightsKey.nil?
                     decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
@@ -70,6 +68,14 @@ def initializeUtility()
             end
         end
 
+        def getDockerInfo() 
+            dockerInfo = DockerApiClient.dockerInfo
+            if (!dockerInfo.nil? && !dockerInfo.empty?)
+                @@CustomProperties['DockerVersion'] = dockerInfo['Version']
+                @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion']
+            end
+        end
+
         def sendHeartBeatEvent(pluginName)
             begin
                 eventName = pluginName + @@HeartBeat
@@ -100,7 +106,9 @@ def sendCustomMetric(pluginName, properties)
         def sendExceptionTelemetry(errorStr)
             begin
                 if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility
+                    initializeUtility()
+                elsif @@CustomProperties['DockerVersion'].nil?
+                    getDockerInfo()
                 end
                 if !(@@Tc.nil?)
                     @@Tc.track_exception errorStr , :properties => @@CustomProperties
@@ -116,7 +124,9 @@ def sendExceptionTelemetry(errorStr)
         def sendTelemetry(pluginName, properties)
             begin
                 if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility
+                    initializeUtility()
+                elsif @@CustomProperties['DockerVersion'].nil?
+                    getDockerInfo()
                 end
                 @@CustomProperties['Computer'] = properties['Computer']
                 sendHeartBeatEvent(pluginName)
@@ -134,7 +144,9 @@ def sendMetricTelemetry(metricName, metricValue, properties)
                     return
                 end
                 if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility
+                    initializeUtility()
+                elsif @@CustomProperties['DockerVersion'].nil?
+                    getDockerInfo()
                 end
                 telemetryProps = {}
                 telemetryProps["Computer"] = @@hostName
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index e12ef13ec..903256f6d 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -10,10 +10,11 @@ class DockerApiClient
     require_relative 'DockerApiRestHelper'
     require_relative 'ApplicationInsightsUtility'
 
-    @@SocketPath = "/var/run/docker.sock"
+    @@SocketPath = "/var/run/host/docker.sock"
     @@ChunkSize = 4096
     @@TimeoutInSeconds = 5
     @@PluginName = 'ContainerInventory'
+
     def initialize
     end
 
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index f501421a2..a38697741 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -19,6 +19,7 @@ def initialize
       require_relative 'ContainerInventoryState'
       require_relative 'ApplicationInsightsUtility'
       require_relative 'omslog'
+
     end
 
     config_param :run_interval, :time, :default => '1m'
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 5df31df95..b7be24510 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -15,6 +15,8 @@ def initialize
       require_relative 'KubernetesApiClient'
       require_relative 'oms_common'
       require_relative 'omslog'
+      require_relative 'ApplicationInsightsUtility'
+
     end
 
     config_param :run_interval, :time, :default => '1m'
@@ -94,6 +96,7 @@ def enumerate(eventList = nil)
           rescue  => errorStr
             $log.warn line.dump, error: errorStr.to_s
             $log.debug_backtrace(errorStr.backtrace)
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end   
     end
 
@@ -110,6 +113,7 @@ def run_periodic
             enumerate
           rescue => errorStr
             $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end
         end
         @mutex.lock
@@ -129,6 +133,7 @@ def getEventQueryState
       rescue  => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
       return eventQueryState
     end
@@ -144,6 +149,7 @@ def writeEventQueryState(eventQueryState)
       rescue  => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end
 
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 1c792d0da..85153b21c 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -17,6 +17,7 @@ def initialize
         require_relative 'ApplicationInsightsUtility'
         require_relative 'oms_common'
         require_relative 'omslog'
+
       end
   
       config_param :run_interval, :time, :default => '1m'
@@ -142,6 +143,7 @@ def enumerate
           rescue  => errorStr
             $log.warn "Failed to retrieve node inventory: #{errorStr}"
             $log.debug_backtrace(errorStr.backtrace)
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end       
       end
   
@@ -158,6 +160,7 @@ def run_periodic
               enumerate
             rescue => errorStr
               $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
+              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
             end
           end
           @mutex.lock
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index c6873e8fe..eaf14b035 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -16,6 +16,7 @@ def initialize
       require_relative 'ApplicationInsightsUtility'
       require_relative 'oms_common'
       require_relative 'omslog'
+
     end
 
     config_param :run_interval, :time, :default => '1m'
@@ -66,6 +67,7 @@ def enumerate(podList = nil)
         rescue  => errorStr
           $log.warn "Failed in enumerate pod inventory: #{errorStr}"
           $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end  
     end
 
@@ -207,6 +209,7 @@ def parse_and_emit_records(podInventory, serviceList)
         end  #podInventory block end
         router.emit_stream(@tag, eventStream) if eventStream
         if telemetryFlush == true
+          ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory")
           ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {})
           ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {})
           @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
@@ -218,6 +221,7 @@ def parse_and_emit_records(podInventory, serviceList)
       rescue  => errorStr
         $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end #begin block end  
     end  
 
@@ -234,6 +238,7 @@ def run_periodic
             enumerate
           rescue => errorStr
             $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end
         end
         @mutex.lock
@@ -268,6 +273,7 @@ def getServiceNameFromLabels(namespace, labels, serviceList)
       rescue  => errorStr
         $log.warn "Failed to retrieve service name from labels: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
       return serviceName
     end
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
index 9a33f4581..655beef59 100644
--- a/source/code/plugin/in_kube_services.rb
+++ b/source/code/plugin/in_kube_services.rb
@@ -14,6 +14,8 @@ def initialize
           require_relative 'KubernetesApiClient'
           require_relative 'oms_common'
           require_relative 'omslog'
+          require_relative 'ApplicationInsightsUtility'
+
         end
     
         config_param :run_interval, :time, :default => '1m'
@@ -70,6 +72,7 @@ def enumerate
               rescue  => errorStr
                 $log.warn line.dump, error: errorStr.to_s
                 $log.debug_backtrace(e.backtrace)
+                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
               end   
         end
     
@@ -86,6 +89,7 @@ def run_periodic
                 enumerate
               rescue => errorStr
                 $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
               end
             end
             @mutex.lock

From 04cc1a87e64cae65ffeba3b061312dcb35959b51 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 26 Dec 2018 10:32:22 -0800
Subject: [PATCH 045/160] Rashmi/kubenodeinventory (#178)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type
---
 installer/conf/kube.conf               | 19 ++++++++++---------
 source/code/plugin/in_kube_events.rb   |  9 +++++++--
 source/code/plugin/in_kube_services.rb |  9 +++++++--
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 22c51ad0e..6331d257e 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -11,7 +11,7 @@
 #Kubernetes events
 <source>
 	type kubeevents
-	tag oms.api.KubeEvents.CollectionTime
+	tag oms.containerinsights.KubeEvents
 	run_interval 60s
   log_level debug
 </source>
@@ -26,7 +26,7 @@
 #Kubernetes services
 <source>
 	type kubeservices
-	tag oms.api.KubeServices.CollectionTime
+	tag oms.containerinsights.KubeServices
 	run_interval 60s
   log_level debug
 </source>
@@ -62,18 +62,19 @@
   max_retry_wait 9m
 </match>
 
-<match oms.api.KubeEvents**>
-	type out_oms_api
+<match oms.containerinsights.KubeEvents**>
+	type out_oms
 	log_level debug
-  num_threads 5
+	num_threads 5
 	buffer_chunk_limit 5m
 	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_api_kubeevents*.buffer
+	buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
 	buffer_queue_limit 10
-  buffer_queue_full_action drop_oldest_chunk
+	buffer_queue_full_action drop_oldest_chunk
 	flush_interval 20s
 	retry_limit 10
 	retry_wait 30s
+	max_retry_wait 9m
 </match>
 
 <match oms.api.KubeLogs**>
@@ -88,8 +89,8 @@
 	retry_wait 30s
 </match>
 
-<match oms.api.KubeServices**>	  
-  type out_oms_api
+<match oms.containerinsights.KubeServices**>
+  type out_oms
   log_level debug
   num_threads 5
   buffer_chunk_limit 20m
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index b7be24510..309dd8034 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -20,7 +20,7 @@ def initialize
     end
 
     config_param :run_interval, :time, :default => '1m'
-    config_param :tag, :string, :default => "oms.api.KubeEvents.CollectionTime"
+    config_param :tag, :string, :default => "oms.containerinsights.KubeEvents"
 
     def configure (conf)
       super
@@ -88,7 +88,12 @@ def enumerate(eventList = nil)
                 end
                 record['ClusterName'] = KubernetesApiClient.getClusterName
                 record['ClusterId'] = KubernetesApiClient.getClusterId
-                eventStream.add(emitTime, record) if record 
+                wrapper = {
+                  "DataType"=>"KUBE_EVENTS_BLOB",
+                  "IPName"=>"ContainerInsights",
+                  "DataItems"=>[record.each{|k,v| record[k]=v}]
+                }
+                eventStream.add(emitTime, wrapper) if wrapper
               end
               router.emit_stream(@tag, eventStream) if eventStream
             end  
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
index 655beef59..e1bb93f30 100644
--- a/source/code/plugin/in_kube_services.rb
+++ b/source/code/plugin/in_kube_services.rb
@@ -19,7 +19,7 @@ def initialize
         end
     
         config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.api.KubeServices.CollectionTime"
+        config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
     
         def configure (conf)
           super
@@ -65,7 +65,12 @@ def enumerate
                     record['ClusterIP'] = items['spec']['clusterIP']
                     record['ServiceType'] = items['spec']['type']
                     #<TODO> : Add ports and status fields
-                    eventStream.add(emitTime, record) if record   
+                    wrapper = {
+                      "DataType"=>"KUBE_SERVICES_BLOB",
+                      "IPName"=>"ContainerInsights",
+                      "DataItems"=>[record.each{|k,v| record[k]=v}]
+                    }
+                    eventStream.add(emitTime, wrapper) if wrapper  
                   end
                   router.emit_stream(@tag, eventStream) if eventStream
                 end  

From 5883f5368cc9704879b25a145fec80906d91d826 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 26 Dec 2018 13:36:48 -0800
Subject: [PATCH 046/160] Fixing an issue on the cpurate metric, which happens
 for the first time (when cache is empty) (#179)

---
 source/code/plugin/CAdvisorMetricsAPIClient.rb | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 03d6f89f5..97eec06ab 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -76,7 +76,10 @@ def getMetrics()
                         metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes"))
                         metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
 
-                        metricDataItems.push(getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores"))
+                        cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores")
+                        if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
+                            metricDataItems.push(cpuUsageNanoSecondsRate)
+                        end
                         metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes"))
                         metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes"))
                         metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))

From 191f3285dad2065f83b57b4b3e55fad6709b15ab Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 28 Dec 2018 12:27:46 -0800
Subject: [PATCH 047/160] Rashmi/kubenodeinventory (#180)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension
---
 source/code/plugin/in_kube_nodes.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 85153b21c..a6908fc99 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -123,7 +123,7 @@ def enumerate
                     if (timeDifferenceInMinutes >= 5)
                       properties = {}
                       properties["Computer"] = record["Computer"]
-                      ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties)
+                      properties["KubeletVersion"] = record["KubeletVersion"]
                       capacityInfo = items['status']['capacity']
                       ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties)
                       ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties)

From 7e52e8c5553bda70dd33a4afccbcb134657b42be Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 7 Jan 2019 15:44:25 -0800
Subject: [PATCH 048/160] Exclude docker containers from container inventory
 (#181)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Excluding raw docker containers from container inventory

* making labels key case insensitive

* make poduid label case insensitive
---
 source/code/plugin/DockerApiClient.rb | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index 903256f6d..d04bf0589 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -86,7 +86,15 @@ def listContainers()
             containers = getResponse(request, true, false)
             if !containers.nil? && !containers.empty?
                 containers.each do |container|
-                    ids.push(container['Id'])
+                    labels = (!container['Labels'].nil?)? container['Labels'] : container['labels']
+                    if !labels.nil?
+                        labelKeys = labels.keys
+                        #Case insensitive lookup for pod uid label
+                        keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase}
+                        if !labels[keyValue].nil?
+                            ids.push(container['Id'])
+                        end
+                    end
                 end
             end
             return ids

From f0591f9e70056c61269f3a961906a908845a1cdd Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 8 Jan 2019 15:10:41 -0800
Subject: [PATCH 049/160] Exclude pauseamd64 containers from container
 inventory (#182)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Excluding raw docker containers from container inventory

* making labels key case insensitive

* make poduid label case insensitive

* changes to exclude pause amd 64 containers
---
 source/code/plugin/DockerApiClient.rb | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index d04bf0589..5a46b5fdb 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -89,10 +89,18 @@ def listContainers()
                     labels = (!container['Labels'].nil?)? container['Labels'] : container['labels']
                     if !labels.nil?
                         labelKeys = labels.keys
-                        #Case insensitive lookup for pod uid label
-                        keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase}
-                        if !labels[keyValue].nil?
-                            ids.push(container['Id'])
+                        dockerTypeLabel = labelKeys.find {|k| 'io.kubernetes.docker.type'.downcase == k.downcase}
+                        if !dockerTypeLabel.nil?
+                            dockerTypeLabelValue = labels[dockerTypeLabel]
+                            # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
+                            if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
+                                # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that 
+                                # are created in the pods for ContainerInventory
+                                keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase}
+                                if !labels[keyValue].nil?
+                                    ids.push(container['Id'])
+                                end
+                            end
                         end
                     end
                 end

From 4782435a228c3626b25d8bf1682a0d977e79eb23 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 9 Jan 2019 11:22:53 -0800
Subject: [PATCH 050/160] Update agent version

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index b6b9bcc44..29c98bdf1 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,5 +28,5 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod11292018
+    AgentVersion                    ciprod01092019
 

From 23bcc4198c3ead32fb0404afeaddac83b3c23b78 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 9 Jan 2019 13:19:06 -0800
Subject: [PATCH 051/160] Updating readme for the latest release

---
 README.md | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fc43d6605..5c65308fb 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,29 @@ additional questions or comments.
 ## Release History
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
-
+  
+### 10/09/2018 - Version microsoft/oms:ciprod01092019
+- Omsagent - 1.8.1.256 (nov 2018 release)
+- Persist fluentbit state between container restarts
+- Populate 'TimeOfCommand' for agent ingest time for container logs
+- Get node cpu usage from cpuusagenanoseconds (and convert to cpuusgaenanocores)
+- Container Node Inventory - move to fluentD from OMI
+- Mount docker.sock (Daemon set) as /var/run/host
+- Liveness probe (Daemon set) - check for omsagent user permissions in docker.sock and update as necessary (required when docker daemon gets restarted)
+- Move to fixed type for kubeevents & kubeservices
+- Disable collecting ENV for our oms agent container (daemonset & replicaset)
+- Disable container inventory collection for 'sandbox' containers & non kubernetes managed containers
+- Agent telemetry - ContainerLogsAgentSideLatencyMs
+- Agent telemetry - PodCount
+- Agent telemetry - ControllerCount
+-	Agent telemetry - K8S Version
+- Agent telemetry - NodeCoreCapacity
+- Agent telemetry - NodeMemoryCapacity
+- Agent telemetry - KubeEvents (exceptions)
+- Agent telemetry - Kubenodes (exceptions)
+- Agent telemetry - kubepods (exceptions)
+- Agent telemetry - kubeservices (exceptions)
+- Agent telemetry - Daemonset , Replicaset as dimensions (bug fix)
 
 ### 11/29/2018 - Version microsoft/oms:ciprod11292018
 - Disable Container Image inventory workflow

From 51d5e938d436584bc094d72361d8652dd51db8bd Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 11 Jan 2019 13:08:56 -0800
Subject: [PATCH 052/160] Fix indentation in kube.conf and update readme (#184)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Excluding raw docker containers from container inventory

* making labels key case insensitive

* make poduid label case insensitive

* changes to exclude pause amd 64 containers

* fixing indentation so that kube.conf contents can be used in config map in the yaml

* updating readme to fix date and agent version
---
 README.md                |   6 +-
 installer/conf/kube.conf | 270 +++++++++++++++++++--------------------
 2 files changed, 138 insertions(+), 138 deletions(-)

diff --git a/README.md b/README.md
index 5c65308fb..dd55f810e 100644
--- a/README.md
+++ b/README.md
@@ -11,21 +11,21 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-### 10/09/2018 - Version microsoft/oms:ciprod01092019
+### 01/09/2018 - Version microsoft/oms:ciprod01092019-2
 - Omsagent - 1.8.1.256 (nov 2018 release)
 - Persist fluentbit state between container restarts
 - Populate 'TimeOfCommand' for agent ingest time for container logs
 - Get node cpu usage from cpuusagenanoseconds (and convert to cpuusgaenanocores)
 - Container Node Inventory - move to fluentD from OMI
 - Mount docker.sock (Daemon set) as /var/run/host
-- Liveness probe (Daemon set) - check for omsagent user permissions in docker.sock and update as necessary (required when docker daemon gets restarted)
+- Add omsagent user to docker group
 - Move to fixed type for kubeevents & kubeservices
 - Disable collecting ENV for our oms agent container (daemonset & replicaset)
 - Disable container inventory collection for 'sandbox' containers & non kubernetes managed containers
 - Agent telemetry - ContainerLogsAgentSideLatencyMs
 - Agent telemetry - PodCount
 - Agent telemetry - ControllerCount
--	Agent telemetry - K8S Version
+- Agent telemetry - K8S Version
 - Agent telemetry - NodeCoreCapacity
 - Agent telemetry - NodeMemoryCapacity
 - Agent telemetry - KubeEvents (exceptions)
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 6331d257e..164865022 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,148 +1,148 @@
-# Fluentd config file for OMS Docker - cluster components (kubeAPI)
+     # Fluentd config file for OMS Docker - cluster components (kubeAPI)
 
-#Kubernetes pod inventory
-<source>
-	type kubepodinventory
-	tag oms.containerinsights.KubePodInventory
-	run_interval 60s
-  log_level debug
-</source>
+     #Kubernetes pod inventory
+     <source>
+      type kubepodinventory
+      tag oms.containerinsights.KubePodInventory
+      run_interval 60s
+      log_level debug
+     </source>
 
-#Kubernetes events
-<source>
-	type kubeevents
-	tag oms.containerinsights.KubeEvents
-	run_interval 60s
-  log_level debug
-</source>
+     #Kubernetes events
+     <source>
+      type kubeevents
+      tag oms.containerinsights.KubeEvents
+      run_interval 60s
+      log_level debug
+      </source>
 
-#Kubernetes logs
-<source>
-	type kubelogs
-	tag oms.api.KubeLogs
-	run_interval 60s
-</source>
+     #Kubernetes logs
+     <source>
+      type kubelogs
+      tag oms.api.KubeLogs
+      run_interval 60s
+     </source>
 
-#Kubernetes services
-<source>
-	type kubeservices
-	tag oms.containerinsights.KubeServices
-	run_interval 60s
-  log_level debug
-</source>
+     #Kubernetes services
+     <source>
+      type kubeservices
+      tag oms.containerinsights.KubeServices
+      run_interval 60s
+      log_level debug
+     </source>
 
-#Kubernetes Nodes
-<source>
-	type kubenodeinventory
-	tag oms.containerinsights.KubeNodeInventory
-	run_interval 60s
-  log_level debug
-</source>
+     #Kubernetes Nodes
+     <source>
+      type kubenodeinventory
+      tag oms.containerinsights.KubeNodeInventory
+      run_interval 60s
+      log_level debug
+     </source>
 
-#Kubernetes perf
-<source>
-	type kubeperf
-	tag oms.api.KubePerf
-	run_interval 60s
-  log_level debug
-</source>
+     #Kubernetes perf
+     <source>
+      type kubeperf
+      tag oms.api.KubePerf
+      run_interval 60s
+      log_level debug
+     </source>
 
-<match oms.containerinsights.KubePodInventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
+     <match oms.containerinsights.KubePodInventory**>
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 20m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+      max_retry_wait 9m
+     </match>
 
-<match oms.containerinsights.KubeEvents**>
-	type out_oms
-	log_level debug
-	num_threads 5
-	buffer_chunk_limit 5m
-	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
-	buffer_queue_limit 10
-	buffer_queue_full_action drop_oldest_chunk
-	flush_interval 20s
-	retry_limit 10
-	retry_wait 30s
-	max_retry_wait 9m
-</match>
+     <match oms.containerinsights.KubeEvents**>
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 5m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
+      buffer_queue_limit 10
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+      max_retry_wait 9m
+     </match>
 
-<match oms.api.KubeLogs**>
-	type out_oms_api
-	log_level debug
-  buffer_chunk_limit 10m
-	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer
-	buffer_queue_limit 10
-	flush_interval 20s
-	retry_limit 10
-	retry_wait 30s
-</match>
+     <match oms.api.KubeLogs**>
+      type out_oms_api
+      log_level debug
+      buffer_chunk_limit 10m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer
+      buffer_queue_limit 10
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+     </match>
 
-<match oms.containerinsights.KubeServices**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
+     <match oms.containerinsights.KubeServices**>
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 20m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+      max_retry_wait 9m
+     </match>
 
-<match oms.containerinsights.KubeNodeInventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
+     <match oms.containerinsights.KubeNodeInventory**>
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 20m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+      max_retry_wait 9m
+     </match>
 
-<match oms.api.ContainerNodeInventory**>
-  type out_oms_api
-  log_level debug
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
+     <match oms.api.ContainerNodeInventory**>
+      type out_oms_api
+      log_level debug
+      buffer_chunk_limit 20m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
+      buffer_queue_limit 20
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 15s
+      max_retry_wait 9m
+     </match>
 
-<match oms.api.KubePerf**>	
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
+     <match oms.api.KubePerf**>	
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 20m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+      max_retry_wait 9m
+     </match>
\ No newline at end of file

From decf86a3d24dece047ea4b780d10c799fbe1a1ce Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 11 Jan 2019 13:16:21 -0800
Subject: [PATCH 053/160] updating agent tag

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dd55f810e..099a065e8 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-### 01/09/2018 - Version microsoft/oms:ciprod01092019-2
+### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)
 - Persist fluentbit state between container restarts
 - Populate 'TimeOfCommand' for agent ingest time for container logs

From a1b35db565c9cc324733534b90e3c4f5a98651d7 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 29 Jan 2019 15:33:59 -0800
Subject: [PATCH 054/160] Get Pods for current Node Only (#185)

* Fix KubeAPI Calls to filter to get pods for current node

* Reinstate log line
---
 source/code/go/src/plugins/oms.go | 48 ++++++++++++++++---------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 5d9269d1e..49e91f87f 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -77,15 +77,15 @@ var (
 
 // DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
 type DataItem struct {
-	LogEntry          		string `json:"LogEntry"`
-	LogEntrySource    		string `json:"LogEntrySource"`
-	LogEntryTimeStamp 		string `json:"LogEntryTimeStamp"`
-	LogEntryTimeOfCommand	string `json:"TimeOfCommand"`
-	ID                string `json:"Id"`
-	Image             string `json:"Image"`
-	Name              string `json:"Name"`
-	SourceSystem      string `json:"SourceSystem"`
-	Computer          string `json:"Computer"`
+	LogEntry              string `json:"LogEntry"`
+	LogEntrySource        string `json:"LogEntrySource"`
+	LogEntryTimeStamp     string `json:"LogEntryTimeStamp"`
+	LogEntryTimeOfCommand string `json:"TimeOfCommand"`
+	ID                    string `json:"Id"`
+	Image                 string `json:"Image"`
+	Name                  string `json:"Name"`
+	SourceSystem          string `json:"SourceSystem"`
+	Computer              string `json:"Computer"`
 }
 
 // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
@@ -137,7 +137,10 @@ func updateContainerImageNameMaps() {
 		_imageIDMap := make(map[string]string)
 		_nameIDMap := make(map[string]string)
 
-		pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{})
+		listOptions := metav1.ListOptions{}
+		listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer)
+		pods, err := ClientSet.CoreV1().Pods("").List(listOptions)
+
 		if err != nil {
 			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 			Log(message)
@@ -244,31 +247,30 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		if val, ok := imageIDMap[containerID]; ok {
 			stringMap["Image"] = val
 		} else {
-			Log("ContainerId %s not present in Map ", containerID)
+			Log("ContainerId %s not present in Name Map ", containerID)
 		}
 
 		if val, ok := nameIDMap[containerID]; ok {
 			stringMap["Name"] = val
 		} else {
-			Log("ContainerId %s not present in Map ", containerID)
+			Log("ContainerId %s not present in Image Map ", containerID)
 		}
 
-
 		dataItem := DataItem{
-			ID:                		stringMap["Id"],
-			LogEntry:          		stringMap["LogEntry"],
-			LogEntrySource:    		stringMap["LogEntrySource"],
-			LogEntryTimeStamp: 		stringMap["LogEntryTimeStamp"],
-			LogEntryTimeOfCommand: 	start.Format(time.RFC3339),
-			SourceSystem:      		stringMap["SourceSystem"],
-			Computer:          		Computer,
-			Image:             		stringMap["Image"],
-			Name:              		stringMap["Name"],
+			ID:                    stringMap["Id"],
+			LogEntry:              stringMap["LogEntry"],
+			LogEntrySource:        stringMap["LogEntrySource"],
+			LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+			LogEntryTimeOfCommand: start.Format(time.RFC3339),
+			SourceSystem:          stringMap["SourceSystem"],
+			Computer:              Computer,
+			Image:                 stringMap["Image"],
+			Name:                  stringMap["Name"],
 		}
 
 		dataItems = append(dataItems, dataItem)
 		loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp)
-		if e!= nil {
+		if e != nil {
 			message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error())
 			Log(message)
 			SendException(message)

From 22649bad0090c05eb809f0521d9222b514084b9b Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 30 Jan 2019 15:50:28 -0800
Subject: [PATCH 055/160] changes for container node inventory fixed type
 (#186)

---
 installer/conf/kube.conf            | 4 ++--
 source/code/plugin/in_kube_nodes.rb | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 164865022..d0ef0517d 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -119,8 +119,8 @@
       max_retry_wait 9m
      </match>
 
-     <match oms.api.ContainerNodeInventory**>
-      type out_oms_api
+     <match oms.containerinsights.ContainerNodeInventory**>
+      type out_oms
       log_level debug
       buffer_chunk_limit 20m
       buffer_type file
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index a6908fc99..2e48e3f1f 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -6,7 +6,7 @@ module Fluent
     class Kube_nodeInventory_Input < Input
       Plugin.register_input('kubenodeinventory', self)
   
-      @@ContainerNodeInventoryTag = 'oms.api.ContainerNodeInventory'
+      @@ContainerNodeInventoryTag = 'oms.containerinsights.ContainerNodeInventory'
 
       def initialize
         super
@@ -109,7 +109,12 @@ def enumerate
                     dockerVersion.slice! "docker://"
                     containerNodeInventoryRecord['DockerVersion'] = dockerVersion
                     # ContainerNodeInventory data for docker version and operating system.
-                    containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord
+                    containerNodeInventoryWrapper = {
+                      "DataType"=>"CONTAINER_NODE_INVENTORY_BLOB",
+                      "IPName"=>"ContainerInsights",
+                      "DataItems"=>[containerNodeInventoryRecord.each{|k,v| containerNodeInventoryRecord[k]=v}]
+                    }
+                    containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
                     wrapper = {
                       "DataType"=>"KUBE_NODE_INVENTORY_BLOB",

From 61e2eaffe3e60b51d83459a494435f3dd6002821 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 13 Feb 2019 11:38:07 -0800
Subject: [PATCH 056/160] Fix for mooncake (disable telemetry optionally)
 (#191)

* disable telemetry option

* fix a typo
---
 source/code/go/src/plugins/telemetry.go          | 5 +++++
 source/code/plugin/ApplicationInsightsUtility.rb | 9 ++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 82f970d3a..a64ca2218 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -120,6 +120,11 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 	}
 
 	TelemetryClient = appinsights.NewTelemetryClient(string(decIkey))
+	telemetryOffSwitch := os.Getenv("DISABLE_TELEMETRY")
+	if strings.Compare(strings.ToLower(telemetryOffSwitch), "true") == 0 {
+		Log("Appinsights telemetry is disabled \n")
+		TelemetryClient.SetIsEnabled(false)
+	}
 
 	CommonProperties = make(map[string]string)
 	CommonProperties["Computer"] = Computer
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 27660d708..683be0db4 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -61,9 +61,16 @@ def initializeUtility()
                 @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion]
                 @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType]
                 encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
-                if !encodedAppInsightsKey.nil?
+
+                #Check if telemetry is turned off
+                telemetryOffSwitch = ENV['DISABLE_TELEMETRY']
+                if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase
+                    $log.warn("AppInsightsUtility: Telemetry is disabled")
+                    @@Tc = ApplicationInsights::TelemetryClient.new
+                elsif !encodedAppInsightsKey.nil?
                     decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
                     @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
+                  
                 end
             rescue => errorStr
                 $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")

From 30dff41106981b9855a89db9227ef9fccbea0158 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 15 Feb 2019 14:27:33 -0800
Subject: [PATCH 057/160] CustomMetrics to ci_feature (#193)

Custom Metrics changes to ci_feature
---
 installer/conf/container.conf                 |  24 ++
 installer/conf/kube.conf                      |  25 +-
 installer/datafiles/base_container.data       |  14 +
 source/code/go/src/plugins/oms.go             |   2 +-
 .../code/plugin/ApplicationInsightsUtility.rb |  19 +-
 source/code/plugin/CustomMetricsUtils.rb      |  26 ++
 source/code/plugin/filter_cadvisor2mdm.rb     | 215 ++++++++++++++++
 source/code/plugin/filter_inventory2mdm.rb    | 235 +++++++++++++++++
 source/code/plugin/in_cadvisor_perf.rb        |   2 +
 source/code/plugin/in_kube_nodes.rb           |   2 +
 source/code/plugin/in_kube_podinventory.rb    |   3 +
 source/code/plugin/out_mdm.rb                 | 239 ++++++++++++++++++
 12 files changed, 802 insertions(+), 4 deletions(-)
 create mode 100644 source/code/plugin/CustomMetricsUtils.rb
 create mode 100644 source/code/plugin/filter_cadvisor2mdm.rb
 create mode 100644 source/code/plugin/filter_inventory2mdm.rb
 create mode 100644 source/code/plugin/out_mdm.rb

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 091753230..f41bd6f98 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -23,6 +23,14 @@
   log_level debug
 </source>
 
+#custom_metrics_mdm filter plugin
+<filter mdm.cadvisorperf**>
+  type filter_cadvisor2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+  log_level info
+</filter>
+
 <match oms.containerinsights.containerinventory**>
   type out_oms
   log_level debug
@@ -52,3 +60,19 @@
   retry_wait 30s
   max_retry_wait 9m
 </match>
+
+<match mdm.cadvisorperf**>
+  type out_mdm
+  log_level debug
+  num_threads 5
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
+  max_retry_wait 9m
+  retry_mdm_post_wait_minutes 60
+</match>
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index d0ef0517d..50a88295e 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -47,6 +47,12 @@
       log_level debug
      </source>
 
+     <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
+      type filter_inventory2mdm
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
+      log_level info
+     </filter>
+
      <match oms.containerinsights.KubePodInventory**>
       type out_oms
       log_level debug
@@ -145,4 +151,21 @@
       retry_limit 10
       retry_wait 30s
       max_retry_wait 9m
-     </match>
\ No newline at end of file
+     </match>
+
+     <match mdm.kubepodinventory** mdm.kubenodeinventory** >
+      type out_mdm
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 20m
+      buffer_type file
+      buffer_path /var/opt/microsoft/omsagent/6bb1e963-b08c-43a8-b708-1628305e964a/state/out_mdm_*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 30s
+      max_retry_wait 9m
+      retry_mdm_post_wait_minutes 60
+     </match>
+
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 7181929e2..c263aa505 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -36,6 +36,9 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb;			        source/code/plugin/in_cadvisor_perf.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_services.rb;			        source/code/plugin/in_kube_services.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
+/opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb;			     source/code/plugin/filter_inventory2mdm.rb;			    	644; root; root
+/opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb;			     source/code/plugin/CustomMetricsUtils.rb;			    	644; root; root
+
 
 /opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb;									source/code/plugin/ApplicationInsightsUtility.rb;	644; root; root
 /opt/microsoft/omsagent/plugin/ContainerInventoryState.rb;										source/code/plugin/ContainerInventoryState.rb;		644; root; root
@@ -43,6 +46,9 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/DockerApiRestHelper.rb;											source/code/plugin/DockerApiRestHelper.rb;			644; root; root
 /opt/microsoft/omsagent/plugin/in_containerinventory.rb;										source/code/plugin/in_containerinventory.rb;		644; root; root
 
+/opt/microsoft/omsagent/plugin/out_mdm.rb;                                                  source/code/plugin/out_mdm.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb;                                      source/code/plugin/filter_cadvisor2mdm.rb; 644; root; root
+
 /opt/microsoft/omsagent/plugin/lib/application_insights/version.rb;								source/code/plugin/lib/application_insights/version.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb;					source/code/plugin/lib/application_insights/rack/track_request.rb;      644; root; root
 /opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb;					source/code/plugin/lib/application_insights/unhandled_exception.rb;     644; root; root
@@ -170,6 +176,14 @@ touch /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
 chmod 666 /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
 chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
 
+touch /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log
+chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log
+
+touch /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
+chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
+
 mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 49e91f87f..27ae6df5c 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -140,7 +140,7 @@ func updateContainerImageNameMaps() {
 		listOptions := metav1.ListOptions{}
 		listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer)
 		pods, err := ClientSet.CoreV1().Pods("").List(listOptions)
-
+		
 		if err != nil {
 			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 			Log(message)
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 683be0db4..5c5e92a6c 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -98,7 +98,7 @@ def sendHeartBeatEvent(pluginName)
             end
         end
 
-        def sendCustomMetric(pluginName, properties)
+        def sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
             begin
                 if !(@@Tc.nil?)
                     @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], 
@@ -112,6 +112,21 @@ def sendCustomMetric(pluginName, properties)
             end
         end
 
+        def sendCustomEvent(eventName, properties)
+            begin
+                if @@CustomProperties.empty? || @@CustomProperties.nil?
+                    initializeUtility()
+                end 
+                if !(@@Tc.nil?)
+                    @@Tc.track_event eventName, :properties => @@CustomProperties
+                    @@Tc.flush
+                    $log.info("AppInsights Custom Event #{eventName} sent successfully")
+                end
+            rescue => errorStr
+                $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}")
+            end
+        end
+
         def sendExceptionTelemetry(errorStr)
             begin
                 if @@CustomProperties.empty? || @@CustomProperties.nil?
@@ -139,7 +154,7 @@ def sendTelemetry(pluginName, properties)
                 end
                 @@CustomProperties['Computer'] = properties['Computer']
                 sendHeartBeatEvent(pluginName)
-                sendCustomMetric(pluginName, properties)
+                sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
             rescue => errorStr
                 $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}")
             end
diff --git a/source/code/plugin/CustomMetricsUtils.rb b/source/code/plugin/CustomMetricsUtils.rb
new file mode 100644
index 000000000..d06c9ad91
--- /dev/null
+++ b/source/code/plugin/CustomMetricsUtils.rb
@@ -0,0 +1,26 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+class CustomMetricsUtils
+    def initialize
+    end
+
+    class << self
+        def check_custom_metrics_availability(custom_metric_regions)
+            aks_region = ENV['AKS_REGION']
+            aks_resource_id = ENV['AKS_RESOURCE_ID']
+            if aks_region.to_s.empty? && aks_resource_id.to_s.empty?
+                false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set
+            end
+            
+            custom_metrics_regions_arr = custom_metric_regions.split(',')
+            custom_metrics_regions_hash = custom_metrics_regions_arr.map {|x| [x.downcase,true]}.to_h
+
+            if custom_metrics_regions_hash.key?(aks_region.downcase)
+                true
+            else 
+                false
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
new file mode 100644
index 000000000..85f9f688e
--- /dev/null
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -0,0 +1,215 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+
+# frozen_string_literal: true
+
+module Fluent
+    require 'logger'
+    require 'json'
+    require_relative 'oms_common'
+    require_relative 'CustomMetricsUtils'
+
+	class CAdvisor2MdmFilter < Filter
+		Fluent::Plugin.register_filter('filter_cadvisor2mdm', self)
+		
+		config_param :enable_log, :integer, :default => 0
+        config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log'
+        config_param :custom_metrics_azure_regions, :string
+        config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes'
+        
+        @@cpu_usage_milli_cores = 'cpuUsageMillicores'
+        @@cpu_usage_nano_cores = 'cpuusagenanocores'
+        @@object_name_k8s_node = 'K8SNode'
+        @@hostName = (OMS::Common.get_hostname)
+        @@custom_metrics_template = '
+            { 
+                "time": "%{timestamp}", 
+                "data": { 
+                    "baseData": { 
+                        "metric": "%{metricName}", 
+                        "namespace": "Insights.Container/nodes", 
+                        "dimNames": [ 
+                        "host"
+                        ], 
+                        "series": [ 
+                        { 
+                            "dimValues": [ 
+                            "%{hostvalue}"
+                            ], 
+                            "min": %{metricminvalue},
+                            "max": %{metricmaxvalue}, 
+                            "sum": %{metricsumvalue}, 
+                            "count": 1 
+                        } 
+                        ] 
+                    } 
+                } 
+            }'
+        
+        @@metric_name_metric_percentage_name_hash = {
+            @@cpu_usage_milli_cores => "cpuUsagePercentage", 
+            "memoryRssBytes" => "memoryRssPercentage",
+            "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" 
+        }
+
+        @process_incoming_stream = true
+        @metrics_to_collect_hash = {}
+
+		def initialize
+            super
+		end
+
+		def configure(conf)
+			super
+			@log = nil
+			
+			if @enable_log
+				@log = Logger.new(@log_path, 'weekly')
+				@log.debug {'Starting filter_cadvisor2mdm plugin'}
+			end
+		end
+
+        def start
+            super
+            @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
+            @metrics_to_collect_hash = build_metrics_hash
+            @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
+            
+            # initialize cpu and memory limit 
+            if @process_incoming_stream
+                @cpu_capacity = 0.0
+                @memory_capacity = 0.0
+                ensure_cpu_memory_capacity_set
+            end
+        end
+
+        def build_metrics_hash
+            @log.debug "Building Hash of Metrics to Collect"
+            metrics_to_collect_arr = @metrics_to_collect.split(',').map(&:strip)
+            metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h
+            @log.info "Metrics Collected : #{metrics_hash}"
+            return metrics_hash
+        end
+
+		def shutdown
+			super
+		end
+
+        def filter(tag, time, record)
+            begin
+                if @process_incoming_stream
+                    object_name = record['DataItems'][0]['ObjectName']
+                    counter_name = record['DataItems'][0]['Collections'][0]['CounterName']
+                    if object_name == @@object_name_k8s_node && @metrics_to_collect_hash.key?(counter_name.downcase)
+                        percentage_metric_value = 0.0
+
+                        # Compute and send % CPU and Memory
+                        metric_value = record['DataItems'][0]['Collections'][0]['Value']
+                        if counter_name.downcase == @@cpu_usage_nano_cores
+                            metric_name = @@cpu_usage_milli_cores
+                            metric_value = metric_value/1000000
+                            if @cpu_capacity != 0.0
+                                percentage_metric_value = (metric_value*1000000)*100/@cpu_capacity
+                            end
+                        end
+
+                        if counter_name.start_with?("memory")
+                            metric_name = counter_name
+                            if @memory_capacity != 0.0
+                                percentage_metric_value = metric_value*100/@memory_capacity
+                            end
+                        end 
+                        return get_metric_records(record, metric_name, metric_value, percentage_metric_value)
+                    else 
+                        return []
+                    end
+                else
+                    return []
+                end
+            rescue Exception => e
+                @log.info "Error processing cadvisor record Exception: #{e.class} Message: #{e.message}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+                return []
+            end
+        end
+
+        def ensure_cpu_memory_capacity_set
+
+            @log.info "ensure_cpu_memory_capacity_set @cpu_capacity #{@cpu_capacity} @memory_capacity #{@memory_capacity}"
+            if @cpu_capacity != 0.0 && @memory_capacity != 0.0
+                @log.info "CPU And Memory Capacity are already set"
+                return
+            end
+
+            begin 
+                nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes?fieldSelector=metadata.name%3D#{@@hostName}").body)
+            rescue Exception => e
+                @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
+                ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+            end
+            if !nodeInventory.nil? 
+                cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
+                if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                    @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
+                    @log.info "CPU Limit #{@cpu_capacity}"
+                else
+                    @log.info "Error getting cpu_capacity"
+                end
+                memory_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes")
+                if !memory_capacity_json.nil? && !memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                    @memory_capacity = memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
+                    @log.info "Memory Limit #{@memory_capacity}"
+                else
+                    @log.info "Error getting memory_capacity"
+                end
+            end
+        end
+        
+        def get_metric_records(record, metric_name, metric_value, percentage_metric_value)
+            records = []
+            custommetricrecord = @@custom_metrics_template % {
+                timestamp: record['DataItems'][0]['Timestamp'],
+                metricName: metric_name,
+                hostvalue: record['DataItems'][0]['Host'],
+                objectnamevalue: record['DataItems'][0]['ObjectName'],
+                instancenamevalue: record['DataItems'][0]['InstanceName'],
+                metricminvalue: metric_value,
+                metricmaxvalue: metric_value,
+                metricsumvalue: metric_value
+                }
+            records.push(JSON.parse(custommetricrecord))
+
+            if !percentage_metric_value.nil?
+                additional_record = @@custom_metrics_template % {
+                    timestamp: record['DataItems'][0]['Timestamp'],
+                    metricName: @@metric_name_metric_percentage_name_hash[metric_name],
+                    hostvalue: record['DataItems'][0]['Host'],
+                    objectnamevalue: record['DataItems'][0]['ObjectName'],
+                    instancenamevalue: record['DataItems'][0]['InstanceName'],
+                    metricminvalue: percentage_metric_value,
+                    metricmaxvalue: percentage_metric_value,
+                    metricsumvalue: percentage_metric_value
+                    }
+                    records.push(JSON.parse(additional_record))
+            end
+            @log.info "Metric Name: #{metric_name} Metric Value: #{metric_value} Percentage Metric Value: #{percentage_metric_value}"
+            return records
+        end
+
+        
+        def filter_stream(tag, es)
+            new_es = MultiEventStream.new
+            ensure_cpu_memory_capacity_set
+            es.each { |time, record|
+              begin
+                filtered_records = filter(tag, time, record)
+                filtered_records.each {|filtered_record| 
+                    new_es.add(time, filtered_record) if filtered_record
+                } if filtered_records
+              rescue => e
+                router.emit_error_event(tag, time, record, e)
+              end
+            }
+            new_es
+        end
+	end
+end
diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
new file mode 100644
index 000000000..d9864bc1a
--- /dev/null
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -0,0 +1,235 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+
+# frozen_string_literal: true
+
+module Fluent
+    require 'logger'
+    require 'json'
+    require_relative 'oms_common'
+    require_relative 'CustomMetricsUtils'
+
+	class Inventory2MdmFilter < Filter
+		Fluent::Plugin.register_filter('filter_inventory2mdm', self)
+		
+		config_param :enable_log, :integer, :default => 0
+        config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log'
+        config_param :custom_metrics_azure_regions, :string
+        
+        @@node_count_metric_name = 'nodesCount'
+        @@pod_count_metric_name = 'podCount'
+        @@pod_inventory_tag = 'mdm.kubepodinventory'
+        @@node_inventory_tag = 'mdm.kubenodeinventory'
+        @@node_status_ready = 'Ready'
+        @@node_status_not_ready = 'NotReady'
+
+        @@node_inventory_custom_metrics_template = '
+            { 
+                "time": "%{timestamp}", 
+                "data": { 
+                    "baseData": { 
+                        "metric": "%{metricName}", 
+                        "namespace": "insights.container/nodes", 
+                        "dimNames": [ 
+                        "status"
+                        ], 
+                        "series": [ 
+                        { 
+                            "dimValues": [ 
+                            "%{statusValue}"
+                            ], 
+                            "min": %{node_status_count},
+                            "max": %{node_status_count}, 
+                            "sum": %{node_status_count}, 
+                            "count": 1
+                        } 
+                        ] 
+                    } 
+                } 
+            }'
+
+        @@pod_inventory_custom_metrics_template = '
+            { 
+                "time": "%{timestamp}", 
+                "data": { 
+                    "baseData": { 
+                        "metric": "%{metricName}", 
+                        "namespace": "insights.container/pods", 
+                        "dimNames": [ 
+                        "phase", 
+                        "namespace", 
+                        "node", 
+                        "controllerName"
+                        ], 
+                        "series": [ 
+                        { 
+                            "dimValues": [ 
+                            "%{phaseDimValue}", 
+                            "%{namespaceDimValue}", 
+                            "%{nodeDimValue}", 
+                            "%{controllerNameDimValue}"
+                            ], 
+                            "min": %{podCountMetricValue},
+                            "max": %{podCountMetricValue}, 
+                            "sum": %{podCountMetricValue}, 
+                            "count": 1 
+                        } 
+                        ] 
+                    } 
+                } 
+            }'
+
+        @process_incoming_stream = true
+
+		def initialize
+            super
+		end
+
+		def configure(conf)
+			super
+			@log = nil
+			
+			if @enable_log
+				@log = Logger.new(@log_path, 'weekly')
+				@log.debug {'Starting filter_inventory2mdm plugin'}
+			end
+		end
+
+        def start
+            super
+            @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
+            @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
+        end
+
+		def shutdown
+			super
+        end
+        
+        def process_node_inventory_records(es)
+            timestamp = DateTime.now
+            
+            begin
+                node_ready_count = 0
+                node_not_ready_count = 0
+                records = []
+                
+                es.each{|time,record|
+                    begin
+                        timestamp = record['DataItems'][0]['CollectionTime']
+                        node_status = record['DataItems'][0]['Status']
+                        if node_status.downcase == @@node_status_ready.downcase
+                            node_ready_count = node_ready_count+1
+                        else
+                            node_not_ready_count = node_not_ready_count + 1
+                        end
+                    rescue => e
+                    end
+                }
+
+                ready_record = @@node_inventory_custom_metrics_template % {
+                    timestamp: timestamp,
+                    metricName: @@node_count_metric_name, 
+                    statusValue: @@node_status_ready,
+                    node_status_count: node_ready_count
+                }
+                records.push(JSON.parse(ready_record))
+                
+                not_ready_record = @@node_inventory_custom_metrics_template % {
+                    timestamp: timestamp,
+                    metricName: @@node_count_metric_name, 
+                    statusValue: @@node_status_not_ready,
+                    node_status_count: node_not_ready_count
+                }
+                records.push(JSON.parse(not_ready_record))
+            rescue Exception => e
+                @log.info "Error processing node inventory records Exception: #{e.class} Message: #{e.message}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+                return [],timestamp
+            end
+            return records,timestamp
+        end
+
+        def process_pod_inventory_records(es)
+            timestamp = DateTime.now
+            pod_count_hash = Hash.new
+
+            begin
+                records = []
+                es.each{|time,record|
+                    
+                    timestamp = record['DataItems'][0]['CollectionTime']
+                    podPhaseDimValue = record['DataItems'][0]['PodStatus']
+                    podNamespaceDimValue = record['DataItems'][0]['Namespace']
+                    podControllerNameDimValue = record['DataItems'][0]['ControllerName']
+                    podNodeDimValue = record['DataItems'][0]['Computer']
+                    
+                    # group by distinct dimension values
+                    pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~')
+                    
+                    if pod_count_hash.key?(pod_key) 
+                        pod_count = pod_count_hash[pod_key]
+                        pod_count = pod_count + 1
+                        pod_count_hash[pod_key] = pod_count
+                    else
+                        pod_count = 1
+                        pod_count_hash[pod_key] = pod_count
+                    end
+                }
+
+                pod_count_hash.each {|key, value|
+
+                    key_elements = key.split('~~')
+                    if key_elements.length != 4
+                        next
+                    end
+
+                    # get dimension values by key
+                    podNodeDimValue = key_elements[0]
+                    podNamespaceDimValue = key_elements[1]
+                    podControllerNameDimValue = key_elements[2]
+                    podPhaseDimValue = key_elements[3]
+
+                    record = @@pod_inventory_custom_metrics_template % {
+                        timestamp: timestamp,
+                        metricName: @@pod_count_metric_name,
+                        phaseDimValue: podPhaseDimValue,
+                        namespaceDimValue: podNamespaceDimValue, 
+                        nodeDimValue: podNodeDimValue, 
+                        controllerNameDimValue: podControllerNameDimValue, 
+                        podCountMetricValue: value
+                    }
+                    records.push(JSON.parse(record))
+                }
+            rescue Exception => e
+                @log.info "Error processing pod inventory record Exception: #{e.class} Message: #{e.message}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+                return [],timestamp
+            end
+            return records, timestamp
+        end
+
+        def filter_stream(tag, es)
+            new_es = MultiEventStream.new
+            filtered_records = []
+            time = DateTime.now
+            begin
+                if @process_incoming_stream
+                    @log.info 'Processing NODE inventory records in filter plugin to send to MDM'
+                    if tag.downcase.start_with?(@@node_inventory_tag)
+                        filtered_records, time = process_node_inventory_records(es)
+                    elsif tag.downcase.start_with?(@@pod_inventory_tag)
+                        @log.info 'Processing POD inventory records in filter plugin to send to MDM'
+                        filtered_records, time = process_pod_inventory_records(es)
+                    else 
+                        filtered_records = []
+                    end
+                end
+                filtered_records.each {|filtered_record| 
+                    new_es.add(time, filtered_record) if filtered_record
+                } if filtered_records
+            rescue => e
+                @log.info "Exception in filter_stream #{e}"
+            end
+            new_es
+        end
+    end
+end
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 5b551f74e..a857aa6b9 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -18,6 +18,7 @@ def initialize
   
       config_param :run_interval, :time, :default => '1m'
       config_param :tag, :string, :default => "oms.api.cadvisorperf"
+      config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
   
       def configure (conf)
         super
@@ -55,6 +56,7 @@ def enumerate()
             end 
             
             router.emit_stream(@tag, eventStream) if eventStream
+            router.emit_stream(@mdmtag, eventStream) if eventStream
             @@istestvar = ENV['ISTEST']
             if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
               $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 2e48e3f1f..ba1dacbe0 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -7,6 +7,7 @@ class Kube_nodeInventory_Input < Input
       Plugin.register_input('kubenodeinventory', self)
   
       @@ContainerNodeInventoryTag = 'oms.containerinsights.ContainerNodeInventory'
+      @@MDMKubeNodeInventoryTag = 'mdm.kubenodeinventory'
 
       def initialize
         super
@@ -136,6 +137,7 @@ def enumerate
                     end
                 end 
                 router.emit_stream(@tag, eventStream) if eventStream
+                router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
                 router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
                 if telemetrySent == true
                   @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index eaf14b035..dee3df30b 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -6,6 +6,8 @@ module Fluent
   class Kube_PodInventory_Input < Input
     Plugin.register_input('kubepodinventory', self)
 
+    @@MDMKubePodInventoryTag = 'mdm.kubepodinventory'
+
     def initialize
       super
       require 'yaml'
@@ -208,6 +210,7 @@ def parse_and_emit_records(podInventory, serviceList)
           end  
         end  #podInventory block end
         router.emit_stream(@tag, eventStream) if eventStream
+        router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream
         if telemetryFlush == true
           ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory")
           ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {})
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
new file mode 100644
index 000000000..2f36ea7d5
--- /dev/null
+++ b/source/code/plugin/out_mdm.rb
@@ -0,0 +1,239 @@
+module Fluent
+
+  class OutputMDM < BufferedOutput
+
+    config_param :retry_mdm_post_wait_minutes, :integer
+
+    Plugin.register_output('out_mdm', self)
+
+    def initialize
+      super
+      require 'net/http'
+      require 'net/https'
+      require 'uri'
+      require 'json'
+      require_relative 'KubernetesApiClient'
+      require_relative 'ApplicationInsightsUtility'
+
+      @@token_resource_url = 'https://monitoring.azure.com/'
+      @@grant_type = 'client_credentials'
+      @@azure_json_path = '/etc/kubernetes/host/azure.json'
+      @@post_request_url_template = "https://%{aks_region}.monitoring.azure.com%{aks_resource_id}/metrics"
+      @@token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token"
+      @@plugin_name = "AKSCustomMetricsMDM"
+      
+      @data_hash = {}
+      @token_url = nil
+      @http_client = nil
+      @token_expiry_time = Time.now
+      @cached_access_token = String.new
+      @last_post_attempt_time = Time.now
+      @first_post_attempt_made = false
+    end
+
+    def configure(conf)
+      s = conf.add_element("secondary")
+      s["type"] = ChunkErrorHandler::SecondaryName
+      super
+    end
+
+    def start
+      super
+      file = File.read(@@azure_json_path)
+      # Handle the case where the file read fails. Send Telemetry and exit the plugin? 
+      @data_hash = JSON.parse(file)
+      @token_url = @@token_url_template % {tenant_id: @data_hash['tenantId']}
+      @cached_access_token = get_access_token
+      aks_resource_id = ENV['AKS_RESOURCE_ID']
+      aks_region = ENV['AKS_REGION']
+      if aks_resource_id.to_s.empty?
+        @log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
+        raise Exception.new "Environment Variable AKS_RESOURCE_ID is not set!!" 
+      end
+      if aks_region.to_s.empty?
+        @log.info "Environment Variable AKS_REGION is not set.. "
+        raise Exception.new "Environment Variable AKS_REGION is not set!!" 
+      end
+
+      @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
+      @post_request_uri = URI.parse(@@post_request_url)
+      @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
+      @http_client.use_ssl = true
+      @log.info "POST Request url: #{@@post_request_url}"
+      ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {})
+    end
+
+    # get the access token only if the time to expiry is less than 5 minutes
+    def get_access_token
+      if @cached_access_token.to_s.empty? || (Time.now + 5*60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration
+        @log.info "Refreshing access token for out_mdm plugin.."
+        token_uri = URI.parse(@token_url)
+        http_access_token = Net::HTTP.new(token_uri.host, token_uri.port)
+        http_access_token.use_ssl = true
+        token_request = Net::HTTP::Post.new(token_uri.request_uri)
+        token_request.set_form_data(
+          {
+            'grant_type' => @@grant_type, 
+            'client_id' => @data_hash['aadClientId'], 
+            'client_secret' => @data_hash['aadClientSecret'],
+            'resource' => @@token_resource_url
+            }
+        )
+        
+        token_response = http_access_token.request(token_request)
+        # Handle the case where the response is not 200 
+        parsed_json = JSON.parse(token_response.body)
+        @token_expiry_time = Time.now + 59*60 # set the expiry time to be ~one hour from current time
+        @cached_access_token = parsed_json['access_token']
+      end
+      @cached_access_token
+    end 
+
+    def write_status_file(success, message)
+      fn = '/var/opt/microsoft/omsagent/log/MDMIngestion.status'
+      status = '{ "operation": "MDMIngestion", "success": "%s", "message": "%s" }' % [success, message]
+      begin
+        File.open(fn,'w') { |file| file.write(status) }
+      rescue => e
+        @log.debug "Error:'#{e}'"
+        ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+      end
+    end
+
+    # This method is called when an event reaches to Fluentd.
+    # Convert the event to a raw string.
+    def format(tag, time, record)
+      if record != {}
+        @log.trace "Buffering #{tag}"
+        return [tag, record].to_msgpack
+      else
+        return ""
+      end
+    end
+
+    # This method is called every flush interval. Send the buffer chunk to MDM. 
+    # 'chunk' is a buffer chunk that includes multiple formatted records
+    def write(chunk)
+      begin
+        if !@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60)
+          post_body = []
+          chunk.msgpack_each {|(tag, record)|
+            post_body.push(record.to_json)
+          }
+          send_to_mdm post_body
+        else
+          @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
+        end
+      rescue Exception => e
+        @log.info "Exception when writing to MDM: #{e}"
+      end
+    end
+
+    def send_to_mdm(post_body) 
+      begin
+        access_token = get_access_token
+        request = Net::HTTP::Post.new(@post_request_uri.request_uri)
+        request['Content-Type'] = "application/x-ndjson"
+        request['Authorization'] = "Bearer #{access_token}"
+        request.body = post_body.join("\n")
+        response = @http_client.request(request)
+        response.value # this throws for non 200 HTTP response code
+        @log.info "HTTP Post Response Code : #{response.code}"
+        ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {})
+      rescue Net::HTTPServerException => e
+        @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}"
+        @log.debug_backtrace(e.backtrace)
+        if !response.code.empty? && response.code == 403.to_s
+          @log.info "Response Code #{response.code} Updating @last_post_attempt_time"
+          @last_post_attempt_time = Time.now
+          @first_post_attempt_made = true
+          ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+          # Not raising exception, as that will cause retries to happen
+        else
+          @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
+          raise e
+        end
+      rescue Errno::ETIMEDOUT => e
+        @log.info "Timed out when POSTing Metrics to MDM : #{e} Response: #{response}"
+        @log.debug_backtrace(e.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+        raise e
+      rescue Exception => e
+        @log.info "Exception POSTing Metrics to MDM : #{e} Response: #{response}"
+        @log.debug_backtrace(e.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+        raise e
+      end
+    end
+  private
+
+    class ChunkErrorHandler
+      include Configurable
+      include PluginId
+      include PluginLoggerMixin
+
+      SecondaryName = "__ChunkErrorHandler__"
+
+      Plugin.register_output(SecondaryName, self)
+
+      def initialize
+        @router = nil
+      end
+
+      def secondary_init(primary)
+        @error_handlers = create_error_handlers @router
+      end
+
+      def start
+        # NOP
+      end
+
+      def shutdown
+        # NOP
+      end
+
+      def router=(r)
+        @router = r
+      end
+
+      def write(chunk)
+        chunk.msgpack_each {|(tag, record)|
+          @error_handlers[tag].emit(record)
+        }
+      end
+   
+    private
+
+      def create_error_handlers(router)
+        nop_handler = NopErrorHandler.new
+        Hash.new() { |hash, tag|
+          etag = OMS::Common.create_error_tag tag
+          hash[tag] = router.match?(etag) ?
+                      ErrorHandler.new(router, etag) :
+                      nop_handler
+        }
+      end
+
+      class ErrorHandler
+        def initialize(router, etag)
+          @router = router
+          @etag = etag
+        end
+
+        def emit(record)
+          @router.emit(@etag, Fluent::Engine.now, record)
+        end
+      end
+
+      class NopErrorHandler
+        def emit(record)
+          # NOP
+        end
+      end
+
+    end
+
+  end # class OutputMDM
+
+end # module Fluent
+

From f1b0cd2a1945057340dc48f85ea685b3a5a69b08 Mon Sep 17 00:00:00 2001
From: Kaveesh Dubey <kadubey@microsoft.com>
Date: Thu, 24 Jan 2019 12:12:01 -0800
Subject: [PATCH 058/160] add ContainerNotRunning column to KubePodInventory

---
 source/code/plugin/in_kube_podinventory.rb | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index dee3df30b..9b8ee1fb8 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -173,6 +173,7 @@ def parse_and_emit_records(podInventory, serviceList)
               containerRestartCount = container['restartCount']		
               record['ContainerRestartCount'] = containerRestartCount
               containerStatus = container['state']
+              record['ContainerNotRunningReason'] = ''
               # state is of the following form , so just picking up the first key name
               # "state": {
               #   "waiting": {
@@ -190,6 +191,10 @@ def parse_and_emit_records(podInventory, serviceList)
               #Picking up both container and node start time from cAdvisor to be consistent
               if containerStatus.keys[0] == "running"
                 record['ContainerCreationTimeStamp'] = container['state']['running']['startedAt']
+              else
+                if !containerStatus[containerStatus.keys[0]]['reason'].nil? && !containerStatus[containerStatus.keys[0]]['reason'].empty?
+                  record['ContainerNotRunningReason'] = containerStatus[containerStatus.keys[0]]['reason']
+                end
               end
               podRestartCount += containerRestartCount	
               records.push(record.dup) 

From 616a803a4c962511a2a27e3f8382b8b82c09362c Mon Sep 17 00:00:00 2001
From: Kaveesh Dubey <kadubey@microsoft.com>
Date: Thu, 24 Jan 2019 13:52:38 -0800
Subject: [PATCH 059/160] merge pr feedback: update name to
 ContainerStatusReason

---
 source/code/plugin/in_kube_podinventory.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 9b8ee1fb8..3d026b05f 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -173,7 +173,7 @@ def parse_and_emit_records(podInventory, serviceList)
               containerRestartCount = container['restartCount']		
               record['ContainerRestartCount'] = containerRestartCount
               containerStatus = container['state']
-              record['ContainerNotRunningReason'] = ''
+              record['ContainerStatusReason'] = ''
               # state is of the following form , so just picking up the first key name
               # "state": {
               #   "waiting": {
@@ -193,7 +193,7 @@ def parse_and_emit_records(podInventory, serviceList)
                 record['ContainerCreationTimeStamp'] = container['state']['running']['startedAt']
               else
                 if !containerStatus[containerStatus.keys[0]]['reason'].nil? && !containerStatus[containerStatus.keys[0]]['reason'].empty?
-                  record['ContainerNotRunningReason'] = containerStatus[containerStatus.keys[0]]['reason']
+                  record['ContainerStatusReason'] = containerStatus[containerStatus.keys[0]]['reason']
                 end
               end
               podRestartCount += containerRestartCount	

From c33ca34233f9adbe02b55c36e7148258041f997d Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 19 Feb 2019 13:10:03 -0800
Subject: [PATCH 060/160] Zero Fill for Missing Pod Phases, Change Namespace
 Dimension to Kubernetes namespace, as it might be confused with metrics
 namespace in Metrics Explorer (#194)

* Zero Fill for Pod Counts by Phase

* Change namespace dimension to Kubernetes namespace
---
 source/code/plugin/filter_inventory2mdm.rb | 31 +++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index d9864bc1a..8aaa5ff01 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -56,7 +56,7 @@ class Inventory2MdmFilter < Filter
                         "namespace": "insights.container/pods", 
                         "dimNames": [ 
                         "phase", 
-                        "namespace", 
+                        "Kubernetes namespace", 
                         "node", 
                         "controllerName"
                         ], 
@@ -77,7 +77,9 @@ class Inventory2MdmFilter < Filter
                     } 
                 } 
             }'
-
+        
+        @@pod_phase_values = ['Running', 'Pending', 'Succeeded', 'Failed', 'Unknown']
+        
         @process_incoming_stream = true
 
 		def initialize
@@ -151,7 +153,7 @@ def process_node_inventory_records(es)
         def process_pod_inventory_records(es)
             timestamp = DateTime.now
             pod_count_hash = Hash.new
-
+            no_phase_dim_values_hash = Hash.new
             begin
                 records = []
                 es.each{|time,record|
@@ -173,6 +175,29 @@ def process_pod_inventory_records(es)
                         pod_count = 1
                         pod_count_hash[pod_key] = pod_count
                     end
+
+                    # Collect all possible combinations of dimension values other than pod phase
+                    key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~')
+                    if no_phase_dim_values_hash.key?(key_without_phase_dim_value)
+                        @log.info "#{key_without_phase_dim_value} already present in #{no_phase_dim_values_hash}"
+                        next
+                    else
+                        @log.info "Adding #{key_without_phase_dim_value} to #{no_phase_dim_values_hash}"
+                        no_phase_dim_values_hash[key_without_phase_dim_value] = true
+                    end
+                }
+
+                # generate all possible values of non_phase_dim_values X pod Phases and zero-fill the ones that are not already present
+                no_phase_dim_values_hash.each {|key, value|
+                    @@pod_phase_values.each{|phase|
+                        pod_key = [key, phase].join('~~')
+                        if !pod_count_hash.key?(pod_key)
+                            pod_count_hash[pod_key] = 0
+                            @log.info "Zero filled #{pod_key}"
+                        else
+                            next
+                        end
+                    }
                 }
 
                 pod_count_hash.each {|key, value|

From 2651750f04932a808a214f84cc7a5742fd075591 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 20 Feb 2019 13:31:23 -0800
Subject: [PATCH 061/160] No Retries for non 404 4xx errors (#196)

---
 source/code/plugin/out_mdm.rb | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 2f36ea7d5..6bde98534 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -126,6 +126,7 @@ def write(chunk)
         end
       rescue Exception => e
         @log.info "Exception when writing to MDM: #{e}"
+        raise e
       end
     end
 
@@ -149,7 +150,11 @@ def send_to_mdm(post_body)
           @first_post_attempt_made = true
           ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
           # Not raising exception, as that will cause retries to happen
-        else
+        elsif !response.code.empty? && response.code.start_with?('4')
+          # Log 400 errors and continue
+          @log.info "Non-retryable HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
+        else 
+          # raise if the response code is non-400
           @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
           raise e
         end

From 195bc3382342c2dfe1f7bd28e623486553b5d59f Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 14:51:56 -0800
Subject: [PATCH 062/160] Update agent version for telemetry

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 29c98bdf1..863e2d86a 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,5 +28,5 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod01092019
+    AgentVersion                    ciprod01202019
 

From 59d6c61e6a5d0841333dca6a685fd0e633b9b53c Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 15:51:09 -0800
Subject: [PATCH 063/160] Update readme for upcoming (ciprod01202019) release

---
 README.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/README.md b/README.md
index 099a065e8..8b5898e92 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,22 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
+### 01/20/2019 - Version microsoft/oms:ciprod01202019
+- Container logs enrichment optimization
+-      Get container meta data only for containers in current node (vs cluster before)
+- Update fluent bit 0.13.7 => 0.14.4
+-      This fixes the escaping issue in the container logs
+- Mooncake cloud support for agent
+-      Ability to disable agent telemetry
+-      Ability to onboard and ingest to mooncake cloud
+- Add & populate 'ContainerStatusReason'  column to KubePodInventory
+- Alertable (custom) metrics (to AzureMonitor - only for AKS clusters)
+-      Cpuusagenanocores & %
+-      MemoryWorkingsetBytes & %
+-      MemoryRssBytes & %
+-      Podcount by node, phase & namespace
+-      Nodecount
+  
 ### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)
 - Persist fluentbit state between container restarts

From 0189bc0a7a8cc5bd1f657baea8a12895e5861ffe Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 15:53:35 -0800
Subject: [PATCH 064/160] fix readme formatting

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 8b5898e92..14c07e948 100644
--- a/README.md
+++ b/README.md
@@ -13,19 +13,19 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
   
 ### 01/20/2019 - Version microsoft/oms:ciprod01202019
 - Container logs enrichment optimization
--      Get container meta data only for containers in current node (vs cluster before)
+..*Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4
--      This fixes the escaping issue in the container logs
+..*This fixes the escaping issue in the container logs
 - Mooncake cloud support for agent
--      Ability to disable agent telemetry
--      Ability to onboard and ingest to mooncake cloud
+..*Ability to disable agent telemetry
+..*Ability to onboard and ingest to mooncake cloud
 - Add & populate 'ContainerStatusReason'  column to KubePodInventory
 - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters)
--      Cpuusagenanocores & %
--      MemoryWorkingsetBytes & %
--      MemoryRssBytes & %
--      Podcount by node, phase & namespace
--      Nodecount
+..*Cpuusagenanocores & %
+..*MemoryWorkingsetBytes & %
+..*MemoryRssBytes & %
+..*Podcount by node, phase & namespace
+..*Nodecount
   
 ### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)

From 8221d2dd849427a08c0dcd6781cd050a8380c551 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 15:54:08 -0800
Subject: [PATCH 065/160] fix formatting for readme

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 14c07e948..1a4506f1e 100644
--- a/README.md
+++ b/README.md
@@ -13,19 +13,19 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
   
 ### 01/20/2019 - Version microsoft/oms:ciprod01202019
 - Container logs enrichment optimization
-..*Get container meta data only for containers in current node (vs cluster before)
+..* Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4
-..*This fixes the escaping issue in the container logs
+..* This fixes the escaping issue in the container logs
 - Mooncake cloud support for agent
-..*Ability to disable agent telemetry
-..*Ability to onboard and ingest to mooncake cloud
+..* Ability to disable agent telemetry
+..* Ability to onboard and ingest to mooncake cloud
 - Add & populate 'ContainerStatusReason'  column to KubePodInventory
 - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters)
-..*Cpuusagenanocores & %
-..*MemoryWorkingsetBytes & %
-..*MemoryRssBytes & %
-..*Podcount by node, phase & namespace
-..*Nodecount
+..* Cpuusagenanocores & %
+..* MemoryWorkingsetBytes & %
+..* MemoryRssBytes & %
+..* Podcount by node, phase & namespace
+..* Nodecount
   
 ### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)

From 30aa305a0546474d55889ea63c7ab8ef84ae9dca Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 15:57:17 -0800
Subject: [PATCH 066/160] fix formatting for readme

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 1a4506f1e..ab621104a 100644
--- a/README.md
+++ b/README.md
@@ -13,19 +13,19 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
   
 ### 01/20/2019 - Version microsoft/oms:ciprod01202019
 - Container logs enrichment optimization
-..* Get container meta data only for containers in current node (vs cluster before)
+  * Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4
-..* This fixes the escaping issue in the container logs
+  * This fixes the escaping issue in the container logs
 - Mooncake cloud support for agent
-..* Ability to disable agent telemetry
-..* Ability to onboard and ingest to mooncake cloud
+  * Ability to disable agent telemetry
+  * Ability to onboard and ingest to mooncake cloud
 - Add & populate 'ContainerStatusReason'  column to KubePodInventory
 - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters)
-..* Cpuusagenanocores & %
-..* MemoryWorkingsetBytes & %
-..* MemoryRssBytes & %
-..* Podcount by node, phase & namespace
-..* Nodecount
+  * Cpuusagenanocores & %
+  * MemoryWorkingsetBytes & %
+  * MemoryRssBytes & %
+  * Podcount by node, phase & namespace
+  * Nodecount
   
 ### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)

From f401116124985b1c24f56557f957f00da423d6cd Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 15:59:29 -0800
Subject: [PATCH 067/160] fix readme

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index ab621104a..125aec3bb 100644
--- a/README.md
+++ b/README.md
@@ -16,16 +16,16 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
   * Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4
   * This fixes the escaping issue in the container logs
-- Mooncake cloud support for agent
+- Mooncake cloud support for agent (AKS only)
   * Ability to disable agent telemetry
   * Ability to onboard and ingest to mooncake cloud
 - Add & populate 'ContainerStatusReason'  column to KubePodInventory
 - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters)
-  * Cpuusagenanocores & %
-  * MemoryWorkingsetBytes & %
-  * MemoryRssBytes & %
-  * Podcount by node, phase & namespace
-  * Nodecount
+  * Cpuusagenanocores & % metric
+  * MemoryWorkingsetBytes & % metric
+  * MemoryRssBytes & % metric
+  * Podcount by node, phase & namespace metric
+  * Nodecount metric
   
 ### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)

From a2f45afdac70173c994d73cd88ba34b20cd817d9 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 16:13:57 -0800
Subject: [PATCH 068/160] fix readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 125aec3bb..4313de5c0 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-### 01/20/2019 - Version microsoft/oms:ciprod01202019
+### 01/20/2019 - Version microsoft/oms:ciprod02202019
 - Container logs enrichment optimization
   * Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4

From 759dbb57e1472df8476ad7acfd8fbc9231207e3a Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 16:14:48 -0800
Subject: [PATCH 069/160] fix agent version for telemetry

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 863e2d86a..467489d1c 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,5 +28,5 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod01202019
+    AgentVersion                    ciprod02202019
 

From 7956f40d075476dc85633b53d72ed4eb8dfdc303 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 20 Feb 2019 17:16:25 -0800
Subject: [PATCH 070/160] fix date in readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4313de5c0..59faf7e4d 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-### 01/20/2019 - Version microsoft/oms:ciprod02202019
+### 02/20/2019 - Version microsoft/oms:ciprod02202019
 - Container logs enrichment optimization
   * Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4

From ee056568eee328b2d37a0d7a75e1ccec370f1729 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 21 Feb 2019 09:15:08 -0800
Subject: [PATCH 071/160] update readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 59faf7e4d..b8d08b05a 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
   * MemoryRssBytes & % metric
   * Podcount by node, phase & namespace metric
   * Nodecount metric
+- ContainerNodeInventory_CL to fixed type
   
 ### 01/09/2018 - Version microsoft/oms:ciprod01092019
 - Omsagent - 1.8.1.256 (nov 2018 release)

From 2abcf67413b7c3fcbc8d1cd80511e1566fc124ba Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 21 Feb 2019 12:56:09 -0800
Subject: [PATCH 072/160] Restart  logs every 10MB instead of weekly (#198)

* Rotate logs every 10MB instead of weekly

* Removing some logging, fixed log rotation
---
 source/code/plugin/filter_cadvisor2mdm.rb  | 3 +--
 source/code/plugin/filter_inventory2mdm.rb | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
index 85f9f688e..94f2107cc 100644
--- a/source/code/plugin/filter_cadvisor2mdm.rb
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -63,7 +63,7 @@ def configure(conf)
 			@log = nil
 			
 			if @enable_log
-				@log = Logger.new(@log_path, 'weekly')
+				@log = Logger.new(@log_path, 1, 5000000)
 				@log.debug {'Starting filter_cadvisor2mdm plugin'}
 			end
 		end
@@ -191,7 +191,6 @@ def get_metric_records(record, metric_name, metric_value, percentage_metric_valu
                     }
                     records.push(JSON.parse(additional_record))
             end
-            @log.info "Metric Name: #{metric_name} Metric Value: #{metric_value} Percentage Metric Value: #{percentage_metric_value}"
             return records
         end
 
diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index 8aaa5ff01..84f12dd06 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -91,7 +91,7 @@ def configure(conf)
 			@log = nil
 			
 			if @enable_log
-				@log = Logger.new(@log_path, 'weekly')
+				@log = Logger.new(@log_path, 1, 5000000)
 				@log.debug {'Starting filter_inventory2mdm plugin'}
 			end
 		end
@@ -179,10 +179,8 @@ def process_pod_inventory_records(es)
                     # Collect all possible combinations of dimension values other than pod phase
                     key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~')
                     if no_phase_dim_values_hash.key?(key_without_phase_dim_value)
-                        @log.info "#{key_without_phase_dim_value} already present in #{no_phase_dim_values_hash}"
                         next
                     else
-                        @log.info "Adding #{key_without_phase_dim_value} to #{no_phase_dim_values_hash}"
                         no_phase_dim_values_hash[key_without_phase_dim_value] = true
                     end
                 }

From 18c107c4678cbbc53f14829458e781cc3b07d2c3 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 21 Feb 2019 13:30:42 -0800
Subject: [PATCH 073/160] update agent version for telemetry

---
 installer/conf/td-agent-bit.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 467489d1c..974e8564a 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,5 +28,5 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod02202019
+    AgentVersion                    ciprod02212019
 

From 14b2b87c15bd4d49e2e5982789a5ba2649b3fc32 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 21 Feb 2019 13:33:02 -0800
Subject: [PATCH 074/160] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b8d08b05a..f72a16f1e 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-### 02/20/2019 - Version microsoft/oms:ciprod02202019
+### 02/21/2019 - Version microsoft/oms:ciprod02212019
 - Container logs enrichment optimization
   * Get container meta data only for containers in current node (vs cluster before)
 - Update fluent bit 0.13.7 => 0.14.4

From 5479dff7a93cc8f640412a90cac8523c283c201d Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 22 Feb 2019 11:44:15 -0800
Subject: [PATCH 075/160] Update kube.conf to use %STATE_DIR_WS% instead of
 hardcoded path

---
 installer/conf/kube.conf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 31a0778d3..454df6e91 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -159,7 +159,7 @@
   num_threads 5
   buffer_chunk_limit 20m
   buffer_type file
-  buffer_path /var/opt/microsoft/omsagent/6bb1e963-b08c-43a8-b708-1628305e964a/state/out_mdm_*.buffer
+  buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
   buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
   flush_interval 20s
@@ -167,4 +167,4 @@
   retry_wait 30s
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
-</match>
\ No newline at end of file
+</match>

From cdded2ee004d2c72e09cb881448dfc4fde49332f Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Mon, 4 Mar 2019 15:38:18 -0800
Subject: [PATCH 076/160] Fix AKSEngine Crash (#200)

---
 source/code/plugin/CustomMetricsUtils.rb |  4 ++--
 source/code/plugin/out_mdm.rb            | 23 ++++++++++++++++++-----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/source/code/plugin/CustomMetricsUtils.rb b/source/code/plugin/CustomMetricsUtils.rb
index d06c9ad91..a19580630 100644
--- a/source/code/plugin/CustomMetricsUtils.rb
+++ b/source/code/plugin/CustomMetricsUtils.rb
@@ -9,8 +9,8 @@ class << self
         def check_custom_metrics_availability(custom_metric_regions)
             aks_region = ENV['AKS_REGION']
             aks_resource_id = ENV['AKS_RESOURCE_ID']
-            if aks_region.to_s.empty? && aks_resource_id.to_s.empty?
-                false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set
+            if aks_region.to_s.empty? || aks_resource_id.to_s.empty?
+                return false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set
             end
             
             custom_metrics_regions_arr = custom_metric_regions.split(',')
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 6bde98534..274f450fd 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -29,6 +29,7 @@ def initialize
       @cached_access_token = String.new
       @last_post_attempt_time = Time.now
       @first_post_attempt_made = false
+      @can_send_data_to_mdm = true
     end
 
     def configure(conf)
@@ -39,7 +40,13 @@ def configure(conf)
 
     def start
       super
-      file = File.read(@@azure_json_path)
+      begin
+        file = File.read(@@azure_json_path)
+      rescue => e
+        @log.info "Unable to read file #{@@azure_json_path} #{e}"
+        @can_send_data_to_mdm = false
+        return
+      end
       # Handle the case where the file read fails. Send Telemetry and exit the plugin? 
       @data_hash = JSON.parse(file)
       @token_url = @@token_url_template % {tenant_id: @data_hash['tenantId']}
@@ -48,11 +55,13 @@ def start
       aks_region = ENV['AKS_REGION']
       if aks_resource_id.to_s.empty?
         @log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
-        raise Exception.new "Environment Variable AKS_RESOURCE_ID is not set!!" 
+        @can_send_data_to_mdm = false
+        return
       end
       if aks_region.to_s.empty?
         @log.info "Environment Variable AKS_REGION is not set.. "
-        raise Exception.new "Environment Variable AKS_REGION is not set!!" 
+        @can_send_data_to_mdm = false
+        return
       end
 
       @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
@@ -115,14 +124,18 @@ def format(tag, time, record)
     # 'chunk' is a buffer chunk that includes multiple formatted records
     def write(chunk)
       begin
-        if !@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60)
+        if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60)) && @can_send_data_to_mdm
           post_body = []
           chunk.msgpack_each {|(tag, record)|
             post_body.push(record.to_json)
           }
           send_to_mdm post_body
         else
-          @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
+          if !@can_send_data_to_mdm
+            @log.info "Cannot send data to MDM since all required conditions were not met"
+          else
+            @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
+          end
         end
       rescue Exception => e
         @log.info "Exception when writing to MDM: #{e}"

From 57be1c4be9f3a6234a9aff130da2ef327c958d1c Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 12 Mar 2019 17:47:17 -0700
Subject: [PATCH 077/160] hotfix * close resp.Body * remove chatty logs *
 membuf=5m and ignore files not updated since 5 mins

---
 installer/conf/td-agent-bit.conf  |  7 ++++---
 source/code/go/src/plugins/oms.go | 11 ++++-------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index f01857cd7..9175b68ce 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -10,16 +10,17 @@
     Path /var/log/containers/*.log
     DB /var/log/omsagent-fblogs.db
     Parser docker
-    Mem_Buf_Limit 30m
+    Mem_Buf_Limit 5m
     Path_Key filepath
     Skip_Long_Lines On
+    Ignore_Older 5m
 
 [INPUT]
     Name tail
     Tag oms.container.log.flbplugin.*
     Path /var/log/containers/omsagent*.log
     DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db
-    Mem_Buf_Limit 30m
+    Mem_Buf_Limit 2m
     Path_Key filepath
     Skip_Long_Lines On
 
@@ -28,6 +29,6 @@
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
-    AgentVersion                    ciprod02212019
+    AgentVersion                    ciprod03122019
 
 
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index d913c6c32..36cf20273 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -246,16 +246,11 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 		if val, ok := imageIDMap[containerID]; ok {
 			stringMap["Image"] = val
-		} else {
-			Log("ContainerId %s not present in Name Map ", containerID)
-		}
+		} 
 
 		if val, ok := nameIDMap[containerID]; ok {
 			stringMap["Name"] = val
-		} else {
-			Log("ContainerId %s not present in Image Map ", containerID)
-		}
-
+		} 
 
 		dataItem := DataItem{
 			ID:                    stringMap["Id"],
@@ -319,6 +314,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			return output.FLB_RETRY
 		}
 
+		defer resp.Body.Close()
+
 		numRecords := len(dataItems)
 		Log("Successfully flushed %d records in %s", numRecords, elapsed)
 		ContainerLogTelemetryMutex.Lock()

From 940a6eb2c1adc215e0dccdc33579159a961f4b9a Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 12 Mar 2019 17:59:57 -0700
Subject: [PATCH 078/160] fix readme for new version

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index f72a16f1e..0a0b9ce08 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,13 @@ additional questions or comments.
 ## Release History
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
+
+### 03/12/2019 - Version microsoft/oms:ciprod03122019
+- Fix for closing response.Body in outoms
+- Update Mem_Buf_Limit to 5m for fluentbit
+- Tail only files that were modified since 5 minutes
+- Remove some unwanted logs that are chatty in outoms
+- Fix for MDM disablement for AKS-Engine
   
 ### 02/21/2019 - Version microsoft/oms:ciprod02212019
 - Container logs enrichment optimization

From 411582432119d9d2ace3b8f3b9b0a2aad12089c5 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 13 Mar 2019 11:25:12 -0700
Subject: [PATCH 079/160] Fix the pod count in mdm agent plugin (#203)

---
 source/code/plugin/filter_inventory2mdm.rb | 35 ++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index 84f12dd06..553c857b7 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -154,20 +154,42 @@ def process_pod_inventory_records(es)
             timestamp = DateTime.now
             pod_count_hash = Hash.new
             no_phase_dim_values_hash = Hash.new
+            total_pod_count = 0
+            pod_count_by_phase = {}
+	    podUids = {}
+            record_count = 0
             begin
                 records = []
                 es.each{|time,record|
-                    
+                    record_count += 1
                     timestamp = record['DataItems'][0]['CollectionTime']
+                    podUid = record['DataItems'][0]['PodUid']
+                    
+		    if podUids.key?(podUid)
+                        #@log.info "pod with #{podUid} already counted"
+                        next
+                    end
+
+                    podUids[podUid] = true
                     podPhaseDimValue = record['DataItems'][0]['PodStatus']
                     podNamespaceDimValue = record['DataItems'][0]['Namespace']
                     podControllerNameDimValue = record['DataItems'][0]['ControllerName']
                     podNodeDimValue = record['DataItems'][0]['Computer']
-                    
+
                     # group by distinct dimension values
                     pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~')
-                    
-                    if pod_count_hash.key?(pod_key) 
+
+                    if pod_count_by_phase.key?(podPhaseDimValue)
+                        phase_count = pod_count_by_phase[podPhaseDimValue]
+                        phase_count += 1
+                        pod_count_by_phase[podPhaseDimValue] = phase_count
+                    else
+                        pod_count_by_phase[podPhaseDimValue] = 1
+                    end
+
+                    total_pod_count += 1
+
+                    if pod_count_hash.key?(pod_key)
                         pod_count = pod_count_hash[pod_key]
                         pod_count = pod_count + 1
                         pod_count_hash[pod_key] = pod_count
@@ -175,7 +197,7 @@ def process_pod_inventory_records(es)
                         pod_count = 1
                         pod_count_hash[pod_key] = pod_count
                     end
-
+                    
                     # Collect all possible combinations of dimension values other than pod phase
                     key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~')
                     if no_phase_dim_values_hash.key?(key_without_phase_dim_value)
@@ -191,7 +213,7 @@ def process_pod_inventory_records(es)
                         pod_key = [key, phase].join('~~')
                         if !pod_count_hash.key?(pod_key)
                             pod_count_hash[pod_key] = 0
-                            @log.info "Zero filled #{pod_key}"
+                            #@log.info "Zero filled #{pod_key}"
                         else
                             next
                         end
@@ -227,6 +249,7 @@ def process_pod_inventory_records(es)
                 ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
                 return [],timestamp
             end
+            @log.info "Record Count #{record_count} pod count = #{total_pod_count} Pod Count To Phase #{pod_count_by_phase} "
             return records, timestamp
         end
 

From df2e64c19bc9e427c72ffe492375b598a8933bfe Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 13 Mar 2019 11:27:48 -0700
Subject: [PATCH 080/160] Update readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 0a0b9ce08..916863dbf 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
 - Tail only files that were modified since 5 minutes
 - Remove some unwanted logs that are chatty in outoms
 - Fix for MDM disablement for AKS-Engine
+- Fix for Pod count metric (same as container count) in MDM
   
 ### 02/21/2019 - Version microsoft/oms:ciprod02212019
 - Container logs enrichment optimization

From 19c2bc7864a4aabade944c327101ddc789850059 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Wed, 13 Mar 2019 12:13:12 -0700
Subject: [PATCH 081/160] string freeze for out_mdm plugin

---
 source/code/plugin/out_mdm.rb | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 274f450fd..93b32ef50 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -1,3 +1,6 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
 module Fluent
 
   class OutputMDM < BufferedOutput

From 69935b305ab3552bc8626c8f81a802ec559a31e4 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Mon, 1 Apr 2019 11:09:27 -0700
Subject: [PATCH 082/160] Vishwa/resourcecentric (#208)

* resourceid fix (for AKS only)

* fix name
---
 source/code/go/src/plugins/oms.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 36cf20273..a1ca3d6ee 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -44,6 +44,10 @@ var (
 	Computer string
 	// WorkspaceID log analytics workspace id
 	WorkspaceID string
+	// ResourceID for resource-centric log analytics data
+	ResourceID string
+	// Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false)
+	ResourceCentric bool
 )
 
 var (
@@ -294,6 +298,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		}
 		req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled))
 		req.Header.Set("Content-Type", "application/json")
+		//expensive to do string len for every request, so use a flag
+		if ResourceCentric == true {
+			req.Header.Set("x-ms-AzureResourceId", ResourceID)
+		}
 
 		resp, err := HTTPClient.Do(req)
 		elapsed := time.Since(start)
@@ -377,6 +385,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	}
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
 	WorkspaceID = omsadminConf["WORKSPACE_ID"]
+	ResourceID = os.Getenv("customResourceId")
+	if len(ResourceID) > 0 {
+		ResourceCentric = true
+		Log("OMS ResourceId=%s",ResourceID)
+	}
 	Log("OMSEndpoint %s", OMSEndpoint)
 
 	// Initialize image,name map refresh ticker

From 6953f50a62c7faade0db553e0839f137b252309b Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 1 Apr 2019 14:48:19 -0700
Subject: [PATCH 083/160] Rashmi/win nodepool - PR (#206)

* changes for win nodes enumeration

* changes

* changes

* changes

* node cpu metric rate changes

* container cpu rate

* changes

* changes

* changes

* changes

* changes

* changes to include in_win_cadvisor_perf.rb file

* send containerinventoryheartbeatevent

* changes

* cahnges for mdm metrics

* changes

* cahnges

* changes

* container states

* changes

* changes

* changes for env variables

* changes

* changes

* changes

* changes

* delete comments

* changes

* mutex changes

* changes

* changes

* changes

* telemetry fix for docker version

* removing hardcoded values for mdm

* update docker version

* telemetry for windows cadvisor timeouts

* exeception key update to computer

* PR comments
---
 installer/conf/kube.conf                      |   47 +
 installer/datafiles/base_container.data       |    1 +
 .../code/plugin/ApplicationInsightsUtility.rb |  379 +++---
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 1020 ++++++++++-------
 source/code/plugin/KubernetesApiClient.rb     |  938 +++++++--------
 source/code/plugin/in_cadvisor_perf.rb        |  152 ++-
 source/code/plugin/in_containerinventory.rb   |  179 ++-
 source/code/plugin/in_kube_nodes.rb           |  319 +++---
 source/code/plugin/in_kube_podinventory.rb    |  397 ++++---
 source/code/plugin/in_win_cadvisor_perf.rb    |  120 ++
 source/code/plugin/out_mdm.rb                 |   94 +-
 11 files changed, 2096 insertions(+), 1550 deletions(-)
 create mode 100644 source/code/plugin/in_win_cadvisor_perf.rb

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 454df6e91..0dfa3710e 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -47,12 +47,44 @@
   log_level debug
 </source>
 
+#cadvisor perf- Windows nodes
+<source>
+	type wincadvisorperf
+	tag oms.api.wincadvisorperf
+	run_interval 60s
+  log_level debug
+</source>
+
 <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
   type filter_inventory2mdm
   custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
   log_level info
 </filter>
 
+#custom_metrics_mdm filter plugin for perf data from windows nodes
+<filter mdm.cadvisorperf**>
+  type filter_cadvisor2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+  log_level info
+</filter>
+
+<match mdm.cadvisorperf**>
+  type out_mdm
+  log_level debug
+  num_threads 5
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
+  max_retry_wait 9m
+  retry_mdm_post_wait_minutes 60
+</match>
+
 <match oms.containerinsights.KubePodInventory**>
   type out_oms
   log_level debug
@@ -168,3 +200,18 @@
   max_retry_wait 9m
   retry_mdm_post_wait_minutes 60
 </match>
+
+<match oms.api.wincadvisorperf**>
+  type out_oms
+  log_level debug
+  num_threads 5
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
+  max_retry_wait 9m
+</match>
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index c263aa505..9c4d563f8 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -34,6 +34,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb;     source/code/plugin/CAdvisorMetricsAPIClient.rb;     644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_perf.rb;			        source/code/plugin/in_kube_perf.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb;			        source/code/plugin/in_cadvisor_perf.rb;			    	644; root; root
+/opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb;			    source/code/plugin/in_win_cadvisor_perf.rb;			    644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_services.rb;			        source/code/plugin/in_kube_services.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb;			     source/code/plugin/filter_inventory2mdm.rb;			    	644; root; root
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 5c5e92a6c..5dc2bfab8 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -2,209 +2,222 @@
 # frozen_string_literal: true
 
 class ApplicationInsightsUtility
-    require_relative 'lib/application_insights'
-    require_relative 'omslog'
-    require_relative 'DockerApiClient'
-    require_relative 'oms_common'
-    require 'json'
-    require 'base64'
+  require_relative "lib/application_insights"
+  require_relative "omslog"
+  require_relative "DockerApiClient"
+  require_relative "oms_common"
+  require "json"
+  require "base64"
 
-    @@HeartBeat = 'HeartBeatEvent'
-    @@Exception = 'ExceptionEvent'
-    @@AcsClusterType = 'ACS'
-    @@AksClusterType = 'AKS'
-    @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf'
-    @@EnvAcsResourceName = 'ACS_RESOURCE_NAME'
-    @@EnvAksRegion = 'AKS_REGION'
-    @@EnvAgentVersion = 'AGENT_VERSION'
-    @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH'
-    @@EnvControllerType = 'CONTROLLER_TYPE'
+  @@HeartBeat = "HeartBeatEvent"
+  @@Exception = "ExceptionEvent"
+  @@AcsClusterType = "ACS"
+  @@AksClusterType = "AKS"
+  @OmsAdminFilePath = "/etc/opt/microsoft/omsagent/conf/omsadmin.conf"
+  @@EnvAcsResourceName = "ACS_RESOURCE_NAME"
+  @@EnvAksRegion = "AKS_REGION"
+  @@EnvAgentVersion = "AGENT_VERSION"
+  @@EnvApplicationInsightsKey = "APPLICATIONINSIGHTS_AUTH"
+  @@EnvControllerType = "CONTROLLER_TYPE"
 
-    @@CustomProperties = {}
-    @@Tc = nil
-    @@hostName = (OMS::Common.get_hostname)
+  @@CustomProperties = {}
+  @@Tc = nil
+  @@hostName = (OMS::Common.get_hostname)
 
-    def initialize
-    end
+  def initialize
+  end
 
-    class << self
-        #Set default properties for telemetry event
-        def initializeUtility()
-            begin
-                resourceInfo = ENV['AKS_RESOURCE_ID']
-                if resourceInfo.nil? || resourceInfo.empty?
-                    @@CustomProperties["ACSResourceName"] = ENV[@@EnvAcsResourceName]
-		            @@CustomProperties["ClusterType"] = @@AcsClusterType
-		            @@CustomProperties["SubscriptionID"] = ""
-		            @@CustomProperties["ResourceGroupName"] = ""
-		            @@CustomProperties["ClusterName"] = ""
-		            @@CustomProperties["Region"] = ""
-                else
-                    @@CustomProperties["AKS_RESOURCE_ID"] = resourceInfo
-                    begin
-                        splitStrings = resourceInfo.split('/')
-                        subscriptionId = splitStrings[2]
-                        resourceGroupName = splitStrings[4]
-                        clusterName = splitStrings[8]
-                    rescue => errorStr
-                        $log.warn("Exception in AppInsightsUtility: parsing AKS resourceId: #{resourceInfo}, error: #{errorStr}")
-                    end
-		            @@CustomProperties["ClusterType"] = @@AksClusterType
-		            @@CustomProperties["SubscriptionID"] = subscriptionId
-		            @@CustomProperties["ResourceGroupName"] = resourceGroupName
-		            @@CustomProperties["ClusterName"] = clusterName
-		            @@CustomProperties["Region"] = ENV[@@EnvAksRegion]
-                end
+  class << self
+    #Set default properties for telemetry event
+    def initializeUtility()
+      begin
+        resourceInfo = ENV["AKS_RESOURCE_ID"]
+        if resourceInfo.nil? || resourceInfo.empty?
+          @@CustomProperties["ACSResourceName"] = ENV[@@EnvAcsResourceName]
+          @@CustomProperties["ClusterType"] = @@AcsClusterType
+          @@CustomProperties["SubscriptionID"] = ""
+          @@CustomProperties["ResourceGroupName"] = ""
+          @@CustomProperties["ClusterName"] = ""
+          @@CustomProperties["Region"] = ""
+        else
+          @@CustomProperties["AKS_RESOURCE_ID"] = resourceInfo
+          begin
+            splitStrings = resourceInfo.split("/")
+            subscriptionId = splitStrings[2]
+            resourceGroupName = splitStrings[4]
+            clusterName = splitStrings[8]
+          rescue => errorStr
+            $log.warn("Exception in AppInsightsUtility: parsing AKS resourceId: #{resourceInfo}, error: #{errorStr}")
+          end
+          @@CustomProperties["ClusterType"] = @@AksClusterType
+          @@CustomProperties["SubscriptionID"] = subscriptionId
+          @@CustomProperties["ResourceGroupName"] = resourceGroupName
+          @@CustomProperties["ClusterName"] = clusterName
+          @@CustomProperties["Region"] = ENV[@@EnvAksRegion]
+        end
 
-                getDockerInfo()
-                @@CustomProperties['WorkspaceID'] = getWorkspaceId
-                @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion]
-                @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType]
-                encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
+        #Commenting it for now from initilize method, we need to pivot all telemetry off of kubenode docker version
+        #getDockerInfo()
+        @@CustomProperties["WorkspaceID"] = getWorkspaceId
+        @@CustomProperties["AgentVersion"] = ENV[@@EnvAgentVersion]
+        @@CustomProperties["ControllerType"] = ENV[@@EnvControllerType]
+        encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
 
-                #Check if telemetry is turned off
-                telemetryOffSwitch = ENV['DISABLE_TELEMETRY']
-                if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase
-                    $log.warn("AppInsightsUtility: Telemetry is disabled")
-                    @@Tc = ApplicationInsights::TelemetryClient.new
-                elsif !encodedAppInsightsKey.nil?
-                    decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
-                    @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
-                  
-                end
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")
-            end
+        #Check if telemetry is turned off
+        telemetryOffSwitch = ENV["DISABLE_TELEMETRY"]
+        if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase
+          $log.warn("AppInsightsUtility: Telemetry is disabled")
+          @@Tc = ApplicationInsights::TelemetryClient.new
+        elsif !encodedAppInsightsKey.nil?
+          decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
+          @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
         end
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")
+      end
+    end
 
-        def getDockerInfo() 
-            dockerInfo = DockerApiClient.dockerInfo
-            if (!dockerInfo.nil? && !dockerInfo.empty?)
-                @@CustomProperties['DockerVersion'] = dockerInfo['Version']
-                @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion']
-            end
-        end
+    def getDockerInfo()
+      dockerInfo = DockerApiClient.dockerInfo
+      if (!dockerInfo.nil? && !dockerInfo.empty?)
+        @@CustomProperties["DockerVersion"] = dockerInfo["Version"]
+        #@@CustomProperties["DockerApiVersion"] = dockerInfo["ApiVersion"]
+      end
+    end
 
-        def sendHeartBeatEvent(pluginName)
-            begin
-                eventName = pluginName + @@HeartBeat
-                if !(@@Tc.nil?)
-                    @@Tc.track_event eventName , :properties => @@CustomProperties
-                    @@Tc.flush
-                    $log.info("AppInsights Heartbeat Telemetry sent successfully")
-                end
-            rescue =>errorStr
-                $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}")
-            end
+    def sendHeartBeatEvent(pluginName)
+      begin
+        eventName = pluginName + @@HeartBeat
+        if !(@@Tc.nil?)
+          @@Tc.track_event eventName, :properties => @@CustomProperties
+          @@Tc.flush
+          $log.info("AppInsights Heartbeat Telemetry sent successfully")
         end
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}")
+      end
+    end
 
-        def sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
-            begin
-                if !(@@Tc.nil?)
-                    @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], 
-                    :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, 
-                    :properties => @@CustomProperties
-                    @@Tc.flush
-                    $log.info("AppInsights Container Count Telemetry sent successfully")
-                end
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}")
-            end
+    def sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
+      begin
+        if !(@@Tc.nil?)
+          @@Tc.track_metric "LastProcessedContainerInventoryCount", properties["ContainerCount"],
+                            :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
+                            :properties => @@CustomProperties
+          @@Tc.flush
+          $log.info("AppInsights Container Count Telemetry sent successfully")
         end
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}")
+      end
+    end
 
-        def sendCustomEvent(eventName, properties)
-            begin
-                if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility()
-                end 
-                if !(@@Tc.nil?)
-                    @@Tc.track_event eventName, :properties => @@CustomProperties
-                    @@Tc.flush
-                    $log.info("AppInsights Custom Event #{eventName} sent successfully")
-                end
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}")
-            end
+    def sendCustomEvent(eventName, properties)
+      begin
+        if @@CustomProperties.empty? || @@CustomProperties.nil?
+          initializeUtility()
+        end
+        telemetryProps = {}
+        # add common dimensions
+        @@CustomProperties.each { |k, v| telemetryProps[k] = v }
+        # add passed-in dimensions if any
+        if (!properties.nil? && !properties.empty?)
+          properties.each { |k, v| telemetryProps[k] = v }
+        end
+        if !(@@Tc.nil?)
+          @@Tc.track_event eventName, :properties => telemetryProps
+          @@Tc.flush
+          $log.info("AppInsights Custom Event #{eventName} sent successfully")
         end
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}")
+      end
+    end
 
-        def sendExceptionTelemetry(errorStr)
-            begin
-                if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility()
-                elsif @@CustomProperties['DockerVersion'].nil?
-                    getDockerInfo()
-                end
-                if !(@@Tc.nil?)
-                    @@Tc.track_exception errorStr , :properties => @@CustomProperties
-                    @@Tc.flush
-                    $log.info("AppInsights Exception Telemetry sent successfully")
-                end
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}")
-            end
+    def sendExceptionTelemetry(errorStr, properties = nil)
+      begin
+        if @@CustomProperties.empty? || @@CustomProperties.nil?
+          initializeUtility()
+        elsif @@CustomProperties["DockerVersion"].nil?
+          getDockerInfo()
+        end
+        telemetryProps = {}
+        # add common dimensions
+        @@CustomProperties.each { |k, v| telemetryProps[k] = v }
+        # add passed-in dimensions if any
+        if (!properties.nil? && !properties.empty?)
+          properties.each { |k, v| telemetryProps[k] = v }
+        end
+        if !(@@Tc.nil?)
+          @@Tc.track_exception errorStr, :properties => telemetryProps
+          @@Tc.flush
+          $log.info("AppInsights Exception Telemetry sent successfully")
         end
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}")
+      end
+    end
 
-        #Method to send heartbeat and container inventory count
-        def sendTelemetry(pluginName, properties)
-            begin
-                if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility()
-                elsif @@CustomProperties['DockerVersion'].nil?
-                    getDockerInfo()
-                end
-                @@CustomProperties['Computer'] = properties['Computer']
-                sendHeartBeatEvent(pluginName)
-                sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}")
-            end
+    #Method to send heartbeat and container inventory count
+    def sendTelemetry(pluginName, properties)
+      begin
+        if @@CustomProperties.empty? || @@CustomProperties.nil?
+          initializeUtility()
+        elsif @@CustomProperties["DockerVersion"].nil?
+          getDockerInfo()
         end
+        @@CustomProperties["Computer"] = properties["Computer"]
+        sendHeartBeatEvent(pluginName)
+        sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}")
+      end
+    end
 
-        #Method to send metric. It will merge passed-in properties with common custom properties
-        def sendMetricTelemetry(metricName, metricValue, properties)
-            begin
-                if (metricName.empty? || metricName.nil?)
-                    $log.warn("SendMetricTelemetry: metricName is missing")
-                    return
-                end
-                if @@CustomProperties.empty? || @@CustomProperties.nil?
-                    initializeUtility()
-                elsif @@CustomProperties['DockerVersion'].nil?
-                    getDockerInfo()
-                end
-                telemetryProps = {}
-                telemetryProps["Computer"] = @@hostName
-                # add common dimensions
-                @@CustomProperties.each{ |k,v| telemetryProps[k]=v}
-                # add passed-in dimensions if any
-                if (!properties.nil? && !properties.empty?)
-                    properties.each{ |k,v| telemetryProps[k]=v}
-                end
-                if !(@@Tc.nil?)
-                    @@Tc.track_metric metricName, metricValue, 
-                    :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, 
-                    :properties => telemetryProps
-                    @@Tc.flush
-                    $log.info("AppInsights metric Telemetry #{metricName} sent successfully")
-                end
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}")
-            end
+    #Method to send metric. It will merge passed-in properties with common custom properties
+    def sendMetricTelemetry(metricName, metricValue, properties)
+      begin
+        if (metricName.empty? || metricName.nil?)
+          $log.warn("SendMetricTelemetry: metricName is missing")
+          return
         end
+        if @@CustomProperties.empty? || @@CustomProperties.nil?
+          initializeUtility()
+        elsif @@CustomProperties["DockerVersion"].nil?
+          getDockerInfo()
+        end
+        telemetryProps = {}
+        # add common dimensions
+        @@CustomProperties.each { |k, v| telemetryProps[k] = v }
+        # add passed-in dimensions if any
+        if (!properties.nil? && !properties.empty?)
+          properties.each { |k, v| telemetryProps[k] = v }
+        end
+        if !(@@Tc.nil?)
+          @@Tc.track_metric metricName, metricValue,
+                            :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
+                            :properties => telemetryProps
+          @@Tc.flush
+          $log.info("AppInsights metric Telemetry #{metricName} sent successfully")
+        end
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}")
+      end
+    end
 
-        def getWorkspaceId()
-            begin
-                adminConf = {}
-                confFile = File.open(@OmsAdminFilePath, "r")
-                confFile.each_line do |line|
-                    splitStrings = line.split('=')
-                    adminConf[splitStrings[0]] = splitStrings[1]
-                end
-                workspaceId = adminConf['WORKSPACE_ID']
-                return workspaceId
-            rescue => errorStr
-                $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}")
-            end
+    def getWorkspaceId()
+      begin
+        adminConf = {}
+        confFile = File.open(@OmsAdminFilePath, "r")
+        confFile.each_line do |line|
+          splitStrings = line.split("=")
+          adminConf[splitStrings[0]] = splitStrings[1]
         end
+        workspaceId = adminConf["WORKSPACE_ID"]
+        return workspaceId
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}")
+      end
     end
-end
\ No newline at end of file
+  end
+end
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 3c36775af..8b4fd9fcf 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -2,424 +2,628 @@
 # frozen_string_literal: true
 
 class CAdvisorMetricsAPIClient
-    
-            require 'json'
-            require 'logger'
-            require 'net/http'
-            require 'net/https'
-            require 'uri'
-            require 'date'
-    
-            require_relative 'oms_common'
-            require_relative 'KubernetesApiClient'
-            require_relative 'ApplicationInsightsUtility'
-    
-            @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
-            @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M
-            @@rxBytesLast = nil
-            @@rxBytesTimeLast = nil
-            @@txBytesLast = nil
-            @@txBytesTimeLast = nil
-            @@nodeCpuUsageNanoSecondsLast = nil
-            @@nodeCpuUsageNanoSecondsTimeLast = nil
-            @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
-            @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
-           
-
-            def initialize
+  require "json"
+  require "logger"
+  require "net/http"
+  require "net/https"
+  require "uri"
+  require "date"
+
+  require_relative "oms_common"
+  require_relative "KubernetesApiClient"
+  require_relative "ApplicationInsightsUtility"
+
+  @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
+  @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
+  #   @@rxBytesLast = nil
+  #   @@rxBytesTimeLast = nil
+  #   @@txBytesLast = nil
+  #   @@txBytesTimeLast = nil
+  @@nodeCpuUsageNanoSecondsLast = nil
+  @@nodeCpuUsageNanoSecondsTimeLast = nil
+  @@winNodeCpuUsageNanoSecondsLast = {}
+  @@winNodeCpuUsageNanoSecondsTimeLast = {}
+  @@winContainerCpuUsageNanoSecondsLast = {}
+  @@winContainerCpuUsageNanoSecondsTimeLast = {}
+  @@winContainerPrevMetricRate = {}
+  @@linuxNodePrevMetricRate = nil
+  @@winNodePrevMetricRate = {}
+  @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
+  @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
+
+  #Containers a hash of node name and the last time telemetry was sent for this node
+  @@nodeTelemetryTimeTracker = {}
+
+  # Keeping track of containers so that can delete the container from the container cpu cache when the container is deleted
+  # as a part of the cleanup routine
+  @@winContainerIdCache = []
+
+  def initialize
+  end
+
+  class << self
+    def getSummaryStatsFromCAdvisor(winNode)
+      headers = {}
+      response = nil
+      @Log.info "Getting CAdvisor Uri"
+      begin
+        cAdvisorUri = getCAdvisorUri(winNode)
+        if !cAdvisorUri.nil?
+          uri = URI.parse(cAdvisorUri)
+          http = Net::HTTP.new(uri.host, uri.port)
+          http.use_ssl = false
+
+          cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+          response = http.request(cAdvisorApiRequest)
+          @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+        end
+      rescue => error
+        @Log.warn("CAdvisor api request failed: #{error}")
+        telemetryProps = {}
+        telemetryProps["Computer"] = winNode["Hostname"]
+        ApplicationInsightsUtility.sendExceptionTelemetry(error, telemetryProps)
+      end
+      return response
+    end
+
+    def getCAdvisorUri(winNode)
+      begin
+        defaultHost = "http://localhost:10255"
+        relativeUri = "/stats/summary"
+        if !winNode.nil?
+          nodeIP = winNode["InternalIP"]
+        else
+          nodeIP = ENV["NODE_IP"]
+        end
+        if !nodeIP.nil?
+          @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri")
+          return "http://#{nodeIP}:10255" + relativeUri
+        else
+          @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ")
+          if !winNode.nil?
+            return nil
+          else
+            return defaultHost + relativeUri
+          end
+        end
+      end
+    end
+
+    def getMetrics(winNode = nil)
+      metricDataItems = []
+      begin
+        if !winNode.nil?
+          hostName = winNode["Hostname"]
+          operatingSystem = "Windows"
+        else
+          hostName = (OMS::Common.get_hostname)
+          operatingSystem = "Linux"
+        end
+        cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
+        if !cAdvisorStats.nil?
+          metricInfo = JSON.parse(cAdvisorStats.body)
+        end
+        if !metricInfo.nil?
+          metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes"))
+          metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
+
+          if operatingSystem == "Linux"
+            metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", "cpuUsageNanoCores"))
+            metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes"))
+            metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes"))
+          elsif operatingSystem == "Windows"
+            containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", "cpuUsageNanoCores")
+            if containerCpuUsageNanoSecondsRate && !containerCpuUsageNanoSecondsRate.empty? && !containerCpuUsageNanoSecondsRate.nil?
+              metricDataItems.concat(containerCpuUsageNanoSecondsRate)
             end
-    
-            class << self
-                def getSummaryStatsFromCAdvisor()
-                    headers = {}
-                    response = nil
-                    @Log.info 'Getting CAdvisor Uri'
-                    begin
-                        cAdvisorUri = getCAdvisorUri()
-                        if !cAdvisorUri.nil?
-                            uri = URI.parse(cAdvisorUri)
-                            http = Net::HTTP.new(uri.host, uri.port)
-                            http.use_ssl = false
-                                
-                            cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
-                            response = http.request(cAdvisorApiRequest)
-                            @Log.info "Got response code #{response.code} from #{uri.request_uri}"
-                        end
-                    rescue => error
-                        @Log.warn("CAdvisor api request failed: #{error}")
-                    end
-                    return response
-                end
-    
-                def getCAdvisorUri()
-                    begin
-                        defaultHost = "http://localhost:10255"
-                        relativeUri = "/stats/summary"
-                        nodeIP = ENV['NODE_IP']
-                        if !nodeIP.nil?
-                            @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri")
-                            return "http://#{nodeIP}:10255" + relativeUri
-                        else
-                            @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ")
-                            return defaultHost + relativeUri
-                        end
-                    end
-                end
-    
-                def getMetrics()
-                    metricDataItems = []
-                    begin
-                        hostName = (OMS::Common.get_hostname)
-                        metricInfo = JSON.parse(getSummaryStatsFromCAdvisor().body)
-                        metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores","cpuUsageNanoCores"))
-                        metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes"))
-                        metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes"))
-                        metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
-
-                        cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores")
-                        if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
-                            metricDataItems.push(cpuUsageNanoSecondsRate)
-                        end
-                        metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes"))
-                        metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes"))
-                        metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
-                        metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "txBytes", "networkTxBytes"))
-                        metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch"))
-                        
-                        networkRxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "rxBytes", "networkRxBytesPerSec")
-                        if networkRxRate && !networkRxRate.empty? && !networkRxRate.nil?
-                            metricDataItems.push(networkRxRate)
-                        end
-                        networkTxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "txBytes", "networkTxBytesPerSec")
-                        if networkTxRate && !networkTxRate.empty? && !networkTxRate.nil?
-                            metricDataItems.push(networkTxRate)
-                        end
-                        
-                        
-                        rescue => error
-                        @Log.warn("getContainerMetrics failed: #{error}")
-                        return metricDataItems
-                    end
-                    return metricDataItems
-                end
+          end
 
-                def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
-                    metricItems = []
-                    clusterId = KubernetesApiClient.getClusterId
-                    timeDifference =  (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
-                    timeDifferenceInMinutes = timeDifference/60
-                    begin
-                        metricInfo = metricJSON
-                        metricInfo['pods'].each do |pod|
-                            podUid = pod['podRef']['uid']
-                            podName = pod['podRef']['name']
-                            podNamespace = pod['podRef']['namespace']
-                            
-                            if (!pod['containers'].nil?)
-                                pod['containers'].each do |container|
-                                    #cpu metric
-                                    containerName = container['name']
-                                    metricValue = container['cpu'][cpuMetricNameToCollect]
-                                    metricTime = container['cpu']['time']
-                                    metricItem = {}
-                                    metricItem['DataItems'] = []
-                                    
-                                    metricProps = {}
-                                    metricProps['Timestamp'] = metricTime
-                                    metricProps['Host'] = hostName
-                                    metricProps['ObjectName'] = "K8SContainer"
-                                    metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName
-                                    
-                                    metricProps['Collections'] = []
-                                    metricCollections = {}
-                                    metricCollections['CounterName'] = metricNametoReturn
-                                    metricCollections['Value'] = metricValue
-
-                                    metricProps['Collections'].push(metricCollections)
-                                    metricItem['DataItems'].push(metricProps)
-                                    metricItems.push(metricItem)
-                                    #Telemetry about agent performance
-                                    begin
-                                        # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
-                                        # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use
-                                        if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("cpuUsageNanoCores"))
-                                            
-                                            if (timeDifferenceInMinutes >= 10)
-                                                telemetryProps = {}
-                                                telemetryProps['PodName'] = podName
-                                                telemetryProps['ContainerName'] = containerName
-                                                ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
-                                            end
-                                        end
-                                    rescue => errorStr
-                                        $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}")
-                                    end
-                                end
-                            end
-                        end
-                        # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs)
-                        if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("cpuUsageNanoCores"))
-                            @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
-                        end
-                        rescue => error
-                        @Log.warn("getcontainerCpuMetricItems failed: #{error} for metric #{cpuMetricNameToCollect}")
-                        return metricItems
-                    end
-                    return metricItems                       
-                end
+          cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores", operatingSystem)
+          if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
+            metricDataItems.push(cpuUsageNanoSecondsRate)
+          end
+          metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes"))
 
-                def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn)
-                    metricItems = []
-                    clusterId = KubernetesApiClient.getClusterId
-                    timeDifference =  (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs
-                    timeDifferenceInMinutes = timeDifference/60
-                    begin
-                        metricInfo = metricJSON
-                        metricInfo['pods'].each do |pod|
-                            podUid = pod['podRef']['uid']
-                            podName = pod['podRef']['name']
-                            podNamespace = pod['podRef']['namespace']
-                            if (!pod['containers'].nil?)
-                                pod['containers'].each do |container|
-                                    containerName = container['name']
-                                    metricValue = container['memory'][memoryMetricNameToCollect]
-                                    metricTime = container['memory']['time']
-                                    
-                                    metricItem = {}
-                                    metricItem['DataItems'] = []
-                                    
-                                    metricProps = {}
-                                    metricProps['Timestamp'] = metricTime
-                                    metricProps['Host'] = hostName
-                                    metricProps['ObjectName'] = "K8SContainer"
-                                    metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName
-                                    
-                                    metricProps['Collections'] = []
-                                    metricCollections = {}
-                                    metricCollections['CounterName'] = metricNametoReturn
-                                    metricCollections['Value'] = metricValue
-
-                                    metricProps['Collections'].push(metricCollections)
-                                    metricItem['DataItems'].push(metricProps)
-                                    metricItems.push(metricItem)
-                                    #Telemetry about agent performance
-                                    begin
-                                        # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
-                                        # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use
-                                        if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("memoryRssBytes"))
-                                            if (timeDifferenceInMinutes >= 10)
-                                                telemetryProps = {}
-                                                telemetryProps['PodName'] = podName
-                                                telemetryProps['ContainerName'] = containerName
-                                                ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
-                                            end
-                                        end
-                                    rescue => errorStr
-                                        $log.warn("Exception while generating Telemetry from getcontainerMemoryMetricItems failed: #{errorStr} for metric #{memoryMetricNameToCollect}")
-                                    end
-                                end
-                            end
-                        end
-                        # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs)
-                        if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("memoryRssBytes"))
-                            @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
-                        end
-                        rescue => error
-                        @Log.warn("getcontainerMemoryMetricItems failed: #{error} for metric #{memoryMetricNameToCollect}")
-                        @Log.warn metricJSON
-                        return metricItems
-                    end
-                    return metricItems                      
-                end
+          metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch"))
+
+          # Disabling networkRxRate and networkTxRate since we dont use it as of now.
+          #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
+          #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "txBytes", "networkTxBytes"))
+          #   networkRxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "rxBytes", "networkRxBytesPerSec")
+          #   if networkRxRate && !networkRxRate.empty? && !networkRxRate.nil?
+          #     metricDataItems.push(networkRxRate)
+          #   end
+          #   networkTxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "txBytes", "networkTxBytesPerSec")
+          #   if networkTxRate && !networkTxRate.empty? && !networkTxRate.nil?
+          #     metricDataItems.push(networkTxRate)
+          #   end
+        else
+          @Log.warn("Couldn't get metric information for host: #{hostName}")
+        end
+      rescue => error
+        @Log.warn("getContainerMetrics failed: #{error}")
+        return metricDataItems
+      end
+      return metricDataItems
+    end
+
+    def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
+      metricItems = []
+      clusterId = KubernetesApiClient.getClusterId
+      timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
+      timeDifferenceInMinutes = timeDifference / 60
+      begin
+        metricInfo = metricJSON
+        metricInfo["pods"].each do |pod|
+          podUid = pod["podRef"]["uid"]
+          podName = pod["podRef"]["name"]
+          podNamespace = pod["podRef"]["namespace"]
+
+          if (!pod["containers"].nil?)
+            pod["containers"].each do |container|
+              #cpu metric
+              containerName = container["name"]
+              metricValue = container["cpu"][cpuMetricNameToCollect]
+              metricTime = container["cpu"]["time"]
+              metricItem = {}
+              metricItem["DataItems"] = []
 
-                def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn)
-                    metricItem = {}
-                    clusterId = KubernetesApiClient.getClusterId
-                    begin
-                        metricInfo = metricJSON
-                        node = metricInfo['node']
-                        nodeName = node['nodeName']
-                        
-                        
-                        metricValue = node[metricCategory][metricNameToCollect]
-                        metricTime = node[metricCategory]['time']
-                        
-                        metricItem['DataItems'] = []
-                        
-                        metricProps = {}
-                        metricProps['Timestamp'] = metricTime
-                        metricProps['Host'] = hostName
-                        metricProps['ObjectName'] = "K8SNode"
-                        metricProps['InstanceName'] = clusterId + "/" + nodeName
-                        
-                        metricProps['Collections'] = []
-                        metricCollections = {}
-                        metricCollections['CounterName'] = metricNametoReturn
-                        metricCollections['Value'] = metricValue
-
-                        metricProps['Collections'].push(metricCollections)
-                        metricItem['DataItems'].push(metricProps)
-                        
-                        rescue => error
-                        @Log.warn("getNodeMetricItem failed: #{error} for metric #{metricNameToCollect}")
-                        @Log.warn metricJSON
-                        return metricItem
-                    end
-                    return metricItem                      
+              metricProps = {}
+              metricProps["Timestamp"] = metricTime
+              metricProps["Host"] = hostName
+              metricProps["ObjectName"] = "K8SContainer"
+              metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+              metricProps["Collections"] = []
+              metricCollections = {}
+              metricCollections["CounterName"] = metricNametoReturn
+              metricCollections["Value"] = metricValue
+
+              metricProps["Collections"].push(metricCollections)
+              metricItem["DataItems"].push(metricProps)
+              metricItems.push(metricItem)
+              #Telemetry about agent performance
+              begin
+                # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
+                # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use
+                if (podName.downcase.start_with?("omsagent-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricNametoReturn.eql?("cpuUsageNanoCores"))
+                  if (timeDifferenceInMinutes >= 10)
+                    telemetryProps = {}
+                    telemetryProps["PodName"] = podName
+                    telemetryProps["ContainerName"] = containerName
+                    telemetryProps["Computer"] = hostName
+                    ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
+                  end
                 end
+              rescue => errorStr
+                $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}")
+              end
+            end
+          end
+        end
+        # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs)
+        if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("cpuUsageNanoCores"))
+          @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
+        end
+      rescue => error
+        @Log.warn("getcontainerCpuMetricItems failed: #{error} for metric #{cpuMetricNameToCollect}")
+        return metricItems
+      end
+      return metricItems
+    end
+
+    def clearDeletedWinContainersFromCache()
+      begin
+        winCpuUsageNanoSecondsKeys = @@winContainerCpuUsageNanoSecondsLast.keys
+        winCpuUsageNanoSecondsTimeKeys = @@winContainerCpuUsageNanoSecondsTimeLast.keys
+
+        # Find the container ids to be deleted from cache
+        winContainersToBeCleared = winCpuUsageNanoSecondsKeys - @@winContainerIdCache
+        if winContainersToBeCleared.length > 0
+          @Log.warn "Stale containers found in cache, clearing...: #{winContainersToBeCleared}"
+        end
+        winContainersToBeCleared.each do |containerId|
+          @@winContainerCpuUsageNanoSecondsLast.delete(containerId)
+          @@winContainerCpuUsageNanoSecondsTimeLast.delete(containerId)
+        end
+      rescue => errorStr
+        @Log.warn("clearDeletedWinContainersFromCache failed: #{errorStr}")
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def resetWinContainerIdCache
+      @@winContainerIdCache = []
+    end
+
+    # usageNanoCores doesnt exist for windows nodes. Hence need to compute this from usageCoreNanoSeconds
+    def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
+      metricItems = []
+      clusterId = KubernetesApiClient.getClusterId
+      timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
+      timeDifferenceInMinutes = timeDifference / 60
+      @Log.warn "in host: #{hostName}"
+      begin
+        metricInfo = metricJSON
+        containerCount = 0
+        metricInfo["pods"].each do |pod|
+          podUid = pod["podRef"]["uid"]
+          podName = pod["podRef"]["name"]
+          podNamespace = pod["podRef"]["namespace"]
+
+          if (!pod["containers"].nil?)
+            pod["containers"].each do |container|
+              #cpu metric
+              containerCount += 1
+              containerName = container["name"]
+              metricValue = container["cpu"][cpuMetricNameToCollect]
+              metricTime = container["cpu"]["time"]
+              metricItem = {}
+              metricItem["DataItems"] = []
+
+              metricProps = {}
+              metricProps["Timestamp"] = metricTime
+              metricProps["Host"] = hostName
+              metricProps["ObjectName"] = "K8SContainer"
+              metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+              metricProps["Collections"] = []
+              metricCollections = {}
+              metricCollections["CounterName"] = metricNametoReturn
 
-                def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn)
-                    metricItem = {}
-                    clusterId = KubernetesApiClient.getClusterId
-                    begin
-                        
-                        metricInfo = metricJSON
-                        node = metricInfo['node']
-                        nodeName = node['nodeName']
-                        
-                        metricValue = node[metricCategory][metricNameToCollect]
-                        metricTime = node[metricCategory]['time']
-
-                        if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds" )
-                            @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}")
-                            return nil
-                        elsif metricNameToCollect == "rxBytes"
-                            if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? || @@rxBytesLast > metricValue #when kubelet is restarted the last condition will be true
-                                @@rxBytesLast = metricValue
-                                @@rxBytesTimeLast = metricTime
-                                return nil
-                            else
-                                metricRateValue = ((metricValue - @@rxBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time)
-                                @@rxBytesLast = metricValue
-                                @@rxBytesTimeLast = metricTime
-                                metricValue = metricRateValue
-                            end
-                        elsif metricNameToCollect == "txBytes"
-                            if @@txBytesLast.nil? || @@txBytesTimeLast.nil? || @@txBytesLast > metricValue #when kubelet is restarted the last condition will be true
-                                @@txBytesLast = metricValue
-                                @@txBytesTimeLast = metricTime
-                                return nil
-                            else
-                                metricRateValue = ((metricValue - @@txBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time)
-                                @@txBytesLast = metricValue
-                                @@txBytesTimeLast = metricTime
-                                metricValue = metricRateValue
-                            end
-                        else
-                            if @@nodeCpuUsageNanoSecondsLast.nil? || @@nodeCpuUsageNanoSecondsTimeLast.nil? || @@nodeCpuUsageNanoSecondsLast > metricValue #when kubelet is restarted the last condition will be true
-                                @@nodeCpuUsageNanoSecondsLast = metricValue
-                                @@nodeCpuUsageNanoSecondsTimeLast = metricTime
-                                return nil
-                            else
-                                metricRateValue = ((metricValue - @@nodeCpuUsageNanoSecondsLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@nodeCpuUsageNanoSecondsTimeLast).to_time)
-                                @@nodeCpuUsageNanoSecondsLast = metricValue
-                                @@nodeCpuUsageNanoSecondsTimeLast = metricTime
-                                metricValue = metricRateValue
-                            end
-                        end
-                        
-                        metricItem['DataItems'] = []
-                        
-                        metricProps = {}
-                        metricProps['Timestamp'] = metricTime
-                        metricProps['Host'] = hostName
-                        metricProps['ObjectName'] = "K8SNode"
-                        metricProps['InstanceName'] = clusterId + "/" + nodeName
-                        
-                        metricProps['Collections'] = []
-                        metricCollections = {}
-                        metricCollections['CounterName'] = metricNametoReturn
-                        metricCollections['Value'] = metricValue
-
-                        metricProps['Collections'].push(metricCollections)
-                        metricItem['DataItems'].push(metricProps)
-                        
-                        rescue => error
-                        @Log.warn("getNodeMetricItemRate failed: #{error} for metric #{metricNameToCollect}")
-                        @Log.warn metricJSON
-                        return nil
-                    end
-                    return metricItem
+              containerId = podUid + "/" + containerName
+              # Adding the containers to the winContainerIdCache so that it can be used by the cleanup routine
+              # to clear the delted containers every 5 minutes
+              @@winContainerIdCache.push(containerId)
+              if @@winContainerCpuUsageNanoSecondsLast[containerId].nil? || @@winContainerCpuUsageNanoSecondsTimeLast[containerId].nil? || @@winContainerCpuUsageNanoSecondsLast[containerId] > metricValue #when kubelet is restarted the last condition will be true
+                @@winContainerCpuUsageNanoSecondsLast[containerId] = metricValue
+                @@winContainerCpuUsageNanoSecondsTimeLast[containerId] = metricTime
+                next
+              else
+                timeDifference = DateTime.parse(metricTime).to_time - DateTime.parse(@@winContainerCpuUsageNanoSecondsTimeLast[containerId]).to_time
+                containerCpuUsageDifference = metricValue - @@winContainerCpuUsageNanoSecondsLast[containerId]
+                # containerCpuUsageDifference check is added to make sure we report non zero values when cadvisor returns same values for subsequent calls
+                if timeDifference != 0 && containerCpuUsageDifference != 0
+                  metricRateValue = (containerCpuUsageDifference * 1.0) / timeDifference
+                else
+                  @Log.info "container - cpu usage difference / time difference is 0, hence using previous cached value"
+                  if !@@winContainerPrevMetricRate[containerId].nil?
+                    metricRateValue = @@winContainerPrevMetricRate[containerId]
+                  else
+                    # This can happen when the metric value returns same values for subsequent calls when the plugin first starts
+                    metricRateValue = 0
+                  end
                 end
+                @@winContainerCpuUsageNanoSecondsLast[containerId] = metricValue
+                @@winContainerCpuUsageNanoSecondsTimeLast[containerId] = metricTime
+                metricValue = metricRateValue
+                @@winContainerPrevMetricRate[containerId] = metricRateValue
+              end
 
-                def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
-                    metricItem = {}
-                    clusterId = KubernetesApiClient.getClusterId
-                    
-                    begin
-                        metricInfo = metricJSON
-                        node = metricInfo['node']
-                        nodeName = node['nodeName']
-                        
-                        
-                        metricValue = node['startTime']
-                        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-                        
-                        metricItem['DataItems'] = []
-                        
-                        metricProps = {}
-                        metricProps['Timestamp'] = metricTime
-                        metricProps['Host'] = hostName
-                        metricProps['ObjectName'] = "K8SNode"
-                        metricProps['InstanceName'] = clusterId + "/" + nodeName
-                        
-                        metricProps['Collections'] = []
-                        metricCollections = {}
-                        metricCollections['CounterName'] = metricNametoReturn
-                        #Read it from /proc/uptime
-                        metricCollections['Value'] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f
-
-                        metricProps['Collections'].push(metricCollections)
-                        metricItem['DataItems'].push(metricProps)
-                        
-                        rescue => error
-                        @Log.warn("getNodeLastRebootTimeMetric failed: #{error} ")
-                        @Log.warn metricJSON
-                        return metricItem
-                    end
-                    return metricItem                      
+              metricCollections["Value"] = metricValue
+              metricProps["Collections"].push(metricCollections)
+              metricItem["DataItems"].push(metricProps)
+              metricItems.push(metricItem)
+            end
+          end
+        end
+        #Sending ContainerInventoryTelemetry from replicaset for telemetry purposes
+        if @@nodeTelemetryTimeTracker[hostName].nil?
+          @@nodeTelemetryTimeTracker[hostName] = DateTime.now.to_time.to_i
+        else
+          timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker[hostName]).abs
+          timeDifferenceInMinutes = timeDifference / 60
+          if (timeDifferenceInMinutes >= 5)
+            @@nodeTelemetryTimeTracker[hostName] = DateTime.now.to_time.to_i
+            telemetryProperties = {}
+            telemetryProperties["Computer"] = hostName
+            telemetryProperties["ContainerCount"] = containerCount
+            # Hardcoding the event to ContainerInventory hearbeat event since the telemetry is pivoted off of this event.
+            @Log.info "sending container inventory heartbeat telemetry"
+            ApplicationInsightsUtility.sendCustomEvent("ContainerInventoryHeartBeatEvent", telemetryProperties)
+          end
+        end
+      rescue => error
+        @Log.warn("getcontainerCpuMetricItemRate failed: #{error} for metric #{cpuMetricNameToCollect}")
+        return metricItems
+      end
+      return metricItems
+    end
+
+    def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn)
+      metricItems = []
+      clusterId = KubernetesApiClient.getClusterId
+      timeDifference = (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs
+      timeDifferenceInMinutes = timeDifference / 60
+      begin
+        metricInfo = metricJSON
+        metricInfo["pods"].each do |pod|
+          podUid = pod["podRef"]["uid"]
+          podName = pod["podRef"]["name"]
+          podNamespace = pod["podRef"]["namespace"]
+          if (!pod["containers"].nil?)
+            pod["containers"].each do |container|
+              containerName = container["name"]
+              metricValue = container["memory"][memoryMetricNameToCollect]
+              metricTime = container["memory"]["time"]
+
+              metricItem = {}
+              metricItem["DataItems"] = []
+
+              metricProps = {}
+              metricProps["Timestamp"] = metricTime
+              metricProps["Host"] = hostName
+              metricProps["ObjectName"] = "K8SContainer"
+              metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+              metricProps["Collections"] = []
+              metricCollections = {}
+              metricCollections["CounterName"] = metricNametoReturn
+              metricCollections["Value"] = metricValue
+
+              metricProps["Collections"].push(metricCollections)
+              metricItem["DataItems"].push(metricProps)
+              metricItems.push(metricItem)
+              #Telemetry about agent performance
+              begin
+                # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers
+                # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use
+                if (podName.downcase.start_with?("omsagent-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricNametoReturn.eql?("memoryRssBytes"))
+                  if (timeDifferenceInMinutes >= 10)
+                    telemetryProps = {}
+                    telemetryProps["PodName"] = podName
+                    telemetryProps["ContainerName"] = containerName
+                    telemetryProps["Computer"] = hostName
+                    ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
+                  end
                 end
+              rescue => errorStr
+                $log.warn("Exception while generating Telemetry from getcontainerMemoryMetricItems failed: #{errorStr} for metric #{memoryMetricNameToCollect}")
+              end
+            end
+          end
+        end
+        # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs)
+        if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("memoryRssBytes"))
+          @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
+        end
+      rescue => error
+        @Log.warn("getcontainerMemoryMetricItems failed: #{error} for metric #{memoryMetricNameToCollect}")
+        @Log.warn metricJSON
+        return metricItems
+      end
+      return metricItems
+    end
+
+    def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn)
+      metricItem = {}
+      clusterId = KubernetesApiClient.getClusterId
+      begin
+        metricInfo = metricJSON
+        node = metricInfo["node"]
+        nodeName = node["nodeName"]
+
+        if !node[metricCategory].nil?
+          metricValue = node[metricCategory][metricNameToCollect]
+          metricTime = node[metricCategory]["time"]
+
+          metricItem["DataItems"] = []
+
+          metricProps = {}
+          metricProps["Timestamp"] = metricTime
+          metricProps["Host"] = hostName
+          metricProps["ObjectName"] = "K8SNode"
+          metricProps["InstanceName"] = clusterId + "/" + nodeName
+
+          metricProps["Collections"] = []
+          metricCollections = {}
+          metricCollections["CounterName"] = metricNametoReturn
+          metricCollections["Value"] = metricValue
+
+          metricProps["Collections"].push(metricCollections)
+          metricItem["DataItems"].push(metricProps)
+        end
+      rescue => error
+        @Log.warn("getNodeMetricItem failed: #{error} for metric #{metricNameToCollect}")
+        @Log.warn metricJSON
+        return metricItem
+      end
+      return metricItem
+    end
+
+    def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, operatingSystem)
+      metricItem = {}
+      clusterId = KubernetesApiClient.getClusterId
+      begin
+        metricInfo = metricJSON
+        node = metricInfo["node"]
+        nodeName = node["nodeName"]
 
-                def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn)
-                    metricItems = []
-                    clusterId = KubernetesApiClient.getClusterId
-                    currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-                    begin
-                        metricInfo = metricJSON
-                        metricInfo['pods'].each do |pod|
-                            podUid = pod['podRef']['uid']
-                            if (!pod['containers'].nil?)
-                                pod['containers'].each do |container|
-                                    containerName = container['name']
-                                    metricValue = container['startTime']
-                                    metricTime = currentTime
-                                    
-                                    metricItem = {}
-                                    metricItem['DataItems'] = []
-                                    
-                                    metricProps = {}
-                                    metricProps['Timestamp'] = metricTime
-                                    metricProps['Host'] = hostName
-                                    metricProps['ObjectName'] = "K8SContainer"
-                                    metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName
-                                    
-                                    metricProps['Collections'] = []
-                                    metricCollections = {}
-                                    metricCollections['CounterName'] = metricNametoReturn
-                                    metricCollections['Value'] = DateTime.parse(metricValue).to_time.to_i
-
-                                    metricProps['Collections'].push(metricCollections)
-                                    metricItem['DataItems'].push(metricProps)
-                                    metricItems.push(metricItem)
-                                end
-                            end
-                        end
-                        rescue => error
-                        @Log.warn("getContainerStartTimeMetric failed: #{error} for metric #{metricNametoReturn}")
-                        @Log.warn metricJSON
-                        return metricItems
-                    end
-                    return metricItems                       
+        if !node[metricCategory].nil?
+          metricValue = node[metricCategory][metricNameToCollect]
+          metricTime = node[metricCategory]["time"]
+
+          #   if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds")
+          #     @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}")
+          if !(metricNameToCollect == "usageCoreNanoSeconds")
+            @Log.warn("getNodeMetricItemRate : rateMetric is supported only for usageCoreNanoSeconds and not for #{metricNameToCollect}")
+            return nil
+            #   elsif metricNameToCollect == "rxBytes"
+            #     if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? || @@rxBytesLast > metricValue #when kubelet is restarted the last condition will be true
+            #       @@rxBytesLast = metricValue
+            #       @@rxBytesTimeLast = metricTime
+            #       return nil
+            #     else
+            #       metricRateValue = ((metricValue - @@rxBytesLast) * 1.0) / (DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time)
+            #       @@rxBytesLast = metricValue
+            #       @@rxBytesTimeLast = metricTime
+            #       metricValue = metricRateValue
+            #     end
+            #   elsif metricNameToCollect == "txBytes"
+            #     if @@txBytesLast.nil? || @@txBytesTimeLast.nil? || @@txBytesLast > metricValue #when kubelet is restarted the last condition will be true
+            #       @@txBytesLast = metricValue
+            #       @@txBytesTimeLast = metricTime
+            #       return nil
+            #     else
+            #       metricRateValue = ((metricValue - @@txBytesLast) * 1.0) / (DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time)
+            #       @@txBytesLast = metricValue
+            #       @@txBytesTimeLast = metricTime
+            #       metricValue = metricRateValue
+            #     end
+          else
+            if operatingSystem == "Linux"
+              if @@nodeCpuUsageNanoSecondsLast.nil? || @@nodeCpuUsageNanoSecondsTimeLast.nil? || @@nodeCpuUsageNanoSecondsLast > metricValue #when kubelet is restarted the last condition will be true
+                @@nodeCpuUsageNanoSecondsLast = metricValue
+                @@nodeCpuUsageNanoSecondsTimeLast = metricTime
+                return nil
+              else
+                timeDifference = DateTime.parse(metricTime).to_time - DateTime.parse(@@nodeCpuUsageNanoSecondsTimeLast).to_time
+                nodeCpuUsageDifference = metricValue - @@nodeCpuUsageNanoSecondsLast
+                # nodeCpuUsageDifference check is added to make sure we report non zero values when cadvisor returns same values for subsequent calls
+                if timeDifference != 0 && nodeCpuUsageDifference != 0
+                  metricRateValue = (nodeCpuUsageDifference * 1.0) / timeDifference
+                else
+                  @Log.info "linux node - cpu usage difference / time difference is 0, hence using previous cached value"
+                  if !@@linuxNodePrevMetricRate.nil?
+                    metricRateValue = @@linuxNodePrevMetricRate
+                  else
+                    # This can happen when the metric value returns same values for subsequent calls when the plugin first starts
+                    metricRateValue = 0
+                  end
+                end
+                @@nodeCpuUsageNanoSecondsLast = metricValue
+                @@nodeCpuUsageNanoSecondsTimeLast = metricTime
+                @@linuxNodePrevMetricRate = metricRateValue
+                metricValue = metricRateValue
+              end
+            elsif operatingSystem == "Windows"
+              # Using the hash for windows nodes since this is running in replica set and there can be multiple nodes
+              if @@winNodeCpuUsageNanoSecondsLast[hostName].nil? || @@winNodeCpuUsageNanoSecondsTimeLast[hostName].nil? || @@winNodeCpuUsageNanoSecondsLast[hostName] > metricValue #when kubelet is restarted the last condition will be true
+                @@winNodeCpuUsageNanoSecondsLast[hostName] = metricValue
+                @@winNodeCpuUsageNanoSecondsTimeLast[hostName] = metricTime
+                return nil
+              else
+                timeDifference = DateTime.parse(metricTime).to_time - DateTime.parse(@@winNodeCpuUsageNanoSecondsTimeLast[hostName]).to_time
+                nodeCpuUsageDifference = metricValue - @@winNodeCpuUsageNanoSecondsLast[hostName]
+                # nodeCpuUsageDifference check is added to make sure we report non zero values when cadvisor returns same values for subsequent calls
+                if timeDifference != 0 && nodeCpuUsageDifference != 0
+                  metricRateValue = (nodeCpuUsageDifference * 1.0) / timeDifference
+                else
+                  @Log.info "windows node - cpu usage difference / time difference is 0, hence using previous cached value"
+                  if !@@winNodePrevMetricRate[hostName].nil?
+                    metricRateValue = @@winNodePrevMetricRate[hostName]
+                  else
+                    # This can happen when the metric value returns same values for subsequent calls when the plugin first starts
+                    metricRateValue = 0
+                  end
                 end
+                @@winNodeCpuUsageNanoSecondsLast[hostName] = metricValue
+                @@winNodeCpuUsageNanoSecondsTimeLast[hostName] = metricTime
+                @@winNodePrevMetricRate[hostName] = metricRateValue
+                metricValue = metricRateValue
+              end
+            end
+          end
+          metricItem["DataItems"] = []
+
+          metricProps = {}
+          metricProps["Timestamp"] = metricTime
+          metricProps["Host"] = hostName
+          metricProps["ObjectName"] = "K8SNode"
+          metricProps["InstanceName"] = clusterId + "/" + nodeName
+
+          metricProps["Collections"] = []
+          metricCollections = {}
+          metricCollections["CounterName"] = metricNametoReturn
+          metricCollections["Value"] = metricValue
+
+          metricProps["Collections"].push(metricCollections)
+          metricItem["DataItems"].push(metricProps)
+        end
+      rescue => error
+        @Log.warn("getNodeMetricItemRate failed: #{error} for metric #{metricNameToCollect}")
+        @Log.warn metricJSON
+        return nil
+      end
+      return metricItem
+    end
+
+    def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
+      metricItem = {}
+      clusterId = KubernetesApiClient.getClusterId
+
+      begin
+        metricInfo = metricJSON
+        node = metricInfo["node"]
+        nodeName = node["nodeName"]
+
+        metricValue = node["startTime"]
+        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+
+        metricItem["DataItems"] = []
+
+        metricProps = {}
+        metricProps["Timestamp"] = metricTime
+        metricProps["Host"] = hostName
+        metricProps["ObjectName"] = "K8SNode"
+        metricProps["InstanceName"] = clusterId + "/" + nodeName
+
+        metricProps["Collections"] = []
+        metricCollections = {}
+        metricCollections["CounterName"] = metricNametoReturn
+        #Read it from /proc/uptime
+        metricCollections["Value"] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f
+
+        metricProps["Collections"].push(metricCollections)
+        metricItem["DataItems"].push(metricProps)
+      rescue => error
+        @Log.warn("getNodeLastRebootTimeMetric failed: #{error} ")
+        @Log.warn metricJSON
+        return metricItem
+      end
+      return metricItem
+    end
+
+    def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn)
+      metricItems = []
+      clusterId = KubernetesApiClient.getClusterId
+      currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+      begin
+        metricInfo = metricJSON
+        metricInfo["pods"].each do |pod|
+          podUid = pod["podRef"]["uid"]
+          if (!pod["containers"].nil?)
+            pod["containers"].each do |container|
+              containerName = container["name"]
+              metricValue = container["startTime"]
+              metricTime = currentTime
+
+              metricItem = {}
+              metricItem["DataItems"] = []
+
+              metricProps = {}
+              metricProps["Timestamp"] = metricTime
+              metricProps["Host"] = hostName
+              metricProps["ObjectName"] = "K8SContainer"
+              metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+              metricProps["Collections"] = []
+              metricCollections = {}
+              metricCollections["CounterName"] = metricNametoReturn
+              metricCollections["Value"] = DateTime.parse(metricValue).to_time.to_i
+
+              metricProps["Collections"].push(metricCollections)
+              metricItem["DataItems"].push(metricProps)
+              metricItems.push(metricItem)
             end
+          end
         end
+      rescue => error
+        @Log.warn("getContainerStartTimeMetric failed: #{error} for metric #{metricNametoReturn}")
+        @Log.warn metricJSON
+        return metricItems
+      end
+      return metricItems
+    end
+  end
+end
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index a1e143b15..4ed85025f 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -2,474 +2,516 @@
 # frozen_string_literal: true
 
 class KubernetesApiClient
+  require "json"
+  require "logger"
+  require "net/http"
+  require "net/https"
+  require "uri"
+  require "time"
 
-        require 'json'
-        require 'logger'
-        require 'net/http'
-        require 'net/https'
-        require 'uri'
-        require 'time'
-
-        require_relative 'oms_common'
-
-        @@ApiVersion = "v1"
-        @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-        @@ClusterName = nil
-        @@ClusterId = nil
-        @@IsNodeMaster = nil
-        #@@IsValidRunningNode = nil
-        #@@IsLinuxCluster = nil
-        @@KubeSystemNamespace = "kube-system"
-        @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt"
-        @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M
-        @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token"
-        @@TokenStr = nil
-        @@NodeMetrics = Hash.new
-
-        def initialize
+  require_relative "oms_common"
+
+  @@ApiVersion = "v1"
+  @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  @@ClusterName = nil
+  @@ClusterId = nil
+  @@IsNodeMaster = nil
+  #@@IsValidRunningNode = nil
+  #@@IsLinuxCluster = nil
+  @@KubeSystemNamespace = "kube-system"
+  @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt"
+  @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
+  @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  @@TokenStr = nil
+  @@NodeMetrics = Hash.new
+  @@WinNodeArray = []
+
+  def initialize
+  end
+
+  class << self
+    def getKubeResourceInfo(resource)
+      headers = {}
+      response = nil
+      @Log.info "Getting Kube resource"
+      @Log.info resource
+      begin
+        resourceUri = getResourceUri(resource)
+        if !resourceUri.nil?
+          uri = URI.parse(resourceUri)
+          http = Net::HTTP.new(uri.host, uri.port)
+          http.use_ssl = true
+          if !File.exist?(@@CaFile)
+            raise "#{@@CaFile} doesnt exist"
+          else
+            http.ca_file = @@CaFile if File.exist?(@@CaFile)
+          end
+          http.verify_mode = OpenSSL::SSL::VERIFY_PEER
+
+          kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
+          kubeApiRequest["Authorization"] = "Bearer " + getTokenStr
+          @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
+          response = http.request(kubeApiRequest)
+          @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
         end
+      rescue => error
+        @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")
+      end
+      if (response.body.empty?)
+        @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}")
+      end
+      return response
+    end
 
-        class << self
-            def getKubeResourceInfo(resource)
-                headers = {}
-                response = nil
-                @Log.info 'Getting Kube resource'
-                @Log.info resource
-                begin
-                    resourceUri = getResourceUri(resource)
-                    if !resourceUri.nil?
-                        uri = URI.parse(resourceUri)
-                        http = Net::HTTP.new(uri.host, uri.port)
-                        http.use_ssl = true
-                        if !File.exist?(@@CaFile)
-                            raise "#{@@CaFile} doesnt exist"
-                        else
-                            http.ca_file = @@CaFile if File.exist?(@@CaFile)
-                        end
-                        http.verify_mode = OpenSSL::SSL::VERIFY_PEER
-
-                        kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
-                        kubeApiRequest['Authorization'] = "Bearer " + getTokenStr
-                        @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
-                        response = http.request(kubeApiRequest)
-                        @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
-                    end
-                rescue => error
-                    @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")
-                end
-                if (response.body.empty?)
-                    @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}")
-                end
-                return response
-            end
+    def getTokenStr
+      return @@TokenStr if !@@TokenStr.nil?
+      begin
+        if File.exist?(@@TokenFileName) && File.readable?(@@TokenFileName)
+          @@TokenStr = File.read(@@TokenFileName).strip
+          return @@TokenStr
+        else
+          @Log.warn("Unable to read token string from #{@@TokenFileName}: #{error}")
+          return nil
+        end
+      end
+    end
 
-            def getTokenStr
-                return @@TokenStr if !@@TokenStr.nil?
-                begin
-                    if File.exist?(@@TokenFileName) && File.readable?(@@TokenFileName)
-                        @@TokenStr = File.read(@@TokenFileName).strip
-                        return @@TokenStr
-                    else
-                       @Log.warn("Unable to read token string from #{@@TokenFileName}: #{error}")
-                       return nil
-                    end
-                end
-            end
+    def getResourceUri(resource)
+      begin
+        if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"]
+          return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + @@ApiVersion + "/" + resource
+        else
+          @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri")
+          return nil
+        end
+      end
+    end
 
-            def getResourceUri(resource)
-                begin
-                    if ENV['KUBERNETES_SERVICE_HOST'] && ENV['KUBERNETES_PORT_443_TCP_PORT']
-                        return "https://#{ENV['KUBERNETES_SERVICE_HOST']}:#{ENV['KUBERNETES_PORT_443_TCP_PORT']}/api/" + @@ApiVersion + "/" + resource
-                    else
-                        @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV['KUBERNETES_SERVICE_HOST']} KUBERNETES_PORT_443_TCP_PORT: #{ENV['KUBERNETES_PORT_443_TCP_PORT']}. Unable to form resourceUri")
-                        return nil
-                    end
+    def getClusterName
+      return @@ClusterName if !@@ClusterName.nil?
+      @@ClusterName = "None"
+      begin
+        #try getting resource ID for aks
+        cluster = ENV["AKS_RESOURCE_ID"]
+        if cluster && !cluster.nil? && !cluster.empty?
+          @@ClusterName = cluster.split("/").last
+        else
+          cluster = ENV["ACS_RESOURCE_NAME"]
+          if cluster && !cluster.nil? && !cluster.empty?
+            @@ClusterName = cluster
+          else
+            kubesystemResourceUri = "namespaces/" + @@KubeSystemNamespace + "/pods"
+            @Log.info("KubernetesApiClient::getClusterName : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+            podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body)
+            @Log.info("KubernetesApiClient::getClusterName : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+            podInfo["items"].each do |items|
+              if items["metadata"]["name"].include? "kube-controller-manager"
+                items["spec"]["containers"][0]["command"].each do |command|
+                  if command.include? "--cluster-name"
+                    @@ClusterName = command.split("=")[1]
+                  end
                 end
+              end
             end
+          end
+        end
+      rescue => error
+        @Log.warn("getClusterName failed: #{error}")
+      end
+      return @@ClusterName
+    end
 
-            def getClusterName
-                return @@ClusterName if !@@ClusterName.nil?
-                @@ClusterName = "None"
-                begin
-                    #try getting resource ID for aks 
-                    cluster = ENV['AKS_RESOURCE_ID']
-                    if  cluster && !cluster.nil? && !cluster.empty?
-                        @@ClusterName = cluster.split("/").last
-                    else
-                        cluster = ENV['ACS_RESOURCE_NAME']
-                        if cluster && !cluster.nil? && !cluster.empty?
-                            @@ClusterName = cluster
-                        else
-                            kubesystemResourceUri = "namespaces/" + @@KubeSystemNamespace + "/pods"
-                            @Log.info("KubernetesApiClient::getClusterName : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                            podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body)
-                            @Log.info("KubernetesApiClient::getClusterName : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                            podInfo['items'].each do |items|
-                                if items['metadata']['name'].include? "kube-controller-manager"
-                                items['spec']['containers'][0]['command'].each do |command|
-                                    if command.include? "--cluster-name"
-                                        @@ClusterName = command.split('=')[1]
-                                    end
-                                end
-                                end
-                            end
-                        end
-                    end
-                rescue => error
-                    @Log.warn("getClusterName failed: #{error}")
-                end
-                return @@ClusterName
-            end
+    def getClusterId
+      return @@ClusterId if !@@ClusterId.nil?
+      #By default initialize ClusterId to ClusterName.
+      #<TODO> In ACS/On-prem, we need to figure out how we can generate ClusterId
+      @@ClusterId = getClusterName
+      begin
+        cluster = ENV["AKS_RESOURCE_ID"]
+        if cluster && !cluster.nil? && !cluster.empty?
+          @@ClusterId = cluster
+        end
+      rescue => error
+        @Log.warn("getClusterId failed: #{error}")
+      end
+      return @@ClusterId
+    end
 
-            def getClusterId
-                return @@ClusterId if !@@ClusterId.nil?
-                #By default initialize ClusterId to ClusterName. 
-                #<TODO> In ACS/On-prem, we need to figure out how we can generate ClusterId
-                @@ClusterId = getClusterName
-                begin
-                    cluster = ENV['AKS_RESOURCE_ID']
-                    if  cluster && !cluster.nil? && !cluster.empty?
-                        @@ClusterId = cluster
-                    end
-                rescue => error
-                    @Log.warn("getClusterId failed: #{error}")
-                end
-                return @@ClusterId
+    def isNodeMaster
+      return @@IsNodeMaster if !@@IsNodeMaster.nil?
+      @@IsNodeMaster = false
+      begin
+        @Log.info("KubernetesApiClient::isNodeMaster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        allNodesInfo = JSON.parse(getKubeResourceInfo("nodes").body)
+        @Log.info("KubernetesApiClient::isNodeMaster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        if !allNodesInfo.nil? && !allNodesInfo.empty?
+          thisNodeName = OMS::Common.get_hostname
+          allNodesInfo["items"].each do |item|
+            if item["metadata"]["name"].casecmp(thisNodeName) == 0
+              if item["metadata"]["labels"]["kubernetes.io/role"].to_s.include?("master") || item["metadata"]["labels"]["role"].to_s.include?("master")
+                @@IsNodeMaster = true
+              end
+              break
             end
+          end
+        end
+      rescue => error
+        @Log.warn("KubernetesApiClient::isNodeMaster : node role request failed: #{error}")
+      end
 
-            def isNodeMaster
-                return @@IsNodeMaster if !@@IsNodeMaster.nil?
-                @@IsNodeMaster = false
-                begin
-                    @Log.info("KubernetesApiClient::isNodeMaster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-                    allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body)
-                    @Log.info("KubernetesApiClient::isNodeMaster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-                    if !allNodesInfo.nil? && !allNodesInfo.empty?
-                        thisNodeName = OMS::Common.get_hostname
-                        allNodesInfo['items'].each do |item|
-                            if item['metadata']['name'].casecmp(thisNodeName) == 0
-                                if item['metadata']['labels']["kubernetes.io/role"].to_s.include?("master") || item['metadata']['labels']["role"].to_s.include?("master")
-                                    @@IsNodeMaster = true
-                                end
-                                break
-                            end
-                        end
-                    end
-                rescue => error
-                    @Log.warn("KubernetesApiClient::isNodeMaster : node role request failed: #{error}")
-                end
-                
-                return @@IsNodeMaster
-            end
+      return @@IsNodeMaster
+    end
 
-            #def isValidRunningNode
-            #    return @@IsValidRunningNode if !@@IsValidRunningNode.nil?
-            #    @@IsValidRunningNode = false
-            #    begin
-            #        thisNodeName = OMS::Common.get_hostname
-            #        if isLinuxCluster
-            #            # Run on agent node [0]
-            #            @@IsValidRunningNode = !isNodeMaster && thisNodeName.to_s.split('-').last == '0'
-            #        else
-            #            # Run on master node [0]
-            #            @@IsValidRunningNode = isNodeMaster && thisNodeName.to_s.split('-').last == '0'
-            #        end
-            #    rescue => error
-            #        @Log.warn("Checking Node Type failed: #{error}")
-            #    end
-            #    if(@@IsValidRunningNode == true)
-            #        @Log.info("Electing current node to talk to k8 api")
-            #    else
-            #        @Log.info("Not Electing current node to talk to k8 api")
-            #    end
-            #    return @@IsValidRunningNode
-            #end
-
-            #def isLinuxCluster
-            #    return @@IsLinuxCluster if !@@IsLinuxCluster.nil?
-            #    @@IsLinuxCluster = true
-            #    begin
-            #        @Log.info("KubernetesApiClient::isLinuxCluster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-            #        allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body)
-            #        @Log.info("KubernetesApiClient::isLinuxCluster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-            #        if !allNodesInfo.nil? && !allNodesInfo.empty?
-            #            allNodesInfo['items'].each do |item|
-            #                if !(item['status']['nodeInfo']['operatingSystem'].casecmp('linux') == 0)
-            #                    @@IsLinuxCluster = false
-            #                    break
-            #                end
-            #            end
-            #        end
-            #    rescue => error
-            #        @Log.warn("KubernetesApiClient::isLinuxCluster : node role request failed: #{error}")
-            #    end
-            #    return @@IsLinuxCluster
-            #end
-
-            # returns an arry of pods (json)
-            def getPods(namespace)
-                pods = []
-                begin
-                    kubesystemResourceUri = "namespaces/" + namespace + "/pods"
-                    podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body)
-                    podInfo['items'].each do |items|
-                        pods.push items
-                    end
-                rescue => error
-                    @Log.warn("List pods request failed: #{error}")
-                end
-                return pods
-            end
+    #def isValidRunningNode
+    #    return @@IsValidRunningNode if !@@IsValidRunningNode.nil?
+    #    @@IsValidRunningNode = false
+    #    begin
+    #        thisNodeName = OMS::Common.get_hostname
+    #        if isLinuxCluster
+    #            # Run on agent node [0]
+    #            @@IsValidRunningNode = !isNodeMaster && thisNodeName.to_s.split('-').last == '0'
+    #        else
+    #            # Run on master node [0]
+    #            @@IsValidRunningNode = isNodeMaster && thisNodeName.to_s.split('-').last == '0'
+    #        end
+    #    rescue => error
+    #        @Log.warn("Checking Node Type failed: #{error}")
+    #    end
+    #    if(@@IsValidRunningNode == true)
+    #        @Log.info("Electing current node to talk to k8 api")
+    #    else
+    #        @Log.info("Not Electing current node to talk to k8 api")
+    #    end
+    #    return @@IsValidRunningNode
+    #end
+
+    #def isLinuxCluster
+    #    return @@IsLinuxCluster if !@@IsLinuxCluster.nil?
+    #    @@IsLinuxCluster = true
+    #    begin
+    #        @Log.info("KubernetesApiClient::isLinuxCluster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+    #        allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body)
+    #        @Log.info("KubernetesApiClient::isLinuxCluster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+    #        if !allNodesInfo.nil? && !allNodesInfo.empty?
+    #            allNodesInfo['items'].each do |item|
+    #                if !(item['status']['nodeInfo']['operatingSystem'].casecmp('linux') == 0)
+    #                    @@IsLinuxCluster = false
+    #                    break
+    #                end
+    #            end
+    #        end
+    #    rescue => error
+    #        @Log.warn("KubernetesApiClient::isLinuxCluster : node role request failed: #{error}")
+    #    end
+    #    return @@IsLinuxCluster
+    #end
+
+    # returns an arry of pods (json)
+    def getPods(namespace)
+      pods = []
+      begin
+        kubesystemResourceUri = "namespaces/" + namespace + "/pods"
+        podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body)
+        podInfo["items"].each do |items|
+          pods.push items
+        end
+      rescue => error
+        @Log.warn("List pods request failed: #{error}")
+      end
+      return pods
+    end
 
-            def getContainerIDs(namespace)
-                containers = Hash.new
-                begin
-                    kubesystemResourceUri = "namespaces/" + namespace + "/pods"
-                    @Log.info("KubernetesApiClient::getContainerIDs : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                    podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body)
-                    @Log.info("KubernetesApiClient::getContainerIDs : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                    podInfo['items'].each do |item|
-                        if (!item['status'].nil? && !item['status'].empty? && !item['status']['containerStatuses'].nil? && !item['status']['containerStatuses'].empty?)
-                            item['status']['containerStatuses'].each do |cntr|
-                                containers[cntr['containerID']] = "kube-system"
-                            end
-                        end
-                    end
-                rescue => error
-                    @Log.warn("KubernetesApiClient::getContainerIDs : List ContainerIDs request failed: #{error}")
+    # returns a hash of windows node names and their internal IPs
+    def getWindowsNodes
+      winNodes = []
+      begin
+        nodeInventory = JSON.parse(getKubeResourceInfo("nodes").body)
+        @Log.info "KubernetesAPIClient::getWindowsNodes : Got nodes from kube api"
+        # Resetting the windows node cache
+        @@WinNodeArray.clear
+        if (!nodeInventory.empty?)
+          nodeInventory["items"].each do |item|
+            # check for windows operating system in node metadata
+            winNode = {}
+            nodeStatus = item["status"]
+            nodeMetadata = item["metadata"]
+            if !nodeStatus.nil? && !nodeStatus["nodeInfo"].nil? && !nodeStatus["nodeInfo"]["operatingSystem"].nil?
+              operatingSystem = nodeStatus["nodeInfo"]["operatingSystem"]
+              if (operatingSystem.is_a?(String) && operatingSystem.casecmp("windows") == 0)
+                # Adding windows nodes to winNodeArray so that it can be used in kubepodinventory to send ContainerInventory data
+                # to get images and image tags for containers in windows nodes
+                if !nodeMetadata.nil? && !nodeMetadata["name"].nil?
+                  @@WinNodeArray.push(nodeMetadata["name"])
                 end
-                return containers
+                nodeStatusAddresses = nodeStatus["addresses"]
+                if !nodeStatusAddresses.nil?
+                  nodeStatusAddresses.each do |address|
+                    winNode[address["type"]] = address["address"]
+                  end
+                  winNodes.push(winNode)
+                end
+              end
             end
+          end
+        end
+        return winNodes
+      rescue => error
+        @Log.warn("Error in get windows nodes: #{error}")
+        return nil
+      end
+    end
 
-            def getContainerLogs(namespace, pod, container, showTimeStamp)
-                containerLogs = ""
-                begin
-                    kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container
-                    if showTimeStamp
-                        kubesystemResourceUri += "&timestamps=true"
-                    end
-                    @Log.info("KubernetesApiClient::getContainerLogs : Getting logs from Kube API @ #{Time.now.utc.iso8601}")
-                    containerLogs = getKubeResourceInfo(kubesystemResourceUri).body
-                    @Log.info("KubernetesApiClient::getContainerLogs : Done getting logs from Kube API @ #{Time.now.utc.iso8601}")
-                rescue => error
-                    @Log.warn("Pod logs request failed: #{error}")
-                end
-                return containerLogs
+    def getWindowsNodesArray
+      return @@WinNodeArray
+    end
+
+    def getContainerIDs(namespace)
+      containers = Hash.new
+      begin
+        kubesystemResourceUri = "namespaces/" + namespace + "/pods"
+        @Log.info("KubernetesApiClient::getContainerIDs : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body)
+        @Log.info("KubernetesApiClient::getContainerIDs : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        podInfo["items"].each do |item|
+          if (!item["status"].nil? && !item["status"].empty? && !item["status"]["containerStatuses"].nil? && !item["status"]["containerStatuses"].empty?)
+            item["status"]["containerStatuses"].each do |cntr|
+              containers[cntr["containerID"]] = "kube-system"
             end
+          end
+        end
+      rescue => error
+        @Log.warn("KubernetesApiClient::getContainerIDs : List ContainerIDs request failed: #{error}")
+      end
+      return containers
+    end
+
+    def getContainerLogs(namespace, pod, container, showTimeStamp)
+      containerLogs = ""
+      begin
+        kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container
+        if showTimeStamp
+          kubesystemResourceUri += "&timestamps=true"
+        end
+        @Log.info("KubernetesApiClient::getContainerLogs : Getting logs from Kube API @ #{Time.now.utc.iso8601}")
+        containerLogs = getKubeResourceInfo(kubesystemResourceUri).body
+        @Log.info("KubernetesApiClient::getContainerLogs : Done getting logs from Kube API @ #{Time.now.utc.iso8601}")
+      rescue => error
+        @Log.warn("Pod logs request failed: #{error}")
+      end
+      return containerLogs
+    end
+
+    def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp)
+      containerLogs = ""
+      begin
+        kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + "&sinceTime=" + since
+        kubesystemResourceUri = URI.escape(kubesystemResourceUri, ":.+") # HTML URL Encoding for date
+
+        if showTimeStamp
+          kubesystemResourceUri += "&timestamps=true"
+        end
+        @Log.info("calling #{kubesystemResourceUri}")
+        @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Getting logs from Kube API @ #{Time.now.utc.iso8601}")
+        containerLogs = getKubeResourceInfo(kubesystemResourceUri).body
+        @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Done getting logs from Kube API @ #{Time.now.utc.iso8601}")
+      rescue => error
+        @Log.warn("Pod logs request failed: #{error}")
+      end
+      return containerLogs
+    end
 
-            def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp)
-                containerLogs = ""
-                begin
-                    kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + "&sinceTime=" + since
-                    kubesystemResourceUri = URI.escape(kubesystemResourceUri, ":.+") # HTML URL Encoding for date
-
-                    if showTimeStamp
-                        kubesystemResourceUri += "&timestamps=true"
-                    end
-                    @Log.info("calling #{kubesystemResourceUri}")
-                    @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Getting logs from Kube API @ #{Time.now.utc.iso8601}")
-                    containerLogs = getKubeResourceInfo(kubesystemResourceUri).body
-                    @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Done getting logs from Kube API @ #{Time.now.utc.iso8601}")
-                rescue => error
-                    @Log.warn("Pod logs request failed: #{error}")
+    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
+      metricItems = []
+      begin
+        clusterId = getClusterId
+        metricInfo = metricJSON
+        metricInfo["items"].each do |pod|
+          podNameSpace = pod["metadata"]["namespace"]
+          if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences")
+            # The above case seems to be the only case where you have horizontal scaling of pods
+            # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
+            # instead of the actual poduid. Since this uid is not being surface into the UX
+            # its ok to use this.
+            # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
+            podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
+          else
+            podUid = pod["metadata"]["uid"]
+          end
+          if (!pod["spec"]["containers"].nil? && !pod["spec"]["nodeName"].nil?)
+            nodeName = pod["spec"]["nodeName"]
+            pod["spec"]["containers"].each do |container|
+              containerName = container["name"]
+              metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+              if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
+                metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
+
+                metricItem = {}
+                metricItem["DataItems"] = []
+
+                metricProps = {}
+                metricProps["Timestamp"] = metricTime
+                metricProps["Host"] = nodeName
+                metricProps["ObjectName"] = "K8SContainer"
+                metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+                metricProps["Collections"] = []
+                metricCollections = {}
+                metricCollections["CounterName"] = metricNametoReturn
+                metricCollections["Value"] = metricValue
+
+                metricProps["Collections"].push(metricCollections)
+                metricItem["DataItems"].push(metricProps)
+                metricItems.push(metricItem)
+                #No container level limit for the given metric, so default to node level limit
+              else
+                nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
+                if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
+                  metricValue = @@NodeMetrics[nodeMetricsHashKey]
+                  #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
+                  metricItem = {}
+                  metricItem["DataItems"] = []
+
+                  metricProps = {}
+                  metricProps["Timestamp"] = metricTime
+                  metricProps["Host"] = nodeName
+                  metricProps["ObjectName"] = "K8SContainer"
+                  metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+                  metricProps["Collections"] = []
+                  metricCollections = {}
+                  metricCollections["CounterName"] = metricNametoReturn
+                  metricCollections["Value"] = metricValue
+
+                  metricProps["Collections"].push(metricCollections)
+                  metricItem["DataItems"].push(metricProps)
+                  metricItems.push(metricItem)
                 end
-                return containerLogs
+              end
             end
+          end
+        end
+      rescue => error
+        @Log.warn("getcontainerResourceRequestsAndLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+        return metricItems
+      end
+      return metricItems
+    end #getContainerResourceRequestAndLimits
 
-            def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
-                metricItems = []
-                begin
-                    clusterId = getClusterId
-                    metricInfo = metricJSON
-                    metricInfo['items'].each do |pod|
-                        podNameSpace = pod['metadata']['namespace']
-                        if podNameSpace.eql?("kube-system") && !pod['metadata'].key?("ownerReferences")
-                        # The above case seems to be the only case where you have horizontal scaling of pods
-                        # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
-                        # instead of the actual poduid. Since this uid is not being surface into the UX
-                        # its ok to use this.
-                        # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-                            podUid = pod['metadata']['annotations']['kubernetes.io/config.hash']
-                        else
-                            podUid = pod['metadata']['uid']
-                        end
-                        if (!pod['spec']['containers'].nil? && !pod['spec']['nodeName'].nil?)
-                            nodeName = pod['spec']['nodeName']
-                            pod['spec']['containers'].each do |container|
-                                containerName = container['name']
-                                metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-                                if (!container['resources'].nil? && !container['resources'].empty? && !container['resources'][metricCategory].nil? && !container['resources'][metricCategory][metricNameToCollect].nil?)
-                                    metricValue = getMetricNumericValue(metricNameToCollect, container['resources'][metricCategory][metricNameToCollect])
-                                    
-                                    metricItem = {}
-                                    metricItem['DataItems'] = []
-                                    
-                                    metricProps = {}
-                                    metricProps['Timestamp'] = metricTime
-                                    metricProps['Host'] = nodeName
-                                    metricProps['ObjectName'] = "K8SContainer"
-                                    metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName
-                                    
-                                    metricProps['Collections'] = []
-                                    metricCollections = {}
-                                    metricCollections['CounterName'] = metricNametoReturn
-                                    metricCollections['Value'] = metricValue
-
-                                    metricProps['Collections'].push(metricCollections)
-                                    metricItem['DataItems'].push(metricProps)
-                                    metricItems.push(metricItem)
-                                #No container level limit for the given metric, so default to node level limit
-                                else
-                                    nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" +  "_" + metricNameToCollect
-                                    if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
-                                        
-                                        metricValue = @@NodeMetrics[nodeMetricsHashKey]
-                                        #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
-                                        metricItem = {}
-                                        metricItem['DataItems'] = []
-                                        
-                                        metricProps = {}
-                                        metricProps['Timestamp'] = metricTime
-                                        metricProps['Host'] = nodeName
-                                        metricProps['ObjectName'] = "K8SContainer"
-                                        metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName
-                                        
-                                        metricProps['Collections'] = []
-                                        metricCollections = {}
-                                        metricCollections['CounterName'] = metricNametoReturn
-                                        metricCollections['Value'] = metricValue
-
-                                        metricProps['Collections'].push(metricCollections)
-                                        metricItem['DataItems'].push(metricProps)
-                                        metricItems.push(metricItem)
-                                    end
-                                end
-                            end
-                        end
-                    end
-                    rescue => error
-                    @Log.warn("getcontainerResourceRequestsAndLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
-                    return metricItems
-                end
-                return metricItems          
-            end #getContainerResourceRequestAndLimits
-
-            def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
-                metricItems = []
-                begin
-                    metricInfo = metricJSON
-                    clusterId = getClusterId
-                    #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
-                    #if we are coming up with the time it should be same for all nodes
-                    metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-                    metricInfo['items'].each do |node|
-                        if (!node['status'][metricCategory].nil?)
-
-                            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
-                            metricValue = getMetricNumericValue(metricNameToCollect, node['status'][metricCategory][metricNameToCollect])
-
-                            metricItem = {}
-                            metricItem['DataItems'] = []
-                            metricProps = {}
-                            metricProps['Timestamp'] = metricTime
-                            metricProps['Host'] = node['metadata']['name']
-                            metricProps['ObjectName'] = "K8SNode"
-                            metricProps['InstanceName'] = clusterId + "/" + node['metadata']['name']
-                            metricProps['Collections'] = []
-                            metricCollections = {}
-                            metricCollections['CounterName'] = metricNametoReturn
-                            metricCollections['Value'] = metricValue
-
-                            metricProps['Collections'].push(metricCollections)
-                            metricItem['DataItems'].push(metricProps)
-                            metricItems.push(metricItem)
-                            #push node level metrics to a inmem hash so that we can use it looking up at container level.
-                            #Currently if container level cpu & memory limits are not defined we default to node level limits
-                            @@NodeMetrics[clusterId + "/" + node['metadata']['name'] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
-                            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
-                        end
-                    end
-                rescue => error
-                    @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
-                end
-                return metricItems
-            end #parseNodeLimits
-
-            def getMetricNumericValue(metricName, metricVal)
-                metricValue = metricVal
-                begin
-                    case metricName
-                    when "memory" #convert to bytes for memory
-                        #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/
-                        if (metricValue.end_with?("Ki")) 
-                            metricValue.chomp!("Ki")
-                            metricValue = Float(metricValue) * 1024.0 ** 1
-                        elsif (metricValue.end_with?("Mi"))
-                            metricValue.chomp!("Mi")
-                            metricValue = Float(metricValue) * 1024.0 ** 2
-                        elsif (metricValue.end_with?("Gi"))
-                            metricValue.chomp!("Gi")
-                            metricValue = Float(metricValue) * 1024.0 ** 3
-                        elsif (metricValue.end_with?("Ti"))
-                            metricValue.chomp!("Ti")
-                            metricValue = Float(metricValue) * 1024.0 ** 4
-                        elsif (metricValue.end_with?("Pi"))
-                            metricValue.chomp!("Pi")
-                            metricValue = Float(metricValue) * 1024.0 ** 5
-                        elsif (metricValue.end_with?("Ei"))
-                            metricValue.chomp!("Ei")
-                            metricValue = Float(metricValue) * 1024.0 ** 6
-                        elsif (metricValue.end_with?("Zi"))
-                            metricValue.chomp!("Zi")
-                            metricValue = Float(metricValue) * 1024.0 ** 7
-                        elsif (metricValue.end_with?("Yi"))
-                            metricValue.chomp!("Yi")
-                            metricValue = Float(metricValue) * 1024.0 ** 8
-                        elsif (metricValue.end_with?("K")) 
-                            metricValue.chomp!("K")
-                            metricValue = Float(metricValue) * 1000.0 ** 1
-                        elsif (metricValue.end_with?("M"))
-                            metricValue.chomp!("M")
-                            metricValue = Float(metricValue) * 1000.0 ** 2
-                        elsif (metricValue.end_with?("G"))
-                            metricValue.chomp!("G")
-                            metricValue = Float(metricValue) * 1000.0 ** 3
-                        elsif (metricValue.end_with?("T"))
-                            metricValue.chomp!("T")
-                            metricValue = Float(metricValue) * 1000.0 ** 4
-                        elsif (metricValue.end_with?("P"))
-                            metricValue.chomp!("P")
-                            metricValue = Float(metricValue) * 1000.0 ** 5
-                        elsif (metricValue.end_with?("E"))
-                            metricValue.chomp!("E")
-                            metricValue = Float(metricValue) * 1000.0 ** 6
-                        elsif (metricValue.end_with?("Z"))
-                            metricValue.chomp!("Z")
-                            metricValue = Float(metricValue) * 1000.0 ** 7
-                        elsif (metricValue.end_with?("Y"))
-                            metricValue.chomp!("Y")
-                            metricValue = Float(metricValue) * 1000.0 ** 8
-                        else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units')
-                            metricValue = Float(metricValue)
-                        end
-                    when "cpu" #convert to nanocores for cpu
-                        #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/
-                        if (metricValue.end_with?("m")) 
-                            metricValue.chomp!("m")
-                            metricValue = Float(metricValue) * 1000.0 ** 2
-                        else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units')
-                            metricValue = Float(metricValue) * 1000.0 ** 3
-                        end
-                    else 
-                        @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value")
-                        metricValue = 0
-                    end #case statement
-                    rescue => error
-                    @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value")
-                    return 0
-                end
-                return metricValue
-            end # getMetricNumericValue
+    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
+      metricItems = []
+      begin
+        metricInfo = metricJSON
+        clusterId = getClusterId
+        #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
+        #if we are coming up with the time it should be same for all nodes
+        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        metricInfo["items"].each do |node|
+          if (!node["status"][metricCategory].nil?)
+
+            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
+            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+            metricItem = {}
+            metricItem["DataItems"] = []
+            metricProps = {}
+            metricProps["Timestamp"] = metricTime
+            metricProps["Host"] = node["metadata"]["name"]
+            metricProps["ObjectName"] = "K8SNode"
+            metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
+            metricProps["Collections"] = []
+            metricCollections = {}
+            metricCollections["CounterName"] = metricNametoReturn
+            metricCollections["Value"] = metricValue
+
+            metricProps["Collections"].push(metricCollections)
+            metricItem["DataItems"].push(metricProps)
+            metricItems.push(metricItem)
+            #push node level metrics to a inmem hash so that we can use it looking up at container level.
+            #Currently if container level cpu & memory limits are not defined we default to node level limits
+            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
+          end
         end
-    end
+      rescue => error
+        @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+      end
+      return metricItems
+    end #parseNodeLimits
 
+    def getMetricNumericValue(metricName, metricVal)
+      metricValue = metricVal
+      begin
+        case metricName
+        when "memory" #convert to bytes for memory
+          #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/
+          if (metricValue.end_with?("Ki"))
+            metricValue.chomp!("Ki")
+            metricValue = Float(metricValue) * 1024.0 ** 1
+          elsif (metricValue.end_with?("Mi"))
+            metricValue.chomp!("Mi")
+            metricValue = Float(metricValue) * 1024.0 ** 2
+          elsif (metricValue.end_with?("Gi"))
+            metricValue.chomp!("Gi")
+            metricValue = Float(metricValue) * 1024.0 ** 3
+          elsif (metricValue.end_with?("Ti"))
+            metricValue.chomp!("Ti")
+            metricValue = Float(metricValue) * 1024.0 ** 4
+          elsif (metricValue.end_with?("Pi"))
+            metricValue.chomp!("Pi")
+            metricValue = Float(metricValue) * 1024.0 ** 5
+          elsif (metricValue.end_with?("Ei"))
+            metricValue.chomp!("Ei")
+            metricValue = Float(metricValue) * 1024.0 ** 6
+          elsif (metricValue.end_with?("Zi"))
+            metricValue.chomp!("Zi")
+            metricValue = Float(metricValue) * 1024.0 ** 7
+          elsif (metricValue.end_with?("Yi"))
+            metricValue.chomp!("Yi")
+            metricValue = Float(metricValue) * 1024.0 ** 8
+          elsif (metricValue.end_with?("K"))
+            metricValue.chomp!("K")
+            metricValue = Float(metricValue) * 1000.0 ** 1
+          elsif (metricValue.end_with?("M"))
+            metricValue.chomp!("M")
+            metricValue = Float(metricValue) * 1000.0 ** 2
+          elsif (metricValue.end_with?("G"))
+            metricValue.chomp!("G")
+            metricValue = Float(metricValue) * 1000.0 ** 3
+          elsif (metricValue.end_with?("T"))
+            metricValue.chomp!("T")
+            metricValue = Float(metricValue) * 1000.0 ** 4
+          elsif (metricValue.end_with?("P"))
+            metricValue.chomp!("P")
+            metricValue = Float(metricValue) * 1000.0 ** 5
+          elsif (metricValue.end_with?("E"))
+            metricValue.chomp!("E")
+            metricValue = Float(metricValue) * 1000.0 ** 6
+          elsif (metricValue.end_with?("Z"))
+            metricValue.chomp!("Z")
+            metricValue = Float(metricValue) * 1000.0 ** 7
+          elsif (metricValue.end_with?("Y"))
+            metricValue.chomp!("Y")
+            metricValue = Float(metricValue) * 1000.0 ** 8
+          else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units')
+            metricValue = Float(metricValue)
+          end
+        when "cpu" #convert to nanocores for cpu
+          #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/
+          if (metricValue.end_with?("m"))
+            metricValue.chomp!("m")
+            metricValue = Float(metricValue) * 1000.0 ** 2
+          else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units')
+            metricValue = Float(metricValue) * 1000.0 ** 3
+          end
+        else
+          @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value")
+          metricValue = 0
+        end #case statement
+      rescue => error
+        @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value")
+        return 0
+      end
+      return metricValue
+    end # getMetricNumericValue
+  end
+end
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index a857aa6b9..f5f65f01b 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -2,90 +2,88 @@
 # frozen_string_literal: true
 
 module Fluent
-    
-    class CAdvisor_Perf_Input < Input
-      Plugin.register_input('cadvisorperf', self)
-  
-      def initialize
-        super
-        require 'yaml'
-        require 'json'
-  
-        require_relative 'CAdvisorMetricsAPIClient'
-        require_relative 'oms_common'
-        require_relative 'omslog'
-      end
-  
-      config_param :run_interval, :time, :default => '1m'
-      config_param :tag, :string, :default => "oms.api.cadvisorperf"
-      config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
-  
-      def configure (conf)
-        super
+  class CAdvisor_Perf_Input < Input
+    Plugin.register_input("cadvisorperf", self)
+
+    def initialize
+      super
+      require "yaml"
+      require "json"
+
+      require_relative "CAdvisorMetricsAPIClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+    end
+
+    config_param :run_interval, :time, :default => "1m"
+    config_param :tag, :string, :default => "oms.api.cadvisorperf"
+    config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
       end
-  
-      def start
-        if @run_interval
-          @finished = false
-          @condition = ConditionVariable.new
-          @mutex = Mutex.new
-          @thread = Thread.new(&method(:run_periodic))
-        end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
       end
-  
-      def shutdown
-        if @run_interval
-          @mutex.synchronize {
-            @finished = true
-            @condition.signal
-          }
-          @thread.join
+    end
+
+    def enumerate()
+      time = Time.now.to_f
+      begin
+        eventStream = MultiEventStream.new
+        metricData = CAdvisorMetricsAPIClient.getMetrics()
+        metricData.each do |record|
+          record["DataType"] = "LINUX_PERF_BLOB"
+          record["IPName"] = "LogManagement"
+          eventStream.add(time, record) if record
+          #router.emit(@tag, time, record) if record
         end
-      end
-  
-      def enumerate()
-        time = Time.now.to_f
-        begin
-            eventStream = MultiEventStream.new
-            metricData = CAdvisorMetricsAPIClient.getMetrics()
-            metricData.each do |record|
-                    record['DataType'] = "LINUX_PERF_BLOB"
-                    record['IPName'] = "LogManagement"
-                    eventStream.add(time, record) if record
-                    #router.emit(@tag, time, record) if record    
-            end 
-            
-            router.emit_stream(@tag, eventStream) if eventStream
-            router.emit_stream(@mdmtag, eventStream) if eventStream
-            @@istestvar = ENV['ISTEST']
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
-              $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-            rescue  => errorStr
-            $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}"
-            $log.debug_backtrace(errorStr.backtrace)
+
+        router.emit_stream(@tag, eventStream) if eventStream
+        router.emit_stream(@mdmtag, eventStream) if eventStream
+        @@istestvar = ENV["ISTEST"]
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
+          $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
+      rescue => errorStr
+        $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
       end
-  
-      def run_periodic
-        @mutex.lock
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
         done = @finished
-        until done
-          @condition.wait(@mutex, @run_interval)
-          done = @finished
-          @mutex.unlock
-          if !done
-            begin
-              $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
-              enumerate
-            rescue => errorStr
-              $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}"
-            end
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}"
           end
-          @mutex.lock
         end
-        @mutex.unlock
+        @mutex.lock
       end
-    end # CAdvisor_Perf_Input
+      @mutex.unlock
+    end
+  end # CAdvisor_Perf_Input
 end # module
-
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index f501421a2..4d83278a9 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -2,29 +2,28 @@
 # frozen_string_literal: true
 
 module Fluent
-
   class Container_Inventory_Input < Input
-    Plugin.register_input('containerinventory', self)
+    Plugin.register_input("containerinventory", self)
 
-    @@PluginName = 'ContainerInventory'
-    @@RunningState = 'Running'
-    @@FailedState = 'Failed'
-    @@StoppedState = 'Stopped'
-    @@PausedState = 'Paused'
+    @@PluginName = "ContainerInventory"
+    @@RunningState = "Running"
+    @@FailedState = "Failed"
+    @@StoppedState = "Stopped"
+    @@PausedState = "Paused"
 
     def initialize
       super
-      require 'json'
-      require_relative 'DockerApiClient'
-      require_relative 'ContainerInventoryState'
-      require_relative 'ApplicationInsightsUtility'
-      require_relative 'omslog'
+      require "json"
+      require_relative "DockerApiClient"
+      require_relative "ContainerInventoryState"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => '1m'
+    config_param :run_interval, :time, :default => "1m"
     config_param :tag, :string, :default => "oms.containerinsights.containerinventory"
-  
-    def configure (conf)
+
+    def configure(conf)
       super
     end
 
@@ -50,16 +49,16 @@ def shutdown
 
     def obtainContainerConfig(instance, container)
       begin
-        configValue = container['Config']
+        configValue = container["Config"]
         if !configValue.nil?
-          instance['ContainerHostname'] = configValue['Hostname']
+          instance["ContainerHostname"] = configValue["Hostname"]
 
-          envValue = configValue['Env']
+          envValue = configValue["Env"]
           envValueString = (envValue.nil?) ? "" : envValue.to_s
           # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
           if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
             envValueString = ["AZMON_COLLECT_ENV=FALSE"]
-            $log.warn("Environment Variable collection for container: #{container['Id']} skipped because AZMON_COLLECT_ENV is set to false")
+            $log.warn("Environment Variable collection for container: #{container["Id"]} skipped because AZMON_COLLECT_ENV is set to false")
           end
           # Restricting the ENV string value to 200kb since the size of this string can go very high
           if envValueString.length > 200000
@@ -68,88 +67,88 @@ def obtainContainerConfig(instance, container)
             if !lastIndex.nil?
               envValueStringTruncated = envValueStringTruncated.slice(0..lastIndex) + "]"
             end
-            instance['EnvironmentVar'] = envValueStringTruncated
+            instance["EnvironmentVar"] = envValueStringTruncated
           else
-            instance['EnvironmentVar'] = envValueString
+            instance["EnvironmentVar"] = envValueString
           end
 
-          cmdValue = configValue['Cmd']
+          cmdValue = configValue["Cmd"]
           cmdValueString = (cmdValue.nil?) ? "" : cmdValue.to_s
-          instance['Command'] = cmdValueString
+          instance["Command"] = cmdValueString
 
-          instance['ComposeGroup'] = ""
-          labelsValue = configValue['Labels']
+          instance["ComposeGroup"] = ""
+          labelsValue = configValue["Labels"]
           if !labelsValue.nil? && !labelsValue.empty?
-            instance['ComposeGroup'] = labelsValue['com.docker.compose.project']
+            instance["ComposeGroup"] = labelsValue["com.docker.compose.project"]
           end
         else
-          $log.warn("Attempt in ObtainContainerConfig to get container: #{container['Id']} config information returned null")
-        end
-        rescue => errorStr
-          $log.warn("Exception in obtainContainerConfig: #{errorStr}")
+          $log.warn("Attempt in ObtainContainerConfig to get container: #{container["Id"]} config information returned null")
         end
+      rescue => errorStr
+        $log.warn("Exception in obtainContainerConfig: #{errorStr}")
+      end
     end
 
     def obtainContainerState(instance, container)
       begin
-        stateValue = container['State']
+        stateValue = container["State"]
         if !stateValue.nil?
-          exitCodeValue  = stateValue['ExitCode']
+          exitCodeValue = stateValue["ExitCode"]
           # Exit codes less than 0 are not supported by the engine
           if exitCodeValue < 0
-            exitCodeValue =  128
-            $log.info("obtainContainerState::Container: #{container['Id']} returned negative exit code")
+            exitCodeValue = 128
+            $log.info("obtainContainerState::Container: #{container["Id"]} returned negative exit code")
           end
-          instance['ExitCode'] = exitCodeValue
+          instance["ExitCode"] = exitCodeValue
           if exitCodeValue > 0
-            instance['State'] = @@FailedState
+            instance["State"] = @@FailedState
           else
             # Set the Container status : Running/Paused/Stopped
-            runningValue = stateValue['Running']
+            runningValue = stateValue["Running"]
             if runningValue
-              pausedValue = stateValue['Paused']
+              pausedValue = stateValue["Paused"]
               # Checking for paused within running is true state because docker returns true for both Running and Paused fields when the container is paused
               if pausedValue
-                instance['State'] = @@PausedState
+                instance["State"] = @@PausedState
               else
-                instance['State'] = @@RunningState
+                instance["State"] = @@RunningState
               end
             else
-              instance['State'] = @@StoppedState
+              instance["State"] = @@StoppedState
             end
           end
-          instance['StartedTime'] = stateValue['StartedAt']
-          instance['FinishedTime'] = stateValue['FinishedAt']
+          instance["StartedTime"] = stateValue["StartedAt"]
+          instance["FinishedTime"] = stateValue["FinishedAt"]
         else
-          $log.info("Attempt in ObtainContainerState to get container: #{container['Id']} state information returned null")
+          $log.info("Attempt in ObtainContainerState to get container: #{container["Id"]} state information returned null")
         end
-        rescue => errorStr
-          $log.warn("Exception in obtainContainerState: #{errorStr}")
+      rescue => errorStr
+        $log.warn("Exception in obtainContainerState: #{errorStr}")
       end
     end
 
     def obtainContainerHostConfig(instance, container)
       begin
-        hostConfig = container['HostConfig']
+        hostConfig = container["HostConfig"]
         if !hostConfig.nil?
-          links = hostConfig['Links']
-          instance['Links'] = ""
+          links = hostConfig["Links"]
+          instance["Links"] = ""
           if !links.nil?
             linksString = links.to_s
-            instance['Links'] = (linksString == "null")? "" : linksString
+            instance["Links"] = (linksString == "null") ? "" : linksString
           end
-          portBindings = hostConfig['PortBindings']
-          instance['Ports'] = ""
+          portBindings = hostConfig["PortBindings"]
+          instance["Ports"] = ""
           if !portBindings.nil?
             portBindingsString = portBindings.to_s
-            instance['Ports'] = (portBindingsString == "null")? "" : portBindingsString
+            instance["Ports"] = (portBindingsString == "null") ? "" : portBindingsString
           end
         else
-          $log.info("Attempt in ObtainContainerHostConfig to get container: #{container['Id']} host config information returned null")
-        end
-        rescue => errorStr
-          $log.warn("Exception in obtainContainerHostConfig: #{errorStr}")
+          $log.info("Attempt in ObtainContainerHostConfig to get container: #{container["Id"]} host config information returned null")
         end
+      rescue => errorStr
+        $log.warn("Exception in obtainContainerHostConfig: #{errorStr}")
+      end
     end
 
     def inspectContainer(id, nameMap)
@@ -157,29 +156,29 @@ def inspectContainer(id, nameMap)
       begin
         container = DockerApiClient.dockerInspectContainer(id)
         if !container.nil? && !container.empty?
-          containerInstance['InstanceID'] = container['Id']
-          containerInstance['CreatedTime'] = container['Created']
-          containerName = container['Name']
+          containerInstance["InstanceID"] = container["Id"]
+          containerInstance["CreatedTime"] = container["Created"]
+          containerName = container["Name"]
           if !containerName.nil? && !containerName.empty?
             # Remove the leading / from the name if it exists (this is an API issue)
-            containerInstance['ElementName'] = (containerName[0] == '/') ? containerName[1..-1] : containerName
+            containerInstance["ElementName"] = (containerName[0] == "/") ? containerName[1..-1] : containerName
           end
-          imageValue = container['Image']
+          imageValue = container["Image"]
           if !imageValue.nil? && !imageValue.empty?
-            containerInstance['ImageId'] = imageValue
+            containerInstance["ImageId"] = imageValue
             repoImageTagArray = nameMap[imageValue]
             if nameMap.has_key? imageValue
-              containerInstance['Repository'] = repoImageTagArray[0]
-              containerInstance['Image'] = repoImageTagArray[1]
-              containerInstance['ImageTag'] = repoImageTagArray[2]
+              containerInstance["Repository"] = repoImageTagArray[0]
+              containerInstance["Image"] = repoImageTagArray[1]
+              containerInstance["ImageTag"] = repoImageTagArray[2]
             end
           end
-          obtainContainerConfig(containerInstance, container);
-          obtainContainerState(containerInstance, container);
-          obtainContainerHostConfig(containerInstance, container);
+          obtainContainerConfig(containerInstance, container)
+          obtainContainerState(containerInstance, container)
+          obtainContainerHostConfig(containerInstance, container)
         end
       rescue => errorStr
-          $log.warn("Exception in inspectContainer: #{errorStr} for container: #{id}")
+        $log.warn("Exception in inspectContainer: #{errorStr} for container: #{id}")
       end
       return containerInstance
     end
@@ -199,8 +198,8 @@ def enumerate
           containerIds.each do |containerId|
             inspectedContainer = {}
             inspectedContainer = inspectContainer(containerId, nameMap)
-            inspectedContainer['Computer'] = hostname
-            inspectedContainer['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
+            inspectedContainer["Computer"] = hostname
+            inspectedContainer["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
             containerInventory.push inspectedContainer
             ContainerInventoryState.writeContainerState(inspectedContainer)
           end
@@ -210,8 +209,8 @@ def enumerate
             deletedContainers.each do |deletedContainer|
               container = ContainerInventoryState.readContainerState(deletedContainer)
               if !container.nil?
-                container.each{|k,v| container[k]=v}
-                container['State'] = "Deleted"
+                container.each { |k, v| container[k] = v }
+                container["State"] = "Deleted"
                 containerInventory.push container
               end
             end
@@ -219,28 +218,28 @@ def enumerate
 
           containerInventory.each do |record|
             wrapper = {
-              "DataType"=>"CONTAINER_INVENTORY_BLOB",
-              "IPName"=>"ContainerInsights",
-              "DataItems"=>[record.each{|k,v| record[k]=v}]
+              "DataType" => "CONTAINER_INVENTORY_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
             }
             eventStream.add(emitTime, wrapper) if wrapper
           end
           router.emit_stream(@tag, eventStream) if eventStream
-          @@istestvar = ENV['ISTEST']
-          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
+          @@istestvar = ENV["ISTEST"]
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
             $log.info("containerInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
-          timeDifference =  (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs
-          timeDifferenceInMinutes = timeDifference/60
-          if (timeDifferenceInMinutes >= 5)
-            @@telemetryTimeTracker = DateTime.now.to_time.to_i
-            telemetryProperties = {}
-            telemetryProperties['Computer'] = hostname
-            telemetryProperties['ContainerCount'] = containerInventory.length
-            ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties)
-          end
           $log.info("in_container_inventory::enumerate : Processing complete - emitted stream @ #{Time.now.utc.iso8601}")
         end
+        timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= 5)
+          @@telemetryTimeTracker = DateTime.now.to_time.to_i
+          telemetryProperties = {}
+          telemetryProperties["Computer"] = hostname
+          telemetryProperties["ContainerCount"] = containerInventory.length
+          ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties)
+        end
       rescue => errorStr
         $log.warn("Exception in enumerate container inventory: #{errorStr}")
       end
@@ -265,7 +264,5 @@ def run_periodic
       end
       @mutex.unlock
     end
-
   end # Container_Inventory_Input
-
-end # module
\ No newline at end of file
+end # module
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index ba1dacbe0..aabda441e 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -2,181 +2,176 @@
 # frozen_string_literal: true
 
 module Fluent
+  class Kube_nodeInventory_Input < Input
+    Plugin.register_input("kubenodeinventory", self)
 
-    class Kube_nodeInventory_Input < Input
-      Plugin.register_input('kubenodeinventory', self)
-  
-      @@ContainerNodeInventoryTag = 'oms.containerinsights.ContainerNodeInventory'
-      @@MDMKubeNodeInventoryTag = 'mdm.kubenodeinventory'
+    @@ContainerNodeInventoryTag = "oms.containerinsights.ContainerNodeInventory"
+    @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
 
-      def initialize
-        super
-        require 'yaml'
-        require 'json'
-  
-        require_relative 'KubernetesApiClient'
-        require_relative 'ApplicationInsightsUtility'
-        require_relative 'oms_common'
-        require_relative 'omslog'
+    def initialize
+      super
+      require "yaml"
+      require "json"
 
+      require_relative "KubernetesApiClient"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "oms_common"
+      require_relative "omslog"
+    end
+
+    config_param :run_interval, :time, :default => "1m"
+    config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+        @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
       end
-  
-      config_param :run_interval, :time, :default => '1m'
-      config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory"
-  
-      def configure (conf)
-        super
-      end
-  
-      def start
-        if @run_interval
-          @finished = false
-          @condition = ConditionVariable.new
-          @mutex = Mutex.new
-          @thread = Thread.new(&method(:run_periodic))
-          @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
-        end
-      end
-  
-      def shutdown
-        if @run_interval
-          @mutex.synchronize {
-            @finished = true
-            @condition.signal
-          }
-          @thread.join
-        end
-      end
-  
-      def enumerate
-        currentTime = Time.now
-        emitTime = currentTime.to_f
-        batchTime = currentTime.utc.iso8601
-        telemetrySent = false
-        $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-        nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body)
-        $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-          begin
-            if(!nodeInventory.empty?)
-              eventStream = MultiEventStream.new
-              containerNodeInventoryEventStream = MultiEventStream.new 
-                #get node inventory 
-                nodeInventory['items'].each do |items|
-                    record = {}
-                    # Sending records for ContainerNodeInventory
-                    containerNodeInventoryRecord = {}
-                    containerNodeInventoryRecord['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-                    containerNodeInventoryRecord['Computer'] = items['metadata']['name']
+    end
 
-                    record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-                    record['Computer'] = items['metadata']['name'] 
-                    record['ClusterName'] = KubernetesApiClient.getClusterName
-                    record['ClusterId'] = KubernetesApiClient.getClusterId  
-                    record['CreationTimeStamp'] = items['metadata']['creationTimestamp'] 
-                    record['Labels'] = [items['metadata']['labels']]
-                    record['Status'] = ""
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
 
-                    # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-                    # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we 
-                    # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-                    # implying that the node is ready for hosting pods, however its out of disk.
-                    
-                    if items['status'].key?("conditions") && !items['status']['conditions'].empty?
-                      allNodeConditions="" 
-                      items['status']['conditions'].each do |condition|
-                          if condition['status'] == "True"
-                            if !allNodeConditions.empty?
-                              allNodeConditions = allNodeConditions + "," + condition['type']
-                            else
-                              allNodeConditions = condition['type']
-                            end
-                          end 
-                          #collect last transition to/from ready (no matter ready is true/false)
-                          if condition['type'] == "Ready" && !condition['lastTransitionTime'].nil?
-                            record['LastTransitionTimeReady'] = condition['lastTransitionTime']
-                          end
-                      end 
-                      if !allNodeConditions.empty?
-                        record['Status'] = allNodeConditions
-                      end
+    def enumerate
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
+      telemetrySent = false
+      $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+      nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+      $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+      begin
+        if (!nodeInventory.empty?)
+          eventStream = MultiEventStream.new
+          containerNodeInventoryEventStream = MultiEventStream.new
+          #get node inventory
+          nodeInventory["items"].each do |items|
+            record = {}
+            # Sending records for ContainerNodeInventory
+            containerNodeInventoryRecord = {}
+            containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
 
-                    end
+            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            record["Computer"] = items["metadata"]["name"]
+            record["ClusterName"] = KubernetesApiClient.getClusterName
+            record["ClusterId"] = KubernetesApiClient.getClusterId
+            record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
+            record["Labels"] = [items["metadata"]["labels"]]
+            record["Status"] = ""
 
-                    nodeInfo = items['status']['nodeInfo']
-                    record['KubeletVersion'] = nodeInfo['kubeletVersion']
-                    record['KubeProxyVersion'] = nodeInfo['kubeProxyVersion']
-                    containerNodeInventoryRecord['OperatingSystem'] = nodeInfo['osImage']
-                    dockerVersion = nodeInfo['containerRuntimeVersion']
-                    dockerVersion.slice! "docker://"
-                    containerNodeInventoryRecord['DockerVersion'] = dockerVersion
-                    # ContainerNodeInventory data for docker version and operating system.
-                    containerNodeInventoryWrapper = {
-                      "DataType"=>"CONTAINER_NODE_INVENTORY_BLOB",
-                      "IPName"=>"ContainerInsights",
-                      "DataItems"=>[containerNodeInventoryRecord.each{|k,v| containerNodeInventoryRecord[k]=v}]
-                    }
-                    containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+            # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+            # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+            # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+            # implying that the node is ready for hosting pods, however its out of disk.
 
-                    wrapper = {
-                      "DataType"=>"KUBE_NODE_INVENTORY_BLOB",
-                      "IPName"=>"ContainerInsights",
-                      "DataItems"=>[record.each{|k,v| record[k]=v}]
-                    }
-                    eventStream.add(emitTime, wrapper) if wrapper
-                    # Adding telemetry to send node telemetry every 5 minutes
-                    timeDifference =  (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-                    timeDifferenceInMinutes = timeDifference/60
-                    if (timeDifferenceInMinutes >= 5)
-                      properties = {}
-                      properties["Computer"] = record["Computer"]
-                      properties["KubeletVersion"] = record["KubeletVersion"]
-                      capacityInfo = items['status']['capacity']
-                      ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties)
-                      ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties)
-                      telemetrySent = true
-                    end
-                end 
-                router.emit_stream(@tag, eventStream) if eventStream
-                router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-                router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
-                if telemetrySent == true
-                  @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+            if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
+              allNodeConditions = ""
+              items["status"]["conditions"].each do |condition|
+                if condition["status"] == "True"
+                  if !allNodeConditions.empty?
+                    allNodeConditions = allNodeConditions + "," + condition["type"]
+                  else
+                    allNodeConditions = condition["type"]
+                  end
                 end
-                @@istestvar = ENV['ISTEST']
-                if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
-                  $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+                #collect last transition to/from ready (no matter ready is true/false)
+                if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+                  record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
                 end
-            end  
-          rescue  => errorStr
-            $log.warn "Failed to retrieve node inventory: #{errorStr}"
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end       
-      end
-  
-      def run_periodic
-        @mutex.lock
-        done = @finished
-        until done
-          @condition.wait(@mutex, @run_interval)
-          done = @finished
-          @mutex.unlock
-          if !done
-            begin
-              $log.info("in_kube_nodes::run_periodic @ #{Time.now.utc.iso8601}")
-              enumerate
-            rescue => errorStr
-              $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
-              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+              end
+              if !allNodeConditions.empty?
+                record["Status"] = allNodeConditions
+              end
             end
+
+            nodeInfo = items["status"]["nodeInfo"]
+            record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+            record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+            containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+            dockerVersion = nodeInfo["containerRuntimeVersion"]
+            dockerVersion.slice! "docker://"
+            containerNodeInventoryRecord["DockerVersion"] = dockerVersion
+            # ContainerNodeInventory data for docker version and operating system.
+            containerNodeInventoryWrapper = {
+              "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+            }
+            containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+
+            wrapper = {
+              "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
+            }
+            eventStream.add(emitTime, wrapper) if wrapper
+            # Adding telemetry to send node telemetry every 5 minutes
+            timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+            timeDifferenceInMinutes = timeDifference / 60
+            if (timeDifferenceInMinutes >= 5)
+              properties = {}
+              properties["Computer"] = record["Computer"]
+              properties["KubeletVersion"] = record["KubeletVersion"]
+              properties["OperatingSystem"] = nodeInfo["operatingSystem"]
+              properties["DockerVersion"] = dockerVersion
+              capacityInfo = items["status"]["capacity"]
+              ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+              ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+              telemetrySent = true
+            end
+          end
+          router.emit_stream(@tag, eventStream) if eventStream
+          router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+          router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+          if telemetrySent == true
+            @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+          end
+          @@istestvar = ENV["ISTEST"]
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
+            $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
-          @mutex.lock
         end
+      rescue => errorStr
+        $log.warn "Failed to retrieve node inventory: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
+        done = @finished
         @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kube_nodes::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          end
+        end
+        @mutex.lock
       end
-  
-    end # Kube_Node_Input
-  
-  end # module
-  
-  
\ No newline at end of file
+      @mutex.unlock
+    end
+  end # Kube_Node_Input
+end # module
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 3d026b05f..65573673c 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -2,29 +2,28 @@
 # frozen_string_literal: true
 
 module Fluent
-
   class Kube_PodInventory_Input < Input
-    Plugin.register_input('kubepodinventory', self)
+    Plugin.register_input("kubepodinventory", self)
 
-    @@MDMKubePodInventoryTag = 'mdm.kubepodinventory'
+    @@MDMKubePodInventoryTag = "mdm.kubepodinventory"
+    @@hostName = (OMS::Common.get_hostname)
 
     def initialize
       super
-      require 'yaml'
-      require 'json'
-      require 'set'
-
-      require_relative 'KubernetesApiClient'
-      require_relative 'ApplicationInsightsUtility'
-      require_relative 'oms_common'
-      require_relative 'omslog'
+      require "yaml"
+      require "json"
+      require "set"
 
+      require_relative "KubernetesApiClient"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "oms_common"
+      require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => '1m'
+    config_param :run_interval, :time, :default => "1m"
     config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
 
-    def configure (conf)
+    def configure(conf)
       super
     end
 
@@ -48,29 +47,126 @@ def shutdown
       end
     end
 
-    def enumerate(podList = nil) 
-        if podList.nil?
-          $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-          podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('pods').body)
-          $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")          
+    def enumerate(podList = nil)
+      if podList.nil?
+        $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body)
+        $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+      else
+        podInventory = podList
+      end
+      begin
+        if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
+          #get pod inventory & services
+          $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
+          serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+          $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
+          parse_and_emit_records(podInventory, serviceList)
         else
-          podInventory = podList
+          $log.warn "Received empty podInventory"
+        end
+      rescue => errorStr
+        $log.warn "Failed in enumerate pod inventory: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def populateWindowsContainerInventoryRecord(container, record, containerEnvVariableHash, batchTime)
+      begin
+        containerInventoryRecord = {}
+        containerName = container["name"]
+        containerInventoryRecord["InstanceID"] = record["ContainerID"]
+        containerInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        containerInventoryRecord["Computer"] = record["Computer"]
+        containerInventoryRecord["ContainerHostname"] = record["Computer"]
+        containerInventoryRecord["ElementName"] = containerName
+        image = container["image"]
+        repoInfo = image.split("/")
+        if !repoInfo.nil?
+          containerInventoryRecord["Repository"] = repoInfo[0]
+          if !repoInfo[1].nil?
+            imageInfo = repoInfo[1].split(":")
+            if !imageInfo.nil?
+              containerInventoryRecord["Image"] = imageInfo[0]
+              containerInventoryRecord["ImageTag"] = imageInfo[1]
+            end
+          end
+        end
+        imageIdInfo = container["imageID"]
+        imageIdSplitInfo = imageIdInfo.split("@")
+        if !imageIdSplitInfo.nil?
+          containerInventoryRecord["ImageId"] = imageIdSplitInfo[1]
+        end
+        # Get container state
+        containerStatus = container["state"]
+        if containerStatus.keys[0] == "running"
+          containerInventoryRecord["State"] = "Running"
+          containerInventoryRecord["StartedTime"] = container["state"]["running"]["startedAt"]
+        elsif containerStatus.keys[0] == "terminated"
+          containerExitCode = container["state"]["terminated"]["exitCode"]
+          containerStartTime = container["state"]["terminated"]["startedAt"]
+          containerFinishTime = container["state"]["terminated"]["finishedAt"]
+          if containerExitCode < 0
+            # Exit codes less than 0 are not supported by the engine
+            containerExitCode = 128
+          end
+          if containerExitCode > 0
+            containerInventoryRecord["State"] = "Failed"
+          else
+            containerInventoryRecord["State"] = "Stopped"
+          end
+          containerInventoryRecord["ExitCode"] = containerExitCode
+          containerInventoryRecord["StartedTime"] = containerStartTime
+          containerInventoryRecord["FinishedTime"] = containerFinishTime
+        elsif containerStatus.keys[0] == "waiting"
+          containerInventoryRecord["State"] = "Waiting"
+        end
+        if !containerEnvVariableHash.nil? && !containerEnvVariableHash.empty?
+          containerInventoryRecord["EnvironmentVar"] = containerEnvVariableHash[containerName]
         end
-        begin
-          if(!podInventory.empty? && podInventory.key?("items") && !podInventory['items'].empty?)
-            #get pod inventory & services 
-            $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-            serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo('services').body)
-            $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-            parse_and_emit_records(podInventory, serviceList)
-          else  
-            $log.warn "Received empty podInventory"
-          end  
-        rescue  => errorStr
-          $log.warn "Failed in enumerate pod inventory: #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-        end  
+        return containerInventoryRecord
+      rescue => errorStr
+        $log.warn "Failed in populateWindowsContainerInventoryRecord: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def getContainerEnvironmentVariables(pod)
+      begin
+        podSpec = pod["spec"]
+        containerEnvHash = {}
+        if !podSpec.nil? && !podSpec["containers"].nil?
+          podSpec["containers"].each do |container|
+            envVarsArray = []
+            containerEnvArray = container["env"]
+            # Parsing the environment variable array of hashes to a string value
+            # since that is format being sent by container inventory workflow in daemonset
+            # Keeping it in the same format because the workflow expects it in this format
+            # and the UX expects an array of string for environment variables
+            if !containerEnvArray.nil? && !containerEnvArray.empty?
+              containerEnvArray.each do |envVarHash|
+                envName = envVarHash["name"]
+                envValue = envVarHash["value"]
+                envArrayElement = envName + "=" + envValue
+                envVarsArray.push(envArrayElement)
+              end
+            end
+            # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
+            envValueString = envVarsArray.to_s
+            if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
+              envValueString = ["AZMON_COLLECT_ENV=FALSE"]
+            end
+            containerEnvHash[container["name"]] = envValueString
+          end
+        end
+        return containerEnvHash
+      rescue => errorStr
+        $log.warn "Failed in getContainerEnvironmentVariables: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
     end
 
     def parse_and_emit_records(podInventory, serviceList)
@@ -80,100 +176,116 @@ def parse_and_emit_records(podInventory, serviceList)
       eventStream = MultiEventStream.new
       controllerSet = Set.new []
       telemetryFlush = false
+      winContainerCount = 0
       begin #begin block start
-        podInventory['items'].each do |items| #podInventory block start
+        # Getting windows nodes from kubeapi
+        winNodes = KubernetesApiClient.getWindowsNodesArray
+
+        podInventory["items"].each do |items| #podInventory block start
+          sendWindowsContainerInventoryRecord = false
+          containerInventoryRecords = []
           records = []
           record = {}
-          record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-          record['Name'] = items['metadata']['name']
-          podNameSpace = items['metadata']['namespace']
-         
-          if podNameSpace.eql?("kube-system") && !items['metadata'].key?("ownerReferences")
+          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          record["Name"] = items["metadata"]["name"]
+          podNameSpace = items["metadata"]["namespace"]
+
+          if podNameSpace.eql?("kube-system") && !items["metadata"].key?("ownerReferences")
             # The above case seems to be the only case where you have horizontal scaling of pods
             # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
             # instead of the actual poduid. Since this uid is not being surface into the UX
             # its ok to use this.
             # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            podUid = items['metadata']['annotations']['kubernetes.io/config.hash']
+            podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"]
           else
-            podUid = items['metadata']['uid']
+            podUid = items["metadata"]["uid"]
           end
-          record['PodUid'] = podUid
-          record['PodLabel'] = [items['metadata']['labels']]
-          record['Namespace'] = podNameSpace
-          record['PodCreationTimeStamp'] = items['metadata']['creationTimestamp']
+          record["PodUid"] = podUid
+          record["PodLabel"] = [items["metadata"]["labels"]]
+          record["Namespace"] = podNameSpace
+          record["PodCreationTimeStamp"] = items["metadata"]["creationTimestamp"]
           #for unscheduled (non-started) pods startTime does NOT exist
-          if !items['status']['startTime'].nil?
-            record['PodStartTime'] = items['status']['startTime']
+          if !items["status"]["startTime"].nil?
+            record["PodStartTime"] = items["status"]["startTime"]
           else
-            record['PodStartTime'] = ""
+            record["PodStartTime"] = ""
           end
           #podStatus
           # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
           podReadyCondition = true
-          if !items['status']['reason'].nil? && items['status']['reason'] == "NodeLost" && !items['status']['conditions'].nil?
-            items['status']['conditions'].each do |condition|
-              if condition['type'] == "Ready" && condition['status'] == "False"
+          if !items["status"]["reason"].nil? && items["status"]["reason"] == "NodeLost" && !items["status"]["conditions"].nil?
+            items["status"]["conditions"].each do |condition|
+              if condition["type"] == "Ready" && condition["status"] == "False"
                 podReadyCondition = false
                 break
               end
             end
           end
           if podReadyCondition == false
-            record['PodStatus'] = "Unknown"
+            record["PodStatus"] = "Unknown"
           else
-            record['PodStatus'] = items['status']['phase']
+            record["PodStatus"] = items["status"]["phase"]
           end
           #for unscheduled (non-started) pods podIP does NOT exist
-          if !items['status']['podIP'].nil?
-            record['PodIp'] =items['status']['podIP']
+          if !items["status"]["podIP"].nil?
+            record["PodIp"] = items["status"]["podIP"]
           else
-            record['PodIp'] = ""
+            record["PodIp"] = ""
           end
           #for unscheduled (non-started) pods nodeName does NOT exist
-          if !items['spec']['nodeName'].nil?
-            record['Computer'] = items['spec']['nodeName']
+          if !items["spec"]["nodeName"].nil?
+            record["Computer"] = items["spec"]["nodeName"]
           else
-            record['Computer'] = ""
-          end  
-          record['ClusterId'] = KubernetesApiClient.getClusterId
-          record['ClusterName'] = KubernetesApiClient.getClusterName
-          record['ServiceName'] = getServiceNameFromLabels(items['metadata']['namespace'], items['metadata']['labels'], serviceList)
-           # Adding telemetry to send pod telemetry every 5 minutes
-           timeDifference =  (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
-           timeDifferenceInMinutes = timeDifference/60
-           if (timeDifferenceInMinutes >= 5)
-             telemetryFlush = true
-           end
-          if !items['metadata']['ownerReferences'].nil?
-            record['ControllerKind'] = items['metadata']['ownerReferences'][0]['kind']
-            record['ControllerName'] = items['metadata']['ownerReferences'][0]['name']
+            record["Computer"] = ""
+          end
+
+          # Setting this flag to true so that we can send ContainerInventory records for containers
+          # on windows nodes and parse environment variables for these containers
+          if winNodes.length > 0
+            if (!record["Computer"].empty? && (winNodes.include? record["Computer"]))
+              sendWindowsContainerInventoryRecord = true
+              containerEnvVariableHash = getContainerEnvironmentVariables(items)
+            end
+          end
+
+          record["ClusterId"] = KubernetesApiClient.getClusterId
+          record["ClusterName"] = KubernetesApiClient.getClusterName
+          record["ServiceName"] = getServiceNameFromLabels(items["metadata"]["namespace"], items["metadata"]["labels"], serviceList)
+          # Adding telemetry to send pod telemetry every 5 minutes
+          timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
+          timeDifferenceInMinutes = timeDifference / 60
+          if (timeDifferenceInMinutes >= 5)
+            telemetryFlush = true
+          end
+          if !items["metadata"]["ownerReferences"].nil?
+            record["ControllerKind"] = items["metadata"]["ownerReferences"][0]["kind"]
+            record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"]
             if telemetryFlush == true
-              controllerSet.add(record['ControllerKind'] + record['ControllerName'])
+              controllerSet.add(record["ControllerKind"] + record["ControllerName"])
             end
           end
           podRestartCount = 0
-          record['PodRestartCount'] = 0 
-          if items['status'].key?("containerStatuses") && !items['status']['containerStatuses'].empty? #container status block start
-            items['status']['containerStatuses'].each do |container|
-              containerRestartCount = 0		
-              #container Id is of the form 		
-              #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527	
-              if !container['containerID'].nil?	
-                record['ContainerID'] = container['containerID'].split("//")[1]		
-              else 
+          record["PodRestartCount"] = 0
+          if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
+            items["status"]["containerStatuses"].each do |container|
+              containerRestartCount = 0
+              #container Id is of the form
+              #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527
+              if !container["containerID"].nil?
+                record["ContainerID"] = container["containerID"].split("//")[1]
+              else
                 # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0
-                record['ContainerID'] = ""  
+                record["ContainerID"] = ""
               end
-              #keeping this as <PodUid/container_name> which is same as InstanceName in perf table		
-              record['ContainerName'] = podUid + "/" +container['name']		
-              #Pod restart count is a sumtotal of restart counts of individual containers		
-              #within the pod. The restart count of a container is maintained by kubernetes		
-              #itself in the form of a container label.		
-              containerRestartCount = container['restartCount']		
-              record['ContainerRestartCount'] = containerRestartCount
-              containerStatus = container['state']
-              record['ContainerStatusReason'] = ''
+              #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
+              record["ContainerName"] = podUid + "/" + container["name"]
+              #Pod restart count is a sumtotal of restart counts of individual containers
+              #within the pod. The restart count of a container is maintained by kubernetes
+              #itself in the form of a container label.
+              containerRestartCount = container["restartCount"]
+              record["ContainerRestartCount"] = containerRestartCount
+              containerStatus = container["state"]
+              record["ContainerStatusReason"] = ""
               # state is of the following form , so just picking up the first key name
               # "state": {
               #   "waiting": {
@@ -183,55 +295,80 @@ def parse_and_emit_records(podInventory, serviceList)
               # },
               # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running
               if podReadyCondition == false
-                record['ContainerStatus'] = "Unknown"
+                record["ContainerStatus"] = "Unknown"
               else
-                record['ContainerStatus'] = containerStatus.keys[0]
+                record["ContainerStatus"] = containerStatus.keys[0]
               end
               #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric
               #Picking up both container and node start time from cAdvisor to be consistent
               if containerStatus.keys[0] == "running"
-                record['ContainerCreationTimeStamp'] = container['state']['running']['startedAt']
+                record["ContainerCreationTimeStamp"] = container["state"]["running"]["startedAt"]
               else
-                if !containerStatus[containerStatus.keys[0]]['reason'].nil? && !containerStatus[containerStatus.keys[0]]['reason'].empty?
-                  record['ContainerStatusReason'] = containerStatus[containerStatus.keys[0]]['reason']
+                if !containerStatus[containerStatus.keys[0]]["reason"].nil? && !containerStatus[containerStatus.keys[0]]["reason"].empty?
+                  record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"]
                 end
               end
-              podRestartCount += containerRestartCount	
-              records.push(record.dup) 
-            end 
+              podRestartCount += containerRestartCount
+              records.push(record.dup)
+
+              #Generate ContainerInventory records for windows nodes so that we can get image and image tag in property panel
+              if sendWindowsContainerInventoryRecord == true
+                containerInventoryRecord = populateWindowsContainerInventoryRecord(container, record, containerEnvVariableHash, batchTime)
+                containerInventoryRecords.push(containerInventoryRecord)
+              end
+            end
           else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod
-            records.push(record) 
+            records.push(record)
           end  #container status block end
           records.each do |record|
             if !record.nil?
-              record['PodRestartCount'] = podRestartCount
+              record["PodRestartCount"] = podRestartCount
               wrapper = {
-                          "DataType"=>"KUBE_POD_INVENTORY_BLOB",
-                          "IPName"=>"ContainerInsights",
-                          "DataItems"=>[record.each{|k,v| record[k]=v}]
+                          "DataType" => "KUBE_POD_INVENTORY_BLOB",
+                          "IPName" => "ContainerInsights",
+                          "DataItems" => [record.each { |k, v| record[k] = v }],
                         }
               eventStream.add(emitTime, wrapper) if wrapper
-            end  
-          end  
+            end
+          end
+          # Send container inventory records for containers on windows nodes
+          winContainerCount += containerInventoryRecords.length
+          containerInventoryRecords.each do |cirecord|
+            if !cirecord.nil?
+              ciwrapper = {
+                "DataType" => "CONTAINER_INVENTORY_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }],
+              }
+              eventStream.add(emitTime, ciwrapper) if ciwrapper
+            end
+          end
         end  #podInventory block end
+
         router.emit_stream(@tag, eventStream) if eventStream
         router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream
         if telemetryFlush == true
-          ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory")
-          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {})
-          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {})
+          telemetryProperties = {}
+          telemetryProperties["Computer"] = @@hostName
+          ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory["items"].length, {})
+          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length, {})
+          if winContainerCount > 0
+            telemetryProperties["ClusterWideWindowsContainersCount"] = winContainerCount
+            ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
+          end
           @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
-        @@istestvar = ENV['ISTEST']
-        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0)
+        @@istestvar = ENV["ISTEST"]
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
-      rescue  => errorStr
+      rescue => errorStr
         $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-      end #begin block end  
-    end  
+      end #begin block end
+    end
 
     def run_periodic
       @mutex.lock
@@ -257,37 +394,33 @@ def run_periodic
     def getServiceNameFromLabels(namespace, labels, serviceList)
       serviceName = ""
       begin
-        if !labels.nil? && !labels.empty? 
-          if( !serviceList.nil? && !serviceList.empty? && serviceList.key?("items") && !serviceList['items'].empty?)
-            serviceList['items'].each do |item|
+        if !labels.nil? && !labels.empty?
+          if (!serviceList.nil? && !serviceList.empty? && serviceList.key?("items") && !serviceList["items"].empty?)
+            serviceList["items"].each do |item|
               found = 0
-              if !item['spec'].nil? && !item['spec']['selector'].nil? && item['metadata']['namespace'] == namespace 
-                selectorLabels = item['spec']['selector']
+              if !item["spec"].nil? && !item["spec"]["selector"].nil? && item["metadata"]["namespace"] == namespace
+                selectorLabels = item["spec"]["selector"]
                 if !selectorLabels.empty?
-                  selectorLabels.each do |key,value|
-                    if !(labels.select {|k,v| k==key && v==value}.length > 0)
+                  selectorLabels.each do |key, value|
+                    if !(labels.select { |k, v| k == key && v == value }.length > 0)
                       break
                     end
                     found = found + 1
                   end
-                end  
+                end
                 if found == selectorLabels.length
-                  return item['metadata']['name']
+                  return item["metadata"]["name"]
                 end
-              end  
+              end
             end
-          end  
+          end
         end
-      rescue  => errorStr
+      rescue => errorStr
         $log.warn "Failed to retrieve service name from labels: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
       return serviceName
     end
-
   end # Kube_Pod_Input
-
 end # module
-
-
diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb
new file mode 100644
index 000000000..2e5f839e6
--- /dev/null
+++ b/source/code/plugin/in_win_cadvisor_perf.rb
@@ -0,0 +1,120 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+module Fluent
+  class Win_CAdvisor_Perf_Input < Input
+    Plugin.register_input("wincadvisorperf", self)
+
+    @@winNodes = []
+
+    def initialize
+      super
+      require "yaml"
+      require "json"
+
+      require_relative "CAdvisorMetricsAPIClient"
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+    end
+
+    config_param :run_interval, :time, :default => "1m"
+    config_param :tag, :string, :default => "oms.api.wincadvisorperf"
+    config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+        @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i
+        @@cleanupRoutineTimeTracker = DateTime.now.to_time.to_i
+      end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def enumerate()
+      time = Time.now.to_f
+      begin
+        eventStream = MultiEventStream.new
+        timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+
+        #Resetting this cache so that it is populated with the current set of containers with every call
+        CAdvisorMetricsAPIClient.resetWinContainerIdCache()
+        if (timeDifferenceInMinutes >= 5)
+          $log.info "in_win_cadvisor_perf: Getting windows nodes"
+          nodes = KubernetesApiClient.getWindowsNodes()
+          if !nodes.nil?
+            @@winNodes = KubernetesApiClient.getWindowsNodes()
+          end
+          $log.info "in_win_cadvisor_perf : Successuly got windows nodes after 5 minute interval"
+          @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i
+        end
+        @@winNodes.each do |winNode|
+          metricData = CAdvisorMetricsAPIClient.getMetrics(winNode)
+          metricData.each do |record|
+            if !record.empty?
+              record["DataType"] = "LINUX_PERF_BLOB"
+              record["IPName"] = "LogManagement"
+              eventStream.add(time, record) if record
+            end
+          end
+          router.emit_stream(@tag, eventStream) if eventStream
+          router.emit_stream(@mdmtag, eventStream) if eventStream
+
+          @@istestvar = ENV["ISTEST"]
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
+            $log.info("winCAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+        end
+
+        # Cleanup routine to clear deleted containers from cache
+        cleanupTimeDifference = (DateTime.now.to_time.to_i - @@cleanupRoutineTimeTracker).abs
+        cleanupTimeDifferenceInMinutes = cleanupTimeDifference / 60
+        if (cleanupTimeDifferenceInMinutes >= 5)
+          $log.info "in_win_cadvisor_perf : Cleanup routine kicking in to clear deleted containers from cache"
+          CAdvisorMetricsAPIClient.clearDeletedWinContainersFromCache()
+          @@cleanupRoutineTimeTracker = DateTime.now.to_time.to_i
+        end
+      rescue => errorStr
+        $log.warn "Failed to retrieve cadvisor metric data for windows nodes: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+      end
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
+        done = @finished
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_win_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            $log.warn "in_win_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics for windows nodes: #{errorStr}"
+          end
+        end
+        @mutex.lock
+      end
+      @mutex.unlock
+    end
+  end # Win_CAdvisor_Perf_Input
+end # module
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 93b32ef50..963069858 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -2,29 +2,27 @@
 # frozen_string_literal: true
 
 module Fluent
-
   class OutputMDM < BufferedOutput
-
     config_param :retry_mdm_post_wait_minutes, :integer
 
-    Plugin.register_output('out_mdm', self)
+    Plugin.register_output("out_mdm", self)
 
     def initialize
       super
-      require 'net/http'
-      require 'net/https'
-      require 'uri'
-      require 'json'
-      require_relative 'KubernetesApiClient'
-      require_relative 'ApplicationInsightsUtility'
+      require "net/http"
+      require "net/https"
+      require "uri"
+      require "json"
+      require_relative "KubernetesApiClient"
+      require_relative "ApplicationInsightsUtility"
 
-      @@token_resource_url = 'https://monitoring.azure.com/'
-      @@grant_type = 'client_credentials'
-      @@azure_json_path = '/etc/kubernetes/host/azure.json'
+      @@token_resource_url = "https://monitoring.azure.com/"
+      @@grant_type = "client_credentials"
+      @@azure_json_path = "/etc/kubernetes/host/azure.json"
       @@post_request_url_template = "https://%{aks_region}.monitoring.azure.com%{aks_resource_id}/metrics"
       @@token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token"
       @@plugin_name = "AKSCustomMetricsMDM"
-      
+
       @data_hash = {}
       @token_url = nil
       @http_client = nil
@@ -50,12 +48,13 @@ def start
         @can_send_data_to_mdm = false
         return
       end
-      # Handle the case where the file read fails. Send Telemetry and exit the plugin? 
+      # Handle the case where the file read fails. Send Telemetry and exit the plugin?
       @data_hash = JSON.parse(file)
-      @token_url = @@token_url_template % {tenant_id: @data_hash['tenantId']}
+      @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]}
       @cached_access_token = get_access_token
-      aks_resource_id = ENV['AKS_RESOURCE_ID']
-      aks_region = ENV['AKS_REGION']
+      aks_resource_id = ENV["AKS_RESOURCE_ID"]
+      aks_region = ENV["AKS_REGION"]
+
       if aks_resource_id.to_s.empty?
         @log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
         @can_send_data_to_mdm = false
@@ -77,7 +76,7 @@ def start
 
     # get the access token only if the time to expiry is less than 5 minutes
     def get_access_token
-      if @cached_access_token.to_s.empty? || (Time.now + 5*60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration
+      if @cached_access_token.to_s.empty? || (Time.now + 5 * 60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration
         @log.info "Refreshing access token for out_mdm plugin.."
         token_uri = URI.parse(@token_url)
         http_access_token = Net::HTTP.new(token_uri.host, token_uri.port)
@@ -85,27 +84,27 @@ def get_access_token
         token_request = Net::HTTP::Post.new(token_uri.request_uri)
         token_request.set_form_data(
           {
-            'grant_type' => @@grant_type, 
-            'client_id' => @data_hash['aadClientId'], 
-            'client_secret' => @data_hash['aadClientSecret'],
-            'resource' => @@token_resource_url
-            }
+            "grant_type" => @@grant_type,
+            "client_id" => @data_hash["aadClientId"],
+            "client_secret" => @data_hash["aadClientSecret"],
+            "resource" => @@token_resource_url,
+          }
         )
-        
+
         token_response = http_access_token.request(token_request)
-        # Handle the case where the response is not 200 
+        # Handle the case where the response is not 200
         parsed_json = JSON.parse(token_response.body)
-        @token_expiry_time = Time.now + 59*60 # set the expiry time to be ~one hour from current time
-        @cached_access_token = parsed_json['access_token']
+        @token_expiry_time = Time.now + 59 * 60 # set the expiry time to be ~one hour from current time
+        @cached_access_token = parsed_json["access_token"]
       end
       @cached_access_token
-    end 
+    end
 
     def write_status_file(success, message)
-      fn = '/var/opt/microsoft/omsagent/log/MDMIngestion.status'
+      fn = "/var/opt/microsoft/omsagent/log/MDMIngestion.status"
       status = '{ "operation": "MDMIngestion", "success": "%s", "message": "%s" }' % [success, message]
       begin
-        File.open(fn,'w') { |file| file.write(status) }
+        File.open(fn, "w") { |file| file.write(status) }
       rescue => e
         @log.debug "Error:'#{e}'"
         ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
@@ -123,13 +122,13 @@ def format(tag, time, record)
       end
     end
 
-    # This method is called every flush interval. Send the buffer chunk to MDM. 
+    # This method is called every flush interval. Send the buffer chunk to MDM.
     # 'chunk' is a buffer chunk that includes multiple formatted records
     def write(chunk)
       begin
-        if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60)) && @can_send_data_to_mdm
+        if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes * 60)) && @can_send_data_to_mdm
           post_body = []
-          chunk.msgpack_each {|(tag, record)|
+          chunk.msgpack_each { |(tag, record)|
             post_body.push(record.to_json)
           }
           send_to_mdm post_body
@@ -137,7 +136,7 @@ def write(chunk)
           if !@can_send_data_to_mdm
             @log.info "Cannot send data to MDM since all required conditions were not met"
           else
-            @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
+            @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time) / 60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
           end
         end
       rescue Exception => e
@@ -146,12 +145,12 @@ def write(chunk)
       end
     end
 
-    def send_to_mdm(post_body) 
+    def send_to_mdm(post_body)
       begin
         access_token = get_access_token
         request = Net::HTTP::Post.new(@post_request_uri.request_uri)
-        request['Content-Type'] = "application/x-ndjson"
-        request['Authorization'] = "Bearer #{access_token}"
+        request["Content-Type"] = "application/x-ndjson"
+        request["Authorization"] = "Bearer #{access_token}"
         request.body = post_body.join("\n")
         response = @http_client.request(request)
         response.value # this throws for non 200 HTTP response code
@@ -166,10 +165,10 @@ def send_to_mdm(post_body)
           @first_post_attempt_made = true
           ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
           # Not raising exception, as that will cause retries to happen
-        elsif !response.code.empty? && response.code.start_with?('4')
+        elsif !response.code.empty? && response.code.start_with?("4")
           # Log 400 errors and continue
           @log.info "Non-retryable HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
-        else 
+        else
           # raise if the response code is non-400
           @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
           raise e
@@ -186,7 +185,8 @@ def send_to_mdm(post_body)
         raise e
       end
     end
-  private
+
+    private
 
     class ChunkErrorHandler
       include Configurable
@@ -218,20 +218,20 @@ def router=(r)
       end
 
       def write(chunk)
-        chunk.msgpack_each {|(tag, record)|
+        chunk.msgpack_each { |(tag, record)|
           @error_handlers[tag].emit(record)
         }
       end
-   
-    private
+
+      private
 
       def create_error_handlers(router)
         nop_handler = NopErrorHandler.new
         Hash.new() { |hash, tag|
           etag = OMS::Common.create_error_tag tag
           hash[tag] = router.match?(etag) ?
-                      ErrorHandler.new(router, etag) :
-                      nop_handler
+            ErrorHandler.new(router, etag) :
+            nop_handler
         }
       end
 
@@ -251,10 +251,6 @@ def emit(record)
           # NOP
         end
       end
-
     end
-
   end # class OutputMDM
-
 end # module Fluent
-

From ebdd8cc119a77752fd543225878f36e055812d14 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 8 Apr 2019 11:55:52 -0700
Subject: [PATCH 084/160] adding os to container inventory for windows nodes
 (#210)

---
 source/code/plugin/CAdvisorMetricsAPIClient.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 8b4fd9fcf..35cf727cf 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -318,6 +318,7 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
             telemetryProperties = {}
             telemetryProperties["Computer"] = hostName
             telemetryProperties["ContainerCount"] = containerCount
+            telemetryProperties["OS"] = "Windows"
             # Hardcoding the event to ContainerInventory hearbeat event since the telemetry is pivoted off of this event.
             @Log.info "sending container inventory heartbeat telemetry"
             ApplicationInsightsUtility.sendCustomEvent("ContainerInventoryHeartBeatEvent", telemetryProperties)

From d7b8cff1d9b20f3894fdd91c0e1cd3b69a465ed9 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Mon, 8 Apr 2019 15:40:31 -0700
Subject: [PATCH 085/160] Fix omsagent crash Error when kube-api returns
 non-200, send events for HTTP Errors (#211)

* Fix omsagent crash Error when kube-api returns non-200, send events for HTTP Errors

* Fixing the bug, deferring telemetry changes for later
---
 source/code/plugin/filter_cadvisor2mdm.rb | 102 +++++++++++-----------
 source/code/plugin/out_mdm.rb             |   2 +-
 2 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
index 94f2107cc..a6e643e45 100644
--- a/source/code/plugin/filter_cadvisor2mdm.rb
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -10,45 +10,45 @@ module Fluent
 
 	class CAdvisor2MdmFilter < Filter
 		Fluent::Plugin.register_filter('filter_cadvisor2mdm', self)
-		
+
 		config_param :enable_log, :integer, :default => 0
         config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log'
         config_param :custom_metrics_azure_regions, :string
         config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes'
-        
+
         @@cpu_usage_milli_cores = 'cpuUsageMillicores'
         @@cpu_usage_nano_cores = 'cpuusagenanocores'
         @@object_name_k8s_node = 'K8SNode'
         @@hostName = (OMS::Common.get_hostname)
         @@custom_metrics_template = '
-            { 
-                "time": "%{timestamp}", 
-                "data": { 
-                    "baseData": { 
-                        "metric": "%{metricName}", 
-                        "namespace": "Insights.Container/nodes", 
-                        "dimNames": [ 
+            {
+                "time": "%{timestamp}",
+                "data": {
+                    "baseData": {
+                        "metric": "%{metricName}",
+                        "namespace": "Insights.Container/nodes",
+                        "dimNames": [
                         "host"
-                        ], 
-                        "series": [ 
-                        { 
-                            "dimValues": [ 
+                        ],
+                        "series": [
+                        {
+                            "dimValues": [
                             "%{hostvalue}"
-                            ], 
+                            ],
                             "min": %{metricminvalue},
-                            "max": %{metricmaxvalue}, 
-                            "sum": %{metricsumvalue}, 
-                            "count": 1 
-                        } 
-                        ] 
-                    } 
-                } 
+                            "max": %{metricmaxvalue},
+                            "sum": %{metricsumvalue},
+                            "count": 1
+                        }
+                        ]
+                    }
+                }
             }'
-        
+
         @@metric_name_metric_percentage_name_hash = {
-            @@cpu_usage_milli_cores => "cpuUsagePercentage", 
+            @@cpu_usage_milli_cores => "cpuUsagePercentage",
             "memoryRssBytes" => "memoryRssPercentage",
-            "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" 
+            "memoryWorkingSetBytes" => "memoryWorkingSetPercentage"
         }
 
         @process_incoming_stream = true
@@ -61,7 +61,7 @@ def initialize
 		def configure(conf)
 			super
 			@log = nil
-			
+
 			if @enable_log
 				@log = Logger.new(@log_path, 1, 5000000)
 				@log.debug {'Starting filter_cadvisor2mdm plugin'}
@@ -70,15 +70,19 @@ def configure(conf)
 
         def start
             super
-            @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
-            @metrics_to_collect_hash = build_metrics_hash
-            @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
-            
-            # initialize cpu and memory limit 
-            if @process_incoming_stream
-                @cpu_capacity = 0.0
-                @memory_capacity = 0.0
-                ensure_cpu_memory_capacity_set
+            begin
+                    @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
+                    @metrics_to_collect_hash = build_metrics_hash
+                    @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
+
+                    # initialize cpu and memory limit
+                    if @process_incoming_stream
+                        @cpu_capacity = 0.0
+                        @memory_capacity = 0.0
+                        ensure_cpu_memory_capacity_set
+                    end
+            rescue => e
+                @log.info "Error initializing plugin #{e}"
             end
         end
 
@@ -117,9 +121,9 @@ def filter(tag, time, record)
                             if @memory_capacity != 0.0
                                 percentage_metric_value = metric_value*100/@memory_capacity
                             end
-                        end 
+                        end
                         return get_metric_records(record, metric_name, metric_value, percentage_metric_value)
-                    else 
+                    else
                         return []
                     end
                 else
@@ -140,13 +144,13 @@ def ensure_cpu_memory_capacity_set
                 return
             end
 
-            begin 
+            begin
                 nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes?fieldSelector=metadata.name%3D#{@@hostName}").body)
             rescue Exception => e
                 @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
                 ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
             end
-            if !nodeInventory.nil? 
+            if !nodeInventory.nil?
                 cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
                 if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
                     @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
@@ -163,7 +167,7 @@ def ensure_cpu_memory_capacity_set
                 end
             end
         end
-        
+
         def get_metric_records(record, metric_name, metric_value, percentage_metric_value)
             records = []
             custommetricrecord = @@custom_metrics_template % {
@@ -194,20 +198,20 @@ def get_metric_records(record, metric_name, metric_value, percentage_metric_valu
             return records
         end
 
-        
+
         def filter_stream(tag, es)
             new_es = MultiEventStream.new
-            ensure_cpu_memory_capacity_set
-            es.each { |time, record|
-              begin
+            begin
+                ensure_cpu_memory_capacity_set
+                es.each { |time, record|
                 filtered_records = filter(tag, time, record)
-                filtered_records.each {|filtered_record| 
+                filtered_records.each {|filtered_record|
                     new_es.add(time, filtered_record) if filtered_record
-                } if filtered_records
-              rescue => e
-                router.emit_error_event(tag, time, record, e)
-              end
-            }
+                    } if filtered_records
+                }
+            rescue => e
+                @log.info "Error in filter_stream #{e.message}"
+            end
             new_es
         end
 	end
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 963069858..351198afe 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -140,6 +140,7 @@ def write(chunk)
           end
         end
       rescue Exception => e
+        ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
         @log.info "Exception when writing to MDM: #{e}"
         raise e
       end
@@ -163,7 +164,6 @@ def send_to_mdm(post_body)
           @log.info "Response Code #{response.code} Updating @last_post_attempt_time"
           @last_post_attempt_time = Time.now
           @first_post_attempt_made = true
-          ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
           # Not raising exception, as that will cause retries to happen
         elsif !response.code.empty? && response.code.start_with?("4")
           # Log 400 errors and continue

From c9bb623c2c0aa6642e0baab3b0ebcf313c4627eb Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 10 Apr 2019 16:28:47 -0700
Subject: [PATCH 086/160] updating to lowercase compare for units (#212)

---
 source/code/plugin/KubernetesApiClient.rb | 66 +++++++++++------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 4ed85025f..3c6b4f203 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -439,58 +439,58 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet
     end #parseNodeLimits
 
     def getMetricNumericValue(metricName, metricVal)
-      metricValue = metricVal
+      metricValue = metricVal.downcase
       begin
         case metricName
         when "memory" #convert to bytes for memory
           #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/
-          if (metricValue.end_with?("Ki"))
-            metricValue.chomp!("Ki")
+          if (metricValue.end_with?("ki"))
+            metricValue.chomp!("ki")
             metricValue = Float(metricValue) * 1024.0 ** 1
-          elsif (metricValue.end_with?("Mi"))
-            metricValue.chomp!("Mi")
+          elsif (metricValue.end_with?("mi"))
+            metricValue.chomp!("mi")
             metricValue = Float(metricValue) * 1024.0 ** 2
-          elsif (metricValue.end_with?("Gi"))
-            metricValue.chomp!("Gi")
+          elsif (metricValue.end_with?("gi"))
+            metricValue.chomp!("gi")
             metricValue = Float(metricValue) * 1024.0 ** 3
-          elsif (metricValue.end_with?("Ti"))
-            metricValue.chomp!("Ti")
+          elsif (metricValue.end_with?("ti"))
+            metricValue.chomp!("ti")
             metricValue = Float(metricValue) * 1024.0 ** 4
-          elsif (metricValue.end_with?("Pi"))
-            metricValue.chomp!("Pi")
+          elsif (metricValue.end_with?("pi"))
+            metricValue.chomp!("pi")
             metricValue = Float(metricValue) * 1024.0 ** 5
-          elsif (metricValue.end_with?("Ei"))
-            metricValue.chomp!("Ei")
+          elsif (metricValue.end_with?("ei"))
+            metricValue.chomp!("ei")
             metricValue = Float(metricValue) * 1024.0 ** 6
-          elsif (metricValue.end_with?("Zi"))
-            metricValue.chomp!("Zi")
+          elsif (metricValue.end_with?("zi"))
+            metricValue.chomp!("zi")
             metricValue = Float(metricValue) * 1024.0 ** 7
-          elsif (metricValue.end_with?("Yi"))
-            metricValue.chomp!("Yi")
+          elsif (metricValue.end_with?("yi"))
+            metricValue.chomp!("yi")
             metricValue = Float(metricValue) * 1024.0 ** 8
-          elsif (metricValue.end_with?("K"))
-            metricValue.chomp!("K")
+          elsif (metricValue.end_with?("k"))
+            metricValue.chomp!("k")
             metricValue = Float(metricValue) * 1000.0 ** 1
-          elsif (metricValue.end_with?("M"))
-            metricValue.chomp!("M")
+          elsif (metricValue.end_with?("m"))
+            metricValue.chomp!("m")
             metricValue = Float(metricValue) * 1000.0 ** 2
-          elsif (metricValue.end_with?("G"))
-            metricValue.chomp!("G")
+          elsif (metricValue.end_with?("g"))
+            metricValue.chomp!("g")
             metricValue = Float(metricValue) * 1000.0 ** 3
-          elsif (metricValue.end_with?("T"))
-            metricValue.chomp!("T")
+          elsif (metricValue.end_with?("t"))
+            metricValue.chomp!("t")
             metricValue = Float(metricValue) * 1000.0 ** 4
-          elsif (metricValue.end_with?("P"))
-            metricValue.chomp!("P")
+          elsif (metricValue.end_with?("p"))
+            metricValue.chomp!("p")
             metricValue = Float(metricValue) * 1000.0 ** 5
-          elsif (metricValue.end_with?("E"))
-            metricValue.chomp!("E")
+          elsif (metricValue.end_with?("e"))
+            metricValue.chomp!("e")
             metricValue = Float(metricValue) * 1000.0 ** 6
-          elsif (metricValue.end_with?("Z"))
-            metricValue.chomp!("Z")
+          elsif (metricValue.end_with?("z"))
+            metricValue.chomp!("z")
             metricValue = Float(metricValue) * 1000.0 ** 7
-          elsif (metricValue.end_with?("Y"))
-            metricValue.chomp!("Y")
+          elsif (metricValue.end_with?("y"))
+            metricValue.chomp!("y")
             metricValue = Float(metricValue) * 1000.0 ** 8
           else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units')
             metricValue = Float(metricValue)

From 3a88db8e5b1005564e54625959972e176835f9d4 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 16 Apr 2019 13:00:00 -0700
Subject: [PATCH 087/160] Merge from vishwa/telegraftcp to ci_feature for
 telegraf changes (#214)

* merge from Vishwa/telegraf to Vishwa/telegraftcp for telegraf changes (#207)

* add configuration for telegraf

* fix for perms

* fix telegraf config.

* fix file location & config

* update to config

* fix namespace

* trying different namespace and also debug=true

* add placeholder for nodename

* change namespace

* updated config

* fix uri

* fix azMon settings

* remove aad settings

* add custom metrics regions

* fix config

* add support for replica-set config

* fix oomkilled

* Add telegraf 403 metric telemetry & non 403 trace telemetry

* fix type

* fix package

* fix package import

* fix filename

* delete unused file

* conf file for rs; fix 403counttotal metric for telegraf, remove host and use nodeName consistently, rename metrics

* fix statefulsets

* fix typo.

* fix another typo.

* fix telemetry

* fix casing issue

* fix comma issue.

* disable telemetry for rs ; fix stateful set name

* worksround for namespace fix

* telegraf integration - v1

* telemetry changes for telegraf

* telemetry & other changes

* remove custom metric regions as we dont need anymore

* remove un-needed files

* fixes

* exclude certain volumes and fix telemetry to not have computer & nodename as dimensions (redundant)

* Vishwa/resourcecentric (#208) (#209)

* resourceid fix (for AKS only)

* fix name

* near final metric shape

* change from customlog to fixed type (InsightsMetrics)

* fix PR feedback

* fix pr feedback
---
 installer/conf/td-agent-bit.conf              |  27 +-
 installer/conf/telegraf.conf                  | 519 ++++++++++++++++++
 installer/datafiles/base_container.data       |   3 +
 .../scripts/TelegrafTCPErrorTelemetry.sh      |   3 +
 source/code/go/src/plugins/oms.go             | 241 +++++++-
 source/code/go/src/plugins/out_oms.go         |  22 +-
 source/code/go/src/plugins/telemetry.go       |  26 +-
 7 files changed, 821 insertions(+), 20 deletions(-)
 create mode 100644 installer/conf/telegraf.conf
 create mode 100644 installer/scripts/TelegrafTCPErrorTelemetry.sh

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 78a7b2dde..88bacaca2 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -23,10 +23,33 @@
     Mem_Buf_Limit 2m
     Path_Key filepath
     Skip_Long_Lines On
+    Ignore_Older 5m
+
+[INPUT]
+    Name tail
+    Tag oms.container.log.telegraf.err.*
+    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
+    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
+    Mem_Buf_Limit 2m
+    Path_Key filepath
+    Skip_Long_Lines On
+    Ignore_Older 5m
+
+[INPUT]
+    Name        tcp
+    Tag oms.container.perf.telegraf.*
+    Listen      0.0.0.0
+    Port        25226
+    Chunk_Size  32
+    Buffer_Size 64
+
+[FILTER]
+    Name grep
+    Match oms.container.log.telegraf.err.*
+    #Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
 
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
-    Match                           oms.container.log.*
-    AgentVersion                    ciprod03122019
\ No newline at end of file
+    Match                           oms.container.*
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
new file mode 100644
index 000000000..355c88b3d
--- /dev/null
+++ b/installer/conf/telegraf.conf
@@ -0,0 +1,519 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+
+# Global tags can be specified here in key="value" format.
+[global_tags]
+  #Below are entirely used for telemetry
+  AgentVersion = "$AGENT_VERSION"
+  AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+  ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+  Region = "$TELEMETRY_AKS_REGION"
+  ClusterName = "$TELEMETRY_CLUSTER_NAME"
+  ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+  Computer = "placeholder_hostname"
+  ControllerType = "$CONTROLLER_TYPE"
+
+  hostName = "placeholder_hostname"
+
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "60s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = true
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            OUTPUT PLUGINS                                   #
+###############################################################################
+
+# Generic socket writer capable of handling multiple socket types.
+[[outputs.socket_writer]]
+  ## URL to connect to
+  address = "tcp://0.0.0.0:25226"
+  # address = "tcp://example.com:http"
+  # address = "tcp4://127.0.0.1:8094"
+  # address = "tcp6://127.0.0.1:8094"
+  # address = "tcp6://[2001:db8::1]:8094"
+  # address = "udp://127.0.0.1:8094"
+  # address = "udp4://127.0.0.1:8094"
+  # address = "udp6://127.0.0.1:8094"
+  # address = "unix:///tmp/telegraf.sock"
+  # address = "unixgram:///tmp/telegraf.sock"
+
+  ## Optional TLS Config
+  # tls_ca = "/etc/telegraf/ca.pem"
+  # tls_cert = "/etc/telegraf/cert.pem"
+  # tls_key = "/etc/telegraf/key.pem"
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+
+  ## Period between keep alive probes.
+  ## Only applies to TCP sockets.
+  ## 0 disables keep alive probes.
+  ## Defaults to the OS configuration.
+  # keep_alive_period = "5m"
+
+  ## Data format to generate.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "json"
+  namedrop = ["telegraf_telemetry"]
+  tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"]
+
+[[outputs.application_insights]]
+  ## Instrumentation key of the Application Insights resource.
+  instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY"
+
+  ## Timeout for closing (default: 5s).
+  # timeout = "5s"
+
+  ## Enable additional diagnostic logging.
+  # enable_diagnostic_logging = false
+
+  ## Context Tag Sources add Application Insights context tags to a tag value.
+  ##
+  ## For list of allowed context tag keys see:
+  ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go
+  # [outputs.application_insights.context_tag_sources]
+  #   "ai.cloud.role" = "kubernetes_container_name"
+  #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
+  namepass = ["telegraf_telemetry"]
+  #tagdrop = ["nodeName"]
+
+###############################################################################
+#                            PROCESSOR PLUGINS                                #
+###############################################################################
+
+# # Perform string processing on tags, fields, and measurements
+#[[processors.rename]]
+  #[[processors.rename.replace]]
+  #   measurement = "disk"
+  #   dest = "nodes"
+#  [[processors.rename.replace]]
+#     field = "free"
+#     dest = "freeBytes"
+#  [[processors.rename.replace]]
+#     field = "used"
+#     dest = "usedBytes"
+#  [[processors.rename.replace]]
+#     field = "used_percent"
+#     dest = "usedPercentage"
+  #[[processors.rename.replace]]
+  #   measurement = "net"
+  #   dest = "nodes"
+  #[[processors.rename.replace]]
+  #   field = "bytes_recv"
+  #   dest = "networkBytesReceivedTotal"
+  #[[processors.rename.replace]]
+  #   field = "bytes_sent"
+  #   dest = "networkBytesSentTotal"
+  #[[processors.rename.replace]]
+  #   field = "err_in"
+  #   dest = "networkErrorsInTotal"
+  #[[processors.rename.replace]]
+  #   field = "err_out"
+  #   dest = "networkErrorsOutTotal"
+  #[[processors.rename.replace]]
+  #   measurement = "kubernetes_pod_volume"
+  #   dest = "pods"
+  #[[processors.rename.replace]]
+  #   field = "used_bytes"
+  #   dest = "podVolumeUsedBytes"
+  #[[processors.rename.replace]]
+  #   field = "available_bytes"
+  #   dest = "podVolumeAvailableBytes"
+  #[[processors.rename.replace]]
+  #   measurement = "kubernetes_pod_network"
+  #   dest = "pods"
+  #[[processors.rename.replace]]
+  #   field = "tx_errors"
+  #   dest = "podNetworkTxErrorsTotal"
+  #[[processors.rename.replace]]
+  #   field = "rx_errors"
+  #   dest = "podNetworkRxErrorsTotal"
+  #[[processors.rename.replace]]
+  #   tag = "volume_name"
+  #   dest = "volumeName"
+  #[[processors.rename.replace]]
+  #   tag = "pod_name"
+  #   dest = "podName"
+  #[[processors.rename.replace]]
+  #   measurement = "docker"
+  #   dest = "containers"
+  #[[processors.rename.replace]]
+  #   measurement = "docker_container_status"
+  #   dest = "containers"
+  #[[processors.rename.replace]]
+  #   field = "n_containers"
+  #   dest = "numContainers"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_running"
+  #   dest = "numContainersRunning"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_stopped"
+  #   dest = "numContainersStopped"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_paused"
+  #   dest = "numContainersPaused"
+  #[[processors.rename.replace]]
+  #   field = "n_images"
+  #   dest = "numContainerImages"
+
+#   ## Convert a tag value to uppercase
+#   # [[processors.strings.uppercase]]
+#   #   tag = "method"
+#
+#   ## Convert a field value to lowercase and store in a new field
+#   # [[processors.strings.lowercase]]
+#   #   field = "uri_stem"
+#   #   dest = "uri_stem_normalised"
+#
+#   ## Trim leading and trailing whitespace using the default cutset
+#   # [[processors.strings.trim]]
+#   #   field = "message"
+#
+#   ## Trim leading characters in cutset
+#   # [[processors.strings.trim_left]]
+#   #   field = "message"
+#   #   cutset = "\t"
+#
+#   ## Trim trailing characters in cutset
+#   # [[processors.strings.trim_right]]
+#   #   field = "message"
+#   #   cutset = "\r\n"
+#
+#   ## Trim the given prefix from the field
+#   # [[processors.strings.trim_prefix]]
+#   #   field = "my_value"
+#   #   prefix = "my_"
+#
+#   ## Trim the given suffix from the field
+#   # [[processors.strings.trim_suffix]]
+#   #   field = "read_count"
+#   #   suffix = "_count"
+
+
+# # Print all metrics that pass through this filter.
+# [[processors.topk]]
+#   ## How many seconds between aggregations
+#   # period = 10
+#
+#   ## How many top metrics to return
+#   # k = 10
+#
+#   ## Over which tags should the aggregation be done. Globs can be specified, in
+#   ## which case any tag matching the glob will aggregated over. If set to an
+#   ## empty list is no aggregation over tags is done
+#   # group_by = ['*']
+#
+#   ## Over which fields are the top k are calculated
+#   # fields = ["value"]
+#
+#   ## What aggregation to use. Options: sum, mean, min, max
+#   # aggregation = "mean"
+#
+#   ## Instead of the top k largest metrics, return the bottom k lowest metrics
+#   # bottomk = false
+#
+#   ## The plugin assigns each metric a GroupBy tag generated from its name and
+#   ## tags. If this setting is different than "" the plugin will add a
+#   ## tag (which name will be the value of this setting) to each metric with
+#   ## the value of the calculated GroupBy tag. Useful for debugging
+#   # add_groupby_tag = ""
+#
+#   ## These settings provide a way to know the position of each metric in
+#   ## the top k. The 'add_rank_field' setting allows to specify for which
+#   ## fields the position is required. If the list is non empty, then a field
+#   ## will be added to each and every metric for each string present in this
+#   ## setting. This field will contain the ranking of the group that
+#   ## the metric belonged to when aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_rank'
+#   # add_rank_fields = []
+#
+#   ## These settings provide a way to know what values the plugin is generating
+#   ## when aggregating metrics. The 'add_agregate_field' setting allows to
+#   ## specify for which fields the final aggregation value is required. If the
+#   ## list is non empty, then a field will be added to each every metric for
+#   ## each field present in this setting. This field will contain
+#   ## the computed aggregation for the group that the metric belonged to when
+#   ## aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_aggregate'
+#   # add_aggregate_fields = []
+
+
+
+###############################################################################
+#                            AGGREGATOR PLUGINS                               #
+###############################################################################
+
+# # Keep the aggregate basicstats of each metric passing through.
+# [[aggregators.basicstats]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Create aggregate histograms.
+# [[aggregators.histogram]]
+#   ## The period in which to flush the aggregator.
+#   period = "30s"
+#
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#
+#   ## Example config that aggregates all fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "cpu"
+#
+#   ## Example config that aggregates only specific fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "diskio"
+#   #   ## The concrete fields of metric
+#   #   fields = ["io_time", "read_time", "write_time"]
+
+
+# # Keep the aggregate min/max of each metric passing through.
+# [[aggregators.minmax]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Count the occurance of values in fields.
+# [[aggregators.valuecounter]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#   ## The fields for which the values will be counted
+#   fields = []
+
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+# Read metrics about cpu usage
+#[[inputs.cpu]]
+  ## Whether to report per-cpu stats or not
+#  percpu = false
+  ## Whether to report total system cpu stats or not
+#  totalcpu = true
+  ## If true, collect raw CPU time metrics.
+#  collect_cpu_time = false
+  ## If true, compute and report the sum of all non-idle CPU states.
+#  report_active = true
+#  fieldpass = ["usage_active","cluster","node","host","device"]
+#  taginclude = ["cluster","cpu","node"]
+  
+
+
+# Read metrics about disk usage by mount point
+[[inputs.disk]]
+  ## By default stats will be gathered for all mount points.
+  ## Set mount_points will restrict the stats to only the specified mount points.
+  # mount_points = ["/"]
+  
+  ## Ignore mount points by filesystem type.
+  ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
+  fieldpass = ["free", "used", "used_percent"]
+  taginclude = ["device","path","hostName"]
+  # Below due to Bug - https://github.com/influxdata/telegraf/issues/5615
+  # ORDER matters here!! - i.e the below should be the LAST modifier
+  [inputs.disk.tagdrop]
+    path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers"]
+
+
+# Read metrics about memory usage
+#[[inputs.mem]]
+#  fieldpass = ["used_percent", "cluster", "node","host","device"]
+#  taginclude = ["cluster","node"]
+
+
+# Read metrics about network interface usage
+#[[inputs.net]]
+  ## By default, telegraf gathers stats from any up interface (excluding loopback)
+  ## Setting interfaces will tell it to gather these explicit interfaces,
+  ## regardless of status.
+  ##
+  # interfaces = ["eth0"]
+  ##
+  ## On linux systems telegraf also collects protocol stats.
+  ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics.
+  ##
+#  ignore_protocol_stats = true
+  ##
+  #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
+  #fieldpass = ["err_in", "err_out"]
+  #taginclude = ["interface","nodeName"]
+
+# Read metrics from the kubernetes kubelet api
+#[[inputs.kubernetes]]
+  ## URL for the kubelet
+  #url = "http://1.1.1.1:10255"
+#  url = "http://placeholder_nodeip:10255"
+
+  ## Use bearer token for authorization
+  # bearer_token = /path/to/bearer/token
+
+  ## Set response_timeout (default 5 seconds)
+  # response_timeout = "5s"
+
+  ## Optional TLS Config
+  # tls_ca = /path/to/cafile
+  # tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+#  fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
+#  taginclude = ["volume_name","nodeName","namespace","pod_name"]
+# Read metrics about docker containers
+#[[inputs.docker]]
+  ## Docker Endpoint
+  ##   To use TCP, set endpoint = "tcp://[ip]:[port]"
+  ##   To use environment variables (ie, docker-machine), set endpoint = "ENV"
+#  endpoint = "unix:///var/run/host/docker.sock"
+
+  ## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
+#  gather_services = false
+
+  ## Only collect metrics for these containers, collect all if empty
+#  container_names = []
+
+  ## Containers to include and exclude. Globs accepted.
+  ## Note that an empty array for both will include all containers
+#  container_name_include = []
+#  container_name_exclude = []
+
+  ## Container states to include and exclude. Globs accepted.
+  ## When empty only containers in the "running" state will be captured.
+#  container_state_include = ['*']
+  # container_state_exclude = []
+
+  ## Timeout for docker list, info, and stats commands
+#  timeout = "5s"
+
+  ## Whether to report for each container per-device blkio (8:0, 8:1...) and
+  ## network (eth0, eth1, ...) stats or not
+#  perdevice = true
+  ## Whether to report for each container total blkio and network stats or not
+#  total = true
+  ## Which environment variables should we use as a tag
+  ##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
+
+  ## docker labels to include and exclude as tags.  Globs accepted.
+  ## Note that an empty array for both will include all labels as tags
+#  docker_label_include = []
+#  docker_label_exclude = []
+
+  ## Optional TLS Config
+  # tls_ca = "/etc/telegraf/ca.pem"
+  # tls_cert = "/etc/telegraf/cert.pem"
+  # tls_key = "/etc/telegraf/key.pem"
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+#  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
+  #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"]
+#  taginclude = ["nodeName"]
+[[inputs.exec]]
+  ## Commands array
+  interval = "15m"
+  commands = [
+    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
+  ]
+
+  ## Timeout for each command to complete.
+  timeout = "15s"
+
+  ## measurement name suffix (for separating different commands)
+  name_suffix = "_telemetry"
+
+  ## Data format to consume.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"
+  tagexclude = ["hostName"]
+
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 9c4d563f8..996c7501a 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -98,6 +98,8 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
+/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
@@ -137,6 +139,7 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/td-agent-bit;                          755; root; root;sysdir
 /opt/td-agent-bit/bin;                      755; root; root;sysdir
+/etc/telegraf;                              755; root; root;sysdir
 
 /opt/microsoft/omsagent/plugin/lib;												755; root; root; sysdir
 /opt/microsoft/omsagent/plugin/lib/application_insights;                        755; root; root; sysdir
diff --git a/installer/scripts/TelegrafTCPErrorTelemetry.sh b/installer/scripts/TelegrafTCPErrorTelemetry.sh
new file mode 100644
index 000000000..637af3969
--- /dev/null
+++ b/installer/scripts/TelegrafTCPErrorTelemetry.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+countErr=$(grep -iF  "socket_writer" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
+echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i"
\ No newline at end of file
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index a1ca3d6ee..269d16111 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -23,10 +23,31 @@ import (
 )
 
 // DataType for Container Log
-const DataType = "CONTAINER_LOG_BLOB"
+const ContainerLogDataType = "CONTAINER_LOG_BLOB"
+
+// DataType for Insights metric
+const InsightsMetricsDataType = "INSIGHTS_METRICS_BLOB"
+
+//env varibale which has ResourceId for LA
+const ResourceIdEnv = "AKS_RESOURCE_ID"
+
+//env variable which has ResourceName for NON-AKS
+const ResourceNameEnv = "ACS_RESOURCE_NAME"
+
+// Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags)
+const TelegrafMetricOriginPrefix = "container.azm.ms"
+// Origin suffix for telegraf Metrics (used as suffix for origin field)
+const TelegrafMetricOriginSuffix = "telegraf"
+// Namespace prefix for telegraf Metrics (used as prefix for Namespace field)
+//const TelegrafMetricNamespacePrefix = "plugin"
+// clusterName tag
+const TelegrafTagClusterName = "clusterName"
+// clusterId tag
+const TelegrafTagClusterID = "clusterId"
 
 // ContainerLogPluginConfFilePath --> config file path for container log plugin
-const ContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
+const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
+const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf"
 
 // IPName for Container Log
 const IPName = "Containers"
@@ -44,10 +65,12 @@ var (
 	Computer string
 	// WorkspaceID log analytics workspace id
 	WorkspaceID string
-	// ResourceID for resource-centric log analytics data
+	// ResourceID for resource-centric log analytics data 
 	ResourceID string
 	// Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false)
 	ResourceCentric bool
+	//ResourceName 
+	ResourceName string
 )
 
 var (
@@ -92,6 +115,26 @@ type DataItem struct {
 	Computer              string `json:"Computer"`
 }
 
+// telegraf metric DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
+type laTelegrafMetric struct {
+	// 'golden' fields
+	Origin          		string `json:"Origin"`
+	Namespace          		string `json:"Namespace"`
+	Name	          		string `json:"Name"`
+	Value                	float64 `json:"Value"`
+	Tags					string `json:"Tags"`
+	// specific required fields for LA
+	CollectionTime			string `json:"CollectionTime"` //mapped to TimeGenerated
+	Computer				string `json:"Computer"`
+}
+
+// ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
+type InsightsMetricsBlob struct {
+	DataType  string     			`json:"DataType"`
+	IPName    string     			`json:"IPName"`
+	DataItems []laTelegrafMetric 	`json:"DataItems"`
+}
+
 // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
 type ContainerLogBlob struct {
 	DataType  string     `json:"DataType"`
@@ -207,6 +250,174 @@ func updateKubeSystemContainerIDs() {
 	}
 }
 
+//Azure loganalytics metric values have to be numeric, so string values are dropped
+func convert(in interface{}) (float64, bool) {
+	switch v := in.(type) {
+	case int64:
+		return float64(v), true
+	case uint64:
+		return float64(v), true
+	case float64:
+		return v, true
+	case bool:
+		if v {
+			return float64(1), true
+		}
+		return float64(0), true
+	default:
+		Log ("returning 0 for %v ", in)
+		return float64(0), false
+	}
+}
+
+//Translates telegraf time series to one or more Azure loganalytics metric(s)
+func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
+	
+	var laMetrics []*laTelegrafMetric
+	var tags map[interface{}]interface{}
+	tags = m["tags"].(map[interface{}]interface{})
+	tagMap := make(map[string]string)
+	for k, v := range tags {
+		key := fmt.Sprintf("%s",k)
+		if key == "" {
+			continue
+		}
+		tagMap[key] = fmt.Sprintf("%s",v)
+	}
+
+	//add azure monitor tags
+	tagMap[fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafTagClusterID)] = ResourceID
+	tagMap[fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafTagClusterName)] = ResourceName
+
+	var fieldMap map[interface{}]interface{}
+	fieldMap = m["fields"].(map[interface{}]interface{})
+
+	tagJson, err := json.Marshal(&tagMap)
+
+	if err != nil {
+		return nil, err
+	}
+
+	for k, v := range fieldMap {
+		fv, ok := convert(v)
+		if !ok {
+			continue
+		}
+		i := m["timestamp"].(uint64)
+		laMetric := laTelegrafMetric{
+			Origin: 		fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafMetricOriginSuffix),
+			//Namespace:  	fmt.Sprintf("%s/%s", TelegrafMetricNamespacePrefix, m["name"]),
+			Namespace:  	fmt.Sprintf("%s", m["name"]),
+			Name:       	fmt.Sprintf("%s",k),
+			Value:			fv,
+			Tags:     		fmt.Sprintf("%s", tagJson),
+			CollectionTime: time.Unix(int64(i),0).Format(time.RFC3339),
+			Computer: 	Computer, //this is the collection agent's computer name, not necessarily to which computer the metric applies to
+		}
+
+		//Log ("la metric:%v", laMetric)
+		laMetrics = append(laMetrics, &laMetric)
+	}
+	return laMetrics, nil
+}
+
+//send metrics from Telegraf to LA. 1) Translate telegraf timeseries to LA metric(s) 2) Send it to LA as 'InsightsMetrics' fixed type
+func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int {
+	var laMetrics []*laTelegrafMetric
+
+	if ( (telegrafRecords== nil) || ! (len(telegrafRecords) > 0) ) {
+		Log("PostTelegrafMetricsToLA::Error:no timeseries to derive")
+		return output.FLB_OK
+	}
+
+	for _, record := range telegrafRecords {
+		translatedMetrics, err := translateTelegrafMetrics(record)
+		if err != nil {
+			message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when translating telegraf metric to log analytics metric %q", err)
+			Log(message)
+			//SendException(message) //This will be too noisy
+		}
+		laMetrics = append(laMetrics, translatedMetrics...)
+	}
+
+	if ( (laMetrics == nil) || !(len(laMetrics) > 0) ) {
+		Log("PostTelegrafMetricsToLA::Info:no metrics derived from timeseries data")
+		return output.FLB_OK
+	} else {
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Info:derived %v metrics from %v timeseries", len(laMetrics), len(telegrafRecords))
+		Log(message)
+	}
+
+	var metrics []laTelegrafMetric
+	var i int
+
+	for i=0; i < len(laMetrics); i++ {
+		metrics = append(metrics, *laMetrics[i])
+	}
+
+	laTelegrafMetrics := InsightsMetricsBlob{
+		DataType:  InsightsMetricsDataType,
+		IPName:    IPName,
+		DataItems: metrics}
+
+	jsonBytes, err := json.Marshal(laTelegrafMetrics)
+
+	if err != nil {
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err)
+		Log(message)
+		SendException(message)
+		return output.FLB_OK
+	}
+	
+	//Post metrics data to LA
+	req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes))
+
+	//req.URL.Query().Add("api-version","2016-04-01")
+
+	//set headers
+	req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
+	
+	//expensive to do string len for every request, so use a flag
+	if ResourceCentric == true {
+		req.Header.Set("x-ms-AzureResourceId", ResourceID)
+	}
+
+	start := time.Now()
+	resp, err := HTTPClient.Do(req)
+	elapsed := time.Since(start)
+
+	if err != nil {
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
+		Log(message)
+		SendException(message)
+		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
+		return output.FLB_RETRY
+	}
+
+	if resp == nil || resp.StatusCode != 200 {
+		if resp != nil {
+			Log("PostTelegrafMetricsToLA::Error:(retriable) Response Status %v Status Code %v", resp.Status, resp.StatusCode)
+		}
+		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
+		return output.FLB_RETRY
+	}
+
+	defer resp.Body.Close()
+
+	numMetrics := len(laMetrics)
+	UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0)
+	Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed)
+
+	return output.FLB_OK
+}
+
+func UpdateNumTelegrafMetricsSentTelemetry(numMetricsSent int, numSendErrors int) {
+	ContainerLogTelemetryMutex.Lock()
+	TelegrafMetricsSentCount += float64(numMetricsSent)
+	TelegrafMetricsSendErrorCount += float64(numSendErrors)
+	ContainerLogTelemetryMutex.Unlock()
+}
+
 // PostDataHelper sends data to the OMS endpoint
 func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
@@ -285,7 +496,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 	if len(dataItems) > 0 {
 		logEntry := ContainerLogBlob{
-			DataType:  DataType,
+			DataType:  ContainerLogDataType,
 			IPName:    IPName,
 			DataItems: dataItems}
 
@@ -384,14 +595,30 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 		log.Fatalln(message)
 	}
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
+	Log("OMSEndpoint %s", OMSEndpoint)
+
 	WorkspaceID = omsadminConf["WORKSPACE_ID"]
 	ResourceID = os.Getenv("customResourceId")
+
 	if len(ResourceID) > 0 {
+		//AKS Scenario
 		ResourceCentric = true
-		Log("OMS ResourceId=%s",ResourceID)
+		splitted := strings.Split(ResourceID, "/")
+		ResourceName = splitted[len(splitted)-1]
+		Log("ResourceCentric: True")
+		Log("ResourceID=%s",ResourceID)
+		Log("ResourceName=%s",ResourceID)
+	} 
+	
+	if ResourceCentric == false {
+		//AKS-Engine/hybrid scenario
+		ResourceName = os.Getenv(ResourceNameEnv)
+		ResourceID = ResourceName
+		Log("ResourceCentric: False")
+		Log("ResourceID=%s",ResourceID)
+		Log("ResourceName=%s",ResourceName)
 	}
-	Log("OMSEndpoint %s", OMSEndpoint)
-
+	
 	// Initialize image,name map refresh ticker
 	containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"])
 	if err != nil {
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 133e0f039..dccc6774c 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -2,11 +2,13 @@ package main
 
 import (
 	"github.com/fluent/fluent-bit-go/output"
+	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
 )
 import (
 	"C"
 	"strings"
 	"unsafe"
+	"os"
 )
 
 //export FLBPluginRegister
@@ -19,8 +21,14 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 // ctx (context) pointer to fluentbit context (state/ c code)
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
-	agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
-	InitializePlugin(ContainerLogPluginConfFilePath, agentVersion)
+	agentVersion := os.Getenv("AGENT_VERSION")
+	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "replicaset") == 0 {
+		Log("Using %s for plugin config \n", ReplicaSetContainerLogPluginConfFilePath)
+		InitializePlugin(ReplicaSetContainerLogPluginConfFilePath, agentVersion)
+	} else {
+		Log("Using %s for plugin config \n", DaemonSetContainerLogPluginConfFilePath)
+		InitializePlugin(DaemonSetContainerLogPluginConfFilePath, agentVersion)
+	}
 	enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
 	if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 {
 		telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds")
@@ -51,9 +59,13 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 		records = append(records, record)
 	}
 
-	incomingTag := C.GoString(tag)
-	if strings.Contains(strings.ToLower(incomingTag), "oms.container.log.flbplugin") {
-		return PushToAppInsightsTraces(records)
+	incomingTag := strings.ToLower(C.GoString(tag))
+	if strings.Contains(incomingTag, "oms.container.log.flbplugin") {
+		return PushToAppInsightsTraces(records, appinsights.Information, incomingTag)
+	} else if strings.Contains(incomingTag, "oms.container.perf.telegraf") {
+		return PostTelegrafMetricsToLA(records)
+	} else if strings.Contains(incomingTag, "oms.container.log.telegraf.err") {
+		return PushToAppInsightsTraces(records, appinsights.Error, incomingTag)
 	}
 
 	return PostDataHelper(records)
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index a64ca2218..f507e4ab9 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -9,11 +9,12 @@ import (
 	"time"
 
 	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
+	"github.com/Microsoft/ApplicationInsights-Go/appinsights/contracts"
 	"github.com/fluent/fluent-bit-go/output"
 )
 
 var (
-	// FlushedRecordsCount indicates the number of flushed records in the current period
+	// FlushedRecordsCount indicates the number of flushed log records in the current period
 	FlushedRecordsCount float64
 	// FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period
 	FlushedRecordsTimeTaken float64
@@ -27,19 +28,23 @@ var (
 	TelemetryClient appinsights.TelemetryClient
 	// ContainerLogTelemetryTicker sends telemetry periodically
 	ContainerLogTelemetryTicker *time.Ticker
+	//Tracks the number of telegraf metrics sent successfully between telemetry ticker periods (uses ContainerLogTelemetryTicker)
+	TelegrafMetricsSentCount float64
+	//Tracks the number of send errors between telemetry ticker periods (uses ContainerLogTelemetryTicker)
+	TelegrafMetricsSendErrorCount float64
 )
 
 const (
 	clusterTypeACS                      = "ACS"
 	clusterTypeAKS                      = "AKS"
-	controllerTypeDaemonSet             = "DaemonSet"
-	controllerTypeReplicaSet            = "ReplicaSet"
 	envAKSResourceID                    = "AKS_RESOURCE_ID"
 	envACSResourceName                  = "ACS_RESOURCE_NAME"
 	envAppInsightsAuth                  = "APPLICATIONINSIGHTS_AUTH"
 	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
 	metricNameAvgLogGenerationRate      = "ContainerLogsGeneratedPerSec"
 	metricNameAgentLogProcessingMaxLatencyMs = "ContainerLogsAgentSideLatencyMs"
+	metricNameNumberofTelegrafMetricsSentSuccessfully = "TelegrafMetricsSentCount"
+	metricNameNumberofSendErrorsTelegrafMetrics = "TelegrafMetricsSendErrorCount"
 
 	defaultTelemetryPushIntervalSeconds = 300
 
@@ -63,9 +68,14 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 	for ; true; <-ContainerLogTelemetryTicker.C {
 		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
 		elapsed := time.Since(start)
+
 		ContainerLogTelemetryMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
 		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
+		telegrafMetricsSentCount := TelegrafMetricsSentCount
+		telegrafMetricsSendErrorCount := TelegrafMetricsSendErrorCount
+		TelegrafMetricsSentCount = 0.0
+		TelegrafMetricsSendErrorCount = 0.0
 		FlushedRecordsCount = 0.0
 		FlushedRecordsTimeTaken = 0.0
 		logLatencyMs := AgentLogProcessingMaxLatencyMs
@@ -81,6 +91,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 		logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs)
 		logLatencyMetric.Properties["Container"] = logLatencyMsContainer
 		TelemetryClient.Track(logLatencyMetric)
+		TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofTelegrafMetricsSentSuccessfully, telegrafMetricsSentCount))
+		TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofSendErrorsTelegrafMetrics, telegrafMetricsSendErrorCount))
 		start = time.Now()
 	}
 }
@@ -129,7 +141,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 	CommonProperties = make(map[string]string)
 	CommonProperties["Computer"] = Computer
 	CommonProperties["WorkspaceID"] = WorkspaceID
-	CommonProperties["ControllerType"] = controllerTypeDaemonSet
+	CommonProperties["ControllerType"] = os.Getenv("CONTROLLER_TYPE")
 	CommonProperties["AgentVersion"] = agentVersion
 
 	aksResourceID := os.Getenv(envAKSResourceID)
@@ -164,13 +176,15 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 }
 
 // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance
-func PushToAppInsightsTraces(records []map[interface{}]interface{}) int {
+func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int {
 	var logLines []string
 	for _, record := range records {
 		logLines = append(logLines, ToString(record["log"]))
 	}
 
 	traceEntry := strings.Join(logLines, "\n")
-	TelemetryClient.TrackTrace(traceEntry, 1)
+	traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel)
+	traceTelemetryItem.Properties["tag"] = tag
+	TelemetryClient.Track(traceTelemetryItem)
 	return output.FLB_OK
 }

From 8cdf72437b3af7b49e6931602a2f2218deea8fbe Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 17 Apr 2019 19:20:57 -0700
Subject: [PATCH 088/160] Fix telemetry error for telegraf err count metric
 (#215)

---
 installer/scripts/TelegrafTCPErrorTelemetry.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/scripts/TelegrafTCPErrorTelemetry.sh b/installer/scripts/TelegrafTCPErrorTelemetry.sh
index 637af3969..2bd58b202 100644
--- a/installer/scripts/TelegrafTCPErrorTelemetry.sh
+++ b/installer/scripts/TelegrafTCPErrorTelemetry.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 countErr=$(grep -iF  "socket_writer" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
-echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i"
\ No newline at end of file
+echo "telegraf,Source=telegrafErrLog telegrafTCPWriteErrorCountTotal=${countErr}i"
\ No newline at end of file

From 36c8037370bd6b98e36f1e03efdefa8de495d32e Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 30 May 2019 17:01:01 -0700
Subject: [PATCH 089/160] Fix Unscheduled Pod bug, remove excess telemetry
 (#218)

* Fix Unscheduled Pod bug, remove excess telemetry

* Send Success Telemetry only once after startup for a node in a cluster for MDM Post

* Sending telemetry for successful push to MDM every hour
---
 source/code/plugin/filter_inventory2mdm.rb | 128 +++++++++++----------
 source/code/plugin/out_mdm.rb              |   7 +-
 2 files changed, 73 insertions(+), 62 deletions(-)

diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index 553c857b7..f98a3224e 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -10,11 +10,11 @@ module Fluent
 
 	class Inventory2MdmFilter < Filter
 		Fluent::Plugin.register_filter('filter_inventory2mdm', self)
-		
+
 		config_param :enable_log, :integer, :default => 0
         config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log'
         config_param :custom_metrics_azure_regions, :string
-        
+
         @@node_count_metric_name = 'nodesCount'
         @@pod_count_metric_name = 'podCount'
         @@pod_inventory_tag = 'mdm.kubepodinventory'
@@ -23,63 +23,63 @@ class Inventory2MdmFilter < Filter
         @@node_status_not_ready = 'NotReady'
 
         @@node_inventory_custom_metrics_template = '
-            { 
-                "time": "%{timestamp}", 
-                "data": { 
-                    "baseData": { 
-                        "metric": "%{metricName}", 
-                        "namespace": "insights.container/nodes", 
-                        "dimNames": [ 
+            {
+                "time": "%{timestamp}",
+                "data": {
+                    "baseData": {
+                        "metric": "%{metricName}",
+                        "namespace": "insights.container/nodes",
+                        "dimNames": [
                         "status"
-                        ], 
-                        "series": [ 
-                        { 
-                            "dimValues": [ 
+                        ],
+                        "series": [
+                        {
+                            "dimValues": [
                             "%{statusValue}"
-                            ], 
+                            ],
                             "min": %{node_status_count},
-                            "max": %{node_status_count}, 
-                            "sum": %{node_status_count}, 
+                            "max": %{node_status_count},
+                            "sum": %{node_status_count},
                             "count": 1
-                        } 
-                        ] 
-                    } 
-                } 
+                        }
+                        ]
+                    }
+                }
             }'
 
         @@pod_inventory_custom_metrics_template = '
-            { 
-                "time": "%{timestamp}", 
-                "data": { 
-                    "baseData": { 
-                        "metric": "%{metricName}", 
-                        "namespace": "insights.container/pods", 
-                        "dimNames": [ 
-                        "phase", 
-                        "Kubernetes namespace", 
-                        "node", 
+            {
+                "time": "%{timestamp}",
+                "data": {
+                    "baseData": {
+                        "metric": "%{metricName}",
+                        "namespace": "insights.container/pods",
+                        "dimNames": [
+                        "phase",
+                        "Kubernetes namespace",
+                        "node",
                         "controllerName"
-                        ], 
-                        "series": [ 
-                        { 
-                            "dimValues": [ 
-                            "%{phaseDimValue}", 
-                            "%{namespaceDimValue}", 
-                            "%{nodeDimValue}", 
+                        ],
+                        "series": [
+                        {
+                            "dimValues": [
+                            "%{phaseDimValue}",
+                            "%{namespaceDimValue}",
+                            "%{nodeDimValue}",
                             "%{controllerNameDimValue}"
-                            ], 
+                            ],
                             "min": %{podCountMetricValue},
-                            "max": %{podCountMetricValue}, 
-                            "sum": %{podCountMetricValue}, 
-                            "count": 1 
-                        } 
-                        ] 
-                    } 
-                } 
+                            "max": %{podCountMetricValue},
+                            "sum": %{podCountMetricValue},
+                            "count": 1
+                        }
+                        ]
+                    }
+                }
             }'
-        
+
         @@pod_phase_values = ['Running', 'Pending', 'Succeeded', 'Failed', 'Unknown']
-        
+
         @process_incoming_stream = true
 
 		def initialize
@@ -89,7 +89,7 @@ def initialize
 		def configure(conf)
 			super
 			@log = nil
-			
+
 			if @enable_log
 				@log = Logger.new(@log_path, 1, 5000000)
 				@log.debug {'Starting filter_inventory2mdm plugin'}
@@ -105,15 +105,15 @@ def start
 		def shutdown
 			super
         end
-        
+
         def process_node_inventory_records(es)
             timestamp = DateTime.now
-            
+
             begin
                 node_ready_count = 0
                 node_not_ready_count = 0
                 records = []
-                
+
                 es.each{|time,record|
                     begin
                         timestamp = record['DataItems'][0]['CollectionTime']
@@ -129,15 +129,15 @@ def process_node_inventory_records(es)
 
                 ready_record = @@node_inventory_custom_metrics_template % {
                     timestamp: timestamp,
-                    metricName: @@node_count_metric_name, 
+                    metricName: @@node_count_metric_name,
                     statusValue: @@node_status_ready,
                     node_status_count: node_ready_count
                 }
                 records.push(JSON.parse(ready_record))
-                
+
                 not_ready_record = @@node_inventory_custom_metrics_template % {
                     timestamp: timestamp,
-                    metricName: @@node_count_metric_name, 
+                    metricName: @@node_count_metric_name,
                     statusValue: @@node_status_not_ready,
                     node_status_count: node_not_ready_count
                 }
@@ -164,7 +164,7 @@ def process_pod_inventory_records(es)
                     record_count += 1
                     timestamp = record['DataItems'][0]['CollectionTime']
                     podUid = record['DataItems'][0]['PodUid']
-                    
+
 		    if podUids.key?(podUid)
                         #@log.info "pod with #{podUid} already counted"
                         next
@@ -176,6 +176,12 @@ def process_pod_inventory_records(es)
                     podControllerNameDimValue = record['DataItems'][0]['ControllerName']
                     podNodeDimValue = record['DataItems'][0]['Computer']
 
+                    if podNodeDimValue.empty? && podPhaseDimValue.downcase == 'pending'
+                        podNodeDimValue = 'unscheduled'
+                    elsif podNodeDimValue.empty?
+                        podNodeDimValue = 'unknown'
+                    end
+
                     # group by distinct dimension values
                     pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~')
 
@@ -197,7 +203,7 @@ def process_pod_inventory_records(es)
                         pod_count = 1
                         pod_count_hash[pod_key] = pod_count
                     end
-                    
+
                     # Collect all possible combinations of dimension values other than pod phase
                     key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~')
                     if no_phase_dim_values_hash.key?(key_without_phase_dim_value)
@@ -237,9 +243,9 @@ def process_pod_inventory_records(es)
                         timestamp: timestamp,
                         metricName: @@pod_count_metric_name,
                         phaseDimValue: podPhaseDimValue,
-                        namespaceDimValue: podNamespaceDimValue, 
-                        nodeDimValue: podNodeDimValue, 
-                        controllerNameDimValue: podControllerNameDimValue, 
+                        namespaceDimValue: podNamespaceDimValue,
+                        nodeDimValue: podNodeDimValue,
+                        controllerNameDimValue: podControllerNameDimValue,
                         podCountMetricValue: value
                     }
                     records.push(JSON.parse(record))
@@ -265,11 +271,11 @@ def filter_stream(tag, es)
                     elsif tag.downcase.start_with?(@@pod_inventory_tag)
                         @log.info 'Processing POD inventory records in filter plugin to send to MDM'
                         filtered_records, time = process_pod_inventory_records(es)
-                    else 
+                    else
                         filtered_records = []
                     end
                 end
-                filtered_records.each {|filtered_record| 
+                filtered_records.each {|filtered_record|
                     new_es.add(time, filtered_record) if filtered_record
                 } if filtered_records
             rescue => e
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 351198afe..68c43d5da 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -31,6 +31,7 @@ def initialize
       @last_post_attempt_time = Time.now
       @first_post_attempt_made = false
       @can_send_data_to_mdm = true
+      @last_telemetry_sent_time = nil
     end
 
     def configure(conf)
@@ -156,7 +157,11 @@ def send_to_mdm(post_body)
         response = @http_client.request(request)
         response.value # this throws for non 200 HTTP response code
         @log.info "HTTP Post Response Code : #{response.code}"
-        ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {})
+        if @last_telemetry_sent_time.nil? || @last_telemetry_sent_time + 60 * 60 < Time.now
+            ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {})
+            @last_telemetry_sent_time = Time.now
+        end
+
       rescue Net::HTTPServerException => e
         @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)

From 803f934cba774bf2abf7594a1025bad88c105e5c Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 5 Jun 2019 17:32:16 -0700
Subject: [PATCH 090/160] Merge from Vishwa/promstandardmetrics into ci_feature
 (#220)

* enable prometheus metrics collection in replica-set

* fixing typos

* fix config file path for replicaset

* fix configuration

* config changes
---
 installer/conf/td-agent-bit-rs.conf     |  29 ++
 installer/conf/td-agent-bit.conf        |   5 -
 installer/conf/telegraf-rs.conf         | 567 ++++++++++++++++++++++++
 installer/conf/telegraf.conf            | 107 ++++-
 installer/datafiles/base_container.data |   2 +
 source/code/go/src/plugins/oms.go       |  10 +-
 source/code/go/src/plugins/telemetry.go |  20 +-
 7 files changed, 709 insertions(+), 31 deletions(-)
 create mode 100644 installer/conf/td-agent-bit-rs.conf
 create mode 100644 installer/conf/telegraf-rs.conf

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
new file mode 100644
index 000000000..740f8a951
--- /dev/null
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -0,0 +1,29 @@
+[SERVICE]
+    Flush         30
+    Log_Level     info
+    Parsers_File  /etc/td-agent-bit/parsers.conf
+    Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
+
+[INPUT]
+    Name tail
+    Tag oms.container.log.telegraf.err.*
+    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
+    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
+    Mem_Buf_Limit 2m
+    Path_Key filepath
+    Skip_Long_Lines On
+    Ignore_Older 5m
+
+[INPUT]
+    Name        tcp
+    Tag oms.container.perf.telegraf.*
+    Listen      0.0.0.0
+    Port        25226
+    Chunk_Size  32
+    Buffer_Size 64
+
+[OUTPUT]
+    Name                            oms
+    EnableTelemetry                 true
+    TelemetryPushIntervalSeconds    300
+    Match                           oms.container.*
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 88bacaca2..50967e61f 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -43,11 +43,6 @@
     Chunk_Size  32
     Buffer_Size 64
 
-[FILTER]
-    Name grep
-    Match oms.container.log.telegraf.err.*
-    #Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
-
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
new file mode 100644
index 000000000..cb9a36685
--- /dev/null
+++ b/installer/conf/telegraf-rs.conf
@@ -0,0 +1,567 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+
+# Global tags can be specified here in key="value" format.
+[global_tags]
+  #Below are entirely used for telemetry
+  #AgentVersion = "$AGENT_VERSION"
+  #AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+  #ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+  #Region = "$TELEMETRY_AKS_REGION"
+  #ClusterName = "$TELEMETRY_CLUSTER_NAME"
+  #ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+  #Computer = "placeholder_hostname"
+  #ControllerType = "$CONTROLLER_TYPE"
+
+  #hostName = "placeholder_hostname"
+
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "60s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = true
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            OUTPUT PLUGINS                                   #
+###############################################################################
+
+# Generic socket writer capable of handling multiple socket types.
+[[outputs.socket_writer]]
+  ## URL to connect to
+  address = "tcp://0.0.0.0:25226"
+  # address = "tcp://example.com:http"
+  # address = "tcp4://127.0.0.1:8094"
+  # address = "tcp6://127.0.0.1:8094"
+  # address = "tcp6://[2001:db8::1]:8094"
+  # address = "udp://127.0.0.1:8094"
+  # address = "udp4://127.0.0.1:8094"
+  # address = "udp6://127.0.0.1:8094"
+  # address = "unix:///tmp/telegraf.sock"
+  # address = "unixgram:///tmp/telegraf.sock"
+
+  ## Optional TLS Config
+  # tls_ca = "/etc/telegraf/ca.pem"
+  # tls_cert = "/etc/telegraf/cert.pem"
+  # tls_key = "/etc/telegraf/key.pem"
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+
+  ## Period between keep alive probes.
+  ## Only applies to TCP sockets.
+  ## 0 disables keep alive probes.
+  ## Defaults to the OS configuration.
+  # keep_alive_period = "5m"
+
+  ## Data format to generate.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "json"
+  namedrop = ["telegraf_telemetry"]
+  #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"]
+
+[[outputs.application_insights]]
+  ## Instrumentation key of the Application Insights resource.
+  instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY"
+
+  ## Timeout for closing (default: 5s).
+  # timeout = "5s"
+
+  ## Enable additional diagnostic logging.
+  # enable_diagnostic_logging = false
+
+  ## Context Tag Sources add Application Insights context tags to a tag value.
+  ##
+  ## For list of allowed context tag keys see:
+  ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go
+  # [outputs.application_insights.context_tag_sources]
+  #   "ai.cloud.role" = "kubernetes_container_name"
+  #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
+  namepass = ["telegraf_telemetry"]
+  #tagdrop = ["nodeName"]
+
+###############################################################################
+#                            PROCESSOR PLUGINS                                #
+###############################################################################
+
+# # Perform string processing on tags, fields, and measurements
+#[[processors.rename]]
+  #[[processors.rename.replace]]
+  #   measurement = "disk"
+  #   dest = "nodes"
+#  [[processors.rename.replace]]
+#     field = "free"
+#     dest = "freeBytes"
+#  [[processors.rename.replace]]
+#     field = "used"
+#     dest = "usedBytes"
+#  [[processors.rename.replace]]
+#     field = "used_percent"
+#     dest = "usedPercentage"
+  #[[processors.rename.replace]]
+  #   measurement = "net"
+  #   dest = "nodes"
+  #[[processors.rename.replace]]
+  #   field = "bytes_recv"
+  #   dest = "networkBytesReceivedTotal"
+  #[[processors.rename.replace]]
+  #   field = "bytes_sent"
+  #   dest = "networkBytesSentTotal"
+  #[[processors.rename.replace]]
+  #   field = "err_in"
+  #   dest = "networkErrorsInTotal"
+  #[[processors.rename.replace]]
+  #   field = "err_out"
+  #   dest = "networkErrorsOutTotal"
+  #[[processors.rename.replace]]
+  #   measurement = "kubernetes_pod_volume"
+  #   dest = "pods"
+  #[[processors.rename.replace]]
+  #   field = "used_bytes"
+  #   dest = "podVolumeUsedBytes"
+  #[[processors.rename.replace]]
+  #   field = "available_bytes"
+  #   dest = "podVolumeAvailableBytes"
+  #[[processors.rename.replace]]
+  #   measurement = "kubernetes_pod_network"
+  #   dest = "pods"
+  #[[processors.rename.replace]]
+  #   field = "tx_errors"
+  #   dest = "podNetworkTxErrorsTotal"
+  #[[processors.rename.replace]]
+  #   field = "rx_errors"
+  #   dest = "podNetworkRxErrorsTotal"
+  #[[processors.rename.replace]]
+  #   tag = "volume_name"
+  #   dest = "volumeName"
+  #[[processors.rename.replace]]
+  #   tag = "pod_name"
+  #   dest = "podName"
+  #[[processors.rename.replace]]
+  #   measurement = "docker"
+  #   dest = "containers"
+  #[[processors.rename.replace]]
+  #   measurement = "docker_container_status"
+  #   dest = "containers"
+  #[[processors.rename.replace]]
+  #   field = "n_containers"
+  #   dest = "numContainers"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_running"
+  #   dest = "numContainersRunning"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_stopped"
+  #   dest = "numContainersStopped"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_paused"
+  #   dest = "numContainersPaused"
+  #[[processors.rename.replace]]
+  #   field = "n_images"
+  #   dest = "numContainerImages"
+
+#   ## Convert a tag value to uppercase
+#   # [[processors.strings.uppercase]]
+#   #   tag = "method"
+#
+#   ## Convert a field value to lowercase and store in a new field
+#   # [[processors.strings.lowercase]]
+#   #   field = "uri_stem"
+#   #   dest = "uri_stem_normalised"
+#
+#   ## Trim leading and trailing whitespace using the default cutset
+#   # [[processors.strings.trim]]
+#   #   field = "message"
+#
+#   ## Trim leading characters in cutset
+#   # [[processors.strings.trim_left]]
+#   #   field = "message"
+#   #   cutset = "\t"
+#
+#   ## Trim trailing characters in cutset
+#   # [[processors.strings.trim_right]]
+#   #   field = "message"
+#   #   cutset = "\r\n"
+#
+#   ## Trim the given prefix from the field
+#   # [[processors.strings.trim_prefix]]
+#   #   field = "my_value"
+#   #   prefix = "my_"
+#
+#   ## Trim the given suffix from the field
+#   # [[processors.strings.trim_suffix]]
+#   #   field = "read_count"
+#   #   suffix = "_count"
+
+
+# # Print all metrics that pass through this filter.
+# [[processors.topk]]
+#   ## How many seconds between aggregations
+#   # period = 10
+#
+#   ## How many top metrics to return
+#   # k = 10
+#
+#   ## Over which tags should the aggregation be done. Globs can be specified, in
+#   ## which case any tag matching the glob will aggregated over. If set to an
+#   ## empty list is no aggregation over tags is done
+#   # group_by = ['*']
+#
+#   ## Over which fields are the top k are calculated
+#   # fields = ["value"]
+#
+#   ## What aggregation to use. Options: sum, mean, min, max
+#   # aggregation = "mean"
+#
+#   ## Instead of the top k largest metrics, return the bottom k lowest metrics
+#   # bottomk = false
+#
+#   ## The plugin assigns each metric a GroupBy tag generated from its name and
+#   ## tags. If this setting is different than "" the plugin will add a
+#   ## tag (which name will be the value of this setting) to each metric with
+#   ## the value of the calculated GroupBy tag. Useful for debugging
+#   # add_groupby_tag = ""
+#
+#   ## These settings provide a way to know the position of each metric in
+#   ## the top k. The 'add_rank_field' setting allows to specify for which
+#   ## fields the position is required. If the list is non empty, then a field
+#   ## will be added to each and every metric for each string present in this
+#   ## setting. This field will contain the ranking of the group that
+#   ## the metric belonged to when aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_rank'
+#   # add_rank_fields = []
+#
+#   ## These settings provide a way to know what values the plugin is generating
+#   ## when aggregating metrics. The 'add_agregate_field' setting allows to
+#   ## specify for which fields the final aggregation value is required. If the
+#   ## list is non empty, then a field will be added to each every metric for
+#   ## each field present in this setting. This field will contain
+#   ## the computed aggregation for the group that the metric belonged to when
+#   ## aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_aggregate'
+#   # add_aggregate_fields = []
+
+
+
+###############################################################################
+#                            AGGREGATOR PLUGINS                               #
+###############################################################################
+
+# # Keep the aggregate basicstats of each metric passing through.
+# [[aggregators.basicstats]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Create aggregate histograms.
+# [[aggregators.histogram]]
+#   ## The period in which to flush the aggregator.
+#   period = "30s"
+#
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#
+#   ## Example config that aggregates all fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "cpu"
+#
+#   ## Example config that aggregates only specific fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "diskio"
+#   #   ## The concrete fields of metric
+#   #   fields = ["io_time", "read_time", "write_time"]
+
+
+# # Keep the aggregate min/max of each metric passing through.
+# [[aggregators.minmax]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Count the occurance of values in fields.
+# [[aggregators.valuecounter]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#   ## The fields for which the values will be counted
+#   fields = []
+
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+# Read metrics about cpu usage
+#[[inputs.cpu]]
+  ## Whether to report per-cpu stats or not
+#  percpu = false
+  ## Whether to report total system cpu stats or not
+#  totalcpu = true
+  ## If true, collect raw CPU time metrics.
+#  collect_cpu_time = false
+  ## If true, compute and report the sum of all non-idle CPU states.
+#  report_active = true
+#  fieldpass = ["usage_active","cluster","node","host","device"]
+#  taginclude = ["cluster","cpu","node"]
+  
+
+
+# Read metrics about disk usage by mount point
+#[[inputs.disk]]
+  ## By default stats will be gathered for all mount points.
+  ## Set mount_points will restrict the stats to only the specified mount points.
+  # mount_points = ["/"]
+  
+  ## Ignore mount points by filesystem type.
+#  ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
+#  fieldpass = ["free", "used", "used_percent"]
+#  taginclude = ["device","path","hostName"]
+  # Below due to Bug - https://github.com/influxdata/telegraf/issues/5615
+  # ORDER matters here!! - i.e the below should be the LAST modifier
+#  [inputs.disk.tagdrop]
+#    path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers"]
+
+
+# Read metrics about memory usage
+#[[inputs.mem]]
+#  fieldpass = ["used_percent", "cluster", "node","host","device"]
+#  taginclude = ["cluster","node"]
+
+
+# Read metrics about network interface usage
+#[[inputs.net]]
+  ## By default, telegraf gathers stats from any up interface (excluding loopback)
+  ## Setting interfaces will tell it to gather these explicit interfaces,
+  ## regardless of status.
+  ##
+  # interfaces = ["eth0"]
+  ##
+  ## On linux systems telegraf also collects protocol stats.
+  ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics.
+  ##
+#  ignore_protocol_stats = true
+  ##
+  #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
+  #fieldpass = ["err_in", "err_out"]
+  #taginclude = ["interface","nodeName"]
+
+# Read metrics from the kubernetes kubelet api
+#[[inputs.kubernetes]]
+  ## URL for the kubelet
+  #url = "http://1.1.1.1:10255"
+#  url = "http://placeholder_nodeip:10255"
+
+  ## Use bearer token for authorization
+  # bearer_token = /path/to/bearer/token
+
+  ## Set response_timeout (default 5 seconds)
+  # response_timeout = "5s"
+
+  ## Optional TLS Config
+  # tls_ca = /path/to/cafile
+  # tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+#  fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
+#  taginclude = ["volume_name","nodeName","namespace","pod_name"]
+# Read metrics about docker containers
+#[[inputs.docker]]
+  ## Docker Endpoint
+  ##   To use TCP, set endpoint = "tcp://[ip]:[port]"
+  ##   To use environment variables (ie, docker-machine), set endpoint = "ENV"
+#  endpoint = "unix:///var/run/host/docker.sock"
+
+  ## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
+#  gather_services = false
+
+  ## Only collect metrics for these containers, collect all if empty
+#  container_names = []
+
+  ## Containers to include and exclude. Globs accepted.
+  ## Note that an empty array for both will include all containers
+#  container_name_include = []
+#  container_name_exclude = []
+
+  ## Container states to include and exclude. Globs accepted.
+  ## When empty only containers in the "running" state will be captured.
+#  container_state_include = ['*']
+  # container_state_exclude = []
+
+  ## Timeout for docker list, info, and stats commands
+#  timeout = "5s"
+
+  ## Whether to report for each container per-device blkio (8:0, 8:1...) and
+  ## network (eth0, eth1, ...) stats or not
+#  perdevice = true
+  ## Whether to report for each container total blkio and network stats or not
+#  total = true
+  ## Which environment variables should we use as a tag
+  ##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
+
+  ## docker labels to include and exclude as tags.  Globs accepted.
+  ## Note that an empty array for both will include all labels as tags
+#  docker_label_include = []
+#  docker_label_exclude = []
+
+  ## Optional TLS Config
+  # tls_ca = "/etc/telegraf/ca.pem"
+  # tls_cert = "/etc/telegraf/cert.pem"
+  # tls_key = "/etc/telegraf/key.pem"
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+#  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
+  #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"]
+#  taginclude = ["nodeName"]
+#[[inputs.prometheus]]
+  ## An array of urls to scrape metrics from.
+#  urls = ["https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/metrics"]
+#  fieldpass = ["apiserver_request_count"]
+
+#  metric_version = 2
+#  url_tag = "scrapeUrl"
+
+  ## An array of Kubernetes services to scrape metrics from.
+  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+#  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+#  response_timeout = "15s"
+
+  ## Optional TLS Config
+#  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+#  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+#  [inputs.prometheus.tagpass]
+
+[[inputs.exec]]
+  ## Commands array
+  interval = "15m"
+  commands = [
+    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
+  ]
+
+  ## Timeout for each command to complete.
+  timeout = "15s"
+
+  ## measurement name suffix (for separating different commands)
+  name_suffix = "_telemetry"
+
+  ## Data format to consume.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"
+  #tagexclude = ["hostName"]
+  [inputs.exec.tags]
+    AgentVersion = "$AGENT_VERSION"
+    AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+    ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+    Region = "$TELEMETRY_AKS_REGION"
+    ClusterName = "$TELEMETRY_CLUSTER_NAME"
+    ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+    Computer = "placeholder_hostname"
+    ControllerType = "$CONTROLLER_TYPE"
+
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 355c88b3d..e7c0d6509 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -17,14 +17,14 @@
 # Global tags can be specified here in key="value" format.
 [global_tags]
   #Below are entirely used for telemetry
-  AgentVersion = "$AGENT_VERSION"
-  AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
-  ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
-  Region = "$TELEMETRY_AKS_REGION"
-  ClusterName = "$TELEMETRY_CLUSTER_NAME"
-  ClusterType = "$TELEMETRY_CLUSTER_TYPE"
-  Computer = "placeholder_hostname"
-  ControllerType = "$CONTROLLER_TYPE"
+  #AgentVersion = "$AGENT_VERSION"
+  #AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+  #ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+  #Region = "$TELEMETRY_AKS_REGION"
+  #ClusterName = "$TELEMETRY_CLUSTER_NAME"
+  #ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+  #Computer = "placeholder_hostname"
+  #ControllerType = "$CONTROLLER_TYPE"
 
   hostName = "placeholder_hostname"
 
@@ -122,7 +122,7 @@
   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
   data_format = "json"
   namedrop = ["telegraf_telemetry"]
-  tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"]
+  #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"]
 
 [[outputs.application_insights]]
   ## Instrumentation key of the Application Insights resource.
@@ -392,6 +392,7 @@
 
 # Read metrics about disk usage by mount point
 [[inputs.disk]]
+  name_prefix="container.azm.ms/"
   ## By default stats will be gathered for all mount points.
   ## Set mount_points will restrict the stats to only the specified mount points.
   # mount_points = ["/"]
@@ -411,9 +412,40 @@
 #  fieldpass = ["used_percent", "cluster", "node","host","device"]
 #  taginclude = ["cluster","node"]
 
+# Read metrics about disk IO by device
+[[inputs.diskio]]
+  name_prefix="container.azm.ms/"
+  ## By default, telegraf will gather stats for all devices including
+  ## disk partitions.
+  ## Setting devices will restrict the stats to the specified devices.
+  devices = ["sd[a-z][0-9]"]
+  ## Uncomment the following line if you need disk serial numbers.
+  # skip_serial_number = false
+  #
+  ## On systems which support it, device metadata can be added in the form of
+  ## tags.
+  ## Currently only Linux is supported via udev properties. You can view
+  ## available properties for a device by running:
+  ## 'udevadm info -q property -n /dev/sda'
+  ## Note: Most, but not all, udev properties can be accessed this way. Properties
+  ## that are currently inaccessible include DEVTYPE, DEVNAME, and DEVPATH.
+  # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"]
+  #
+  ## Using the same metadata source as device_tags, you can also customize the
+  ## name of the device via templates.
+  ## The 'name_templates' parameter is a list of templates to try and apply to
+  ## the device. The template may contain variables in the form of '$PROPERTY' or
+  ## '${PROPERTY}'. The first template which does not contain any variables not
+  ## present for the device is used as the device name tag.
+  ## The typical use case is for LVM volumes, to get the VG/LV name instead of
+  ## the near-meaningless DM-0 name.
+  # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"]
+  fieldpass = ["reads", "read_bytes", "read_time", "writes", "write_bytes", "write_time", "io_time", "iops_in_progress"]
+  taginclude = ["name","hostName"]
 
 # Read metrics about network interface usage
-#[[inputs.net]]
+[[inputs.net]]
+  name_prefix="container.azm.ms/"
   ## By default, telegraf gathers stats from any up interface (excluding loopback)
   ## Setting interfaces will tell it to gather these explicit interfaces,
   ## regardless of status.
@@ -423,11 +455,10 @@
   ## On linux systems telegraf also collects protocol stats.
   ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics.
   ##
-#  ignore_protocol_stats = true
+  ignore_protocol_stats = true
   ##
-  #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
-  #fieldpass = ["err_in", "err_out"]
-  #taginclude = ["interface","nodeName"]
+  fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
+  taginclude = ["interface","hostName"]
 
 # Read metrics from the kubernetes kubelet api
 #[[inputs.kubernetes]]
@@ -497,6 +528,45 @@
 #  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
   #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"]
 #  taginclude = ["nodeName"]
+[[inputs.prometheus]]
+  name_prefix="container.azm.ms/"
+  ## An array of urls to scrape metrics from.
+  urls = ["http://$NODE_IP:10255/metrics"]
+  fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## An array of Kubernetes services to scrape metrics from.
+  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+
 [[inputs.exec]]
   ## Commands array
   interval = "15m"
@@ -516,4 +586,13 @@
   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
   data_format = "influx"
   tagexclude = ["hostName"]
+  [inputs.exec.tags]
+    AgentVersion = "$AGENT_VERSION"
+    AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+    ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+    Region = "$TELEMETRY_AKS_REGION"
+    ClusterName = "$TELEMETRY_CLUSTER_NAME"
+    ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+    Computer = "placeholder_hostname"
+    ControllerType = "$CONTROLLER_TYPE"
 
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 996c7501a..234785b64 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -97,8 +97,10 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/td-agent-bit-rs.conf;			    installer/conf/td-agent-bit-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 
 %Links
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 269d16111..166f427be 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -47,7 +47,7 @@ const TelegrafTagClusterID = "clusterId"
 
 // ContainerLogPluginConfFilePath --> config file path for container log plugin
 const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
-const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf"
+const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
 
 // IPName for Container Log
 const IPName = "Containers"
@@ -680,6 +680,10 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	PluginConfiguration = pluginConfig
 
 	CreateHTTPClient()
-	go updateKubeSystemContainerIDs()
-	go updateContainerImageNameMaps()
+	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
+		go updateKubeSystemContainerIDs()
+		go updateContainerImageNameMaps()
+	} else {
+		Log("Running in replicaset. Disabling kube-system container cache collection & updates \n")
+	}
 }
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index f507e4ab9..1e3d73fcd 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -66,9 +66,7 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 	SendEvent(eventNameContainerLogInit, make(map[string]string))
 
 	for ; true; <-ContainerLogTelemetryTicker.C {
-		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
 		elapsed := time.Since(start)
-
 		ContainerLogTelemetryMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
 		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
@@ -84,13 +82,17 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 		AgentLogProcessingMaxLatencyMsContainer = ""
 		ContainerLogTelemetryMutex.Unlock()
 
-		flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
-		TelemetryClient.Track(flushRateMetric)
-		logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
-		TelemetryClient.Track(logRateMetric)
-		logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs)
-		logLatencyMetric.Properties["Container"] = logLatencyMsContainer
-		TelemetryClient.Track(logLatencyMetric)
+		if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
+			SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
+			flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
+			TelemetryClient.Track(flushRateMetric)
+			logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
+			TelemetryClient.Track(logRateMetric)
+			logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs)
+			logLatencyMetric.Properties["Container"] = logLatencyMsContainer
+			TelemetryClient.Track(logLatencyMetric)
+		}
+
 		TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofTelegrafMetricsSentSuccessfully, telegrafMetricsSentCount))
 		TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofSendErrorsTelegrafMetrics, telegrafMetricsSendErrorCount))
 		start = time.Now()

From afc66b7dcb2a3743bfb507f5a2cc8241d6b51e2b Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 6 Jun 2019 16:32:13 -0700
Subject: [PATCH 091/160] merge config/settings to ci_feature (#221)

* updating fluentbit to use LOG_TAIL_PATH

* changes

* log exclusion pattern

* changes

* removing comments

* adding enviornment varibale collection/disable

* disable env var for cluster variable change

* changes

* toml parser changes

* adding directory tomlrb

* changes for container inventory

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* Telemetry for config overrides

* add schema version telemetry

* reduce the number of api calls for namespace filtering
add more telemetry for config processing
move liveness probe & parser to this repo

* optimize for default kube-system namespace log collection exclusion
---
 installer/conf/out_oms.conf                   |   3 +-
 installer/conf/td-agent-bit.conf              |   9 +-
 installer/datafiles/base_container.data       |  13 +
 installer/scripts/livenessprobe.sh            |  20 +
 installer/scripts/tomlparser.rb               | 152 +++++
 source/code/go/src/plugins/oms.go             | 233 +++++---
 source/code/go/src/plugins/out_oms.go         |   2 +-
 .../code/plugin/CAdvisorMetricsAPIClient.rb   |  17 +
 source/code/plugin/in_containerinventory.rb   |  51 +-
 source/code/plugin/in_kube_podinventory.rb    |  49 +-
 source/code/toml-parser/tomlrb.rb             |  44 ++
 .../toml-parser/tomlrb/generated_parser.rb    | 542 ++++++++++++++++++
 source/code/toml-parser/tomlrb/handler.rb     |  73 +++
 source/code/toml-parser/tomlrb/parser.rb      |  18 +
 source/code/toml-parser/tomlrb/parser.y       | 104 ++++
 source/code/toml-parser/tomlrb/scanner.rb     |  54 ++
 .../code/toml-parser/tomlrb/string_utils.rb   |  33 ++
 source/code/toml-parser/tomlrb/version.rb     |   3 +
 18 files changed, 1288 insertions(+), 132 deletions(-)
 create mode 100644 installer/scripts/livenessprobe.sh
 create mode 100644 installer/scripts/tomlparser.rb
 create mode 100644 source/code/toml-parser/tomlrb.rb
 create mode 100644 source/code/toml-parser/tomlrb/generated_parser.rb
 create mode 100644 source/code/toml-parser/tomlrb/handler.rb
 create mode 100644 source/code/toml-parser/tomlrb/parser.rb
 create mode 100644 source/code/toml-parser/tomlrb/parser.y
 create mode 100644 source/code/toml-parser/tomlrb/scanner.rb
 create mode 100644 source/code/toml-parser/tomlrb/string_utils.rb
 create mode 100644 source/code/toml-parser/tomlrb/version.rb

diff --git a/installer/conf/out_oms.conf b/installer/conf/out_oms.conf
index d4b797757..d6679f982 100644
--- a/installer/conf/out_oms.conf
+++ b/installer/conf/out_oms.conf
@@ -3,4 +3,5 @@ cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt
 key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key
 container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname
 container_inventory_refresh_interval=60
-kube_system_containers_refresh_interval=300
+#kube_system_containers_refresh_interval=300
+exclude_namespaces_containers_refresh_interval=60
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 50967e61f..d1a045063 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -7,13 +7,14 @@
 [INPUT]
     Name tail
     Tag oms.container.log.*
-    Path /var/log/containers/*.log
+    Path ${AZMON_LOG_TAIL_PATH}
     DB /var/log/omsagent-fblogs.db
     Parser docker
     Mem_Buf_Limit 5m
     Path_Key filepath
     Skip_Long_Lines On
     Ignore_Older 5m
+    Exclude_Path ${AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH}
 
 [INPUT]
     Name tail
@@ -43,6 +44,12 @@
     Chunk_Size  32
     Buffer_Size 64
 
+# Enable/Disable stdout stderr logs using configmap
+[FILTER]
+    Name grep
+    Match oms.container.log.*
+    Exclude stream ${AZMON_LOG_EXCLUSION_REGEX_PATTERN}
+
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 234785b64..fd070426c 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -95,6 +95,15 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/lib/application_insights/channel/event.rb;						source/code/plugin/lib/application_insights/channel/event.rb;   644; root; root
 /opt/microsoft/omsagent/plugin/lib/application_insights.rb;     source/code/plugin/lib/application_insights.rb; 644; root; root
 
+/opt/tomlrb.rb;                                                 source/code/toml-parser/tomlrb.rb; 644; root; root
+/opt/tomlrb/generated_parser.rb;                                source/code/toml-parser/tomlrb/generated_parser.rb; 644; root; root
+/opt/tomlrb/handler.rb;                                         source/code/toml-parser/tomlrb/handler.rb; 644; root; root
+/opt/tomlrb/parser.rb;                                          source/code/toml-parser/tomlrb/parser.rb; 644; root; root
+/opt/tomlrb/parser.y;                                           source/code/toml-parser/tomlrb/parser.y; 644; root; root
+/opt/tomlrb/scanner.rb;                                         source/code/toml-parser/tomlrb/scanner.rb; 644; root; root
+/opt/tomlrb/string_utils.rb;                                    source/code/toml-parser/tomlrb/string_utils.rb; 644; root; root
+/opt/tomlrb/version.rb;                                         source/code/toml-parser/tomlrb/version.rb; 644; root; root
+
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit-rs.conf;			    installer/conf/td-agent-bit-rs.conf;                    644; root; root
@@ -102,6 +111,8 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
+/opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
+/opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb     755; root; root 
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
@@ -149,6 +160,8 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts;      755; root; root; sysdir
 /opt/microsoft/omsagent/plugin/lib/application_insights/rack;                   755; root; root; sysdir
 
+/opt/tomlrb;												755; root; root; sysdir
+
 %Dependencies
 
 %Postinstall_10
diff --git a/installer/scripts/livenessprobe.sh b/installer/scripts/livenessprobe.sh
new file mode 100644
index 000000000..cb7e8a0ba
--- /dev/null
+++ b/installer/scripts/livenessprobe.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+#test to exit non zero value
+(ps -ef | grep omsagent | grep -v "grep") && (ps -ef | grep td-agent-bit | grep -v "grep")
+if [ $? -eq 0 ] && [ ! -s "inotifyoutput.txt" ]
+then
+  # inotifyoutput file is empty and the grep commands for omsagent and td-agent-bit succeeded
+  exit 0
+else
+  if [ -s "inotifyoutput.txt" ]
+  then
+    # inotifyoutput file has data(config map was applied)
+    echo "config changed" > /dev/termination-log
+    exit 1
+  else
+    # grep commands for omsagent or td-agent-bit failed
+    echo "agent or fluentbit not running" > /dev/termination-log
+    exit 1
+  fi
+fi
\ No newline at end of file
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
new file mode 100644
index 000000000..52516641a
--- /dev/null
+++ b/installer/scripts/tomlparser.rb
@@ -0,0 +1,152 @@
+#!/usr/local/bin/ruby
+
+require_relative "tomlrb"
+
+@configMapMountPath = "/etc/config/settings/log-data-collection-settings"
+@configVersion = ""
+@configSchemaVersion = ""
+# Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist
+@collectStdoutLogs = true
+@stdoutExcludeNamespaces = "kube-system"
+@collectStderrLogs = true
+@stderrExcludeNamespaces = "kube-system"
+@collectClusterEnvVariables = true
+@logTailPath = "/var/log/containers/*.log"
+@logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
+@excludePath = "*.csv2"
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@configMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values"
+      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults"
+      @excludePath = "*_kube-system_*.log"
+      return nil
+    end
+  rescue => errorStr
+    puts "config::error::Exception while parsing toml config file: #{errorStr}, using defaults"
+    @excludePath = "*_kube-system_*.log"
+    return nil
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used as environment variables
+def populateSettingValuesFromConfigMap(parsedConfig)
+  if !parsedConfig.nil? && !parsedConfig[:log_collection_settings].nil?
+    #Get stdout log config settings
+    begin
+      if !parsedConfig[:log_collection_settings][:stdout].nil? && !parsedConfig[:log_collection_settings][:stdout][:enabled].nil?
+        @collectStdoutLogs = parsedConfig[:log_collection_settings][:stdout][:enabled]
+        puts "config::Using config map setting for stdout log collection"
+        stdoutNamespaces = parsedConfig[:log_collection_settings][:stdout][:exclude_namespaces]
+
+        #Clearing it, so that it can be overridden with the config map settings
+        @stdoutExcludeNamespaces.clear
+        if @collectStdoutLogs && !stdoutNamespaces.nil?
+          if stdoutNamespaces.kind_of?(Array)
+            # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type
+            if stdoutNamespaces.length > 0 && stdoutNamespaces[0].kind_of?(String)
+              #Empty the array to use the values from configmap
+              stdoutNamespaces.each do |namespace|
+                if @stdoutExcludeNamespaces.empty?
+                  # To not append , for the first element
+                  @stdoutExcludeNamespaces.concat(namespace)
+                else
+                  @stdoutExcludeNamespaces.concat("," + namespace)
+                end
+              end
+              puts "config::Using config map setting for stdout log collection to exclude namespace"
+            end
+          end
+        end
+      end
+    rescue => errorStr
+      puts "config::error::Exception while reading config settings for stdout log collection - #{errorStr}, using defaults"
+    end
+
+    #Get stderr log config settings
+    begin
+      if !parsedConfig[:log_collection_settings][:stderr].nil? && !parsedConfig[:log_collection_settings][:stderr][:enabled].nil?
+        @collectStderrLogs = parsedConfig[:log_collection_settings][:stderr][:enabled]
+        puts "config::Using config map setting for stderr log collection"
+        stderrNamespaces = parsedConfig[:log_collection_settings][:stderr][:exclude_namespaces]
+
+        #Clearing it, so that it can be overridden with the config map settings
+        @stderrExcludeNamespaces.clear
+        if @collectStderrLogs && !stderrNamespaces.nil?
+          if stderrNamespaces.kind_of?(Array)
+            # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type
+            if stderrNamespaces.length > 0 && stderrNamespaces[0].kind_of?(String)
+              stderrNamespaces.each do |namespace|
+                if @stderrExcludeNamespaces.empty?
+                  # To not append , for the first element
+                  @stderrExcludeNamespaces.concat(namespace)
+                else
+                  @stderrExcludeNamespaces.concat("," + namespace)
+                end
+              end
+              puts "config::Using config map setting for stderr log collection to exclude namespace"
+            end
+          end
+        end
+      end
+    rescue => errorStr
+      puts "config::error:Exception while reading config settings for stderr log collection - #{errorStr}, using defaults"
+    end
+
+    #Get environment variables log config settings
+    begin
+      if !parsedConfig[:log_collection_settings][:env_var].nil? && !parsedConfig[:log_collection_settings][:env_var][:enabled].nil?
+        @collectClusterEnvVariables = parsedConfig[:log_collection_settings][:env_var][:enabled]
+        puts "config::Using config map setting for cluster level environment variable collection"
+      end
+    rescue => errorStr
+      puts "config::error::Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults"
+    end
+  end
+end
+
+  @configSchemaVersion = ENV['AZMON_AGENT_CFG_SCHEMA_VERSION']
+  if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @@configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it
+    configMapSettings = parseConfigMap
+    if !configMapSettings.nil?
+      populateSettingValuesFromConfigMap(configMapSettings)
+    end
+  else
+    puts "config::unsupported config schema version - #{@configSchemaVersion}, using defaults"
+    @excludePath = "*_kube-system_*.log"
+  end
+
+  # Write the settings to file, so that they can be set as environment variables
+  file = File.open("config_env_var", "w")
+
+  if !file.nil?
+    # This will be used in td-agent-bit.conf file to filter out logs
+    if (!@collectStdoutLogs && !@collectStderrLogs)
+      #Stop log tailing completely
+      @logTailPath = "/opt/nolog*.log"
+      @logExclusionRegexPattern = "stdout|stderr"
+    elsif !@collectStdoutLogs
+      @logExclusionRegexPattern = "stdout"
+    elsif !@collectStderrLogs
+      @logExclusionRegexPattern = "stderr"
+    end
+    file.write("export AZMON_COLLECT_STDOUT_LOGS=#{@collectStdoutLogs}\n")
+    file.write("export AZMON_LOG_TAIL_PATH=#{@logTailPath}\n")
+    file.write("export AZMON_LOG_EXCLUSION_REGEX_PATTERN=\"#{@logExclusionRegexPattern}\"\n")
+    file.write("export AZMON_STDOUT_EXCLUDED_NAMESPACES=#{@stdoutExcludeNamespaces}\n")
+    file.write("export AZMON_COLLECT_STDERR_LOGS=#{@collectStderrLogs}\n")
+    file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
+    file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
+    file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
+    # Close file after writing all environment variables
+    file.close
+  else
+    puts "config::error::Exception while opening file for writing config environment variables"
+  end
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 166f427be..0ffaaff63 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -17,6 +17,7 @@ import (
 
 	lumberjack "gopkg.in/natefinch/lumberjack.v2"
 
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/rest"
@@ -36,12 +37,15 @@ const ResourceNameEnv = "ACS_RESOURCE_NAME"
 
 // Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags)
 const TelegrafMetricOriginPrefix = "container.azm.ms"
+
 // Origin suffix for telegraf Metrics (used as suffix for origin field)
 const TelegrafMetricOriginSuffix = "telegraf"
+
 // Namespace prefix for telegraf Metrics (used as prefix for Namespace field)
 //const TelegrafMetricNamespacePrefix = "plugin"
 // clusterName tag
 const TelegrafTagClusterName = "clusterName"
+
 // clusterId tag
 const TelegrafTagClusterID = "clusterId"
 
@@ -52,7 +56,9 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp
 // IPName for Container Log
 const IPName = "Containers"
 const defaultContainerInventoryRefreshInterval = 60
-const defaultKubeSystemContainersRefreshInterval = 300
+
+// const defaultKubeSystemContainersRefreshInterval = 300
+const defaultExcludeNamespacesContainersRefreshInterval = 300
 
 var (
 	// PluginConfiguration the plugins configuration
@@ -65,11 +71,11 @@ var (
 	Computer string
 	// WorkspaceID log analytics workspace id
 	WorkspaceID string
-	// ResourceID for resource-centric log analytics data 
+	// ResourceID for resource-centric log analytics data
 	ResourceID string
 	// Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false)
 	ResourceCentric bool
-	//ResourceName 
+	//ResourceName
 	ResourceName string
 )
 
@@ -78,8 +84,10 @@ var (
 	ImageIDMap map[string]string
 	// NameIDMap caches the container it to Name mapping
 	NameIDMap map[string]string
-	// IgnoreIDSet set of  container Ids of kube-system pods
-	IgnoreIDSet map[string]bool
+	// StdoutIgnoreIDSet set of  container Ids of excluded namespaces for stdout logs
+	StdoutIgnoreIDSet map[string]bool
+	// StderrIgnoreIDSet set of  container Ids of excluded namespaces for stderr logs
+	StderrIgnoreIDSet map[string]bool
 	// DataUpdateMutex read and write mutex access to the container id set
 	DataUpdateMutex = &sync.Mutex{}
 	// ContainerLogTelemetryMutex read and write mutex access to the Container Log Telemetry
@@ -89,8 +97,8 @@ var (
 )
 
 var (
-	// KubeSystemContainersRefreshTicker updates the kube-system containers
-	KubeSystemContainersRefreshTicker *time.Ticker
+	// ExcludeNamespacesContainersRefreshTicker updates the excludenamespace containers
+	ExcludeNamespacesContainersRefreshTicker *time.Ticker
 	// ContainerImageNameRefreshTicker updates the container image and names periodically
 	ContainerImageNameRefreshTicker *time.Ticker
 )
@@ -118,21 +126,21 @@ type DataItem struct {
 // telegraf metric DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
 type laTelegrafMetric struct {
 	// 'golden' fields
-	Origin          		string `json:"Origin"`
-	Namespace          		string `json:"Namespace"`
-	Name	          		string `json:"Name"`
-	Value                	float64 `json:"Value"`
-	Tags					string `json:"Tags"`
+	Origin    string  `json:"Origin"`
+	Namespace string  `json:"Namespace"`
+	Name      string  `json:"Name"`
+	Value     float64 `json:"Value"`
+	Tags      string  `json:"Tags"`
 	// specific required fields for LA
-	CollectionTime			string `json:"CollectionTime"` //mapped to TimeGenerated
-	Computer				string `json:"Computer"`
+	CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated
+	Computer       string `json:"Computer"`
 }
 
 // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
 type InsightsMetricsBlob struct {
-	DataType  string     			`json:"DataType"`
-	IPName    string     			`json:"IPName"`
-	DataItems []laTelegrafMetric 	`json:"DataItems"`
+	DataType  string             `json:"DataType"`
+	IPName    string             `json:"IPName"`
+	DataItems []laTelegrafMetric `json:"DataItems"`
 }
 
 // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
@@ -187,7 +195,7 @@ func updateContainerImageNameMaps() {
 		listOptions := metav1.ListOptions{}
 		listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer)
 		pods, err := ClientSet.CoreV1().Pods("").List(listOptions)
-		
+
 		if err != nil {
 			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 			Log(message)
@@ -217,36 +225,71 @@ func updateContainerImageNameMaps() {
 	}
 }
 
-func updateKubeSystemContainerIDs() {
-	for ; true; <-KubeSystemContainersRefreshTicker.C {
-		if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 {
-			Log("Kube System Log Collection is ENABLED.")
-			return
+func excludeContainerIDPopulator(excludeNamespaceList []string, logStream string) {
+	var podsToExclude []*corev1.PodList
+	listOptions := metav1.ListOptions{}
+	listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer)
+	
+	pods, err := ClientSet.CoreV1().Pods("").List(listOptions)
+	if err != nil {
+		message := fmt.Sprintf("Error getting pods %s - for node %s . All %s logs might be collected", err.Error(), Computer, logStream)
+		SendException(message)
+		Log(message)
+		return
+	}
+	
+	podsToExclude = append(podsToExclude, pods)
+	ignoreNamespaceSet := make(map[string]bool)
+	for _, ns := range excludeNamespaceList {
+		ignoreNamespaceSet[strings.TrimSpace(ns)] = true
+	}
+	
+	_ignoreIDSet := make(map[string]bool)
+	for _, pod := range podsToExclude {
+		for _, pod := range pod.Items {
+			_, ok := ignoreNamespaceSet[pod.Namespace]
+			if ok {
+				Log ("Adding pod %s in namespace %s to %s exclusion list", pod.Name, pod.Namespace, logStream)
+				for _, status := range pod.Status.ContainerStatuses {
+						lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
+						_ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true
+				}
+			}
 		}
+	}
 
-		Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records")
+	Log("Locking to update excluded container IDs for %s", logStream)
+	DataUpdateMutex.Lock()
+	if strings.Compare(logStream, "stdout") == 0 {
+		StdoutIgnoreIDSet = _ignoreIDSet
+	} else {
+		StderrIgnoreIDSet = _ignoreIDSet
+	}
+	DataUpdateMutex.Unlock()
+	Log("Unlocking after updating excluded container IDs for %s", logStream)
+}
 
-		pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
-		if err != nil {
-			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error())
-			SendException(message)
-			Log(message)
-			continue
+func updateExcludeStdoutContainerIDs() {
+	for ; true; <-ExcludeNamespacesContainersRefreshTicker.C {
+		collectStdoutLogs := os.Getenv("AZMON_COLLECT_STDOUT_LOGS")
+		var stdoutNSExcludeList []string
+		excludeList := os.Getenv("AZMON_STDOUT_EXCLUDED_NAMESPACES")
+		if (strings.Compare(collectStdoutLogs, "true") == 0) && (len(excludeList) > 0) {
+			stdoutNSExcludeList = strings.Split(excludeList, ",")
+			excludeContainerIDPopulator(stdoutNSExcludeList, "stdout")
 		}
+	}
+}
 
-		_ignoreIDSet := make(map[string]bool)
-		for _, pod := range pods.Items {
-			for _, status := range pod.Status.ContainerStatuses {
-				lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
-				_ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true
-			}
+func updateExcludeStderrContainerIDs() {
+	for ; true; <-ExcludeNamespacesContainersRefreshTicker.C {
+		collectStderrLogs := os.Getenv("AZMON_COLLECT_STDERR_LOGS")
+		var stderrNSExcludeList []string
+		excludeList := os.Getenv("AZMON_STDERR_EXCLUDED_NAMESPACES")
+		if (strings.Compare(collectStderrLogs, "true") == 0) && (len(excludeList) > 0) {
+			stderrNSExcludeList = strings.Split(excludeList, ",")
+			excludeContainerIDPopulator(stderrNSExcludeList, "stderr")
 		}
-
-		Log("Locking to update kube-system container IDs")
-		DataUpdateMutex.Lock()
-		IgnoreIDSet = _ignoreIDSet
-		DataUpdateMutex.Unlock()
-		Log("Unlocking after updating kube-system container IDs")
 	}
 }
 
@@ -265,24 +308,24 @@ func convert(in interface{}) (float64, bool) {
 		}
 		return float64(0), true
 	default:
-		Log ("returning 0 for %v ", in)
+		Log("returning 0 for %v ", in)
 		return float64(0), false
 	}
 }
 
 //Translates telegraf time series to one or more Azure loganalytics metric(s)
 func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
-	
+
 	var laMetrics []*laTelegrafMetric
 	var tags map[interface{}]interface{}
 	tags = m["tags"].(map[interface{}]interface{})
 	tagMap := make(map[string]string)
 	for k, v := range tags {
-		key := fmt.Sprintf("%s",k)
+		key := fmt.Sprintf("%s", k)
 		if key == "" {
 			continue
 		}
-		tagMap[key] = fmt.Sprintf("%s",v)
+		tagMap[key] = fmt.Sprintf("%s", v)
 	}
 
 	//add azure monitor tags
@@ -305,14 +348,14 @@ func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetri
 		}
 		i := m["timestamp"].(uint64)
 		laMetric := laTelegrafMetric{
-			Origin: 		fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafMetricOriginSuffix),
+			Origin: fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafMetricOriginSuffix),
 			//Namespace:  	fmt.Sprintf("%s/%s", TelegrafMetricNamespacePrefix, m["name"]),
-			Namespace:  	fmt.Sprintf("%s", m["name"]),
-			Name:       	fmt.Sprintf("%s",k),
-			Value:			fv,
-			Tags:     		fmt.Sprintf("%s", tagJson),
-			CollectionTime: time.Unix(int64(i),0).Format(time.RFC3339),
-			Computer: 	Computer, //this is the collection agent's computer name, not necessarily to which computer the metric applies to
+			Namespace:      fmt.Sprintf("%s", m["name"]),
+			Name:           fmt.Sprintf("%s", k),
+			Value:          fv,
+			Tags:           fmt.Sprintf("%s", tagJson),
+			CollectionTime: time.Unix(int64(i), 0).Format(time.RFC3339),
+			Computer:       Computer, //this is the collection agent's computer name, not necessarily to which computer the metric applies to
 		}
 
 		//Log ("la metric:%v", laMetric)
@@ -325,7 +368,7 @@ func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetri
 func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int {
 	var laMetrics []*laTelegrafMetric
 
-	if ( (telegrafRecords== nil) || ! (len(telegrafRecords) > 0) ) {
+	if (telegrafRecords == nil) || !(len(telegrafRecords) > 0) {
 		Log("PostTelegrafMetricsToLA::Error:no timeseries to derive")
 		return output.FLB_OK
 	}
@@ -340,7 +383,7 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 		laMetrics = append(laMetrics, translatedMetrics...)
 	}
 
-	if ( (laMetrics == nil) || !(len(laMetrics) > 0) ) {
+	if (laMetrics == nil) || !(len(laMetrics) > 0) {
 		Log("PostTelegrafMetricsToLA::Info:no metrics derived from timeseries data")
 		return output.FLB_OK
 	} else {
@@ -351,7 +394,7 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 	var metrics []laTelegrafMetric
 	var i int
 
-	for i=0; i < len(laMetrics); i++ {
+	for i = 0; i < len(laMetrics); i++ {
 		metrics = append(metrics, *laMetrics[i])
 	}
 
@@ -368,7 +411,7 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 		SendException(message)
 		return output.FLB_OK
 	}
-	
+
 	//Post metrics data to LA
 	req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes))
 
@@ -376,7 +419,7 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 
 	//set headers
 	req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
-	
+
 	//expensive to do string len for every request, so use a flag
 	if ResourceCentric == true {
 		req.Header.Set("x-ms-AzureResourceId", ResourceID)
@@ -420,20 +463,23 @@ func UpdateNumTelegrafMetricsSentTelemetry(numMetricsSent int, numSendErrors int
 
 // PostDataHelper sends data to the OMS endpoint
 func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
-
 	start := time.Now()
 	var dataItems []DataItem
 
 	var maxLatency float64
 	var maxLatencyContainer string
 
-	ignoreIDSet := make(map[string]bool)
+	stdoutIgnoreIDSet := make(map[string]bool)
+	stderrIgnoreIDSet := make(map[string]bool)
 	imageIDMap := make(map[string]string)
 	nameIDMap := make(map[string]string)
 
 	DataUpdateMutex.Lock()
-	for k, v := range IgnoreIDSet {
-		ignoreIDSet[k] = v
+	for k, v := range StdoutIgnoreIDSet {
+		stdoutIgnoreIDSet[k] = v
+	}
+	for k, v := range StderrIgnoreIDSet {
+		stderrIgnoreIDSet[k] = v
 	}
 	for k, v := range ImageIDMap {
 		imageIDMap[k] = v
@@ -444,28 +490,34 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	DataUpdateMutex.Unlock()
 
 	for _, record := range tailPluginRecords {
-
 		containerID := GetContainerIDFromFilePath(ToString(record["filepath"]))
+		logEntrySource := ToString(record["stream"])
 
-		if containerID == "" || containsKey(ignoreIDSet, containerID) {
-			continue
+		if strings.EqualFold(logEntrySource, "stdout") {
+			if containerID == "" || containsKey(stdoutIgnoreIDSet, containerID) {
+				continue
+			}
+		} else if strings.EqualFold(logEntrySource, "stderr") {
+			if containerID == "" || containsKey(stderrIgnoreIDSet, containerID) {
+				continue
+			}
 		}
 
 		stringMap := make(map[string]string)
 
 		stringMap["LogEntry"] = ToString(record["log"])
-		stringMap["LogEntrySource"] = ToString(record["stream"])
+		stringMap["LogEntrySource"] = logEntrySource
 		stringMap["LogEntryTimeStamp"] = ToString(record["time"])
 		stringMap["SourceSystem"] = "Containers"
 		stringMap["Id"] = containerID
 
 		if val, ok := imageIDMap[containerID]; ok {
 			stringMap["Image"] = val
-		} 
+		}
 
 		if val, ok := nameIDMap[containerID]; ok {
 			stringMap["Name"] = val
-		} 
+		}
 
 		dataItem := DataItem{
 			ID:                    stringMap["Id"],
@@ -534,7 +586,6 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		}
 
 		defer resp.Body.Close()
-
 		numRecords := len(dataItems)
 		Log("Successfully flushed %d records in %s", numRecords, elapsed)
 		ContainerLogTelemetryMutex.Lock()
@@ -573,7 +624,8 @@ func GetContainerIDFromFilePath(filepath string) string {
 // InitializePlugin reads and populates plugin configuration
 func InitializePlugin(pluginConfPath string, agentVersion string) {
 
-	IgnoreIDSet = make(map[string]bool)
+	StdoutIgnoreIDSet = make(map[string]bool)
+	StderrIgnoreIDSet = make(map[string]bool)
 	ImageIDMap = make(map[string]string)
 	NameIDMap = make(map[string]string)
 
@@ -606,19 +658,19 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 		splitted := strings.Split(ResourceID, "/")
 		ResourceName = splitted[len(splitted)-1]
 		Log("ResourceCentric: True")
-		Log("ResourceID=%s",ResourceID)
-		Log("ResourceName=%s",ResourceID)
-	} 
-	
+		Log("ResourceID=%s", ResourceID)
+		Log("ResourceName=%s", ResourceID)
+	}
+
 	if ResourceCentric == false {
 		//AKS-Engine/hybrid scenario
 		ResourceName = os.Getenv(ResourceNameEnv)
 		ResourceID = ResourceName
 		Log("ResourceCentric: False")
-		Log("ResourceID=%s",ResourceID)
-		Log("ResourceName=%s",ResourceName)
+		Log("ResourceID=%s", ResourceID)
+		Log("ResourceName=%s", ResourceName)
 	}
-	
+
 	// Initialize image,name map refresh ticker
 	containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"])
 	if err != nil {
@@ -631,17 +683,16 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
 	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval))
 
-	// Initialize Kube System Refresh Ticker
-	kubeSystemContainersRefreshInterval, err := strconv.Atoi(pluginConfig["kube_system_containers_refresh_interval"])
+	excludeNamespacesContainersRefreshInterval, err := strconv.Atoi(pluginConfig["exclude_namespaces_containers_refresh_interval"])
 	if err != nil {
-		message := fmt.Sprintf("Error Reading Kube System Container Ids Refresh Interval %s", err.Error())
+		message := fmt.Sprintf("Error Reading exclude namespaces Container Ids Refresh Interval %s", err.Error())
 		Log(message)
 		SendException(message)
-		Log("Using Default Refresh Interval of %d s\n", defaultKubeSystemContainersRefreshInterval)
-		kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval
+		Log("Using Default Refresh Interval of %d s\n", defaultExcludeNamespacesContainersRefreshInterval)
+		excludeNamespacesContainersRefreshInterval = defaultExcludeNamespacesContainersRefreshInterval
 	}
-	Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval)
-	KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * time.Duration(kubeSystemContainersRefreshInterval))
+	Log("excludeNamespacesContainersRefreshInterval = %d \n", excludeNamespacesContainersRefreshInterval)
+	ExcludeNamespacesContainersRefreshTicker = time.NewTicker(time.Second * time.Duration(excludeNamespacesContainersRefreshInterval))
 
 	// Populate Computer field
 	containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"])
@@ -680,10 +731,16 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	PluginConfiguration = pluginConfig
 
 	CreateHTTPClient()
-	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
-		go updateKubeSystemContainerIDs()
-		go updateContainerImageNameMaps()
-	} else {
+
+  if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
+    defaultExcludePath := os.Getenv("AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH")
+    //further optimization for clusters with default settings. need this cache only when log collection config is overridden with custom config
+    if ( (strings.Compare(defaultExcludePath, "*_kube-system_*.log") != 0) ) {
+      go updateExcludeStdoutContainerIDs()
+      go updateExcludeStderrContainerIDs()
+    }
+    go updateContainerImageNameMaps()
+  } else {
 		Log("Running in replicaset. Disabling kube-system container cache collection & updates \n")
 	}
 }
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index dccc6774c..2ee6f994d 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -74,7 +74,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 // FLBPluginExit exits the plugin
 func FLBPluginExit() int {
 	ContainerLogTelemetryTicker.Stop()
-	KubeSystemContainersRefreshTicker.Stop()
+	ExcludeNamespacesContainersRefreshTicker.Stop()
 	ContainerImageNameRefreshTicker.Stop()
 	return output.FLB_OK
 }
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 35cf727cf..b842edb29 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -13,6 +13,13 @@ class CAdvisorMetricsAPIClient
   require_relative "KubernetesApiClient"
   require_relative "ApplicationInsightsUtility"
 
+  @configMapMountPath = "/etc/config/settings/log-data-collection-settings"
+  @clusterEnvVarCollectionEnabled = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
+  @clusterStdErrLogCollectionEnabled = ENV["AZMON_COLLECT_STDERR_LOGS"]
+  @clusterStdOutLogCollectionEnabled = ENV["AZMON_COLLECT_STDOUT_LOGS"]
+  @clusterLogTailExcludPath = ENV["AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH"]
+  @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
+  @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
   @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
   @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
   #   @@rxBytesLast = nil
@@ -192,6 +199,16 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                     telemetryProps["PodName"] = podName
                     telemetryProps["ContainerName"] = containerName
                     telemetryProps["Computer"] = hostName
+                    #telemetry about custom log collections setting
+                    if (File.file?(@configMapMountPath))
+                      telemetryProps["clustercustomsettings"] = true
+                      telemetryProps["clusterenvvars"] = @clusterEnvVarCollectionEnabled
+                      telemetryProps["clusterstderrlogs"] = @clusterStdErrLogCollectionEnabled
+                      telemetryProps["clusterstdoutlogs"] = @clusterStdOutLogCollectionEnabled
+                      telemetryProps["clusterlogtailexcludepath"] = @clusterLogTailExcludPath
+                      telemetryProps["clusterLogTailPath"] = @clusterLogTailPath
+                      telemetryProps["clusterAgentSchemaVersion"] = @clusterAgentSchemaVersion
+                    end
                     ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
                   end
                 end
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index 4d83278a9..05e5bc9ea 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -47,29 +47,34 @@ def shutdown
       end
     end
 
-    def obtainContainerConfig(instance, container)
+    def obtainContainerConfig(instance, container, clusterCollectEnvironmentVar)
       begin
         configValue = container["Config"]
         if !configValue.nil?
           instance["ContainerHostname"] = configValue["Hostname"]
-
-          envValue = configValue["Env"]
-          envValueString = (envValue.nil?) ? "" : envValue.to_s
-          # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
-          if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
-            envValueString = ["AZMON_COLLECT_ENV=FALSE"]
-            $log.warn("Environment Variable collection for container: #{container["Id"]} skipped because AZMON_COLLECT_ENV is set to false")
-          end
-          # Restricting the ENV string value to 200kb since the size of this string can go very high
-          if envValueString.length > 200000
-            envValueStringTruncated = envValueString.slice(0..200000)
-            lastIndex = envValueStringTruncated.rindex("\", ")
-            if !lastIndex.nil?
-              envValueStringTruncated = envValueStringTruncated.slice(0..lastIndex) + "]"
-            end
-            instance["EnvironmentVar"] = envValueStringTruncated
+          # Check to see if the environment variable collection is disabled at the cluster level - This disables env variable collection for all containers.
+          if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
+            instance["EnvironmentVar"] = ["AZMON_CLUSTER_COLLECT_ENV_VAR=FALSE"]
           else
-            instance["EnvironmentVar"] = envValueString
+            envValue = configValue["Env"]
+            envValueString = (envValue.nil?) ? "" : envValue.to_s
+            # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
+            # Check to see if the environment variable collection is disabled for this container.
+            if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
+              envValueString = ["AZMON_COLLECT_ENV=FALSE"]
+              $log.warn("Environment Variable collection for container: #{container["Id"]} skipped because AZMON_COLLECT_ENV is set to false")
+            end
+            # Restricting the ENV string value to 200kb since the size of this string can go very high
+            if envValueString.length > 200000
+              envValueStringTruncated = envValueString.slice(0..200000)
+              lastIndex = envValueStringTruncated.rindex("\", ")
+              if !lastIndex.nil?
+                envValueStringTruncated = envValueStringTruncated.slice(0..lastIndex) + "]"
+              end
+              instance["EnvironmentVar"] = envValueStringTruncated
+            else
+              instance["EnvironmentVar"] = envValueString
+            end
           end
 
           cmdValue = configValue["Cmd"]
@@ -151,7 +156,7 @@ def obtainContainerHostConfig(instance, container)
       end
     end
 
-    def inspectContainer(id, nameMap)
+    def inspectContainer(id, nameMap, clusterCollectEnvironmentVar)
       containerInstance = {}
       begin
         container = DockerApiClient.dockerInspectContainer(id)
@@ -173,7 +178,7 @@ def inspectContainer(id, nameMap)
               containerInstance["ImageTag"] = repoImageTagArray[2]
             end
           end
-          obtainContainerConfig(containerInstance, container)
+          obtainContainerConfig(containerInstance, container, clusterCollectEnvironmentVar)
           obtainContainerState(containerInstance, container)
           obtainContainerHostConfig(containerInstance, container)
         end
@@ -195,9 +200,13 @@ def enumerate
         if !containerIds.empty?
           eventStream = MultiEventStream.new
           nameMap = DockerApiClient.getImageIdMap
+          clusterCollectEnvironmentVar = ENV['AZMON_CLUSTER_COLLECT_ENV_VAR']
+          if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
+            $log.warn("Environment Variable collection disabled for cluster")
+          end
           containerIds.each do |containerId|
             inspectedContainer = {}
-            inspectedContainer = inspectContainer(containerId, nameMap)
+            inspectedContainer = inspectContainer(containerId, nameMap, clusterCollectEnvironmentVar)
             inspectedContainer["Computer"] = hostname
             inspectedContainer["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
             containerInventory.push inspectedContainer
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 65573673c..79490ba7d 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -133,32 +133,37 @@ def populateWindowsContainerInventoryRecord(container, record, containerEnvVaria
       end
     end
 
-    def getContainerEnvironmentVariables(pod)
+    def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       begin
         podSpec = pod["spec"]
         containerEnvHash = {}
         if !podSpec.nil? && !podSpec["containers"].nil?
           podSpec["containers"].each do |container|
-            envVarsArray = []
-            containerEnvArray = container["env"]
-            # Parsing the environment variable array of hashes to a string value
-            # since that is format being sent by container inventory workflow in daemonset
-            # Keeping it in the same format because the workflow expects it in this format
-            # and the UX expects an array of string for environment variables
-            if !containerEnvArray.nil? && !containerEnvArray.empty?
-              containerEnvArray.each do |envVarHash|
-                envName = envVarHash["name"]
-                envValue = envVarHash["value"]
-                envArrayElement = envName + "=" + envValue
-                envVarsArray.push(envArrayElement)
+            if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
+              containerEnvHash[container["name"]] = ["AZMON_CLUSTER_COLLECT_ENV_VAR=FALSE"]
+            else
+              envVarsArray = []
+              containerEnvArray = container["env"]
+              # Parsing the environment variable array of hashes to a string value
+              # since that is format being sent by container inventory workflow in daemonset
+              # Keeping it in the same format because the workflow expects it in this format
+              # and the UX expects an array of string for environment variables
+              if !containerEnvArray.nil? && !containerEnvArray.empty?
+                containerEnvArray.each do |envVarHash|
+                  envName = envVarHash["name"]
+                  envValue = envVarHash["value"]
+                  envArrayElement = envName + "=" + envValue
+                  envVarsArray.push(envArrayElement)
+                end
               end
+              # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
+              envValueString = envVarsArray.to_s
+              if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
+                envValueString = ["AZMON_COLLECT_ENV=FALSE"]
+                $log.warn("Environment Variable collection for container: #{container["name"]} skipped because AZMON_COLLECT_ENV is set to false")
+              end
+              containerEnvHash[container["name"]] = envValueString
             end
-            # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
-            envValueString = envVarsArray.to_s
-            if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString)
-              envValueString = ["AZMON_COLLECT_ENV=FALSE"]
-            end
-            containerEnvHash[container["name"]] = envValueString
           end
         end
         return containerEnvHash
@@ -243,8 +248,12 @@ def parse_and_emit_records(podInventory, serviceList)
           # on windows nodes and parse environment variables for these containers
           if winNodes.length > 0
             if (!record["Computer"].empty? && (winNodes.include? record["Computer"]))
+              clusterCollectEnvironmentVar = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
+              if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
+                $log.warn("WindowsContainerInventory: Environment Variable collection disabled for cluster")
+              end
               sendWindowsContainerInventoryRecord = true
-              containerEnvVariableHash = getContainerEnvironmentVariables(items)
+              containerEnvVariableHash = getContainerEnvironmentVariables(items, clusterCollectEnvironmentVar)
             end
           end
 
diff --git a/source/code/toml-parser/tomlrb.rb b/source/code/toml-parser/tomlrb.rb
new file mode 100644
index 000000000..c0eff9093
--- /dev/null
+++ b/source/code/toml-parser/tomlrb.rb
@@ -0,0 +1,44 @@
+require "time"
+require "stringio"
+require_relative "tomlrb/version"
+require_relative "tomlrb/string_utils"
+require_relative "tomlrb/scanner"
+require_relative "tomlrb/parser"
+require_relative "tomlrb/handler"
+
+module Tomlrb
+  class ParseError < StandardError; end
+
+  # Parses a valid TOML string into its Ruby data structure
+  #
+  # @param string_or_io [String, StringIO] the content
+  # @param options [Hash] the options hash
+  # @option options [Boolean] :symbolize_keys (false) whether to return the keys as symbols or strings
+  # @return [Hash] the Ruby data structure represented by the input
+  def self.parse(string_or_io, **options)
+    io = string_or_io.is_a?(String) ? StringIO.new(string_or_io) : string_or_io
+    scanner = Scanner.new(io)
+    parser = Parser.new(scanner, options)
+    begin
+      handler = parser.parse
+    rescue Racc::ParseError => e
+      raise ParseError, e.message
+    end
+
+    handler.output
+  end
+
+  # Reads a file content and parses it into its Ruby data structure
+  #
+  # @param path [String] the path to the file
+  # @param options [Hash] the options hash
+  # @option options [Boolean] :symbolize_keys (false) whether to return the keys as symbols or strings
+  # @return [Hash] the Ruby data structure represented by the input
+  def self.load_file(path, **options)
+    # By default Ruby sets the external encoding of an IO object to the
+    # default external encoding. The default external encoding is set by
+    # locale encoding or the interpreter -E option.
+    tmp = File.read(path, :encoding => "utf-8")
+    Tomlrb.parse(tmp, options)
+  end
+end
diff --git a/source/code/toml-parser/tomlrb/generated_parser.rb b/source/code/toml-parser/tomlrb/generated_parser.rb
new file mode 100644
index 000000000..ebf815e7d
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/generated_parser.rb
@@ -0,0 +1,542 @@
+#
+# DO NOT MODIFY!!!!
+# This file is automatically generated by Racc 1.4.14
+# from Racc grammer file "".
+#
+
+require 'racc/parser.rb'
+module Tomlrb
+  class GeneratedParser < Racc::Parser
+##### State transition tables begin ###
+
+racc_action_table = [
+     2,    17,    11,    31,    12,    31,    13,    27,    14,    77,
+    15,    16,     8,    78,    32,    10,    33,    29,    34,    29,
+    57,    58,    59,    60,    56,    53,    52,    54,    55,    46,
+    40,    41,    10,    57,    58,    59,    60,    56,    53,    52,
+    54,    55,    46,    69,    70,    10,    57,    58,    59,    60,
+    56,    53,    52,    54,    55,    46,    35,    36,    10,    57,
+    58,    59,    60,    56,    53,    52,    54,    55,    46,    37,
+    38,    10,    57,    58,    59,    60,    56,    53,    52,    54,
+    55,    46,    43,    66,    10,    57,    58,    59,    60,    56,
+    53,    52,    54,    55,    46,   nil,   nil,    10,    57,    58,
+    59,    60,    56,    53,    52,    54,    55,    46,   nil,   nil,
+    10,    57,    58,    59,    60,    56,    53,    52,    54,    55,
+    46,    73,   nil,    10,    57,    58,    59,    60,    56,    53,
+    52,    54,    55,    46,    73,    21,    10,    22,   nil,    23,
+   nil,    24,   nil,    25,    26,    21,    19,    22,   nil,    23,
+   nil,    24,   nil,    25,    26,   nil,    19 ]
+
+racc_action_check = [
+     1,     2,     1,     9,     1,    70,     1,     8,     1,    74,
+     1,     1,     1,    74,    11,     1,    12,     9,    13,    70,
+    32,    32,    32,    32,    32,    32,    32,    32,    32,    32,
+    20,    20,    32,    33,    33,    33,    33,    33,    33,    33,
+    33,    33,    33,    42,    42,    33,    34,    34,    34,    34,
+    34,    34,    34,    34,    34,    34,    14,    15,    34,    35,
+    35,    35,    35,    35,    35,    35,    35,    35,    35,    16,
+    19,    35,    36,    36,    36,    36,    36,    36,    36,    36,
+    36,    36,    30,    40,    36,    37,    37,    37,    37,    37,
+    37,    37,    37,    37,    37,   nil,   nil,    37,    43,    43,
+    43,    43,    43,    43,    43,    43,    43,    43,   nil,   nil,
+    43,    45,    45,    45,    45,    45,    45,    45,    45,    45,
+    45,    45,   nil,    45,    78,    78,    78,    78,    78,    78,
+    78,    78,    78,    78,    78,     7,    78,     7,   nil,     7,
+   nil,     7,   nil,     7,     7,    41,     7,    41,   nil,    41,
+   nil,    41,   nil,    41,    41,   nil,    41 ]
+
+racc_action_pointer = [
+   nil,     0,     1,   nil,   nil,   nil,   nil,   133,    -5,     1,
+   nil,    -4,    -2,     0,    38,    39,    51,   nil,   nil,    57,
+    17,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+    64,   nil,    17,    30,    43,    56,    69,    82,   nil,   nil,
+    70,   143,    27,    95,   nil,   108,   nil,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+     3,   nil,   nil,   nil,    -4,   nil,   nil,   nil,   121,   nil ]
+
+racc_action_default = [
+    -1,   -56,   -56,    -2,    -3,    -4,    -5,   -56,    -8,   -56,
+   -22,   -56,   -56,   -56,   -56,   -56,   -56,    80,    -6,   -10,
+   -56,   -15,   -16,   -17,   -18,   -19,   -20,    -7,   -21,   -23,
+   -56,   -27,   -46,   -46,   -46,   -46,   -46,   -46,    -9,   -11,
+   -13,   -56,   -56,   -46,   -29,   -46,   -40,   -41,   -42,   -43,
+   -44,   -45,   -47,   -48,   -49,   -50,   -51,   -52,   -53,   -54,
+   -55,   -30,   -31,   -32,   -33,   -34,   -12,   -14,   -24,   -25,
+   -56,   -28,   -35,   -36,   -56,   -26,   -37,   -38,   -46,   -39 ]
+
+racc_goto_table = [
+    28,    18,     1,    72,    44,    61,    62,    63,    64,    65,
+     3,     4,     5,     6,     7,    71,    39,    42,    68,    76,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,    67,    79,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,    75 ]
+
+racc_goto_check = [
+    11,     7,     1,    18,    15,    15,    15,    15,    15,    15,
+     2,     3,     4,     5,     6,    15,     9,    13,    14,    19,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,     7,    18,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,   nil,
+   nil,    11 ]
+
+racc_goto_pointer = [
+   nil,     2,     9,    10,    11,    12,    13,    -6,   nil,    -4,
+   nil,    -9,   nil,   -13,   -24,   -28,   nil,   nil,   -42,   -55,
+   nil,   nil,   nil ]
+
+racc_goto_default = [
+   nil,   nil,   nil,   nil,   nil,    49,   nil,   nil,    20,   nil,
+     9,   nil,    30,   nil,   nil,    74,    48,    45,   nil,   nil,
+    47,    50,    51 ]
+
+racc_reduce_table = [
+  0, 0, :racc_error,
+  0, 20, :_reduce_none,
+  2, 20, :_reduce_none,
+  1, 21, :_reduce_none,
+  1, 21, :_reduce_none,
+  1, 21, :_reduce_none,
+  2, 22, :_reduce_none,
+  2, 25, :_reduce_7,
+  1, 25, :_reduce_8,
+  2, 26, :_reduce_9,
+  1, 26, :_reduce_10,
+  2, 26, :_reduce_none,
+  2, 28, :_reduce_12,
+  1, 28, :_reduce_13,
+  2, 28, :_reduce_none,
+  1, 27, :_reduce_15,
+  1, 27, :_reduce_16,
+  1, 27, :_reduce_17,
+  1, 27, :_reduce_18,
+  1, 27, :_reduce_19,
+  1, 27, :_reduce_20,
+  2, 24, :_reduce_none,
+  1, 29, :_reduce_22,
+  1, 30, :_reduce_23,
+  3, 30, :_reduce_none,
+  1, 33, :_reduce_25,
+  2, 33, :_reduce_none,
+  1, 31, :_reduce_27,
+  2, 32, :_reduce_none,
+  3, 23, :_reduce_29,
+  3, 23, :_reduce_30,
+  3, 23, :_reduce_31,
+  3, 23, :_reduce_32,
+  3, 23, :_reduce_33,
+  3, 23, :_reduce_34,
+  2, 35, :_reduce_none,
+  1, 37, :_reduce_36,
+  2, 37, :_reduce_none,
+  1, 38, :_reduce_38,
+  2, 38, :_reduce_none,
+  1, 36, :_reduce_40,
+  1, 34, :_reduce_41,
+  1, 34, :_reduce_none,
+  1, 34, :_reduce_none,
+  1, 39, :_reduce_none,
+  1, 39, :_reduce_none,
+  0, 41, :_reduce_none,
+  1, 41, :_reduce_47,
+  1, 41, :_reduce_48,
+  1, 41, :_reduce_49,
+  1, 41, :_reduce_50,
+  1, 41, :_reduce_51,
+  1, 40, :_reduce_52,
+  1, 40, :_reduce_53,
+  1, 40, :_reduce_54,
+  1, 40, :_reduce_55 ]
+
+racc_reduce_n = 56
+
+racc_shift_n = 80
+
+racc_token_table = {
+  false => 0,
+  :error => 1,
+  :IDENTIFIER => 2,
+  :STRING_MULTI => 3,
+  :STRING_BASIC => 4,
+  :STRING_LITERAL_MULTI => 5,
+  :STRING_LITERAL => 6,
+  :DATETIME => 7,
+  :INTEGER => 8,
+  :FLOAT => 9,
+  :TRUE => 10,
+  :FALSE => 11,
+  "[" => 12,
+  "]" => 13,
+  "." => 14,
+  "{" => 15,
+  "}" => 16,
+  "," => 17,
+  "=" => 18 }
+
+racc_nt_base = 19
+
+racc_use_result_var = true
+
+Racc_arg = [
+  racc_action_table,
+  racc_action_check,
+  racc_action_default,
+  racc_action_pointer,
+  racc_goto_table,
+  racc_goto_check,
+  racc_goto_default,
+  racc_goto_pointer,
+  racc_nt_base,
+  racc_reduce_table,
+  racc_token_table,
+  racc_shift_n,
+  racc_reduce_n,
+  racc_use_result_var ]
+
+Racc_token_to_s_table = [
+  "$end",
+  "error",
+  "IDENTIFIER",
+  "STRING_MULTI",
+  "STRING_BASIC",
+  "STRING_LITERAL_MULTI",
+  "STRING_LITERAL",
+  "DATETIME",
+  "INTEGER",
+  "FLOAT",
+  "TRUE",
+  "FALSE",
+  "\"[\"",
+  "\"]\"",
+  "\".\"",
+  "\"{\"",
+  "\"}\"",
+  "\",\"",
+  "\"=\"",
+  "$start",
+  "expressions",
+  "expression",
+  "table",
+  "assignment",
+  "inline_table",
+  "table_start",
+  "table_continued",
+  "table_identifier",
+  "table_next",
+  "inline_table_start",
+  "inline_continued",
+  "inline_assignment_key",
+  "inline_assignment_value",
+  "inline_next",
+  "value",
+  "array",
+  "start_array",
+  "array_continued",
+  "array_next",
+  "scalar",
+  "string",
+  "literal" ]
+
+Racc_debug_parser = false
+
+##### State transition tables end #####
+
+# reduce 0 omitted
+
+# reduce 1 omitted
+
+# reduce 2 omitted
+
+# reduce 3 omitted
+
+# reduce 4 omitted
+
+# reduce 5 omitted
+
+# reduce 6 omitted
+
+module_eval(<<'.,.,', 'parser.y', 15)
+  def _reduce_7(val, _values, result)
+     @handler.start_(:array_of_tables) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 16)
+  def _reduce_8(val, _values, result)
+     @handler.start_(:table) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 19)
+  def _reduce_9(val, _values, result)
+     array = @handler.end_(:array_of_tables); @handler.set_context(array, is_array_of_tables: true) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 20)
+  def _reduce_10(val, _values, result)
+     array = @handler.end_(:table); @handler.set_context(array) 
+    result
+  end
+.,.,
+
+# reduce 11 omitted
+
+module_eval(<<'.,.,', 'parser.y', 24)
+  def _reduce_12(val, _values, result)
+     array = @handler.end_(:array_of_tables); @handler.set_context(array, is_array_of_tables: true) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 25)
+  def _reduce_13(val, _values, result)
+     array = @handler.end_(:table); @handler.set_context(array) 
+    result
+  end
+.,.,
+
+# reduce 14 omitted
+
+module_eval(<<'.,.,', 'parser.y', 29)
+  def _reduce_15(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 30)
+  def _reduce_16(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 31)
+  def _reduce_17(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 32)
+  def _reduce_18(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 33)
+  def _reduce_19(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 34)
+  def _reduce_20(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+# reduce 21 omitted
+
+module_eval(<<'.,.,', 'parser.y', 40)
+  def _reduce_22(val, _values, result)
+     @handler.start_(:inline) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 43)
+  def _reduce_23(val, _values, result)
+     array = @handler.end_(:inline); @handler.push(Hash[*array]) 
+    result
+  end
+.,.,
+
+# reduce 24 omitted
+
+module_eval(<<'.,.,', 'parser.y', 48)
+  def _reduce_25(val, _values, result)
+          array = @handler.end_(:inline)
+      array.map!.with_index{ |n,i| i.even? ? n.to_sym : n } if @handler.symbolize_keys
+      @handler.push(Hash[*array])
+    
+    result
+  end
+.,.,
+
+# reduce 26 omitted
+
+module_eval(<<'.,.,', 'parser.y', 55)
+  def _reduce_27(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+# reduce 28 omitted
+
+module_eval(<<'.,.,', 'parser.y', 61)
+  def _reduce_29(val, _values, result)
+     @handler.assign(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 62)
+  def _reduce_30(val, _values, result)
+     @handler.assign(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 63)
+  def _reduce_31(val, _values, result)
+     @handler.assign(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 64)
+  def _reduce_32(val, _values, result)
+     @handler.assign(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 65)
+  def _reduce_33(val, _values, result)
+     @handler.assign(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 66)
+  def _reduce_34(val, _values, result)
+     @handler.assign(val[0]) 
+    result
+  end
+.,.,
+
+# reduce 35 omitted
+
+module_eval(<<'.,.,', 'parser.y', 72)
+  def _reduce_36(val, _values, result)
+     array = @handler.end_(:array); @handler.push(array) 
+    result
+  end
+.,.,
+
+# reduce 37 omitted
+
+module_eval(<<'.,.,', 'parser.y', 76)
+  def _reduce_38(val, _values, result)
+     array = @handler.end_(:array); @handler.push(array) 
+    result
+  end
+.,.,
+
+# reduce 39 omitted
+
+module_eval(<<'.,.,', 'parser.y', 80)
+  def _reduce_40(val, _values, result)
+     @handler.start_(:array) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 83)
+  def _reduce_41(val, _values, result)
+     @handler.push(val[0]) 
+    result
+  end
+.,.,
+
+# reduce 42 omitted
+
+# reduce 43 omitted
+
+# reduce 44 omitted
+
+# reduce 45 omitted
+
+# reduce 46 omitted
+
+module_eval(<<'.,.,', 'parser.y', 92)
+  def _reduce_47(val, _values, result)
+     result = val[0].to_f 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 93)
+  def _reduce_48(val, _values, result)
+     result = val[0].to_i 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 94)
+  def _reduce_49(val, _values, result)
+     result = true 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 95)
+  def _reduce_50(val, _values, result)
+     result = false 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 96)
+  def _reduce_51(val, _values, result)
+     result = Time.new(*val[0])
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 99)
+  def _reduce_52(val, _values, result)
+     result = StringUtils.replace_escaped_chars(StringUtils.multiline_replacements(val[0])) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 100)
+  def _reduce_53(val, _values, result)
+     result = StringUtils.replace_escaped_chars(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 101)
+  def _reduce_54(val, _values, result)
+     result = StringUtils.strip_spaces(val[0]) 
+    result
+  end
+.,.,
+
+module_eval(<<'.,.,', 'parser.y', 102)
+  def _reduce_55(val, _values, result)
+     result = val[0] 
+    result
+  end
+.,.,
+
+def _reduce_none(val, _values, result)
+  val[0]
+end
+
+  end   # class GeneratedParser
+  end   # module Tomlrb
diff --git a/source/code/toml-parser/tomlrb/handler.rb b/source/code/toml-parser/tomlrb/handler.rb
new file mode 100644
index 000000000..d60b54bc3
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/handler.rb
@@ -0,0 +1,73 @@
+module Tomlrb
+  class Handler
+    attr_reader :output, :symbolize_keys
+
+    def initialize(**options)
+      @output = {}
+      @current = @output
+      @stack = []
+      @array_names = []
+      @symbolize_keys = options[:symbolize_keys]
+    end
+
+    def set_context(identifiers, is_array_of_tables: false)
+      @current = @output
+
+      deal_with_array_of_tables(identifiers, is_array_of_tables) do |identifierz|
+        identifierz.each do |k|
+          k = k.to_sym if @symbolize_keys
+          if @current[k].is_a?(Array)
+            @current[k] << {} if @current[k].empty?
+            @current = @current[k].last
+          else
+            @current[k] ||= {}
+            @current = @current[k]
+          end
+        end
+      end
+    end
+
+    def deal_with_array_of_tables(identifiers, is_array_of_tables)
+      identifiers.map!{|n| n.gsub("\"", '')}
+      stringified_identifier = identifiers.join('.')
+
+      if is_array_of_tables
+        @array_names << stringified_identifier
+        last_identifier = identifiers.pop
+      elsif @array_names.include?(stringified_identifier)
+        raise ParseError, 'Cannot define a normal table with the same name as an already established array'
+      end
+
+      yield(identifiers)
+
+      if is_array_of_tables
+        last_identifier = last_identifier.to_sym if @symbolize_keys
+        @current[last_identifier] ||= []
+        @current[last_identifier] << {}
+        @current = @current[last_identifier].last
+      end
+    end
+
+    def assign(k)
+      k = k.to_sym if @symbolize_keys
+      @current[k] = @stack.pop
+    end
+
+    def push(o)
+      @stack << o
+    end
+
+    def start_(type)
+      push([type])
+    end
+
+    def end_(type)
+      array = []
+      while (value = @stack.pop) != [type]
+        raise ParseError, 'Unclosed table' unless value
+        array.unshift(value)
+      end
+      array
+    end
+  end
+end
diff --git a/source/code/toml-parser/tomlrb/parser.rb b/source/code/toml-parser/tomlrb/parser.rb
new file mode 100644
index 000000000..31771a1ca
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/parser.rb
@@ -0,0 +1,18 @@
+require_relative "generated_parser"
+
+class Tomlrb::Parser < Tomlrb::GeneratedParser
+  def initialize(tokenizer, **options)
+    @tokenizer = tokenizer
+    @handler = Tomlrb::Handler.new(options)
+    super()
+  end
+
+  def next_token
+    @tokenizer.next_token
+  end
+
+  def parse
+    do_parse
+    @handler
+  end
+end
diff --git a/source/code/toml-parser/tomlrb/parser.y b/source/code/toml-parser/tomlrb/parser.y
new file mode 100644
index 000000000..fcebcac06
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/parser.y
@@ -0,0 +1,104 @@
+class Tomlrb::GeneratedParser
+token IDENTIFIER STRING_MULTI STRING_BASIC STRING_LITERAL_MULTI STRING_LITERAL DATETIME INTEGER FLOAT TRUE FALSE
+rule
+  expressions
+    | expressions expression
+    ;
+  expression
+    : table
+    | assignment
+    | inline_table
+    ;
+  table
+    : table_start table_continued
+    ;
+  table_start
+    : '[' '[' { @handler.start_(:array_of_tables) }
+    | '[' { @handler.start_(:table) }
+    ;
+  table_continued
+    : ']' ']' { array = @handler.end_(:array_of_tables); @handler.set_context(array, is_array_of_tables: true) }
+    | ']' { array = @handler.end_(:table); @handler.set_context(array) }
+    | table_identifier table_next
+    ;
+  table_next
+    : ']' ']' { array = @handler.end_(:array_of_tables); @handler.set_context(array, is_array_of_tables: true) }
+    | ']' { array = @handler.end_(:table); @handler.set_context(array) }
+    | '.' table_continued
+    ;
+  table_identifier
+    : IDENTIFIER { @handler.push(val[0]) }
+    | STRING_BASIC { @handler.push(val[0]) }
+    | STRING_LITERAL { @handler.push(val[0]) }
+    | INTEGER { @handler.push(val[0]) }
+    | TRUE { @handler.push(val[0]) }
+    | FALSE { @handler.push(val[0]) }
+    ;
+  inline_table
+    : inline_table_start inline_continued
+    ;
+  inline_table_start
+    : '{' { @handler.start_(:inline) }
+    ;
+  inline_continued
+    : '}' { array = @handler.end_(:inline); @handler.push(Hash[*array]) }
+    | inline_assignment_key inline_assignment_value inline_next
+    ;
+  inline_next
+    : '}' {
+      array = @handler.end_(:inline)
+      array.map!.with_index{ |n,i| i.even? ? n.to_sym : n } if @handler.symbolize_keys
+      @handler.push(Hash[*array])
+    }
+    | ',' inline_continued
+    ;
+  inline_assignment_key
+    : IDENTIFIER { @handler.push(val[0]) }
+    ;
+  inline_assignment_value
+    : '=' value
+    ;
+  assignment
+    : IDENTIFIER '=' value { @handler.assign(val[0]) }
+    | STRING_BASIC '=' value { @handler.assign(val[0]) }
+    | STRING_LITERAL '=' value { @handler.assign(val[0]) }
+    | INTEGER '=' value { @handler.assign(val[0]) }
+    | TRUE '=' value { @handler.assign(val[0]) }
+    | FALSE '=' value { @handler.assign(val[0]) }
+    ;
+  array
+    : start_array array_continued
+    ;
+  array_continued
+    : ']' { array = @handler.end_(:array); @handler.push(array) }
+    | value array_next
+    ;
+  array_next
+    : ']' { array = @handler.end_(:array); @handler.push(array) }
+    | ',' array_continued
+    ;
+  start_array
+    : '[' { @handler.start_(:array) }
+    ;
+  value
+    : scalar { @handler.push(val[0]) }
+    | array
+    | inline_table
+    ;
+  scalar
+    : string
+    | literal
+    ;
+  literal
+    | FLOAT { result = val[0].to_f }
+    | INTEGER { result = val[0].to_i }
+    | TRUE   { result = true }
+    | FALSE  { result = false }
+    | DATETIME { result = Time.new(*val[0])}
+    ;
+  string
+    : STRING_MULTI { result = StringUtils.replace_escaped_chars(StringUtils.multiline_replacements(val[0])) }
+    | STRING_BASIC { result = StringUtils.replace_escaped_chars(val[0]) }
+    | STRING_LITERAL_MULTI { result = StringUtils.strip_spaces(val[0]) }
+    | STRING_LITERAL { result = val[0] }
+    ;
diff --git a/source/code/toml-parser/tomlrb/scanner.rb b/source/code/toml-parser/tomlrb/scanner.rb
new file mode 100644
index 000000000..d0f479eef
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/scanner.rb
@@ -0,0 +1,54 @@
+require 'strscan'
+
+module Tomlrb
+  class Scanner
+    COMMENT = /#.*/
+    IDENTIFIER = /[A-Za-z0-9_-]+/
+    SPACE = /[ \t\r\n]/
+    STRING_BASIC = /(["])(?:\\?.)*?\1/
+    STRING_MULTI = /"{3}([\s\S]*?"{3,4})/m
+    STRING_LITERAL = /(['])(?:\\?.)*?\1/
+    STRING_LITERAL_MULTI = /'{3}([\s\S]*?'{3})/m
+    DATETIME = /(-?\d{4})-(\d{2})-(\d{2})(?:(?:t|\s)(\d{2}):(\d{2}):(\d{2}(?:\.\d+)?))?(z|[-+]\d{2}:\d{2})?/i
+    FLOAT = /[+-]?(?:[0-9_]+\.[0-9_]*|\d+(?=[eE]))(?:[eE][+-]?[0-9_]+)?/
+    INTEGER = /[+-]?([1-9](_?\d)*|0)(?![A-Za-z0-9_-]+)/
+    TRUE   = /true/
+    FALSE  = /false/
+
+    def initialize(io)
+      @ss = StringScanner.new(io.read)
+    end
+
+    def next_token
+      return if @ss.eos?
+
+      case
+      when @ss.scan(SPACE) then next_token
+      when @ss.scan(COMMENT) then next_token
+      when @ss.scan(DATETIME) then process_datetime
+      when text = @ss.scan(STRING_MULTI) then [:STRING_MULTI, text[3..-4]]
+      when text = @ss.scan(STRING_BASIC) then [:STRING_BASIC, text[1..-2]]
+      when text = @ss.scan(STRING_LITERAL_MULTI) then [:STRING_LITERAL_MULTI, text[3..-4]]
+      when text = @ss.scan(STRING_LITERAL) then [:STRING_LITERAL, text[1..-2]]
+      when text = @ss.scan(FLOAT) then [:FLOAT, text]
+      when text = @ss.scan(INTEGER) then [:INTEGER, text]
+      when text = @ss.scan(TRUE)   then [:TRUE, text]
+      when text = @ss.scan(FALSE)  then [:FALSE, text]
+      when text = @ss.scan(IDENTIFIER) then [:IDENTIFIER, text]
+      else
+        x = @ss.getch
+        [x, x]
+      end
+    end
+
+    def process_datetime
+      if @ss[7].nil?
+        offset = '+00:00'
+      else
+        offset = @ss[7].gsub('Z', '+00:00')
+      end
+      args = [@ss[1], @ss[2], @ss[3], @ss[4] || 0, @ss[5] || 0, @ss[6].to_f, offset]
+      [:DATETIME, args]
+    end
+  end
+end
diff --git a/source/code/toml-parser/tomlrb/string_utils.rb b/source/code/toml-parser/tomlrb/string_utils.rb
new file mode 100644
index 000000000..53d27e414
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/string_utils.rb
@@ -0,0 +1,33 @@
+module Tomlrb
+  class StringUtils
+
+    SPECIAL_CHARS = {
+      '\\t'  => "\t",
+      '\\b'  => "\b",
+      '\\f'  => "\f",
+      '\\n'  => "\n",
+      '\\r'  => "\r",
+      '\\"'  => '"',
+      '\\\\' => '\\'
+    }.freeze
+
+    def self.multiline_replacements(str)
+      strip_spaces(str).gsub(/\\\n\s+/, '')
+    end
+
+    def self.replace_escaped_chars(str)
+      str.gsub(/\\(u[\da-fA-F]{4}|U[\da-fA-F]{8}|.)/) do |m|
+        if m.size == 2
+          SPECIAL_CHARS[m] || (raise Tomlrb::ParseError.new "Escape sequence #{m} is reserved")
+        else
+          m[2..-1].to_i(16).chr(Encoding::UTF_8)
+        end
+      end
+    end
+
+    def self.strip_spaces(str)
+      str[0] = '' if str[0] == "\n"
+      str
+    end
+  end
+end
diff --git a/source/code/toml-parser/tomlrb/version.rb b/source/code/toml-parser/tomlrb/version.rb
new file mode 100644
index 000000000..b72a81b60
--- /dev/null
+++ b/source/code/toml-parser/tomlrb/version.rb
@@ -0,0 +1,3 @@
+module Tomlrb
+  VERSION = "1.2.8"
+end

From 727d5bd691b50192d7b6879fe68e438ee7a7fdc1 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 6 Jun 2019 16:47:55 -0700
Subject: [PATCH 092/160] Fix Scenario when Controller name is empty (#222)

---
 source/code/plugin/filter_inventory2mdm.rb | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index f98a3224e..30f6f911a 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -176,6 +176,10 @@ def process_pod_inventory_records(es)
                     podControllerNameDimValue = record['DataItems'][0]['ControllerName']
                     podNodeDimValue = record['DataItems'][0]['Computer']
 
+                    if podControllerNameDimValue.nil? || podControllerNameDimValue.empty?
+                        podControllerNameDimValue = 'No Controller'
+                    end
+
                     if podNodeDimValue.empty? && podPhaseDimValue.downcase == 'pending'
                         podNodeDimValue = 'unscheduled'
                     elsif podNodeDimValue.empty?

From 5e4b0f3f817b1d51b9b0830acd71ebae90b7fe2d Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 6 Jun 2019 17:01:51 -0700
Subject: [PATCH 093/160] fix ;

---
 installer/datafiles/base_container.data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index fd070426c..58a74aa0a 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -112,7 +112,7 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
-/opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb     755; root; root 
+/opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root

From 6fefcac8db6db2fb97c7480ddb93036b1b65f092 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Sat, 8 Jun 2019 08:26:37 -0700
Subject: [PATCH 094/160] ContainerLog collection optimizations (#223)

* * derive k8s namespace from file (rather than making a api call)
* optimize perf by not tailing excluded namespaces in stdout & stderr

* Tuning fluentbit settings based on Cortana teams findings
* making db sync off
* buffer chunk and max as 1m so that we dont flush > 1m payloads
* increasing rotatte wait from 5 secs to 30 secs
* decreasing refresh interval from 60 secs to 30 secs
* adding retry limit as 10 so that items get dropped in 50 secs rather than infinetely trying
* changing flush to 5 secs from 30 secs
---
 installer/conf/out_oms.conf           |   2 -
 installer/conf/td-agent-bit-rs.conf   |   1 +
 installer/conf/td-agent-bit.conf      |   8 +-
 installer/scripts/tomlparser.rb       |  21 +++-
 source/code/go/src/plugins/oms.go     | 169 +++++++++-----------------
 source/code/go/src/plugins/out_oms.go |   1 -
 6 files changed, 82 insertions(+), 120 deletions(-)

diff --git a/installer/conf/out_oms.conf b/installer/conf/out_oms.conf
index d6679f982..7af7b6fdd 100644
--- a/installer/conf/out_oms.conf
+++ b/installer/conf/out_oms.conf
@@ -3,5 +3,3 @@ cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt
 key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key
 container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname
 container_inventory_refresh_interval=60
-#kube_system_containers_refresh_interval=300
-exclude_namespaces_containers_refresh_interval=60
diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
index 740f8a951..7945261aa 100644
--- a/installer/conf/td-agent-bit-rs.conf
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -25,5 +25,6 @@
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
+    Retry_Limit                     10
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.*
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index d1a045063..14728af5d 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -1,5 +1,5 @@
 [SERVICE]
-    Flush         30
+    Flush         5
     Log_Level     info
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
@@ -9,8 +9,13 @@
     Tag oms.container.log.*
     Path ${AZMON_LOG_TAIL_PATH}
     DB /var/log/omsagent-fblogs.db
+    DB.Sync Off
     Parser docker
     Mem_Buf_Limit 5m
+    Buffer_Chunk_Size 1m
+    Buffer_Max_Size 1m
+    Rotate_Wait 20
+    Refresh_Interval 30
     Path_Key filepath
     Skip_Long_Lines On
     Ignore_Older 5m
@@ -53,5 +58,6 @@
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
+    Retry_Limit                     10
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.*
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index 52516641a..abc8b8e19 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -13,7 +13,7 @@
 @collectClusterEnvVariables = true
 @logTailPath = "/var/log/containers/*.log"
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
-@excludePath = "*.csv2"
+@excludePath = "*.csv2" #some invalid path
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -76,11 +76,14 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         @collectStderrLogs = parsedConfig[:log_collection_settings][:stderr][:enabled]
         puts "config::Using config map setting for stderr log collection"
         stderrNamespaces = parsedConfig[:log_collection_settings][:stderr][:exclude_namespaces]
-
+        stdoutNamespaces = Array.new
         #Clearing it, so that it can be overridden with the config map settings
         @stderrExcludeNamespaces.clear
         if @collectStderrLogs && !stderrNamespaces.nil?
           if stderrNamespaces.kind_of?(Array)
+            if !@stdoutExcludeNamespaces.nil? && !@stdoutExcludeNamespaces.empty?
+              stdoutNamespaces = @stdoutExcludeNamespaces.split(',')
+            end
             # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type
             if stderrNamespaces.length > 0 && stderrNamespaces[0].kind_of?(String)
               stderrNamespaces.each do |namespace|
@@ -90,6 +93,10 @@ def populateSettingValuesFromConfigMap(parsedConfig)
                 else
                   @stderrExcludeNamespaces.concat("," + namespace)
                 end
+                # Add this namespace to excludepath if both stdout & stderr are excluded for this namespace, to ensure are optimized and dont tail these files at all
+                if stdoutNamespaces.include? namespace
+                  @excludePath.concat("," + "*_" + namespace + "_*.log")
+                end
               end
               puts "config::Using config map setting for stderr log collection to exclude namespace"
             end
@@ -113,13 +120,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
 end
 
   @configSchemaVersion = ENV['AZMON_AGENT_CFG_SCHEMA_VERSION']
-  if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @@configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it
+  if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it
+    puts "****************Start Config Processing********************"
     configMapSettings = parseConfigMap
     if !configMapSettings.nil?
       populateSettingValuesFromConfigMap(configMapSettings)
     end
   else
-    puts "config::unsupported config schema version - #{@configSchemaVersion}, using defaults"
+    if (File.file?(@configMapMountPath))
+      puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
+    end 
     @excludePath = "*_kube-system_*.log"
   end
 
@@ -147,6 +157,9 @@ def populateSettingValuesFromConfigMap(parsedConfig)
     file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
     # Close file after writing all environment variables
     file.close
+    puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
+    puts "****************End Config Processing********************"
   else
     puts "config::error::Exception while opening file for writing config environment variables"
+    puts "****************End Config Processing********************"
   end
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 0ffaaff63..ae4a109a6 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -17,7 +17,6 @@ import (
 
 	lumberjack "gopkg.in/natefinch/lumberjack.v2"
 
-	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/rest"
@@ -57,9 +56,6 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp
 const IPName = "Containers"
 const defaultContainerInventoryRefreshInterval = 60
 
-// const defaultKubeSystemContainersRefreshInterval = 300
-const defaultExcludeNamespacesContainersRefreshInterval = 300
-
 var (
 	// PluginConfiguration the plugins configuration
 	PluginConfiguration map[string]string
@@ -84,10 +80,10 @@ var (
 	ImageIDMap map[string]string
 	// NameIDMap caches the container it to Name mapping
 	NameIDMap map[string]string
-	// StdoutIgnoreIDSet set of  container Ids of excluded namespaces for stdout logs
-	StdoutIgnoreIDSet map[string]bool
-	// StderrIgnoreIDSet set of  container Ids of excluded namespaces for stderr logs
-	StderrIgnoreIDSet map[string]bool
+	// StdoutIgnoreNamespaceSet set of  excluded K8S namespaces for stdout logs
+	StdoutIgnoreNsSet map[string]bool
+	// StderrIgnoreNamespaceSet set of  excluded K8S namespaces for stderr logs
+	StderrIgnoreNsSet map[string]bool
 	// DataUpdateMutex read and write mutex access to the container id set
 	DataUpdateMutex = &sync.Mutex{}
 	// ContainerLogTelemetryMutex read and write mutex access to the Container Log Telemetry
@@ -97,8 +93,6 @@ var (
 )
 
 var (
-	// ExcludeNamespacesContainersRefreshTicker updates the excludenamespace containers
-	ExcludeNamespacesContainersRefreshTicker *time.Ticker
 	// ContainerImageNameRefreshTicker updates the container image and names periodically
 	ContainerImageNameRefreshTicker *time.Ticker
 )
@@ -225,70 +219,28 @@ func updateContainerImageNameMaps() {
 	}
 }
 
-func excludeContainerIDPopulator(excludeNamespaceList []string, logStream string) {
-	var podsToExclude []*corev1.PodList
-	listOptions := metav1.ListOptions{}
-	listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer)
-	
-	pods, err := ClientSet.CoreV1().Pods("").List(listOptions)
-	if err != nil {
-		message := fmt.Sprintf("Error getting pods %s - for node %s . All %s logs might be collected", err.Error(), Computer, logStream)
-		SendException(message)
-		Log(message)
-		return
-	}
-	
-	podsToExclude = append(podsToExclude, pods)
-	ignoreNamespaceSet := make(map[string]bool)
-	for _, ns := range excludeNamespaceList {
-		ignoreNamespaceSet[strings.TrimSpace(ns)] = true
-	}
-	
-	_ignoreIDSet := make(map[string]bool)
-	for _, pod := range podsToExclude {
-		for _, pod := range pod.Items {
-			_, ok := ignoreNamespaceSet[pod.Namespace]
-			if ok {
-				Log ("Adding pod %s in namespace %s to %s exclusion list", pod.Name, pod.Namespace, logStream)
-				for _, status := range pod.Status.ContainerStatuses {
-						lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
-						_ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true
-				}
-			}
-		}
-	}
-
-	Log("Locking to update excluded container IDs for %s", logStream)
-	DataUpdateMutex.Lock()
-	if strings.Compare(logStream, "stdout") == 0 {
-		StdoutIgnoreIDSet = _ignoreIDSet
-	} else {
-		StderrIgnoreIDSet = _ignoreIDSet
-	}
-	DataUpdateMutex.Unlock()
-	Log("Unlocking after updating excluded container IDs for %s", logStream)
-}
-
-func updateExcludeStdoutContainerIDs() {
-	for ; true; <-ExcludeNamespacesContainersRefreshTicker.C {
-		collectStdoutLogs := os.Getenv("AZMON_COLLECT_STDOUT_LOGS")
-		var stdoutNSExcludeList []string
-		excludeList := os.Getenv("AZMON_STDOUT_EXCLUDED_NAMESPACES")
-		if (strings.Compare(collectStdoutLogs, "true") == 0) && (len(excludeList) > 0) {
-			stdoutNSExcludeList = strings.Split(excludeList, ",")
-			excludeContainerIDPopulator(stdoutNSExcludeList, "stdout")
+func populateExcludedStdoutNamespaces() {
+	collectStdoutLogs := os.Getenv("AZMON_COLLECT_STDOUT_LOGS")
+	var stdoutNSExcludeList []string
+	excludeList := os.Getenv("AZMON_STDOUT_EXCLUDED_NAMESPACES")
+	if (strings.Compare(collectStdoutLogs, "true") == 0) && (len(excludeList) > 0) {
+		stdoutNSExcludeList = strings.Split(excludeList, ",")
+		for _, ns := range stdoutNSExcludeList {
+			Log ("Excluding namespace %s for stdout log collection", ns)
+			StdoutIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
 }
 
-func updateExcludeStderrContainerIDs() {
-	for ; true; <-ExcludeNamespacesContainersRefreshTicker.C {
-		collectStderrLogs := os.Getenv("AZMON_COLLECT_STDERR_LOGS")
-		var stderrNSExcludeList []string
-		excludeList := os.Getenv("AZMON_STDERR_EXCLUDED_NAMESPACES")
-		if (strings.Compare(collectStderrLogs, "true") == 0) && (len(excludeList) > 0) {
-			stderrNSExcludeList = strings.Split(excludeList, ",")
-			excludeContainerIDPopulator(stderrNSExcludeList, "stderr")
+func populateExcludedStderrNamespaces() {
+	collectStderrLogs := os.Getenv("AZMON_COLLECT_STDERR_LOGS")
+	var stderrNSExcludeList []string
+	excludeList := os.Getenv("AZMON_STDERR_EXCLUDED_NAMESPACES")
+	if (strings.Compare(collectStderrLogs, "true") == 0) && (len(excludeList) > 0) {
+		stderrNSExcludeList = strings.Split(excludeList, ",")
+		for _, ns := range stderrNSExcludeList {
+			Log ("Excluding namespace %s for stderr log collection", ns)
+			StderrIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
 }
@@ -469,18 +421,11 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	var maxLatency float64
 	var maxLatencyContainer string
 
-	stdoutIgnoreIDSet := make(map[string]bool)
-	stderrIgnoreIDSet := make(map[string]bool)
 	imageIDMap := make(map[string]string)
 	nameIDMap := make(map[string]string)
 
 	DataUpdateMutex.Lock()
-	for k, v := range StdoutIgnoreIDSet {
-		stdoutIgnoreIDSet[k] = v
-	}
-	for k, v := range StderrIgnoreIDSet {
-		stderrIgnoreIDSet[k] = v
-	}
+	
 	for k, v := range ImageIDMap {
 		imageIDMap[k] = v
 	}
@@ -490,15 +435,15 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	DataUpdateMutex.Unlock()
 
 	for _, record := range tailPluginRecords {
-		containerID := GetContainerIDFromFilePath(ToString(record["filepath"]))
+		containerID, k8sNamespace := GetContainerIDK8sNamespaceFromFileName(ToString(record["filepath"]))
 		logEntrySource := ToString(record["stream"])
 
 		if strings.EqualFold(logEntrySource, "stdout") {
-			if containerID == "" || containsKey(stdoutIgnoreIDSet, containerID) {
+			if containerID == "" || containsKey(StdoutIgnoreNsSet, k8sNamespace) {
 				continue
 			}
 		} else if strings.EqualFold(logEntrySource, "stderr") {
-			if containerID == "" || containsKey(stderrIgnoreIDSet, containerID) {
+			if containerID == "" || containsKey(StderrIgnoreNsSet, k8sNamespace) {
 				continue
 			}
 		}
@@ -608,24 +553,38 @@ func containsKey(currentMap map[string]bool, key string) bool {
 	return c
 }
 
-// GetContainerIDFromFilePath Gets the container ID From the file Path
-func GetContainerIDFromFilePath(filepath string) string {
-	start := strings.LastIndex(filepath, "-")
-	end := strings.LastIndex(filepath, ".")
+// GetContainerIDK8sNamespaceFromFileName Gets the container ID From the file Name
+// sample filename kube-proxy-dgcx7_kube-system_kube-proxy-8df7e49e9028b60b5b0d0547f409c455a9567946cf763267b7e6fa053ab8c182.log
+func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) {
+	id := ""
+	ns := ""
+
+	start := strings.LastIndex(filename, "-")
+	end := strings.LastIndex(filename, ".")
+	
+	if start >= end || start == -1 || end == -1 {
+		id = ""
+	} else {
+		id = filename[start+1 : end]
+	}
+
+	start = strings.Index(filename, "_")
+	end = strings.LastIndex(filename, "_")
+
 	if start >= end || start == -1 || end == -1 {
-		// This means the file is not a managed Kubernetes docker log file.
-		// Drop all records from the file
-		Log("File %s is not a Kubernetes managed docker log file. Dropping all records from the file", filepath)
-		return ""
+		ns = ""
+	} else {
+		ns = filename[start+1 : end]
 	}
-	return filepath[start+1 : end]
+
+	return id, ns
 }
 
 // InitializePlugin reads and populates plugin configuration
 func InitializePlugin(pluginConfPath string, agentVersion string) {
 
-	StdoutIgnoreIDSet = make(map[string]bool)
-	StderrIgnoreIDSet = make(map[string]bool)
+	StdoutIgnoreNsSet = make(map[string]bool)
+	StderrIgnoreNsSet = make(map[string]bool)
 	ImageIDMap = make(map[string]string)
 	NameIDMap = make(map[string]string)
 
@@ -683,16 +642,6 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
 	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval))
 
-	excludeNamespacesContainersRefreshInterval, err := strconv.Atoi(pluginConfig["exclude_namespaces_containers_refresh_interval"])
-	if err != nil {
-		message := fmt.Sprintf("Error Reading exclude namespaces Container Ids Refresh Interval %s", err.Error())
-		Log(message)
-		SendException(message)
-		Log("Using Default Refresh Interval of %d s\n", defaultExcludeNamespacesContainersRefreshInterval)
-		excludeNamespacesContainersRefreshInterval = defaultExcludeNamespacesContainersRefreshInterval
-	}
-	Log("excludeNamespacesContainersRefreshInterval = %d \n", excludeNamespacesContainersRefreshInterval)
-	ExcludeNamespacesContainersRefreshTicker = time.NewTicker(time.Second * time.Duration(excludeNamespacesContainersRefreshInterval))
 
 	// Populate Computer field
 	containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"])
@@ -732,15 +681,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 
 	CreateHTTPClient()
 
-  if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
-    defaultExcludePath := os.Getenv("AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH")
-    //further optimization for clusters with default settings. need this cache only when log collection config is overridden with custom config
-    if ( (strings.Compare(defaultExcludePath, "*_kube-system_*.log") != 0) ) {
-      go updateExcludeStdoutContainerIDs()
-      go updateExcludeStderrContainerIDs()
-    }
-    go updateContainerImageNameMaps()
-  } else {
-		Log("Running in replicaset. Disabling kube-system container cache collection & updates \n")
+  	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
+		populateExcludedStdoutNamespaces()
+		populateExcludedStderrNamespaces()
+		go updateContainerImageNameMaps()		
+  	} else {
+		Log("Running in replicaset. Disabling container enrichment caching & updates \n")
 	}
 }
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 2ee6f994d..0fa2ddd4b 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -74,7 +74,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 // FLBPluginExit exits the plugin
 func FLBPluginExit() int {
 	ContainerLogTelemetryTicker.Stop()
-	ExcludeNamespacesContainersRefreshTicker.Stop()
 	ContainerImageNameRefreshTicker.Stop()
 	return output.FLB_OK
 }

From f87349eafa96160a3d3c0bf81f80a8c98064b3e3 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Sun, 9 Jun 2019 20:00:21 -0700
Subject: [PATCH 095/160] merge final changes for release  from
 Vishwa/june2019agentrel to ci_feature (#224)

* * derive k8s namespace from file (rather than making a api call)
* optimize perf by not tailing excluded namespaces in stdout & stderr

* Tuning fluentbit settings based on Cortana teams findings
* making db sync off
* buffer chunk and max as 1m so that we dont flush > 1m payloads
* increasing rotatte wait from 5 secs to 30 secs
* decreasing refresh interval from 60 secs to 30 secs
* adding retry limit as 10 so that items get dropped in 50 secs rather than infinetely trying
* changing flush to 5 secs from 30 secs

* fix a minor comment

* * change flush from 5 to 10 secs based on perf findings
---
 installer/conf/td-agent-bit.conf | 2 +-
 installer/scripts/tomlparser.rb  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 14728af5d..3d51154e7 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -1,5 +1,5 @@
 [SERVICE]
-    Flush         5
+    Flush         10
     Log_Level     info
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index abc8b8e19..3e7f48045 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -120,8 +120,8 @@ def populateSettingValuesFromConfigMap(parsedConfig)
 end
 
   @configSchemaVersion = ENV['AZMON_AGENT_CFG_SCHEMA_VERSION']
+  puts "****************Start Config Processing********************"
   if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it
-    puts "****************Start Config Processing********************"
     configMapSettings = parseConfigMap
     if !configMapSettings.nil?
       populateSettingValuesFromConfigMap(configMapSettings)

From 8a412c19c935035a13664eec8672e2af141be37b Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Fri, 14 Jun 2019 09:05:36 -0700
Subject: [PATCH 096/160] fix fluent bit tuning for perf run (#226)

* fix fluent bit tuning for perf run

* stop collecting our own partition
---
 installer/conf/td-agent-bit.conf | 14 ++++++++------
 installer/conf/telegraf.conf     |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index d4a49a385..2dee26234 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -1,5 +1,5 @@
 [SERVICE]
-    Flush         10
+    Flush         15
     Log_Level     info
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
@@ -11,7 +11,7 @@
     DB /var/log/omsagent-fblogs.db
     DB.Sync Off
     Parser docker
-    Mem_Buf_Limit 5m
+    Mem_Buf_Limit 10m
     Buffer_Chunk_Size 1m
     Buffer_Max_Size 1m
     Rotate_Wait 20
@@ -26,20 +26,22 @@
     Tag oms.container.log.flbplugin.*
     Path /var/log/containers/omsagent*.log
     DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db
-    Mem_Buf_Limit 2m
+    DB.Sync Off
+    Mem_Buf_Limit 1m
     Path_Key filepath
     Skip_Long_Lines On
-    Ignore_Older 5m
+    Ignore_Older 2m
 
 [INPUT]
     Name tail
     Tag oms.container.log.telegraf.err.*
     Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
     DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    Mem_Buf_Limit 2m
+    DB.Sync Off
+    Mem_Buf_Limit 1m
     Path_Key filepath
     Skip_Long_Lines On
-    Ignore_Older 5m
+    Ignore_Older 2m
 
 [INPUT]
     Name        tcp
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 6b3f44929..06b1c55eb 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -404,7 +404,7 @@
   # Below due to Bug - https://github.com/influxdata/telegraf/issues/5615
   # ORDER matters here!! - i.e the below should be the LAST modifier
   [inputs.disk.tagdrop]
-    path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers"]
+    path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers", "/etc/config/settings"]
 
 
 # Read metrics about memory usage

From e36b5ab1600fccfb9cad1fe1b07aa95f2f1171d7 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Fri, 14 Jun 2019 09:30:04 -0700
Subject: [PATCH 097/160] fix merge issue

---
 source/code/go/src/plugins/oms.go | 69 -------------------------------
 1 file changed, 69 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 4e6cd4d88..b925e7145 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -243,75 +243,6 @@ func populateExcludedStderrNamespaces() {
 			StderrIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
-
-	var metrics []laTelegrafMetric
-	var i int
-
-	for i = 0; i < len(laMetrics); i++ {
-		metrics = append(metrics, *laMetrics[i])
-	}
-
-	laTelegrafMetrics := InsightsMetricsBlob{
-		DataType:  InsightsMetricsDataType,
-		IPName:    IPName,
-		DataItems: metrics}
-
-	jsonBytes, err := json.Marshal(laTelegrafMetrics)
-
-	if err != nil {
-		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err)
-		Log(message)
-		SendException(message)
-		return output.FLB_OK
-	}
-
-	//Post metrics data to LA
-	req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes))
-
-	//req.URL.Query().Add("api-version","2016-04-01")
-
-	//set headers
-	req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
-
-	//expensive to do string len for every request, so use a flag
-	if ResourceCentric == true {
-		req.Header.Set("x-ms-AzureResourceId", ResourceID)
-	}
-
-	start := time.Now()
-	resp, err := HTTPClient.Do(req)
-	elapsed := time.Since(start)
-
-	if err != nil {
-		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
-		Log(message)
-		SendException(message)
-		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
-		return output.FLB_RETRY
-	}
-
-	if resp == nil || resp.StatusCode != 200 {
-		if resp != nil {
-			Log("PostTelegrafMetricsToLA::Error:(retriable) Response Status %v Status Code %v", resp.Status, resp.StatusCode)
-		}
-		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
-		return output.FLB_RETRY
-	}
-
-	defer resp.Body.Close()
-
-	numMetrics := len(laMetrics)
-	UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0)
-	Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed)
-
-	return output.FLB_OK
-}
-
-func UpdateNumTelegrafMetricsSentTelemetry(numMetricsSent int, numSendErrors int) {
-	ContainerLogTelemetryMutex.Lock()
-	TelegrafMetricsSentCount += float64(numMetricsSent)
-	TelegrafMetricsSendErrorCount += float64(numSendErrors)
-	ContainerLogTelemetryMutex.Unlock()
 }
 
 //Azure loganalytics metric values have to be numeric, so string values are dropped

From 8ba1f86953ef3c666023564e1ad7ad57fffb584e Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 21 Jun 2019 12:10:34 -0700
Subject: [PATCH 098/160] add release notes for june release in ci_feature
 branch

---
 README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/README.md b/README.md
index d6ac07e33..32ed42929 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,26 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
+##### Version microsoft/oms:ciprod06142019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
+- MDM pod metrics bug fixes - MDM rejecting pod metrics due to nodename or controllername dimensions being empty
+- Prometheus metrics collection by default in every node for kubelet docker operations and kubelet docker operation errors
+- Telegraf metric collection for diskio and networkio metrics
+- Agent Configuration/ Settings for data collection
+  * Cluster level log collection enable/disable option
+  * Ability to enable/disable stdout and/or stderr logs collection per namespace
+  * Cluster level environment variable collection enable/disable option
+  * Config file version & config schema version
+  * Pod annotation for supported config schema version(s)
+- Log collection optimization/tuning for better performance
+  * Derive k8s namespaces from log file name (instead of making call to k8s api service)
+  * Do not tail log files for containers in the excluded namespace list (if excluded both in stdout & stderr)
+  * Limit buffer size to 1M and flush logs more frequently [every 10 secs (instead of 30 secs)] 
+  * Tuning of several other fluent bit settings
+-	Increase requests 
+  * Replica set memory request by 75M (100M to 175M)
+  * Daemonset CPU request by 25m (50m to 75m)
+- Will be pushing image only to MCR ( no more Docker) starting this release. AKS-engine will also start to pull our agent image from MCR
+
 ### 04/23/2019 - 
 ##### Version microsoft/oms:ciprod043232019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod04232019
 - Windows node monitoring (metrics & inventory)

From e7e9e6d73808e15566e5d56f00f543f777112678 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 21 Jun 2019 12:12:27 -0700
Subject: [PATCH 099/160] fix title

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 32ed42929..3a12a521b 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-##### Version microsoft/oms:ciprod06142019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
+### 06/14/2019 - Version microsoft/oms:ciprod06142019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
 - MDM pod metrics bug fixes - MDM rejecting pod metrics due to nodename or controllername dimensions being empty
 - Prometheus metrics collection by default in every node for kubelet docker operations and kubelet docker operation errors
 - Telegraf metric collection for diskio and networkio metrics

From 3903a9dc972fd6b6323a968cb8a5ede9707af722 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 21 Jun 2019 12:13:51 -0700
Subject: [PATCH 100/160] update

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3a12a521b..5dfc12f28 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,8 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
-### 06/14/2019 - Version microsoft/oms:ciprod06142019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
+### 06/14/2019 
+##### Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
 - MDM pod metrics bug fixes - MDM rejecting pod metrics due to nodename or controllername dimensions being empty
 - Prometheus metrics collection by default in every node for kubelet docker operations and kubelet docker operation errors
 - Telegraf metric collection for diskio and networkio metrics

From f5b54fed0b05e4546e310d31c00bb872a3c1cac2 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 21 Jun 2019 12:14:47 -0700
Subject: [PATCH 101/160] fix title

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5dfc12f28..759ec476d 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ additional questions or comments.
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
   
 ### 06/14/2019 
-##### Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
+##### Version microsoft/oms:ciprod06142019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019
 - MDM pod metrics bug fixes - MDM rejecting pod metrics due to nodename or controllername dimensions being empty
 - Prometheus metrics collection by default in every node for kubelet docker operations and kubelet docker operation errors
 - Telegraf metric collection for diskio and networkio metrics

From 1d32cec35bf1b4441484080b5f18b0d54d64c49d Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 5 Jul 2019 10:05:29 -0700
Subject: [PATCH 102/160] Trim spaces in AKS_REGION (#233)

This is not an issue for normal AKS Monitoring Addon Onboarding. ONLY an issue for backdoor onboarding
---
 source/code/plugin/out_mdm.rb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 68c43d5da..a81da0fbc 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -67,7 +67,9 @@ def start
         return
       end
 
-      @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
+      aks_region = aks_region.gsub(" ","")
+
+      @@post_request_url = @@post_request_url_template % {aks_region: aks_region), aks_resource_id: aks_resource_id}
       @post_request_uri = URI.parse(@@post_request_url)
       @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
       @http_client.use_ssl = true

From 5b8c52eff693da7eee9f6adcaae567108b38cad0 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 9 Jul 2019 11:17:10 -0700
Subject: [PATCH 103/160] Add Logs Size To Telemetry (#234)

* Add Logs to telemetry
* Using len instead of unsafe.Sizeof
---
 source/code/go/src/plugins/oms.go       | 17 ++++++++--------
 source/code/go/src/plugins/telemetry.go | 26 ++++++++++++++++---------
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index b925e7145..a79297189 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -226,7 +226,7 @@ func populateExcludedStdoutNamespaces() {
 	if (strings.Compare(collectStdoutLogs, "true") == 0) && (len(excludeList) > 0) {
 		stdoutNSExcludeList = strings.Split(excludeList, ",")
 		for _, ns := range stdoutNSExcludeList {
-			Log ("Excluding namespace %s for stdout log collection", ns)
+			Log("Excluding namespace %s for stdout log collection", ns)
 			StdoutIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
@@ -239,7 +239,7 @@ func populateExcludedStderrNamespaces() {
 	if (strings.Compare(collectStderrLogs, "true") == 0) && (len(excludeList) > 0) {
 		stderrNSExcludeList = strings.Split(excludeList, ",")
 		for _, ns := range stderrNSExcludeList {
-			Log ("Excluding namespace %s for stderr log collection", ns)
+			Log("Excluding namespace %s for stderr log collection", ns)
 			StderrIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
@@ -425,7 +425,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	nameIDMap := make(map[string]string)
 
 	DataUpdateMutex.Lock()
-	
+
 	for k, v := range ImageIDMap {
 		imageIDMap[k] = v
 	}
@@ -476,6 +476,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			Name:                  stringMap["Name"],
 		}
 
+		FlushedRecordsSize += float64(len(stringMap["LogEntry"]))
+
 		dataItems = append(dataItems, dataItem)
 		loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp)
 		if e != nil {
@@ -561,7 +563,7 @@ func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) {
 
 	start := strings.LastIndex(filename, "-")
 	end := strings.LastIndex(filename, ".")
-	
+
 	if start >= end || start == -1 || end == -1 {
 		id = ""
 	} else {
@@ -641,7 +643,6 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
 	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval))
 
-
 	// Populate Computer field
 	containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"])
 	if err != nil {
@@ -680,11 +681,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 
 	CreateHTTPClient()
 
-  	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
+	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
 		populateExcludedStdoutNamespaces()
 		populateExcludedStderrNamespaces()
-		go updateContainerImageNameMaps()		
-  	} else {
+		go updateContainerImageNameMaps()
+	} else {
 		Log("Running in replicaset. Disabling container enrichment caching & updates \n")
 	}
 }
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 956ebf07e..5fc0fa843 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -16,6 +16,8 @@ import (
 var (
 	// FlushedRecordsCount indicates the number of flushed log records in the current period
 	FlushedRecordsCount float64
+	// FlushedRecordsSize indicates the size of the flushed records in the current period
+	FlushedRecordsSize float64
 	// FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period
 	FlushedRecordsTimeTaken float64
 	// This is telemetry for how old/latent logs we are processing in milliseconds (max over a period of time)
@@ -35,16 +37,17 @@ var (
 )
 
 const (
-	clusterTypeACS                      = "ACS"
-	clusterTypeAKS                      = "AKS"
-	envAKSResourceID                    = "AKS_RESOURCE_ID"
-	envACSResourceName                  = "ACS_RESOURCE_NAME"
-	envAppInsightsAuth                  = "APPLICATIONINSIGHTS_AUTH"
-	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
-	metricNameAvgLogGenerationRate      = "ContainerLogsGeneratedPerSec"
-	metricNameAgentLogProcessingMaxLatencyMs = "ContainerLogsAgentSideLatencyMs"
+	clusterTypeACS                                    = "ACS"
+	clusterTypeAKS                                    = "AKS"
+	envAKSResourceID                                  = "AKS_RESOURCE_ID"
+	envACSResourceName                                = "ACS_RESOURCE_NAME"
+	envAppInsightsAuth                                = "APPLICATIONINSIGHTS_AUTH"
+	metricNameAvgFlushRate                            = "ContainerLogAvgRecordsFlushedPerSec"
+	metricNameAvgLogGenerationRate                    = "ContainerLogsGeneratedPerSec"
+	metricNameLogSize                                 = "ContainerLogsSize"
+	metricNameAgentLogProcessingMaxLatencyMs          = "ContainerLogsAgentSideLatencyMs"
 	metricNameNumberofTelegrafMetricsSentSuccessfully = "TelegrafMetricsSentCount"
-	metricNameNumberofSendErrorsTelegrafMetrics = "TelegrafMetricsSendErrorCount"
+	metricNameNumberofSendErrorsTelegrafMetrics       = "TelegrafMetricsSendErrorCount"
 
 	defaultTelemetryPushIntervalSeconds = 300
 
@@ -71,11 +74,13 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 		ContainerLogTelemetryMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
 		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
+		logSizeRate := FlushedRecordsSize / float64(elapsed/time.Second)
 		telegrafMetricsSentCount := TelegrafMetricsSentCount
 		telegrafMetricsSendErrorCount := TelegrafMetricsSendErrorCount
 		TelegrafMetricsSentCount = 0.0
 		TelegrafMetricsSendErrorCount = 0.0
 		FlushedRecordsCount = 0.0
+		FlushedRecordsSize = 0.0
 		FlushedRecordsTimeTaken = 0.0
 		logLatencyMs := AgentLogProcessingMaxLatencyMs
 		logLatencyMsContainer := AgentLogProcessingMaxLatencyMsContainer
@@ -88,7 +93,10 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 			flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate)
 			TelemetryClient.Track(flushRateMetric)
 			logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate)
+			logSizeMetric := appinsights.NewMetricTelemetry(metricNameLogSize, logSizeRate)
 			TelemetryClient.Track(logRateMetric)
+			Log("Log Size Rate: %f\n", logSizeRate)
+			TelemetryClient.Track(logSizeMetric)
 			logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs)
 			logLatencyMetric.Properties["Container"] = logLatencyMsContainer
 			TelemetryClient.Track(logLatencyMetric)

From 5fc0f1b49b6cdd04b9f8adddadbfa6a6bc1f73f5 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 9 Jul 2019 15:10:12 -0700
Subject: [PATCH 104/160] Merge Vishwa/promcustommetrics to ci_feature (#237)

* hard code config for UST CCP team

* fix config

* fix config after discussion

* fix error log to get errros

* fix config

* update config

* Add telemetry

* Rashmi/promcustomconfig (#231)

* changes

* formatting changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* adding telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* cahnges

* changes

* Rashmi/promcustomconfig (#236)

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* fix exceptions

* changes to remove some exceptions

* exception fixes

* changes

* changes for poduid nil check
---
 installer/conf/td-agent-bit-rs.conf           |  10 -
 installer/conf/td-agent-bit.conf              |  22 +-
 installer/conf/telegraf-rs.conf               |  95 +++--
 installer/conf/telegraf.conf                  |  88 +++--
 installer/datafiles/base_container.data       |   3 +-
 .../scripts/tomlparser-prom-customconfig.rb   | 200 +++++++++++
 installer/scripts/tomlparser.rb               |  82 ++---
 source/code/go/src/plugins/oms.go             |   9 +-
 source/code/go/src/plugins/out_oms.go         |   2 -
 .../code/plugin/CAdvisorMetricsAPIClient.rb   |  34 +-
 source/code/plugin/DockerApiClient.rb         | 325 +++++++++---------
 source/code/plugin/KubernetesApiClient.rb     |   2 +-
 source/code/plugin/in_containerinventory.rb   |   5 +-
 source/code/plugin/in_kube_events.rb          | 138 ++++----
 source/code/plugin/in_kube_nodes.rb           | 134 ++++----
 source/code/plugin/in_kube_podinventory.rb    |  18 +-
 source/code/plugin/in_kube_services.rb        | 191 +++++-----
 17 files changed, 833 insertions(+), 525 deletions(-)
 create mode 100644 installer/scripts/tomlparser-prom-customconfig.rb

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
index 7945261aa..7839b0eee 100644
--- a/installer/conf/td-agent-bit-rs.conf
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -4,16 +4,6 @@
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
 
-[INPUT]
-    Name tail
-    Tag oms.container.log.telegraf.err.*
-    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
-    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    Mem_Buf_Limit 2m
-    Path_Key filepath
-    Skip_Long_Lines On
-    Ignore_Older 5m
-
 [INPUT]
     Name        tcp
     Tag oms.container.perf.telegraf.*
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 2dee26234..e7aabd242 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -6,7 +6,7 @@
 
 [INPUT]
     Name tail
-    Tag oms.container.log.*
+    Tag oms.container.log.la.*
     Path ${AZMON_LOG_TAIL_PATH}
     DB /var/log/omsagent-fblogs.db
     DB.Sync Off
@@ -32,17 +32,6 @@
     Skip_Long_Lines On
     Ignore_Older 2m
 
-[INPUT]
-    Name tail
-    Tag oms.container.log.telegraf.err.*
-    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
-    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    DB.Sync Off
-    Mem_Buf_Limit 1m
-    Path_Key filepath
-    Skip_Long_Lines On
-    Ignore_Older 2m
-
 [INPUT]
     Name        tcp
     Tag oms.container.perf.telegraf.*
@@ -53,9 +42,16 @@
 
 [FILTER]
     Name grep
-    Match oms.container.log.*
+    Match oms.container.log.la.*
     Exclude stream ${AZMON_LOG_EXCLUSION_REGEX_PATTERN}
 
+# Exclude prometheus plugin exceptions that might be caused due to invalid config.(Logs which contain - E! [inputs.prometheus])
+# Excluding these logs from being sent to AI since it can result in high volume of data in telemetry due to invalid config.
+[FILTER]
+    Name grep
+    Match oms.container.log.flbplugin.*
+    Exclude log E! [\[]inputs.prometheus[\]]
+
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index cb9a36685..ce60bfa04 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -77,7 +77,7 @@
   ## Run telegraf in quiet mode (error log messages only).
   quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+  logfile = ""
 
   ## Override default hostname, if empty use os.Hostname()
   #hostname = "placeholder_hostname"
@@ -536,32 +536,75 @@
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 #  [inputs.prometheus.tagpass]
 
-[[inputs.exec]]
-  ## Commands array
-  interval = "15m"
-  commands = [
-    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
-  ]
+#Prometheus Custom Metrics
+[[inputs.prometheus]]
+  interval = "$AZMON_RS_PROM_INTERVAL"
 
-  ## Timeout for each command to complete.
-  timeout = "15s"
+  ## An array of urls to scrape metrics from.
+  urls = $AZMON_RS_PROM_URLS
+  
+  ## An array of Kubernetes services to scrape metrics from.
+  kubernetes_services = $AZMON_RS_PROM_K8S_SERVICES
+  
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
 
-  ## measurement name suffix (for separating different commands)
-  name_suffix = "_telemetry"
+  fieldpass = $AZMON_RS_PROM_FIELDPASS
+  fielddrop = $AZMON_RS_PROM_FIELDDROP
 
-  ## Data format to consume.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-  data_format = "influx"
-  #tagexclude = ["hostName"]
-  [inputs.exec.tags]
-    AgentVersion = "$AGENT_VERSION"
-    AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
-    ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
-    Region = "$TELEMETRY_AKS_REGION"
-    ClusterName = "$TELEMETRY_CLUSTER_NAME"
-    ClusterType = "$TELEMETRY_CLUSTER_TYPE"
-    Computer = "placeholder_hostname"
-    ControllerType = "$CONTROLLER_TYPE"
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+
+# [[inputs.exec]]
+#   ## Commands array
+#   interval = "15m"
+#   commands = [
+#     "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
+#   ]
+
+#   ## Timeout for each command to complete.
+#   timeout = "15s"
+
+#   ## measurement name suffix (for separating different commands)
+#   name_suffix = "_telemetry"
+
+#   ## Data format to consume.
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+#   data_format = "influx"
+#   #tagexclude = ["hostName"]
+#   [inputs.exec.tags]
+#     AgentVersion = "$AGENT_VERSION"
+#     AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+#     ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+#     Region = "$TELEMETRY_AKS_REGION"
+#     ClusterName = "$TELEMETRY_CLUSTER_NAME"
+#     ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+#     Computer = "placeholder_hostname"
+#     ControllerType = "$CONTROLLER_TYPE"
 
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 06b1c55eb..4883de81b 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -77,8 +77,7 @@
   ## Run telegraf in quiet mode (error log messages only).
   quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
-
+  logfile = ""
   ## Override default hostname, if empty use os.Hostname()
   #hostname = "placeholder_hostname"
   ## If set to true, do no set the "host" tag in the telegraf agent.
@@ -568,31 +567,66 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
-[[inputs.exec]]
-  ## Commands array
-  interval = "15m"
-  commands = [
-    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
-  ]
 
-  ## Timeout for each command to complete.
-  timeout = "15s"
+## prometheus custom metrics
+[[inputs.prometheus]]
 
-  ## measurement name suffix (for separating different commands)
-  name_suffix = "_telemetry"
+  interval = "$AZMON_DS_PROM_INTERVAL"
 
-  ## Data format to consume.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-  data_format = "influx"
-  tagexclude = ["hostName"]
-  [inputs.exec.tags]
-    AgentVersion = "$AGENT_VERSION"
-    AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
-    ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
-    Region = "$TELEMETRY_AKS_REGION"
-    ClusterName = "$TELEMETRY_CLUSTER_NAME"
-    ClusterType = "$TELEMETRY_CLUSTER_TYPE"
-    Computer = "placeholder_hostname"
-    ControllerType = "$CONTROLLER_TYPE"
\ No newline at end of file
+  ## An array of urls to scrape metrics from.
+  urls = $AZMON_DS_PROM_URLS
+
+  fieldpass = $AZMON_DS_PROM_FIELDPASS
+  
+  fielddrop = $AZMON_DS_PROM_FIELDDROP
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+
+# [[inputs.exec]]
+#   ## Commands array
+#   interval = "15m"
+#   commands = [
+#     "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
+#   ]
+
+#   ## Timeout for each command to complete.
+#   timeout = "15s"
+
+#   ## measurement name suffix (for separating different commands)
+#   name_suffix = "_telemetry"
+
+#   ## Data format to consume.
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+#   data_format = "influx"
+#   tagexclude = ["hostName"]
+#   [inputs.exec.tags]
+#     AgentVersion = "$AGENT_VERSION"
+#     AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+#     ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+#     Region = "$TELEMETRY_AKS_REGION"
+#     ClusterName = "$TELEMETRY_CLUSTER_NAME"
+#     ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+#     Computer = "placeholder_hostname"
+#     ControllerType = "$CONTROLLER_TYPE"
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 58a74aa0a..fe1635335 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -110,9 +110,10 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
-/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
+/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	    installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
 /opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
+/opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root 
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb
new file mode 100644
index 000000000..d9fdf1cc2
--- /dev/null
+++ b/installer/scripts/tomlparser-prom-customconfig.rb
@@ -0,0 +1,200 @@
+#!/usr/local/bin/ruby
+
+require_relative "tomlrb"
+require "fileutils"
+
+@promConfigMapMountPath = "/etc/config/settings/prometheus-data-collection-settings"
+@replicaset = "replicaset"
+@daemonset = "daemonset"
+@configSchemaVersion = ""
+@defaultDsInterval = "1m"
+@defaultDsPromUrls = []
+@defaultDsFieldPass = []
+@defaultDsFieldDrop = []
+@defaultRsInterval = "1m"
+@defaultRsPromUrls = []
+@defaultRsFieldPass = []
+@defaultRsFieldDrop = []
+@defaultRsK8sServices = []
+@defaultRsMonitorPods = false
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@promConfigMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values for prometheus config map"
+      parsedConfig = Tomlrb.load_file(@promConfigMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted prometheus config map"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults for prometheus scraping"
+      return nil
+    end
+  rescue => errorStr
+    puts "config::error::Exception while parsing toml config file for prometheus config: #{errorStr}, using defaults"
+    return nil
+  end
+end
+
+def checkForTypeArray(arrayValue, arrayType)
+  if (arrayValue.nil? || (arrayValue.kind_of?(Array) && arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType)))
+    return true
+  else
+    return false
+  end
+end
+
+def checkForType(variable, varType)
+  if variable.nil? || variable.kind_of?(varType)
+    return true
+  else
+    return false
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used as environment variables
+def populateSettingValuesFromConfigMap(parsedConfig)
+  # Checking to see if this is the daemonset or replicaset to parse config accordingly
+  controller = ENV["CONTROLLER_TYPE"]
+  if !controller.nil?
+    if !parsedConfig.nil? && !parsedConfig[:prometheus_data_collection_settings].nil?
+      if controller.casecmp(@replicaset) == 0 && !parsedConfig[:prometheus_data_collection_settings][:cluster].nil?
+        #Get prometheus replicaset custom config settings
+        begin
+          interval = parsedConfig[:prometheus_data_collection_settings][:cluster][:interval]
+          fieldPass = parsedConfig[:prometheus_data_collection_settings][:cluster][:fieldpass]
+          fieldDrop = parsedConfig[:prometheus_data_collection_settings][:cluster][:fielddrop]
+          urls = parsedConfig[:prometheus_data_collection_settings][:cluster][:urls]
+          kubernetesServices = parsedConfig[:prometheus_data_collection_settings][:cluster][:kubernetes_services]
+          monitorKubernetesPods = parsedConfig[:prometheus_data_collection_settings][:cluster][:monitor_kubernetes_pods]
+
+          # Check for the right datattypes to enforce right setting values
+          if checkForType(interval, String) &&
+             checkForTypeArray(fieldPass, String) &&
+             checkForTypeArray(fieldDrop, String) &&
+             checkForTypeArray(kubernetesServices, String) &&
+             checkForTypeArray(urls, String) &&
+             !monitorKubernetesPods.nil? && (!!monitorKubernetesPods == monitorKubernetesPods) #Checking for Boolean type, since 'Boolean' is not defined as a type in ruby
+            puts "config::Successfully passed typecheck for config settings for replicaset"
+            #if setting is nil assign default values
+            interval = (interval.nil?) ? @defaultRsInterval : interval
+            fieldPass = (fieldPass.nil?) ? @defaultRsFieldPass : fieldPass
+            fieldDrop = (fieldDrop.nil?) ? @defaultRsFieldDrop : fieldDrop
+            kubernetesServices = (kubernetesServices.nil?) ? @defaultRsK8sServices : kubernetesServices
+            urls = (urls.nil?) ? @defaultRsPromUrls : urls
+            monitorKubernetesPods = (kubernetesServices.nil?) ? @defaultRsMonitorPods : monitorKubernetesPods
+
+            file_name = "/opt/telegraf-test-rs.conf"
+            # Copy the telegraf config file to a temp file to run telegraf in test mode with this config
+            FileUtils.cp("/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf", file_name)
+
+            puts "config::Starting to substitute the placeholders in telegraf conf copy file for replicaset"
+            #Replace the placeholder config values with values from custom config
+            text = File.read(file_name)
+            new_contents = text.gsub("$AZMON_RS_PROM_INTERVAL", interval)
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDPASS", ((fieldPass.length > 0) ? ("[\"" + fieldPass.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDDROP", ((fieldDrop.length > 0) ? ("[\"" + fieldDrop.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_URLS", ((urls.length > 0) ? ("[\"" + urls.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_K8S_SERVICES", ((kubernetesServices.length > 0) ? ("[\"" + kubernetesServices.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", (monitorKubernetesPods ? "true" : "false"))
+            File.open(file_name, "w") { |file| file.puts new_contents }
+            puts "config::Successfully substituted the placeholders in telegraf conf file for replicaset"
+            #Set environment variables for telemetry
+            file = File.open("telemetry_prom_config_env_var", "w")
+            if !file.nil?
+              file.write("export TELEMETRY_RS_PROM_INTERVAL=\"#{interval}\"\n")
+              #Setting array lengths as environment variables for telemetry purposes
+              file.write("export TELEMETRY_RS_PROM_FIELDPASS_LENGTH=\"#{fieldPass.length}\"\n")
+              file.write("export TELEMETRY_RS_PROM_FIELDDROP_LENGTH=\"#{fieldDrop.length}\"\n")
+              file.write("export TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH=#{kubernetesServices.length}\n")
+              file.write("export TELEMETRY_RS_PROM_URLS_LENGTH=#{urls.length}\n")
+              file.write("export TELEMETRY_RS_PROM_MONITOR_PODS=\"#{monitorKubernetesPods}\"\n")
+              # Close file after writing all environment variables
+              file.close
+              puts "config::Successfully created telemetry file for replicaset"
+            end
+          else
+            puts "config::Typecheck failed for prometheus config settings for replicaset, using defaults"
+          end # end of type check condition
+        rescue => errorStr
+          puts "config::error::Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults"
+          setRsPromDefaults
+          puts "****************End Prometheus Config Processing********************"
+        end
+      elsif controller.casecmp(@daemonset) == 0 && !parsedConfig[:prometheus_data_collection_settings][:node].nil?
+        #Get prometheus daemonset custom config settings
+        begin
+          interval = parsedConfig[:prometheus_data_collection_settings][:node][:interval]
+          fieldPass = parsedConfig[:prometheus_data_collection_settings][:node][:fieldpass]
+          fieldDrop = parsedConfig[:prometheus_data_collection_settings][:node][:fielddrop]
+          urls = parsedConfig[:prometheus_data_collection_settings][:node][:urls]
+
+          # Check for the right datattypes to enforce right setting values
+          if checkForType(interval, String) &&
+             checkForTypeArray(fieldPass, String) &&
+             checkForTypeArray(fieldDrop, String) &&
+             checkForTypeArray(urls, String)
+            puts "config::Successfully passed typecheck for config settings for daemonset"
+
+            #if setting is nil assign default values
+            interval = (interval.nil?) ? @defaultDsInterval : interval
+            fieldPass = (fieldPass.nil?) ? @defaultDsFieldPass : fieldPass
+            fieldDrop = (fieldDrop.nil?) ? @defaultDsFieldDrop : fieldDrop
+            urls = (urls.nil?) ? @defaultDsPromUrls : urls
+
+            file_name = "/opt/telegraf-test.conf"
+            # Copy the telegraf config file to a temp file to run telegraf in test mode with this config
+            FileUtils.cp("/etc/opt/microsoft/docker-cimprov/telegraf.conf", file_name)
+
+            puts "config::Starting to substitute the placeholders in telegraf conf copy file for daemonset"
+            #Replace the placeholder config values with values from custom config
+            text = File.read(file_name)
+            new_contents = text.gsub("$AZMON_DS_PROM_INTERVAL", interval)
+            new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDPASS", ((fieldPass.length > 0) ? ("[\"" + fieldPass.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDDROP", ((fieldDrop.length > 0) ? ("[\"" + fieldDrop.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_DS_PROM_URLS", ((urls.length > 0) ? ("[\"" + urls.join("\",\"") + "\"]") : "[]"))
+            File.open(file_name, "w") { |file| file.puts new_contents }
+            puts "config::Successfully substituted the placeholders in telegraf conf file for daemonset"
+
+            #Set environment variables for telemetry
+            file = File.open("telemetry_prom_config_env_var", "w")
+            if !file.nil?
+              file.write("export TELEMETRY_DS_PROM_INTERVAL=\"#{interval}\"\n")
+              #Setting array lengths as environment variables for telemetry purposes
+              file.write("export TELEMETRY_DS_PROM_FIELDPASS_LENGTH=\"#{fieldPass.length}\"\n")
+              file.write("export TELEMETRY_DS_PROM_FIELDDROP_LENGTH=\"#{fieldDrop.length}\"\n")
+              file.write("export TELEMETRY_DS_PROM_URLS_LENGTH=#{urls.length}\n")
+              # Close file after writing all environment variables
+              file.close
+              puts "config::Successfully created telemetry file for daemonset"
+            end
+          else
+            puts "config::Typecheck failed for prometheus config settings for daemonset, using defaults"
+          end # end of type check condition
+        rescue => errorStr
+          puts "config::error::Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults"
+          puts "****************End Prometheus Config Processing********************"
+        end
+      end # end of controller type check
+    end
+  else
+    puts "config::error:: Controller undefined while processing prometheus config, using defaults"
+  end
+end
+
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Prometheus Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@promConfigMapMountPath))
+    puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
+  else
+    puts "config::No configmap mounted for prometheus custom config, using defaults"
+  end
+end
+puts "****************End Prometheus Config Processing********************"
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index 3e7f48045..c72e64127 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -82,7 +82,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         if @collectStderrLogs && !stderrNamespaces.nil?
           if stderrNamespaces.kind_of?(Array)
             if !@stdoutExcludeNamespaces.nil? && !@stdoutExcludeNamespaces.empty?
-              stdoutNamespaces = @stdoutExcludeNamespaces.split(',')
+              stdoutNamespaces = @stdoutExcludeNamespaces.split(",")
             end
             # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type
             if stderrNamespaces.length > 0 && stderrNamespaces[0].kind_of?(String)
@@ -119,47 +119,47 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   end
 end
 
-  @configSchemaVersion = ENV['AZMON_AGENT_CFG_SCHEMA_VERSION']
-  puts "****************Start Config Processing********************"
-  if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it
-    configMapSettings = parseConfigMap
-    if !configMapSettings.nil?
-      populateSettingValuesFromConfigMap(configMapSettings)
-    end
-  else
-    if (File.file?(@configMapMountPath))
-      puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
-    end 
-    @excludePath = "*_kube-system_*.log"
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@configMapMountPath))
+    puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
   end
+  @excludePath = "*_kube-system_*.log"
+end
 
-  # Write the settings to file, so that they can be set as environment variables
-  file = File.open("config_env_var", "w")
+# Write the settings to file, so that they can be set as environment variables
+file = File.open("config_env_var", "w")
 
-  if !file.nil?
-    # This will be used in td-agent-bit.conf file to filter out logs
-    if (!@collectStdoutLogs && !@collectStderrLogs)
-      #Stop log tailing completely
-      @logTailPath = "/opt/nolog*.log"
-      @logExclusionRegexPattern = "stdout|stderr"
-    elsif !@collectStdoutLogs
-      @logExclusionRegexPattern = "stdout"
-    elsif !@collectStderrLogs
-      @logExclusionRegexPattern = "stderr"
-    end
-    file.write("export AZMON_COLLECT_STDOUT_LOGS=#{@collectStdoutLogs}\n")
-    file.write("export AZMON_LOG_TAIL_PATH=#{@logTailPath}\n")
-    file.write("export AZMON_LOG_EXCLUSION_REGEX_PATTERN=\"#{@logExclusionRegexPattern}\"\n")
-    file.write("export AZMON_STDOUT_EXCLUDED_NAMESPACES=#{@stdoutExcludeNamespaces}\n")
-    file.write("export AZMON_COLLECT_STDERR_LOGS=#{@collectStderrLogs}\n")
-    file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
-    file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
-    file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
-    # Close file after writing all environment variables
-    file.close
-    puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
-    puts "****************End Config Processing********************"
-  else
-    puts "config::error::Exception while opening file for writing config environment variables"
-    puts "****************End Config Processing********************"
+if !file.nil?
+  # This will be used in td-agent-bit.conf file to filter out logs
+  if (!@collectStdoutLogs && !@collectStderrLogs)
+    #Stop log tailing completely
+    @logTailPath = "/opt/nolog*.log"
+    @logExclusionRegexPattern = "stdout|stderr"
+  elsif !@collectStdoutLogs
+    @logExclusionRegexPattern = "stdout"
+  elsif !@collectStderrLogs
+    @logExclusionRegexPattern = "stderr"
   end
+  file.write("export AZMON_COLLECT_STDOUT_LOGS=#{@collectStdoutLogs}\n")
+  file.write("export AZMON_LOG_TAIL_PATH=#{@logTailPath}\n")
+  file.write("export AZMON_LOG_EXCLUSION_REGEX_PATTERN=\"#{@logExclusionRegexPattern}\"\n")
+  file.write("export AZMON_STDOUT_EXCLUDED_NAMESPACES=#{@stdoutExcludeNamespaces}\n")
+  file.write("export AZMON_COLLECT_STDERR_LOGS=#{@collectStderrLogs}\n")
+  file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
+  file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
+  file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
+  # Close file after writing all environment variables
+  file.close
+  puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
+  puts "****************End Config Processing********************"
+else
+  puts "config::error::Exception while opening file for writing config environment variables"
+  puts "****************End Config Processing********************"
+end
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index a79297189..c5ad307d8 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -34,14 +34,12 @@ const ResourceIdEnv = "AKS_RESOURCE_ID"
 //env variable which has ResourceName for NON-AKS
 const ResourceNameEnv = "ACS_RESOURCE_NAME"
 
-// Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags)
+// Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags and also for custom-metrics telemetry )
 const TelegrafMetricOriginPrefix = "container.azm.ms"
 
 // Origin suffix for telegraf Metrics (used as suffix for origin field)
 const TelegrafMetricOriginSuffix = "telegraf"
 
-// Namespace prefix for telegraf Metrics (used as prefix for Namespace field)
-//const TelegrafMetricNamespacePrefix = "plugin"
 // clusterName tag
 const TelegrafTagClusterName = "clusterName"
 
@@ -193,7 +191,6 @@ func updateContainerImageNameMaps() {
 		if err != nil {
 			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 			Log(message)
-			SendException(message)
 			continue
 		}
 
@@ -384,7 +381,6 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 	if err != nil {
 		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
 		Log(message)
-		SendException(message)
 		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
 		return output.FLB_RETRY
 	}
@@ -519,7 +515,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		if err != nil {
 			message := fmt.Sprintf("Error when sending request %s \n", err.Error())
 			Log(message)
-			SendException(message)
+			// Commenting this out for now. TODO - Add better telemetry for ods errors using aggregation
+			//SendException(message)
 			Log("Failed to flush %d records after %s", len(dataItems), elapsed)
 
 			return output.FLB_RETRY
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 0fa2ddd4b..e9e7124b7 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -64,8 +64,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 		return PushToAppInsightsTraces(records, appinsights.Information, incomingTag)
 	} else if strings.Contains(incomingTag, "oms.container.perf.telegraf") {
 		return PostTelegrafMetricsToLA(records)
-	} else if strings.Contains(incomingTag, "oms.container.log.telegraf.err") {
-		return PushToAppInsightsTraces(records, appinsights.Error, incomingTag)
 	}
 
 	return PostDataHelper(records)
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index b842edb29..ec38bcbb5 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -14,12 +14,31 @@ class CAdvisorMetricsAPIClient
   require_relative "ApplicationInsightsUtility"
 
   @configMapMountPath = "/etc/config/settings/log-data-collection-settings"
+  @promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
   @clusterEnvVarCollectionEnabled = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
   @clusterStdErrLogCollectionEnabled = ENV["AZMON_COLLECT_STDERR_LOGS"]
   @clusterStdOutLogCollectionEnabled = ENV["AZMON_COLLECT_STDOUT_LOGS"]
   @clusterLogTailExcludPath = ENV["AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH"]
   @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
   @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+
+  @rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
+  @dsPromInterval = ENV["TELEMETRY_DS_PROM_INTERVAL"]
+  
+  @rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
+  @dsPromFieldPassCount = ENV["TELEMETRY_DS_PROM_FIELDPASS_LENGTH"]
+  
+  @rsPromFieldDropCount = ENV["TELEMETRY_RS_PROM_FIELDDROP_LENGTH"]
+  @dsPromFieldDropCount = ENV["TELEMETRY_DS_PROM_FIELDDROP_LENGTH"]
+
+  @rsPromK8sServiceCount = ENV["TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH"]
+
+  @rsPromUrlCount = ENV["TELEMETRY_RS_PROM_URLS_LENGTH"]
+  @dsPromUrlCount = ENV["TELEMETRY_DS_PROM_URLS_LENGTH"]
+
+  @rsPromMonitorPods = ENV["TELEMETRY_RS_PROM_MONITOR_PODS"]
+  
+
   @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
   @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
   #   @@rxBytesLast = nil
@@ -199,7 +218,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                     telemetryProps["PodName"] = podName
                     telemetryProps["ContainerName"] = containerName
                     telemetryProps["Computer"] = hostName
-                    #telemetry about custom log collections setting
+                    #telemetry about log collections settings
                     if (File.file?(@configMapMountPath))
                       telemetryProps["clustercustomsettings"] = true
                       telemetryProps["clusterenvvars"] = @clusterEnvVarCollectionEnabled
@@ -209,6 +228,19 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                       telemetryProps["clusterLogTailPath"] = @clusterLogTailPath
                       telemetryProps["clusterAgentSchemaVersion"] = @clusterAgentSchemaVersion
                     end
+                    #telemetry about prometheus metric collections settings
+                    if (File.file?(@promConfigMountPath))
+                      telemetryProps["rsPromInt"] = @rsPromInterval
+                      telemetryProps["dsPromInt"] = @dsPromInterval
+                      telemetryProps["rsPromFPC"] = @rsPromFieldPassCount
+                      telemetryProps["dsPromFPC"] = @dsPromFieldPassCount
+                      telemetryProps["rsPromFDC"] = @rsPromFieldDropCount
+                      telemetryProps["dsPromFDC"] = @dsPromFieldDropCount
+                      telemetryProps["rsPromServ"] = @rsPromK8sServiceCount
+                      telemetryProps["rsPromUrl"] = @rsPromUrlCount
+                      telemetryProps["dsPromUrl"] = @dsPromUrlCount
+                      telemetryProps["rsPromMonPods"] = @rsPromMonitorPods
+                    end
                     ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
                   end
                 end
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index 5a46b5fdb..eb9d74531 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -2,179 +2,196 @@
 # frozen_string_literal: true
 
 class DockerApiClient
+  require "socket"
+  require "json"
+  require "timeout"
+  require_relative "omslog"
+  require_relative "DockerApiRestHelper"
+  require_relative "ApplicationInsightsUtility"
 
-    require 'socket'
-    require 'json'
-    require 'timeout'
-    require_relative 'omslog'
-    require_relative 'DockerApiRestHelper'
-    require_relative 'ApplicationInsightsUtility'
+  @@SocketPath = "/var/run/host/docker.sock"
+  @@ChunkSize = 4096
+  @@TimeoutInSeconds = 5
+  @@PluginName = "ContainerInventory"
 
-    @@SocketPath = "/var/run/host/docker.sock"
-    @@ChunkSize = 4096
-    @@TimeoutInSeconds = 5
-    @@PluginName = 'ContainerInventory'
+  def initialize
+  end
 
-    def initialize
-    end
-
-    class << self
-        # Make docker socket call for requests
-        def getResponse(request, isMultiJson, isVersion)
-            begin
-                socket = UNIXSocket.new(@@SocketPath)
-                dockerResponse = ""
-                isTimeOut = false
-                socket.write(request)
-                # iterate through the response until the last chunk is less than the chunk size so that we can read all data in socket.
-                loop do
-                    begin
-                        responseChunk = ""
-                        timeout(@@TimeoutInSeconds) do
-                            responseChunk = socket.recv(@@ChunkSize)
-                        end
-                        dockerResponse += responseChunk
-                    rescue Timeout::Error
-                        $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}")
-                        isTimeOut = true
-                        break
-                    end
-                    break if (isVersion)? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
-                end
-                socket.close
-                return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson)
-            rescue => errorStr
-                $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+  class << self
+    # Make docker socket call for requests
+    def getResponse(request, isMultiJson, isVersion)
+      begin
+        socket = UNIXSocket.new(@@SocketPath)
+        dockerResponse = ""
+        isTimeOut = false
+        socket.write(request)
+        # iterate through the response until the last chunk is less than the chunk size so that we can read all data in socket.
+        loop do
+          begin
+            responseChunk = ""
+            timeout(@@TimeoutInSeconds) do
+              responseChunk = socket.recv(@@ChunkSize)
             end
+            dockerResponse += responseChunk
+          rescue Timeout::Error
+            $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}")
+            isTimeOut = true
+            break
+          end
+          break if (isVersion) ? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
         end
+        socket.close
+        return (isTimeOut) ? nil : parseResponse(dockerResponse, isMultiJson)
+      rescue => errorStr
+        $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
 
-        def parseResponse(dockerResponse, isMultiJson)
-            # Doing this because the response is in the raw format and includes headers.
-            # Need to do a regex match to extract the json part of the response - Anything between [{}] in response
-            parsedJsonResponse = nil
-            begin
-                jsonResponse = isMultiJson ? dockerResponse[/\[{.+}\]/] : dockerResponse[/{.+}/]
-            rescue => errorStr
-                $log.warn("Regex match for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-            end
-            begin
-                if jsonResponse != nil
-                    parsedJsonResponse = JSON.parse(jsonResponse)
-                end
-            rescue => errorStr
-                $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-            end 
-            return parsedJsonResponse
-        end 
+    def parseResponse(dockerResponse, isMultiJson)
+      # Doing this because the response is in the raw format and includes headers.
+      # Need to do a regex match to extract the json part of the response - Anything between [{}] in response
+      parsedJsonResponse = nil
+      begin
+        jsonResponse = isMultiJson ? dockerResponse[/\[{.+}\]/] : dockerResponse[/{.+}/]
+      rescue => errorStr
+        $log.warn("Regex match for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+      end
+      begin
+        if jsonResponse != nil
+          parsedJsonResponse = JSON.parse(jsonResponse)
+        end
+      rescue => errorStr
+        $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+      return parsedJsonResponse
+    end
 
+    def getDockerHostName()
+      dockerHostName = ""
+      request = DockerApiRestHelper.restDockerInfo
+      response = getResponse(request, false, false)
+      if (response != nil)
+        dockerHostName = response["Name"]
+      end
+      return dockerHostName
+    end
 
-        def getDockerHostName()
-            dockerHostName = ""
-            request = DockerApiRestHelper.restDockerInfo
-            response = getResponse(request, false, false)
-            if (response != nil)
-                dockerHostName = response['Name']
+    def listContainers()
+      ids = []
+      request = DockerApiRestHelper.restDockerPs
+      containers = getResponse(request, true, false)
+      if !containers.nil? && !containers.empty?
+        containers.each do |container|
+          labels = (!container["Labels"].nil?) ? container["Labels"] : container["labels"]
+          if !labels.nil?
+            labelKeys = labels.keys
+            dockerTypeLabel = labelKeys.find { |k| "io.kubernetes.docker.type".downcase == k.downcase }
+            if !dockerTypeLabel.nil?
+              dockerTypeLabelValue = labels[dockerTypeLabel]
+              # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
+              if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
+                # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that
+                # are created in the pods for ContainerInventory
+                keyValue = labelKeys.find { |k| "io.kubernetes.pod.uid".downcase == k.downcase }
+                if !labels[keyValue].nil?
+                  ids.push(container["Id"])
+                end
+              end
             end
-            return dockerHostName
+          end
         end
+      end
+      return ids
+    end
 
-        def listContainers()
-            ids = []
-            request = DockerApiRestHelper.restDockerPs
-            containers = getResponse(request, true, false)
-            if !containers.nil? && !containers.empty?
-                containers.each do |container|
-                    labels = (!container['Labels'].nil?)? container['Labels'] : container['labels']
-                    if !labels.nil?
-                        labelKeys = labels.keys
-                        dockerTypeLabel = labelKeys.find {|k| 'io.kubernetes.docker.type'.downcase == k.downcase}
-                        if !dockerTypeLabel.nil?
-                            dockerTypeLabelValue = labels[dockerTypeLabel]
-                            # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
-                            if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
-                                # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that 
-                                # are created in the pods for ContainerInventory
-                                keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase}
-                                if !labels[keyValue].nil?
-                                    ids.push(container['Id'])
-                                end
-                            end
-                        end
-                    end
-                end
-            end
-            return ids
+    # This method splits the tag value into an array - repository, image, tag, repodigest-imageid
+    def getImageRepositoryImageTag(tagValue, digestValue)
+      result = ["", "", "", ""]
+      atLocation = nil
+      begin
+        if !digestValue.empty?
+          # digest is of the format - repo@sha256:imageid
+          atLocation = digestValue.index("@")
+          if !atLocation.nil?
+            result[3] = digestValue[(atLocation + 1)..-1]
+          end
         end
 
-        # This method splits the tag value into an array - repository, image and tag
-        def getImageRepositoryImageTag(tagValue)
-            result = ["", "", ""]
-            begin
-                if !tagValue.empty?
-                    # Find delimiters in the string of format repository/image:imagetag
-                    slashLocation = tagValue.index('/')
-                    colonLocation = tagValue.index(':')
-                    if !colonLocation.nil?
-                        if slashLocation.nil?
-                            # image:imagetag
-                            result[1] = tagValue[0..(colonLocation-1)]
-                        else
-                            # repository/image:imagetag
-                            result[0] = tagValue[0..(slashLocation-1)]
-                            result[1] = tagValue[(slashLocation + 1)..(colonLocation - 1)]
-                        end
-                        result[2] = tagValue[(colonLocation + 1)..-1]
-                    end
-                end
-            rescue => errorStr
-                $log.warn("Exception at getImageRepositoryImageTag: #{errorStr} @ #{Time.now.utc.iso8601}")
+        if !tagValue.empty?
+          # Find delimiters in the string of format repository/image:imagetag
+          slashLocation = tagValue.index("/")
+          colonLocation = tagValue.index(":")
+          if !colonLocation.nil?
+            if slashLocation.nil?
+              # image:imagetag
+              result[1] = tagValue[0..(colonLocation - 1)]
+            else
+              # repository/image:imagetag
+              result[0] = tagValue[0..(slashLocation - 1)]
+              result[1] = tagValue[(slashLocation + 1)..(colonLocation - 1)]
             end
-            return result
+            result[2] = tagValue[(colonLocation + 1)..-1]
+          end
+        elsif !digestValue.empty?
+          # Getting repo information from repodigests when repotags is empty
+          if !atLocation.nil?
+            result[0] = digestValue[0..(atLocation - 1)]
+          end
         end
+      rescue => errorStr
+        $log.warn("Exception at getImageRepositoryImageTag: #{errorStr} @ #{Time.now.utc.iso8601}")
+      end
+      return result
+    end
 
-        # Image is in the format repository/image:imagetag - This method creates a hash of image id and repository, image and tag
-        def getImageIdMap()
-            result = nil
-            begin
-                request = DockerApiRestHelper.restDockerImages
-                images = getResponse(request, true, false)
-                if !images.nil? && !images.empty?
-                    result = {}
-                    images.each do |image|
-                        tagValue = ""
-                        tags = image['RepoTags']
-                        if !tags.nil? && tags.kind_of?(Array) && tags.length > 0
-                            tagValue = tags[0]
-                        end
-                        idValue = image['Id']
-                        if !idValue.nil?
-                            result[idValue] = getImageRepositoryImageTag(tagValue)
-                        end
-                    end
-                end
-            rescue => errorStr
-                $log.warn("Exception at getImageIdMap: #{errorStr} @ #{Time.now.utc.iso8601}")
+    # Image is in the format repository/image:imagetag - This method creates a hash of image id and repository, image and tag
+    def getImageIdMap()
+      result = nil
+      begin
+        request = DockerApiRestHelper.restDockerImages
+        images = getResponse(request, true, false)
+        if !images.nil? && !images.empty?
+          result = {}
+          images.each do |image|
+            tagValue = ""
+            tags = image["RepoTags"]
+            if !tags.nil? && tags.kind_of?(Array) && tags.length > 0
+              tagValue = tags[0]
+            end
+            digestValue = ""
+            digests = image["RepoDigests"]
+            if !digests.nil? && digests.kind_of?(Array) && digests.length > 0
+              digestValue = digests[0]
+            end
+            idValue = image["Id"]
+            if !idValue.nil?
+              result[idValue] = getImageRepositoryImageTag(tagValue, digestValue)
             end
-            return result
+          end
         end
+      rescue => errorStr
+        $log.warn("Exception at getImageIdMap: #{errorStr} @ #{Time.now.utc.iso8601}")
+      end
+      return result
+    end
 
-        def dockerInspectContainer(id)
-            request = DockerApiRestHelper.restDockerInspect(id)
-            return getResponse(request, false, false)
-        end
+    def dockerInspectContainer(id)
+      request = DockerApiRestHelper.restDockerInspect(id)
+      return getResponse(request, false, false)
+    end
 
-        # This method returns docker version and docker api version for telemetry
-        def dockerInfo()
-            request = DockerApiRestHelper.restDockerVersion
-            response = getResponse(request, false, true)
-            dockerInfo = {}
-            if (response != nil)
-                dockerInfo['Version'] = response['Version']
-                dockerInfo['ApiVersion'] = response['ApiVersion']
-            end
-            return dockerInfo
-        end
+    # This method returns docker version and docker api version for telemetry
+    def dockerInfo()
+      request = DockerApiRestHelper.restDockerVersion
+      response = getResponse(request, false, true)
+      dockerInfo = {}
+      if (response != nil)
+        dockerInfo["Version"] = response["Version"]
+        dockerInfo["ApiVersion"] = response["ApiVersion"]
+      end
+      return dockerInfo
     end
+  end
 end
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 3c6b4f203..58a276cfd 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -57,7 +57,7 @@ def getKubeResourceInfo(resource)
       rescue => error
         @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")
       end
-      if (response.body.empty?)
+      if (!response.nil? && !response.body.nil? && response.body.empty?)
         @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}")
       end
       return response
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index 05e5bc9ea..4392de280 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -170,12 +170,13 @@ def inspectContainer(id, nameMap, clusterCollectEnvironmentVar)
           end
           imageValue = container["Image"]
           if !imageValue.nil? && !imageValue.empty?
-            containerInstance["ImageId"] = imageValue
             repoImageTagArray = nameMap[imageValue]
             if nameMap.has_key? imageValue
               containerInstance["Repository"] = repoImageTagArray[0]
               containerInstance["Image"] = repoImageTagArray[1]
               containerInstance["ImageTag"] = repoImageTagArray[2]
+              # Setting the image id to the id in the remote repository
+              containerInstance["ImageId"] = repoImageTagArray[3]
             end
           end
           obtainContainerConfig(containerInstance, container, clusterCollectEnvironmentVar)
@@ -200,7 +201,7 @@ def enumerate
         if !containerIds.empty?
           eventStream = MultiEventStream.new
           nameMap = DockerApiClient.getImageIdMap
-          clusterCollectEnvironmentVar = ENV['AZMON_CLUSTER_COLLECT_ENV_VAR']
+          clusterCollectEnvironmentVar = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
           if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
             $log.warn("Environment Variable collection disabled for cluster")
           end
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 309dd8034..3a0e04c67 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -2,27 +2,25 @@
 # frozen_string_literal: true
 
 module Fluent
-
   class Kube_Event_Input < Input
-    Plugin.register_input('kubeevents', self)
+    Plugin.register_input("kubeevents", self)
 
     @@KubeEventsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml"
 
     def initialize
       super
-      require 'json'
-
-      require_relative 'KubernetesApiClient'
-      require_relative 'oms_common'
-      require_relative 'omslog'
-      require_relative 'ApplicationInsightsUtility'
+      require "json"
 
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
     end
 
-    config_param :run_interval, :time, :default => '1m'
+    config_param :run_interval, :time, :default => "1m"
     config_param :tag, :string, :default => "oms.containerinsights.KubeEvents"
 
-    def configure (conf)
+    def configure(conf)
       super
     end
 
@@ -46,63 +44,62 @@ def shutdown
     end
 
     def enumerate(eventList = nil)
-        currentTime = Time.now
-        emitTime = currentTime.to_f
-        batchTime = currentTime.utc.iso8601
-          if eventList.nil?
-            $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-            events = JSON.parse(KubernetesApiClient.getKubeResourceInfo('events').body)
-            $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
-          else
-            events = eventList
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
+      if eventList.nil?
+        $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
+        events = JSON.parse(KubernetesApiClient.getKubeResourceInfo("events").body)
+        $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+      else
+        events = eventList
+      end
+      eventQueryState = getEventQueryState
+      newEventQueryState = []
+      begin
+        if (!events.empty? && !events["items"].nil?)
+          eventStream = MultiEventStream.new
+          events["items"].each do |items|
+            record = {}
+            #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
+            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            eventId = items["metadata"]["uid"] + "/" + items["count"].to_s
+            newEventQueryState.push(eventId)
+            if !eventQueryState.empty? && eventQueryState.include?(eventId)
+              next
+            end
+            record["ObjectKind"] = items["involvedObject"]["kind"]
+            record["Namespace"] = items["involvedObject"]["namespace"]
+            record["Name"] = items["involvedObject"]["name"]
+            record["Reason"] = items["reason"]
+            record["Message"] = items["message"]
+            record["Type"] = items["type"]
+            record["TimeGenerated"] = items["metadata"]["creationTimestamp"]
+            record["SourceComponent"] = items["source"]["component"]
+            record["FirstSeen"] = items["firstTimestamp"]
+            record["LastSeen"] = items["lastTimestamp"]
+            record["Count"] = items["count"]
+            if items["source"].key?("host")
+              record["Computer"] = items["source"]["host"]
+            else
+              record["Computer"] = (OMS::Common.get_hostname)
+            end
+            record["ClusterName"] = KubernetesApiClient.getClusterName
+            record["ClusterId"] = KubernetesApiClient.getClusterId
+            wrapper = {
+              "DataType" => "KUBE_EVENTS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
+            }
+            eventStream.add(emitTime, wrapper) if wrapper
           end
-          eventQueryState = getEventQueryState
-          newEventQueryState = []
-          begin
-            if(!events.empty?)
-              eventStream = MultiEventStream.new
-              events['items'].each do |items|
-                record = {}
-                #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
-                record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-                eventId = items['metadata']['uid'] + "/" + items['count'].to_s  
-                newEventQueryState.push(eventId)
-                if !eventQueryState.empty? && eventQueryState.include?(eventId)
-                  next
-                end  
-                record['ObjectKind']= items['involvedObject']['kind']
-                record['Namespace'] = items['involvedObject']['namespace']
-                record['Name'] = items['involvedObject']['name']
-                record['Reason'] = items['reason']
-                record['Message'] = items['message']
-                record['Type'] = items['type']
-                record['TimeGenerated'] = items['metadata']['creationTimestamp']
-                record['SourceComponent'] = items['source']['component']
-                record['FirstSeen'] = items['firstTimestamp']
-                record['LastSeen'] = items['lastTimestamp']
-                record['Count'] = items['count']
-                if items['source'].key?('host')
-                        record['Computer'] = items['source']['host']
-                else
-                        record['Computer'] = (OMS::Common.get_hostname)
-                end
-                record['ClusterName'] = KubernetesApiClient.getClusterName
-                record['ClusterId'] = KubernetesApiClient.getClusterId
-                wrapper = {
-                  "DataType"=>"KUBE_EVENTS_BLOB",
-                  "IPName"=>"ContainerInsights",
-                  "DataItems"=>[record.each{|k,v| record[k]=v}]
-                }
-                eventStream.add(emitTime, wrapper) if wrapper
-              end
-              router.emit_stream(@tag, eventStream) if eventStream
-            end  
-            writeEventQueryState(newEventQueryState)
-          rescue  => errorStr
-            $log.warn line.dump, error: errorStr.to_s
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end   
+          router.emit_stream(@tag, eventStream) if eventStream
+        end
+        writeEventQueryState(newEventQueryState)
+      rescue => errorStr
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
     end
 
     def run_periodic
@@ -135,7 +132,7 @@ def getEventQueryState
             eventQueryState.push(line.chomp) #puts will append newline which needs to be removed
           end
         end
-      rescue  => errorStr
+      rescue => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
@@ -145,20 +142,17 @@ def getEventQueryState
 
     def writeEventQueryState(eventQueryState)
       begin
-        if(!eventQueryState.nil? && !eventQueryState.empty?)
+        if (!eventQueryState.nil? && !eventQueryState.empty?)
           # No need to close file handle (f) due to block scope
           File.open(@@KubeEventsStateFile, "w") do |f|
             f.puts(eventQueryState)
           end
         end
-      rescue  => errorStr
+      rescue => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end
-
   end # Kube_Event_Input
-
 end # module
-
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index aabda441e..0310fa419 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -58,81 +58,83 @@ def enumerate
         if (!nodeInventory.empty?)
           eventStream = MultiEventStream.new
           containerNodeInventoryEventStream = MultiEventStream.new
-          #get node inventory
-          nodeInventory["items"].each do |items|
-            record = {}
-            # Sending records for ContainerNodeInventory
-            containerNodeInventoryRecord = {}
-            containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
+          if !nodeInventory["items"].nil?
+            #get node inventory
+            nodeInventory["items"].each do |items|
+              record = {}
+              # Sending records for ContainerNodeInventory
+              containerNodeInventoryRecord = {}
+              containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+              containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
 
-            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            record["Computer"] = items["metadata"]["name"]
-            record["ClusterName"] = KubernetesApiClient.getClusterName
-            record["ClusterId"] = KubernetesApiClient.getClusterId
-            record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-            record["Labels"] = [items["metadata"]["labels"]]
-            record["Status"] = ""
+              record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+              record["Computer"] = items["metadata"]["name"]
+              record["ClusterName"] = KubernetesApiClient.getClusterName
+              record["ClusterId"] = KubernetesApiClient.getClusterId
+              record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
+              record["Labels"] = [items["metadata"]["labels"]]
+              record["Status"] = ""
 
-            # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-            # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
-            # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-            # implying that the node is ready for hosting pods, however its out of disk.
+              # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+              # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+              # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+              # implying that the node is ready for hosting pods, however its out of disk.
 
-            if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
-              allNodeConditions = ""
-              items["status"]["conditions"].each do |condition|
-                if condition["status"] == "True"
-                  if !allNodeConditions.empty?
-                    allNodeConditions = allNodeConditions + "," + condition["type"]
-                  else
-                    allNodeConditions = condition["type"]
+              if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
+                allNodeConditions = ""
+                items["status"]["conditions"].each do |condition|
+                  if condition["status"] == "True"
+                    if !allNodeConditions.empty?
+                      allNodeConditions = allNodeConditions + "," + condition["type"]
+                    else
+                      allNodeConditions = condition["type"]
+                    end
+                  end
+                  #collect last transition to/from ready (no matter ready is true/false)
+                  if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+                    record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
                   end
                 end
-                #collect last transition to/from ready (no matter ready is true/false)
-                if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
-                  record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
+                if !allNodeConditions.empty?
+                  record["Status"] = allNodeConditions
                 end
               end
-              if !allNodeConditions.empty?
-                record["Status"] = allNodeConditions
-              end
-            end
 
-            nodeInfo = items["status"]["nodeInfo"]
-            record["KubeletVersion"] = nodeInfo["kubeletVersion"]
-            record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
-            containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
-            dockerVersion = nodeInfo["containerRuntimeVersion"]
-            dockerVersion.slice! "docker://"
-            containerNodeInventoryRecord["DockerVersion"] = dockerVersion
-            # ContainerNodeInventory data for docker version and operating system.
-            containerNodeInventoryWrapper = {
-              "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
-            }
-            containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+              nodeInfo = items["status"]["nodeInfo"]
+              record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+              record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+              containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+              dockerVersion = nodeInfo["containerRuntimeVersion"]
+              dockerVersion.slice! "docker://"
+              containerNodeInventoryRecord["DockerVersion"] = dockerVersion
+              # ContainerNodeInventory data for docker version and operating system.
+              containerNodeInventoryWrapper = {
+                "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+              }
+              containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
-            wrapper = {
-              "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [record.each { |k, v| record[k] = v }],
-            }
-            eventStream.add(emitTime, wrapper) if wrapper
-            # Adding telemetry to send node telemetry every 5 minutes
-            timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-            timeDifferenceInMinutes = timeDifference / 60
-            if (timeDifferenceInMinutes >= 5)
-              properties = {}
-              properties["Computer"] = record["Computer"]
-              properties["KubeletVersion"] = record["KubeletVersion"]
-              properties["OperatingSystem"] = nodeInfo["operatingSystem"]
-              properties["DockerVersion"] = dockerVersion
-              capacityInfo = items["status"]["capacity"]
-              ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
-              ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
-              telemetrySent = true
+              wrapper = {
+                "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [record.each { |k, v| record[k] = v }],
+              }
+              eventStream.add(emitTime, wrapper) if wrapper
+              # Adding telemetry to send node telemetry every 5 minutes
+              timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+              timeDifferenceInMinutes = timeDifference / 60
+              if (timeDifferenceInMinutes >= 5)
+                properties = {}
+                properties["Computer"] = record["Computer"]
+                properties["KubeletVersion"] = record["KubeletVersion"]
+                properties["OperatingSystem"] = nodeInfo["operatingSystem"]
+                properties["DockerVersion"] = dockerVersion
+                capacityInfo = items["status"]["capacity"]
+                ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+                ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+                telemetrySent = true
+              end
             end
           end
           router.emit_stream(@tag, eventStream) if eventStream
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 79490ba7d..d0056fb14 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -152,8 +152,10 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
                 containerEnvArray.each do |envVarHash|
                   envName = envVarHash["name"]
                   envValue = envVarHash["value"]
-                  envArrayElement = envName + "=" + envValue
-                  envVarsArray.push(envArrayElement)
+                  if !envName.nil? && !envValue.nil?
+                    envArrayElement = envName + "=" + envValue
+                    envVarsArray.push(envArrayElement)
+                  end
                 end
               end
               # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
@@ -201,7 +203,11 @@ def parse_and_emit_records(podInventory, serviceList)
             # instead of the actual poduid. Since this uid is not being surface into the UX
             # its ok to use this.
             # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            if items["metadata"]["annotations"].nil?
+              next
+            else
+              podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            end
           else
             podUid = items["metadata"]["uid"]
           end
@@ -287,7 +293,11 @@ def parse_and_emit_records(podInventory, serviceList)
                 record["ContainerID"] = ""
               end
               #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
-              record["ContainerName"] = podUid + "/" + container["name"]
+              if podUid.nil? || container["name"].nil?
+                next
+              else
+                record["ContainerName"] = podUid + "/" + container["name"]
+              end
               #Pod restart count is a sumtotal of restart counts of individual containers
               #within the pod. The restart count of a container is maintained by kubernetes
               #itself in the form of a container label.
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
index e1bb93f30..8b0a013e4 100644
--- a/source/code/plugin/in_kube_services.rb
+++ b/source/code/plugin/in_kube_services.rb
@@ -2,108 +2,101 @@
 # frozen_string_literal: true
 
 module Fluent
-    
-      class Kube_Services_Input < Input
-        Plugin.register_input('kubeservices', self)
-    
-        def initialize
-          super
-          require 'yaml'
-          require 'json'
-    
-          require_relative 'KubernetesApiClient'
-          require_relative 'oms_common'
-          require_relative 'omslog'
-          require_relative 'ApplicationInsightsUtility'
+  class Kube_Services_Input < Input
+    Plugin.register_input("kubeservices", self)
 
-        end
-    
-        config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
-    
-        def configure (conf)
-          super
-        end
-    
-        def start
-          if @run_interval
-            @finished = false
-            @condition = ConditionVariable.new
-            @mutex = Mutex.new
-            @thread = Thread.new(&method(:run_periodic))
-          end
-        end
-    
-        def shutdown
-          if @run_interval
-            @mutex.synchronize {
-              @finished = true
-              @condition.signal
+    def initialize
+      super
+      require "yaml"
+      require "json"
+
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
+    end
+
+    config_param :run_interval, :time, :default => "1m"
+    config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+      end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def enumerate
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
+      $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
+      serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+      $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
+      begin
+        if (!serviceList.empty?)
+          eventStream = MultiEventStream.new
+          serviceList["items"].each do |items|
+            record = {}
+            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            record["ServiceName"] = items["metadata"]["name"]
+            record["Namespace"] = items["metadata"]["namespace"]
+            record["SelectorLabels"] = [items["spec"]["selector"]]
+            record["ClusterId"] = KubernetesApiClient.getClusterId
+            record["ClusterName"] = KubernetesApiClient.getClusterName
+            record["ClusterIP"] = items["spec"]["clusterIP"]
+            record["ServiceType"] = items["spec"]["type"]
+            #<TODO> : Add ports and status fields
+            wrapper = {
+              "DataType" => "KUBE_SERVICES_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
             }
-            @thread.join
+            eventStream.add(emitTime, wrapper) if wrapper
           end
+          router.emit_stream(@tag, eventStream) if eventStream
         end
-    
-        def enumerate
-            currentTime = Time.now
-            emitTime = currentTime.to_f
-            batchTime = currentTime.utc.iso8601
-              $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-              serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo('services').body)
-              $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-              begin
-                if(!serviceList.empty?)
-                  eventStream = MultiEventStream.new
-                  serviceList['items'].each do |items|
-                    record = {}
-                    record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-                    record['ServiceName'] = items['metadata']['name']
-                    record['Namespace'] = items['metadata']['namespace']
-                    record['SelectorLabels'] = [items['spec']['selector']]
-                    record['ClusterId'] = KubernetesApiClient.getClusterId
-                    record['ClusterName'] = KubernetesApiClient.getClusterName
-                    record['ClusterIP'] = items['spec']['clusterIP']
-                    record['ServiceType'] = items['spec']['type']
-                    #<TODO> : Add ports and status fields
-                    wrapper = {
-                      "DataType"=>"KUBE_SERVICES_BLOB",
-                      "IPName"=>"ContainerInsights",
-                      "DataItems"=>[record.each{|k,v| record[k]=v}]
-                    }
-                    eventStream.add(emitTime, wrapper) if wrapper  
-                  end
-                  router.emit_stream(@tag, eventStream) if eventStream
-                end  
-              rescue  => errorStr
-                $log.warn line.dump, error: errorStr.to_s
-                $log.debug_backtrace(e.backtrace)
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-              end   
-        end
-    
-        def run_periodic
-          @mutex.lock
-          done = @finished
-          until done
-            @condition.wait(@mutex, @run_interval)
-            done = @finished
-            @mutex.unlock
-            if !done
-              begin
-                $log.info("in_kube_services::run_periodic @ #{Time.now.utc.iso8601}")
-                enumerate
-              rescue => errorStr
-                $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-              end
-            end
-            @mutex.lock
+      rescue => errorStr
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
+        done = @finished
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kube_services::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end
-          @mutex.unlock
         end
-    
-      end # Kube_Services_Input
-    
-    end # module
-    
-    
\ No newline at end of file
+        @mutex.lock
+      end
+      @mutex.unlock
+    end
+  end # Kube_Services_Input
+end # module

From 4b8708b13c20060794a3ed47262e2383ac56a7f9 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 10 Jul 2019 10:44:43 -0700
Subject: [PATCH 105/160] Fix Region space error (#239)

* Trim spaces in AKS_REGION
This is not an issue for normal AKS Monitoring Addon Onboarding. ONLY an issue for backdoor onboarding

* Fix out_mdm parsing error
---
 source/code/plugin/out_mdm.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index a81da0fbc..69ef25580 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -69,7 +69,7 @@ def start
 
       aks_region = aks_region.gsub(" ","")
 
-      @@post_request_url = @@post_request_url_template % {aks_region: aks_region), aks_resource_id: aks_resource_id}
+      @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
       @post_request_uri = URI.parse(@@post_request_url)
       @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
       @http_client.use_ssl = true

From 1cd9eee6027fb6c2f131336800caa595f4bbedf0 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 10 Jul 2019 13:45:04 -0700
Subject: [PATCH 106/160] Removing buffer chunk size and buffer max size from
 fluentbit conf  (#240)

* hard code config for UST CCP team

* fix config

* fix config after discussion

* fix error log to get errros

* fix config

* update config

* Add telemetry

* Rashmi/promcustomconfig (#231)

* changes

* formatting changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* adding telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* cahnges

* changes

* Rashmi/promcustomconfig (#236)

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* fix exceptions

* changes to remove some exceptions

* exception fixes

* changes

* changes for poduid nil check

* removing buffer chunk size and buffer max size from fluentbit conf
---
 installer/conf/td-agent-bit.conf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index e7aabd242..ab79710c7 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -12,8 +12,6 @@
     DB.Sync Off
     Parser docker
     Mem_Buf_Limit 10m
-    Buffer_Chunk_Size 1m
-    Buffer_Max_Size 1m
     Rotate_Wait 20
     Refresh_Interval 30
     Path_Key filepath

From 788ab8bfb5eede90578ad1655883692cf211b349 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 10 Jul 2019 17:44:53 -0700
Subject: [PATCH 107/160] changes (#243)

---
 source/code/plugin/KubernetesApiClient.rb | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 58a276cfd..4cbf8bb40 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -333,7 +333,11 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
             # instead of the actual poduid. Since this uid is not being surface into the UX
             # its ok to use this.
             # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            if pod["metadata"]["annotations"].nil?
+              next
+            else
+              podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            end
           else
             podUid = pod["metadata"]["uid"]
           end

From 5ee482b09dfd7311ce2e3f164788d6b13919fe8a Mon Sep 17 00:00:00 2001
From: David Michelman <daweim0@gmail.com>
Date: Mon, 15 Jul 2019 11:01:54 -0700
Subject: [PATCH 108/160] Collect container last state (#235)

* updating the OMS agent to also collect container last state

* changed a comment

* git surrounded ContainerLastStatus code in a begin/rescue block

* added a lot of error checking and logging
---
 source/code/plugin/in_kube_podinventory.rb | 32 ++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index d0056fb14..9991c13e3 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -327,6 +327,38 @@ def parse_and_emit_records(podInventory, serviceList)
                   record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"]
                 end
               end
+
+              # Record the last state of the container. This may have information on why a container was killed.
+              begin 
+                if !container["lastState"].nil? && container["lastState"].keys.length == 1
+                  lastStateName = container["lastState"].keys[0]
+                  lastStateObject = container["lastState"][lastStateName]
+                  if !lastStateObject.is_a?(Hash)
+                    raise "expected a hash object. This could signify a bug or a kubernetes API change"
+                  end
+
+                  if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt")
+                    newRecord  = Hash.new
+                    newRecord["lastState"] = lastStateName  # get the name of the last state (ex: terminated)
+                    newRecord["reason"] = lastStateObject["reason"]  # (ex: OOMKilled)
+                    newRecord["startedAt"] = lastStateObject["startedAt"]  # (ex: 2019-07-02T14:58:51Z)
+                    newRecord["finishedAt"] = lastStateObject["finishedAt"]  # (ex: 2019-07-02T14:58:52Z)
+
+                    # only write to the output field if everything previously ran without error
+                    record["ContainerLastStatus"] = newRecord
+                  else
+                    record["ContainerLastStatus"] = Hash.new
+                  end
+                else
+                  record["ContainerLastStatus"] = Hash.new
+                end
+              rescue => errorStr
+                $log.warn "Failed in parse_and_emit_record pod inventory while processing ContainerLastStatus: #{errorStr}"
+                $log.debug_backtrace(errorStr.backtrace)
+                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+                record["ContainerLastStatus"] = Hash.new
+              end
+
               podRestartCount += containerRestartCount
               records.push(record.dup)
 

From 378cc93a1307227cd154f08d6dabe7f6e6bec9fd Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 12 Aug 2019 11:36:09 -0700
Subject: [PATCH 109/160] Rashmi/fix prom telemetry (#247)

* fix prom telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes
---
 installer/conf/td-agent-bit.conf              |  5 +-
 installer/datafiles/base_container.data       |  1 +
 .../scripts/td-agent-bit-conf-customizer.rb   | 47 +++++++++++++++++++
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 34 ++++----------
 source/code/plugin/KubernetesApiClient.rb     |  6 +++
 source/code/plugin/in_kube_nodes.rb           | 23 ++++++++-
 source/code/plugin/in_kube_podinventory.rb    | 15 ++++--
 7 files changed, 101 insertions(+), 30 deletions(-)
 create mode 100644 installer/scripts/td-agent-bit-conf-customizer.rb

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index ab79710c7..4e3de6c46 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -1,5 +1,6 @@
 [SERVICE]
-    Flush         15
+    #Default service flush interval is 15 seconds
+    ${SERVICE_FLUSH_INTERVAL}
     Log_Level     info
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
@@ -12,6 +13,8 @@
     DB.Sync Off
     Parser docker
     Mem_Buf_Limit 10m
+    ${TAIL_BUFFER_CHUNK_SIZE}
+    ${TAIL_BUFFER_MAX_SIZE}
     Rotate_Wait 20
     Refresh_Interval 30
     Path_Key filepath
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index fe1635335..62a6f6885 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -114,6 +114,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
 /opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
 /opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root 
+/opt/td-agent-bit-conf-customizer.rb;                               installer/scripts/td-agent-bit-conf-customizer.rb;     755; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
diff --git a/installer/scripts/td-agent-bit-conf-customizer.rb b/installer/scripts/td-agent-bit-conf-customizer.rb
new file mode 100644
index 000000000..1e62e3cc2
--- /dev/null
+++ b/installer/scripts/td-agent-bit-conf-customizer.rb
@@ -0,0 +1,47 @@
+#!/usr/local/bin/ruby
+
+@td_agent_bit_conf_path = "/etc/opt/microsoft/docker-cimprov/td-agent-bit.conf"
+
+@default_service_interval = "15"
+
+def is_number?(value)
+  true if Integer(value) rescue false
+end
+
+def substituteFluentBitPlaceHolders
+  begin
+    # Replace the fluentbit config file with custom values if present
+    puts "config::Starting to substitute the placeholders in td-agent-bit.conf file for log collection"
+
+    interval = ENV["FBIT_SERVICE_FLUSH_INTERVAL"]
+    bufferChunkSize = ENV["FBIT_TAIL_BUFFER_CHUNK_SIZE"]
+    bufferMaxSize = ENV["FBIT_TAIL_BUFFER_MAX_SIZE"]
+
+    serviceInterval = (!interval.nil? && is_number?(interval)) ? interval : @default_service_interval
+    serviceIntervalSetting = "Flush         " + serviceInterval
+
+    tailBufferChunkSize = (!bufferChunkSize.nil? && is_number?(bufferChunkSize)) ? bufferChunkSize : nil
+
+    tailBufferMaxSize = (!bufferMaxSize.nil? && is_number?(bufferMaxSize)) ? bufferMaxSize : nil
+
+    text = File.read(@td_agent_bit_conf_path)
+    new_contents = text.gsub("${SERVICE_FLUSH_INTERVAL}", serviceIntervalSetting)
+    if !tailBufferChunkSize.nil?
+      new_contents = new_contents.gsub("${TAIL_BUFFER_CHUNK_SIZE}", "Buffer_Chunk_Size " + tailBufferChunkSize + "m")
+    else
+      new_contents = new_contents.gsub("\n    ${TAIL_BUFFER_CHUNK_SIZE}\n", "\n")
+    end
+    if !tailBufferMaxSize.nil?
+      new_contents = new_contents.gsub("${TAIL_BUFFER_MAX_SIZE}", "Buffer_Max_Size " + tailBufferMaxSize + "m")
+    else
+      new_contents = new_contents.gsub("\n    ${TAIL_BUFFER_MAX_SIZE}\n", "\n")
+    end
+
+    File.open(@td_agent_bit_conf_path, "w") { |file| file.puts new_contents }
+    puts "config::Successfully substituted the placeholders in td-agent-bit.conf file"
+  rescue => errorStr
+    puts "td-agent-bit-config-customizer: error while substituting values: #{errorStr}"
+  end
+end
+
+substituteFluentBitPlaceHolders
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index ec38bcbb5..09499b4cf 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -22,23 +22,11 @@ class CAdvisorMetricsAPIClient
   @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
   @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
 
-  @rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
   @dsPromInterval = ENV["TELEMETRY_DS_PROM_INTERVAL"]
-  
-  @rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
   @dsPromFieldPassCount = ENV["TELEMETRY_DS_PROM_FIELDPASS_LENGTH"]
-  
-  @rsPromFieldDropCount = ENV["TELEMETRY_RS_PROM_FIELDDROP_LENGTH"]
   @dsPromFieldDropCount = ENV["TELEMETRY_DS_PROM_FIELDDROP_LENGTH"]
-
-  @rsPromK8sServiceCount = ENV["TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH"]
-
-  @rsPromUrlCount = ENV["TELEMETRY_RS_PROM_URLS_LENGTH"]
   @dsPromUrlCount = ENV["TELEMETRY_DS_PROM_URLS_LENGTH"]
 
-  @rsPromMonitorPods = ENV["TELEMETRY_RS_PROM_MONITOR_PODS"]
-  
-
   @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
   @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
   #   @@rxBytesLast = nil
@@ -118,17 +106,21 @@ def getCAdvisorUri(winNode)
     def getMetrics(winNode = nil)
       metricDataItems = []
       begin
+        cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
+        if !cAdvisorStats.nil?
+          metricInfo = JSON.parse(cAdvisorStats.body)
+        end
         if !winNode.nil?
           hostName = winNode["Hostname"]
           operatingSystem = "Windows"
         else
-          hostName = (OMS::Common.get_hostname)
+          if !metricInfo.nil? && !metricInfo["node"].nil? && !metricInfo["node"]["nodeName"].nil?
+            hostName = metricInfo["node"]["nodeName"]
+          else
+            hostName = (OMS::Common.get_hostname)
+          end
           operatingSystem = "Linux"
         end
-        cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
-        if !cAdvisorStats.nil?
-          metricInfo = JSON.parse(cAdvisorStats.body)
-        end
         if !metricInfo.nil?
           metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes"))
           metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
@@ -228,18 +220,12 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                       telemetryProps["clusterLogTailPath"] = @clusterLogTailPath
                       telemetryProps["clusterAgentSchemaVersion"] = @clusterAgentSchemaVersion
                     end
-                    #telemetry about prometheus metric collections settings
+                    #telemetry about prometheus metric collections settings for daemonset
                     if (File.file?(@promConfigMountPath))
-                      telemetryProps["rsPromInt"] = @rsPromInterval
                       telemetryProps["dsPromInt"] = @dsPromInterval
-                      telemetryProps["rsPromFPC"] = @rsPromFieldPassCount
                       telemetryProps["dsPromFPC"] = @dsPromFieldPassCount
-                      telemetryProps["rsPromFDC"] = @rsPromFieldDropCount
                       telemetryProps["dsPromFDC"] = @dsPromFieldDropCount
-                      telemetryProps["rsPromServ"] = @rsPromK8sServiceCount
-                      telemetryProps["rsPromUrl"] = @rsPromUrlCount
                       telemetryProps["dsPromUrl"] = @dsPromUrlCount
-                      telemetryProps["rsPromMonPods"] = @rsPromMonitorPods
                     end
                     ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
                   end
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 4cbf8bb40..61cbaea00 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -355,6 +355,8 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
                 metricProps = {}
                 metricProps["Timestamp"] = metricTime
                 metricProps["Host"] = nodeName
+                # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+                metricProps["Computer"] = nodeName
                 metricProps["ObjectName"] = "K8SContainer"
                 metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
 
@@ -378,6 +380,8 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
                   metricProps = {}
                   metricProps["Timestamp"] = metricTime
                   metricProps["Host"] = nodeName
+                  # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+                  metricProps["Computer"] = nodeName
                   metricProps["ObjectName"] = "K8SContainer"
                   metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
 
@@ -420,6 +424,8 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet
             metricProps = {}
             metricProps["Timestamp"] = metricTime
             metricProps["Host"] = node["metadata"]["name"]
+            # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+            metricProps["Computer"] = node["metadata"]["name"]
             metricProps["ObjectName"] = "K8SNode"
             metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
             metricProps["Collections"] = []
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 0310fa419..24ab51d4c 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -7,6 +7,14 @@ class Kube_nodeInventory_Input < Input
 
     @@ContainerNodeInventoryTag = "oms.containerinsights.ContainerNodeInventory"
     @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
+    @@promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
+
+    @@rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
+    @@rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
+    @@rsPromFieldDropCount = ENV["TELEMETRY_RS_PROM_FIELDDROP_LENGTH"]
+    @@rsPromK8sServiceCount = ENV["TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH"]
+    @@rsPromUrlCount = ENV["TELEMETRY_RS_PROM_URLS_LENGTH"]
+    @@rsPromMonitorPods = ENV["TELEMETRY_RS_PROM_MONITOR_PODS"]
 
     def initialize
       super
@@ -124,15 +132,26 @@ def enumerate
               # Adding telemetry to send node telemetry every 5 minutes
               timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
               timeDifferenceInMinutes = timeDifference / 60
-              if (timeDifferenceInMinutes >= 5)
+              if (timeDifferenceInMinutes >= 10)
                 properties = {}
                 properties["Computer"] = record["Computer"]
                 properties["KubeletVersion"] = record["KubeletVersion"]
                 properties["OperatingSystem"] = nodeInfo["operatingSystem"]
                 properties["DockerVersion"] = dockerVersion
+
                 capacityInfo = items["status"]["capacity"]
-                ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
                 ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+
+                #telemetry about prometheus metric collections settings for replicaset
+                if (File.file?(@@promConfigMountPath))
+                  properties["rsPromInt"] = @@rsPromInterval
+                  properties["rsPromFPC"] = @@rsPromFieldPassCount
+                  properties["rsPromFDC"] = @@rsPromFieldDropCount
+                  properties["rsPromServ"] = @@rsPromK8sServiceCount
+                  properties["rsPromUrl"] = @@rsPromUrlCount
+                  properties["rsPromMonPods"] = @@rsPromMonitorPods
+                end
+                ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
                 telemetrySent = true
               end
             end
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 9991c13e3..f41ce9095 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -182,6 +182,7 @@ def parse_and_emit_records(podInventory, serviceList)
       batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
       controllerSet = Set.new []
+      controllerData = {}
       telemetryFlush = false
       winContainerCount = 0
       begin #begin block start
@@ -277,6 +278,13 @@ def parse_and_emit_records(podInventory, serviceList)
             record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"]
             if telemetryFlush == true
               controllerSet.add(record["ControllerKind"] + record["ControllerName"])
+              #Adding controller kind to telemetry ro information about customer workload
+              if (controllerData[record["ControllerKind"]].nil?)
+                controllerData[record["ControllerKind"]] = 1
+              else
+                controllerValue = controllerData[record["ControllerKind"]]
+                controllerData[record["ControllerKind"]] += 1
+              end
             end
           end
           podRestartCount = 0
@@ -329,7 +337,7 @@ def parse_and_emit_records(podInventory, serviceList)
               end
 
               # Record the last state of the container. This may have information on why a container was killed.
-              begin 
+              begin
                 if !container["lastState"].nil? && container["lastState"].keys.length == 1
                   lastStateName = container["lastState"].keys[0]
                   lastStateObject = container["lastState"][lastStateName]
@@ -338,7 +346,7 @@ def parse_and_emit_records(podInventory, serviceList)
                   end
 
                   if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt")
-                    newRecord  = Hash.new
+                    newRecord = Hash.new
                     newRecord["lastState"] = lastStateName  # get the name of the last state (ex: terminated)
                     newRecord["reason"] = lastStateObject["reason"]  # (ex: OOMKilled)
                     newRecord["startedAt"] = lastStateObject["startedAt"]  # (ex: 2019-07-02T14:58:51Z)
@@ -403,7 +411,8 @@ def parse_and_emit_records(podInventory, serviceList)
           telemetryProperties["Computer"] = @@hostName
           ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
           ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory["items"].length, {})
-          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length, {})
+          telemetryProperties["ControllerData"] = controllerData.to_json
+          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length, telemetryProperties)
           if winContainerCount > 0
             telemetryProperties["ClusterWideWindowsContainersCount"] = winContainerCount
             ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)

From df60197b920e4b2641ad2746dc521fe0e643966b Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 14 Aug 2019 16:55:59 -0700
Subject: [PATCH 110/160] Merge Health Model work into ci_feature behind a
 feature flag Pending perf testing (#246)

Merge Health to ci_feature
---
 Rakefile                                      |   9 +
 build/Makefile                                |  13 +-
 installer/conf/container.conf                 |  33 +-
 installer/conf/health_model_definition.json   | 248 ++++++++++++
 installer/conf/healthmonitorconfig.json       |  31 ++
 installer/conf/kube.conf                      |  36 +-
 installer/datafiles/base_container.data       |  55 ++-
 installer/scripts/tomlparser.rb               |  40 +-
 source/code/plugin/KubernetesApiClient.rb     |  25 +-
 .../filter_cadvisor_health_container.rb       | 263 +++++++++++++
 .../plugin/filter_cadvisor_health_node.rb     | 267 +++++++++++++
 .../plugin/filter_health_model_builder.rb     | 233 +++++++++++
 .../plugin/health/agg_monitor_id_labels.rb    |  26 ++
 .../code/plugin/health/aggregate_monitor.rb   | 193 +++++++++
 .../aggregate_monitor_state_finalizer.rb      |  33 ++
 .../plugin/health/cluster_health_state.rb     | 115 ++++++
 .../plugin/health/health_hierarchy_builder.rb |  76 ++++
 .../health/health_kube_api_down_handler.rb    |  27 ++
 .../health/health_kubernetes_resources.rb     | 102 +++++
 .../health/health_missing_signal_generator.rb | 142 +++++++
 .../code/plugin/health/health_model_buffer.rb |  29 ++
 .../plugin/health/health_model_builder.rb     |  37 ++
 .../plugin/health/health_model_constants.rb   |  81 ++++
 .../health/health_model_definition_parser.rb  |  50 +++
 .../plugin/health/health_monitor_helpers.rb   |  36 ++
 .../plugin/health/health_monitor_optimizer.rb |  52 +++
 .../plugin/health/health_monitor_provider.rb  | 123 ++++++
 .../plugin/health/health_monitor_record.rb    |  10 +
 .../plugin/health/health_monitor_state.rb     | 214 ++++++++++
 .../plugin/health/health_monitor_utils.rb     | 369 ++++++++++++++++++
 .../plugin/health/health_signal_reducer.rb    |  51 +++
 source/code/plugin/health/monitor_factory.rb  |  28 ++
 source/code/plugin/health/monitor_set.rb      |  44 +++
 .../health/node_monitor_hierarchy_reducer.rb  |  33 ++
 .../plugin/health/parent_monitor_provider.rb  |  86 ++++
 source/code/plugin/health/unit_monitor.rb     |  26 ++
 source/code/plugin/in_cadvisor_perf.rb        |  10 +-
 source/code/plugin/in_kube_events.rb          |   6 +-
 source/code/plugin/in_kube_health.rb          | 307 +++++++++++++++
 .../filter_health_model_builder_test.rb       |  54 +++
 .../plugin/health/aggregate_monitor_spec.rb   | 256 ++++++++++++
 .../aggregate_monitor_state_finalizer_spec.rb |  59 +++
 test/code/plugin/health/ca.crt                |   1 +
 .../health/cluster_health_state_spec.rb       |  37 ++
 .../health/health_hierarchy_builder_spec.rb   |  11 +
 .../health/health_kubernetes_resource_spec.rb | 222 +++++++++++
 .../health_missing_signal_generator_spec.rb   |  79 ++++
 .../plugin/health/health_model_buffer_spec.rb |  25 ++
 .../health/health_model_builder_spec.rb       |  37 ++
 .../health/health_model_builder_test.rb       | 337 ++++++++++++++++
 .../health_model_definition_parser_spec.rb    |  24 ++
 .../health/health_monitor_state_spec.rb       | 176 +++++++++
 .../health/health_signal_reducer_spec.rb      |  96 +++++
 .../health/kube_api_down_handler_spec.rb      |  26 ++
 .../plugin/health/monitor_factory_spec.rb     |  28 ++
 test/code/plugin/health/monitor_set_spec.rb   |  58 +++
 .../health/parent_monitor_provider_spec.rb    | 144 +++++++
 .../health/test_health_model_definition.json  |  42 ++
 test/code/plugin/health/unit_monitor_spec.rb  |  20 +
 test/code/plugin/health/unit_monitor_test.rb  |  16 +
 test/code/plugin/test_helpers.rb              |   3 +
 61 files changed, 5278 insertions(+), 32 deletions(-)
 create mode 100644 Rakefile
 create mode 100644 installer/conf/health_model_definition.json
 create mode 100644 installer/conf/healthmonitorconfig.json
 create mode 100644 source/code/plugin/filter_cadvisor_health_container.rb
 create mode 100644 source/code/plugin/filter_cadvisor_health_node.rb
 create mode 100644 source/code/plugin/filter_health_model_builder.rb
 create mode 100644 source/code/plugin/health/agg_monitor_id_labels.rb
 create mode 100644 source/code/plugin/health/aggregate_monitor.rb
 create mode 100644 source/code/plugin/health/aggregate_monitor_state_finalizer.rb
 create mode 100644 source/code/plugin/health/cluster_health_state.rb
 create mode 100644 source/code/plugin/health/health_hierarchy_builder.rb
 create mode 100644 source/code/plugin/health/health_kube_api_down_handler.rb
 create mode 100644 source/code/plugin/health/health_kubernetes_resources.rb
 create mode 100644 source/code/plugin/health/health_missing_signal_generator.rb
 create mode 100644 source/code/plugin/health/health_model_buffer.rb
 create mode 100644 source/code/plugin/health/health_model_builder.rb
 create mode 100644 source/code/plugin/health/health_model_constants.rb
 create mode 100644 source/code/plugin/health/health_model_definition_parser.rb
 create mode 100644 source/code/plugin/health/health_monitor_helpers.rb
 create mode 100644 source/code/plugin/health/health_monitor_optimizer.rb
 create mode 100644 source/code/plugin/health/health_monitor_provider.rb
 create mode 100644 source/code/plugin/health/health_monitor_record.rb
 create mode 100644 source/code/plugin/health/health_monitor_state.rb
 create mode 100644 source/code/plugin/health/health_monitor_utils.rb
 create mode 100644 source/code/plugin/health/health_signal_reducer.rb
 create mode 100644 source/code/plugin/health/monitor_factory.rb
 create mode 100644 source/code/plugin/health/monitor_set.rb
 create mode 100644 source/code/plugin/health/node_monitor_hierarchy_reducer.rb
 create mode 100644 source/code/plugin/health/parent_monitor_provider.rb
 create mode 100644 source/code/plugin/health/unit_monitor.rb
 create mode 100644 source/code/plugin/in_kube_health.rb
 create mode 100644 test/code/plugin/filter_health_model_builder_test.rb
 create mode 100644 test/code/plugin/health/aggregate_monitor_spec.rb
 create mode 100644 test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb
 create mode 100644 test/code/plugin/health/ca.crt
 create mode 100644 test/code/plugin/health/cluster_health_state_spec.rb
 create mode 100644 test/code/plugin/health/health_hierarchy_builder_spec.rb
 create mode 100644 test/code/plugin/health/health_kubernetes_resource_spec.rb
 create mode 100644 test/code/plugin/health/health_missing_signal_generator_spec.rb
 create mode 100644 test/code/plugin/health/health_model_buffer_spec.rb
 create mode 100644 test/code/plugin/health/health_model_builder_spec.rb
 create mode 100644 test/code/plugin/health/health_model_builder_test.rb
 create mode 100644 test/code/plugin/health/health_model_definition_parser_spec.rb
 create mode 100644 test/code/plugin/health/health_monitor_state_spec.rb
 create mode 100644 test/code/plugin/health/health_signal_reducer_spec.rb
 create mode 100644 test/code/plugin/health/kube_api_down_handler_spec.rb
 create mode 100644 test/code/plugin/health/monitor_factory_spec.rb
 create mode 100644 test/code/plugin/health/monitor_set_spec.rb
 create mode 100644 test/code/plugin/health/parent_monitor_provider_spec.rb
 create mode 100644 test/code/plugin/health/test_health_model_definition.json
 create mode 100644 test/code/plugin/health/unit_monitor_spec.rb
 create mode 100644 test/code/plugin/health/unit_monitor_test.rb
 create mode 100644 test/code/plugin/test_helpers.rb

diff --git a/Rakefile b/Rakefile
new file mode 100644
index 000000000..3733e71a3
--- /dev/null
+++ b/Rakefile
@@ -0,0 +1,9 @@
+require 'rake/testtask'
+
+task default: "test"
+
+Rake::TestTask.new do |task|
+ task.libs << "test"
+ task.pattern = './test/code/plugin/health/*_spec.rb'
+ task.warning = false
+end
\ No newline at end of file
diff --git a/build/Makefile b/build/Makefile
index b5312cfe3..257980160 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -91,9 +91,9 @@ CXXFLAGS = $(COMPILE_FLAGS)
 # Build targets
 
 ifeq ($(ULINUX),1)
-all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit fluentbitplugin
+all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit fluentbitplugin rubypluginstests
 else
-all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) fluentbitplugin
+all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) fluentbitplugin rubypluginstests
 endif
 
 clean :
@@ -143,6 +143,15 @@ fluentbitplugin :
 	make -C $(GO_SOURCE_DIR) fbplugin
 	$(COPY) $(GO_SOURCE_DIR)/out_oms.so $(INTERMEDIATE_DIR)
 
+rubypluginstests :
+	@echo "========================= Installing pre-reqs for running tests"
+	sudo apt-add-repository ppa:brightbox/ruby-ng -y
+	sudo apt-get update
+	sudo apt-get install ruby2.4 rake -y
+	sudo gem install minitest
+	@echo "========================= Running tests..."
+	rake test
+
 #--------------------------------------------------------------------------------
 # PAL build
 #
diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index f41bd6f98..6d810a0e2 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -17,16 +17,22 @@
 
 #cadvisor perf
 <source>
-	type cadvisorperf
-	tag oms.api.cadvisorperf
-	run_interval 60s
+  type cadvisorperf
+  tag oms.api.cadvisorperf
+  run_interval 60s
   log_level debug
 </source>
 
+<filter oms.api.KubeHealth.DaemonSet.Node**>
+  type filter_cadvisor_health_node
+  log_level debug
+</filter>
+
+
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
   log_level info
 </filter>
@@ -61,6 +67,25 @@
   max_retry_wait 9m
 </match>
 
+
+<match oms.api.KubeHealth.DaemonSet**>
+  @type forward
+  send_timeout 60s
+  recover_wait 10s
+  hard_timeout 60s
+  heartbeat_type tcp
+
+  <server>
+    host healthmodel-replicaset-service.kube-system
+    port 25227
+  </server>
+
+  <secondary>
+    @type file
+    path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+  </secondary>
+</match>
+
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json
new file mode 100644
index 000000000..1112fe158
--- /dev/null
+++ b/installer/conf/health_model_definition.json
@@ -0,0 +1,248 @@
+[
+    {
+        "monitor_id": "user_workload_pods_ready",
+        "parent_monitor_id": "user_workload",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/workload-name",
+            "container.azm.ms/workload-kind",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "user_workload",
+        "parent_monitor_id": "namespace",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "system_workload_pods_ready",
+        "parent_monitor_id": "system_workload",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/workload-name",
+            "container.azm.ms/workload-kind",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "system_workload",
+        "parent_monitor_id": "k8s_infrastructure",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "kube_api_status",
+        "parent_monitor_id": "k8s_infrastructure",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "namespace",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ],
+        "parent_monitor_id": "all_namespaces"
+    },
+    {
+        "monitor_id": "k8s_infrastructure",
+        "parent_monitor_id": "cluster",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "all_namespaces",
+        "parent_monitor_id": "all_workloads",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "all_workloads",
+        "parent_monitor_id": "cluster",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "node_cpu_utilization",
+        "parent_monitor_id": "node",
+        "labels": [
+            "kubernetes.io/hostname",
+            "agentpool",
+            "kubernetes.io/role",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "node_memory_utilization",
+        "parent_monitor_id": "node",
+        "labels": [
+            "kubernetes.io/hostname",
+            "agentpool",
+            "kubernetes.io/role",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "node_condition",
+        "parent_monitor_id": "node",
+        "labels": [
+            "kubernetes.io/hostname",
+            "agentpool",
+            "kubernetes.io/role",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "node",
+        "aggregation_algorithm": "worstOf",
+        "labels": [
+            "kubernetes.io/hostname",
+            "agentpool",
+            "kubernetes.io/role",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ],
+        "parent_monitor_id": [
+            {
+                "label": "kubernetes.io/role",
+                "operator": "==",
+                "value": "master",
+                "id": "master_node_pool"
+            },
+            {
+                "label": "kubernetes.io/role",
+                "operator": "==",
+                "value": "agent",
+                "id": "agent_node_pool"
+            }
+        ]
+    },
+    {
+        "monitor_id": "master_node_pool",
+        "aggregation_algorithm": "percentage",
+        "aggregation_algorithm_params": {
+            "critical_threshold": 80.0,
+            "warning_threshold": 90.0
+        },
+        "parent_monitor_id": "all_nodes",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "agent_node_pool",
+        "aggregation_algorithm": "percentage",
+        "aggregation_algorithm_params": {
+            "state_threshold": 80.0
+        },
+        "labels": [
+            "agentpool",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ],
+        "parent_monitor_id": "all_nodes"
+    },
+    {
+        "monitor_id": "all_nodes",
+        "aggregation_algorithm": "worstOf",
+        "parent_monitor_id": "cluster",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "cluster",
+        "aggregation_algorithm": "worstOf",
+        "parent_monitor_id": null,
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "subscribed_capacity_cpu",
+        "parent_monitor_id": "capacity",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "subscribed_capacity_memory",
+        "parent_monitor_id": "capacity",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "capacity",
+        "parent_monitor_id": "all_workloads",
+        "labels": [
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json
new file mode 100644
index 000000000..28d562652
--- /dev/null
+++ b/installer/conf/healthmonitorconfig.json
@@ -0,0 +1,31 @@
+{
+    "node_cpu_utilization": {
+        "WarnThresholdPercentage": 80.0,
+        "FailThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3
+    },
+    "node_memory_utilization": {
+        "WarnThresholdPercentage": 80.0,
+        "FailThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3
+    },
+    "container_cpu_utilization": {
+        "WarnThresholdPercentage": 80.0,
+        "FailThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3
+    },
+    "container_memory_utilization": {
+        "WarnThresholdPercentage": 80.0,
+        "FailThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3
+    },
+    "user_workload_pods_ready": {
+        "WarnThresholdPercentage": 0.0,
+        "FailThresholdPercentage": 10.0,
+        "ConsecutiveSamplesForStateTransition": 2
+    },
+    "system_workload_pods_ready": {
+        "FailThresholdPercentage": 0.0,
+        "ConsecutiveSamplesForStateTransition": 2
+    }
+}
\ No newline at end of file
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 0dfa3710e..4b4ec09ea 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,4 +1,9 @@
 # Fluentd config file for OMS Docker - cluster components (kubeAPI)
+<source>
+  type forward
+  port 25227
+  bind 0.0.0.0
+</source>
 
 #Kubernetes pod inventory
 <source>
@@ -13,7 +18,7 @@
 	type kubeevents
 	tag oms.containerinsights.KubeEvents
 	run_interval 60s
-  log_level debug
+    log_level debug
 </source>
 
 #Kubernetes logs
@@ -47,6 +52,14 @@
   log_level debug
 </source>
 
+#Kubernetes health
+<source>
+  type kubehealth
+ tag oms.api.KubeHealth.ReplicaSet
+  run_interval 60s
+  log_level debug
+</source>
+
 #cadvisor perf- Windows nodes
 <source>
 	type wincadvisorperf
@@ -69,6 +82,9 @@
   log_level info
 </filter>
 
+<filter oms.api.KubeHealth**>
+  type filter_health_model_builder
+</filter>
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
@@ -118,7 +134,7 @@
 <match oms.api.KubeLogs**>
 	type out_oms_api
 	log_level debug
-  buffer_chunk_limit 10m
+  	buffer_chunk_limit 10m
 	buffer_type file
 	buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer
 	buffer_queue_limit 10
@@ -127,6 +143,8 @@
 	retry_wait 30s
 </match>
 
+
+
 <match oms.containerinsights.KubeServices**>
   type out_oms
   log_level debug
@@ -170,7 +188,7 @@
   max_retry_wait 9m
 </match>
 
-<match oms.api.KubePerf**>	
+<match oms.api.KubePerf**>
   type out_oms
   log_level debug
   num_threads 5
@@ -214,4 +232,16 @@
   retry_limit 10
   retry_wait 30s
   max_retry_wait 9m
+</match>
+
+<match oms.api.KubeHealth.AgentCollectionTime**>
+  type out_oms_api
+  log_level debug
+  buffer_chunk_limit 10m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer
+  buffer_queue_limit 10
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
 </match>
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 62a6f6885..3dc1a18cd 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -112,10 +112,45 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	    installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
-/opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
-/opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root 
+/opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root
+/opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root
 /opt/td-agent-bit-conf-customizer.rb;                               installer/scripts/td-agent-bit-conf-customizer.rb;     755; root; root
 
+
+
+/opt/microsoft/omsagent/plugin/filter_cadvisor_health_node.rb;                                      source/code/plugin/filter_cadvisor_health_node.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/filter_health_model_builder.rb;                                      source/code/plugin/filter_health_model_builder.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/in_kube_health.rb;                                                   source/code/plugin/in_kube_health.rb; 644; root; root
+/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json;					                installer/conf/healthmonitorconfig.json; 644; root; root
+/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json;					            installer/conf/health_model_definition.json; 644; root; root
+
+
+/opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb;                                         source/code/plugin/health/aggregate_monitor.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb;                      		    source/code/plugin/health/agg_monitor_id_labels.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb;                         source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/cluster_health_state.rb;                                      source/code/plugin/health/cluster_health_state.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb;                                  source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb;                               source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb;                              source/code/plugin/health/health_kube_api_down_handler.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_missing_signal_generator.rb;                           source/code/plugin/health/health_missing_signal_generator.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_model_buffer.rb;                                       source/code/plugin/health/health_model_buffer.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_model_builder.rb;                                      source/code/plugin/health/health_model_builder.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_model_constants.rb;                                    source/code/plugin/health/health_model_constants.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/parent_monitor_provider.rb;                                   source/code/plugin/health/parent_monitor_provider.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb;                            source/code/plugin/health/health_model_definition_parser.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb;                                  source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb;                                   source/code/plugin/health/health_monitor_provider.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_record.rb;                                     source/code/plugin/health/health_monitor_record.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_state.rb;                                      source/code/plugin/health/health_monitor_state.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb;                                      source/code/plugin/health/health_monitor_utils.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb;                                     source/code/plugin/health/health_signal_reducer.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/monitor_factory.rb;                                           source/code/plugin/health/monitor_factory.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/monitor_set.rb;                                               source/code/plugin/health/monitor_set.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/unit_monitor.rb;                                              source/code/plugin/health/unit_monitor.rb; 644; root; root
+
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
 
@@ -129,6 +164,7 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft;                                     755; root; root; sysdir
 /etc/opt/microsoft/docker-cimprov;                      755; root; root
 /etc/opt/microsoft/docker-cimprov/conf;                 755; root; root
+/etc/opt/microsoft/docker-cimprov/health;               755; root; root
 
 /etc/opt/omi;                                           755; root; root; sysdir
 /etc/opt/omi/conf;                                      755; root; root; sysdir
@@ -142,6 +178,7 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/microsoft/omsagent;                                755; root; root; sysdir
 /opt/microsoft/omsagent/plugin;                         755; root; root; sysdir
+/opt/microsoft/omsagent/plugin/health;                  755; root; root; sysdir
 
 /opt/omi;                                               755; root; root; sysdir
 /opt/omi/lib;                                           755; root; root; sysdir
@@ -205,12 +242,24 @@ touch /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
 chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
 chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log
 
+touch /var/opt/microsoft/docker-cimprov/log/health_monitors.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/health_monitors.log
+chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/health_monitors.log
+
+touch /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log
+chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log
+
+touch /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+
 mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 
 %Postuninstall_10
 # If we're an upgrade, skip all of this cleanup
-if ${{PERFORMING_UPGRADE_NOT}}; then 
+if ${{PERFORMING_UPGRADE_NOT}}; then
    # Clean up installinfo.txt file (registered as "conf" file to pass rpmcheck)
    rm -f /etc/opt/microsoft/docker-cimprov/conf/installinfo.txt*
    rm -f /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index c72e64127..067586629 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -1,8 +1,10 @@
 #!/usr/local/bin/ruby
 
 require_relative "tomlrb"
+require 'json'
 
-@configMapMountPath = "/etc/config/settings/log-data-collection-settings"
+@log_settings_config_map_mount_path = "/etc/config/settings/log-data-collection-settings"
+@agent_settings_config_map_mount_path = "/etc/config/settings/agent-settings"
 @configVersion = ""
 @configSchemaVersion = ""
 # Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist
@@ -16,16 +18,16 @@
 @excludePath = "*.csv2" #some invalid path
 
 # Use parser to parse the configmap toml file to a ruby structure
-def parseConfigMap
+def parseConfigMap(path)
   begin
     # Check to see if config map is created
-    if (File.file?(@configMapMountPath))
-      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values"
-      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
-      puts "config::Successfully parsed mounted config map"
+    if (File.file?(path))
+      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values from #{path}"
+      parsedConfig = Tomlrb.load_file(path, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map from #{path}"
       return parsedConfig
     else
-      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults"
+      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults for #{path}"
       @excludePath = "*_kube-system_*.log"
       return nil
     end
@@ -117,19 +119,35 @@ def populateSettingValuesFromConfigMap(parsedConfig)
       puts "config::error::Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults"
     end
   end
+
+  begin
+    if !parsedConfig.nil?  && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
+        @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
+        puts "enable_health_model = #{@enable_health_model}"
+    end
+  rescue => errorStr
+    puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
+    @enable_health_model = false
+  end
 end
 
 @configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
 puts "****************Start Config Processing********************"
 if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
-  configMapSettings = parseConfigMap
+    configMapSettings = {}
+
+    #iterate over every *settings file and build a hash of settings
+    Dir["/etc/config/settings/*settings"].each{|file|
+        puts "Parsing File #{file}"
+        settings = parseConfigMap(file)
+        configMapSettings = configMapSettings.merge(settings)
+    }
+
   if !configMapSettings.nil?
     populateSettingValuesFromConfigMap(configMapSettings)
   end
 else
-  if (File.file?(@configMapMountPath))
     puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
-  end
   @excludePath = "*_kube-system_*.log"
 end
 
@@ -155,6 +173,8 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
   file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
   file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
+    #health_model settings
+  file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
   # Close file after writing all environment variables
   file.close
   puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 61cbaea00..48b25bf14 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -30,13 +30,13 @@ def initialize
   end
 
   class << self
-    def getKubeResourceInfo(resource)
+    def getKubeResourceInfo(resource, api_version: nil)
       headers = {}
       response = nil
-      @Log.info "Getting Kube resource"
+      @Log.info "Getting Kube resource api_version #{api_version}"
       @Log.info resource
       begin
-        resourceUri = getResourceUri(resource)
+        resourceUri = getResourceUri(resource, api_version: api_version)
         if !resourceUri.nil?
           uri = URI.parse(resourceUri)
           http = Net::HTTP.new(uri.host, uri.port)
@@ -76,10 +76,23 @@ def getTokenStr
       end
     end
 
-    def getResourceUri(resource)
+    def getClusterRegion
+      if ENV["AKS_REGION"]
+        return ENV["AKS_REGION"]
+      else
+        @Log.warn ("Kubernetes environment variable not set AKS_REGION. Unable to get cluster region.")
+        return nil
+      end
+    end
+
+    def getResourceUri(resource, api_version: nil)
       begin
         if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"]
-          return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + @@ApiVersion + "/" + resource
+            if !api_version.nil?
+                return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/apis/" + api_version + "/" + resource
+            end
+            api_version = @@ApiVersion
+            return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + api_version + "/" + resource
         else
           @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri")
           return nil
@@ -125,6 +138,8 @@ def getClusterId
       return @@ClusterId if !@@ClusterId.nil?
       #By default initialize ClusterId to ClusterName.
       #<TODO> In ACS/On-prem, we need to figure out how we can generate ClusterId
+      # Dilipr: Spoof the subid by generating md5 hash of cluster name, and taking some constant parts of it.
+      # e.g. md5 digest is 128 bits = 32 character in hex. Get first 16 and get a guid, and the next 16 to get resource id
       @@ClusterId = getClusterName
       begin
         cluster = ENV["AKS_RESOURCE_ID"]
diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb
new file mode 100644
index 000000000..4090092a9
--- /dev/null
+++ b/source/code/plugin/filter_cadvisor_health_container.rb
@@ -0,0 +1,263 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+module Fluent
+    require 'logger'
+    require 'json'
+    require_relative 'oms_common'
+    require_relative 'HealthMonitorUtils'
+    require_relative 'HealthMonitorState'
+    require_relative "ApplicationInsightsUtility"
+
+
+    class CAdvisor2ContainerHealthFilter < Filter
+        Fluent::Plugin.register_filter('filter_cadvisor_health_container', self)
+
+        config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log'
+        config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes'
+        config_param :container_resource_refresh_interval_minutes, :integer, :default => 5
+
+        @@object_name_k8s_node = 'K8SNode'
+        @@object_name_k8s_container = 'K8SContainer'
+
+        @@counter_name_cpu = 'cpuusagenanocores'
+        @@counter_name_memory_rss = 'memoryrssbytes'
+
+        @@health_monitor_config = {}
+
+        @@hostName = (OMS::Common.get_hostname)
+        @@clusterName = KubernetesApiClient.getClusterName
+        @@clusterId = KubernetesApiClient.getClusterId
+        @@clusterRegion = KubernetesApiClient.getClusterRegion
+        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
+
+        def initialize
+            super
+            @cpu_capacity = 0.0
+            @memory_capacity = 0.0
+            @last_resource_refresh = DateTime.now.to_time.to_i
+            @metrics_to_collect_hash = {}
+        end
+
+        def configure(conf)
+            super
+            @log = HealthMonitorUtils.getLogHandle
+            @log.debug {'Starting filter_cadvisor2health plugin'}
+        end
+
+        def start
+            super
+            @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
+            @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}"
+            node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
+            @cpu_capacity = node_capacity[0]
+            @memory_capacity = node_capacity[1]
+            @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}"
+            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName)
+            @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig
+            ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {})
+        end
+
+        def filter_stream(tag, es)
+            if !@@cluster_health_model_enabled
+                @log.info "Cluster Health Model disabled in filter_cadvisor_health_container"
+                return []
+            end
+            new_es = MultiEventStream.new
+            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName)
+            records_count = 0
+            es.each { |time, record|
+              begin
+                filtered_record = filter(tag, time, record)
+                if !filtered_record.nil?
+                    new_es.add(time, filtered_record)
+                    records_count += 1
+                end
+              rescue => e
+                router.emit_error_event(tag, time, record, e)
+              end
+            }
+            @log.debug "Filter Records Count #{records_count}"
+            new_es
+        end
+
+        def filter(tag, time, record)
+            begin
+                if record.key?("MonitorLabels")
+                    return record
+                end
+                object_name = record['DataItems'][0]['ObjectName']
+                counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase
+                if @metrics_to_collect_hash.key?(counter_name.downcase)
+                    metric_value = record['DataItems'][0]['Collections'][0]['Value']
+                    case object_name
+                    when @@object_name_k8s_container
+                        case counter_name.downcase
+                        when @@counter_name_cpu
+                            # @log.debug "Object Name #{object_name}"
+                            # @log.debug "Counter Name #{counter_name}"
+                            # @log.debug "Metric Value #{metric_value}"
+                            #return process_container_cpu_record(record, metric_value)
+                        when @@counter_name_memory_rss
+                            #return process_container_memory_record(record, metric_value)
+                        end
+                    when @@object_name_k8s_node
+                        case counter_name.downcase
+                        when @@counter_name_cpu
+                            #process_node_cpu_record(record, metric_value)
+                        when @@counter_name_memory_rss
+                            #process_node_memory_record(record, metric_value)
+                        end
+                    end
+                end
+            rescue => e
+                @log.debug "Error in filter #{e}"
+                @log.debug "record #{record}"
+                @log.debug "backtrace #{e.backtrace}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e)
+                return nil
+            end
+        end
+
+        def process_container_cpu_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID
+            @log.debug "processing container cpu record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
+                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
+                if !container_metadata.nil?
+                    cpu_limit = container_metadata['cpuLimit']
+                end
+
+                if cpu_limit.to_s.empty?
+                    #@log.info "CPU Limit is nil"
+                    cpu_limit = @cpu_capacity
+                end
+
+                #@log.info "cpu limit #{cpu_limit}"
+
+                percent = (metric_value.to_f/cpu_limit*100).round(2)
+                #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}"
+                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID])
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
+                #@log.info health_monitor_record
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName, key])
+                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
+                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                @log.info "Processed Container CPU #{temp}"
+                return record
+            end
+            return nil
+        end
+
+        def process_container_memory_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID
+            #@log.debug "processing container memory record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
+                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
+                if !container_metadata.nil?
+                    memory_limit = container_metadata['memoryLimit']
+                end
+
+                if memory_limit.to_s.empty?
+                    #@log.info "Memory Limit is nil"
+                    memory_limit = @memory_capacity
+                end
+
+                #@log.info "memory limit #{memory_limit}"
+
+                percent = (metric_value.to_f/memory_limit*100).round(2)
+                #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}"
+                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID])
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
+                #@log.info health_monitor_record
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName, key])
+                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
+                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                @log.info "Processed Container Memory #{temp}"
+                return record
+            end
+            return nil
+        end
+
+        def process_node_cpu_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID
+            #@log.debug "processing node cpu record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                #@log.info "CPU capacity #{@cpu_capacity}"
+
+                percent = (metric_value.to_f/@cpu_capacity*100).round(2)
+                #@log.debug "Percentage of CPU limit: #{percent}"
+                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID])
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName])
+                # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName)
+                # temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                health_record = {}
+                time_now = Time.now.utc.iso8601
+                health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
+                @log.info "Processed Node CPU"
+                return health_record
+            end
+            return nil
+        end
+
+        def process_node_memory_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID
+            #@log.debug "processing node memory record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                #@log.info "Memory capacity #{@memory_capacity}"
+
+                percent = (metric_value.to_f/@memory_capacity*100).round(2)
+                #@log.debug "Percentage of Memory limit: #{percent}"
+                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID])
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
+                #@log.info health_monitor_record
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName])
+                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
+                # temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                health_record = {}
+                time_now = Time.now.utc.iso8601
+                health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
+                @log.info "Processed Node Memory"
+                return health_record
+            end
+            return nil
+        end
+    end
+end
diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
new file mode 100644
index 000000000..627a525e7
--- /dev/null
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -0,0 +1,267 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+module Fluent
+    require 'logger'
+    require 'json'
+    require_relative 'oms_common'
+    require_relative "ApplicationInsightsUtility"
+    require_relative "KubernetesApiClient"
+    Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
+
+    class CAdvisor2NodeHealthFilter < Filter
+        include HealthModel
+        Fluent::Plugin.register_filter('filter_cadvisor_health_node', self)
+
+        attr_accessor :provider, :resources
+
+        config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes'
+        config_param :container_resource_refresh_interval_minutes, :integer, :default => 5
+        config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json'
+
+        @@object_name_k8s_node = 'K8SNode'
+        @@object_name_k8s_container = 'K8SContainer'
+
+        @@counter_name_cpu = 'cpuusagenanocores'
+        @@counter_name_memory_rss = 'memoryrssbytes'
+
+        @@hm_log = HealthMonitorUtils.get_log_handle
+        @@hostName = (OMS::Common.get_hostname)
+        @@clusterName = KubernetesApiClient.getClusterName
+        @@clusterId = KubernetesApiClient.getClusterId
+        @@clusterRegion = KubernetesApiClient.getClusterRegion
+        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
+
+        def initialize
+            begin
+                super
+                @cpu_capacity = 0.0
+                @memory_capacity = 0.0
+                @last_resource_refresh = DateTime.now.to_time.to_i
+                @metrics_to_collect_hash = {}
+                @resources = HealthKubernetesResources.instance # this doesnt require node and pod inventory. So no need to populate them
+                @provider = HealthMonitorProvider.new(@@clusterId, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
+            rescue => e
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
+        end
+
+        def configure(conf)
+            super
+            @log = HealthMonitorUtils.get_log_handle
+            @log.debug {'Starting filter_cadvisor2health plugin'}
+        end
+
+        def start
+            super
+            @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
+            @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}"
+            node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
+            @cpu_capacity = node_capacity[0]
+            @memory_capacity = node_capacity[1]
+            @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}"
+            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName)
+            ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {})
+        end
+
+        def filter_stream(tag, es)
+            if !@@cluster_health_model_enabled
+                @log.info "Cluster Health Model disabled in filter_cadvisor_health_node"
+                return []
+            end
+            new_es = MultiEventStream.new
+            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName)
+            records_count = 0
+            es.each { |time, record|
+              begin
+                filtered_record = filter(tag, time, record)
+                if !filtered_record.nil?
+                    new_es.add(time, filtered_record)
+                    records_count += 1
+                end
+              rescue => e
+                @log.info "Error in filter_stream for filter_cadvisor_health_node #{e.message}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+              end
+            }
+            @log.debug "Filter Records Count #{records_count}"
+            new_es
+        end
+
+        def filter(tag, time, record)
+            begin
+                if record.key?("MonitorLabels")
+                    return record
+                end
+                object_name = record['DataItems'][0]['ObjectName']
+                counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase
+                if @metrics_to_collect_hash.key?(counter_name.downcase)
+                    metric_value = record['DataItems'][0]['Collections'][0]['Value']
+                    case object_name
+                    when @@object_name_k8s_container
+                        case counter_name.downcase
+                        when @@counter_name_cpu
+                            # @log.debug "Object Name #{object_name}"
+                            # @log.debug "Counter Name #{counter_name}"
+                            # @log.debug "Metric Value #{metric_value}"
+                            #return process_container_cpu_record(record, metric_value)
+                        when @@counter_name_memory_rss
+                            #return process_container_memory_record(record, metric_value)
+                        end
+                    when @@object_name_k8s_node
+                        case counter_name.downcase
+                        when @@counter_name_cpu
+                            process_node_cpu_record(record, metric_value)
+                        when @@counter_name_memory_rss
+                            process_node_memory_record(record, metric_value)
+                        end
+                    end
+                end
+            rescue => e
+                @log.debug "Error in filter #{e}"
+                @log.debug "record #{record}"
+                @log.debug "backtrace #{e.backtrace}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e)
+                return nil
+            end
+        end
+
+        def process_container_cpu_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID
+            @log.debug "processing container cpu record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
+                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
+                if !container_metadata.nil?
+                    cpu_limit = container_metadata['cpuLimit']
+                end
+
+                if cpu_limit.to_s.empty?
+                    #@log.info "CPU Limit is nil"
+                    cpu_limit = @cpu_capacity
+                end
+
+                #@log.info "cpu limit #{cpu_limit}"
+
+                percent = (metric_value.to_f/cpu_limit*100).round(2)
+                #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}"
+                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(monitor_id))
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
+                #@log.info health_monitor_record
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName, key])
+                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
+                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                @log.info "Processed Container CPU #{temp}"
+                return record
+            end
+            return nil
+        end
+
+        def process_container_memory_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID
+            #@log.debug "processing container memory record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
+                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
+                if !container_metadata.nil?
+                    memory_limit = container_metadata['memoryLimit']
+                end
+
+                if memory_limit.to_s.empty?
+                    #@log.info "Memory Limit is nil"
+                    memory_limit = @memory_capacity
+                end
+
+                #@log.info "memory limit #{memory_limit}"
+
+                percent = (metric_value.to_f/memory_limit*100).round(2)
+                #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}"
+                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID))
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
+                #@log.info health_monitor_record
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName, key])
+                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
+                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                @log.info "Processed Container Memory #{temp}"
+                return record
+            end
+            return nil
+        end
+
+        def process_node_cpu_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID
+            #@log.debug "processing node cpu record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                #@log.info "CPU capacity #{@cpu_capacity}"
+
+                percent = (metric_value.to_f/@cpu_capacity*100).round(2)
+                #@log.debug "Percentage of CPU limit: #{percent}"
+                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::NODE_CPU_MONITOR_ID))
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName])
+                # temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
+                health_record = {}
+                time_now = Time.now.utc.iso8601
+                health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
+                @log.info "Processed Node CPU"
+                return health_record
+            end
+            return nil
+        end
+
+        def process_node_memory_record(record, metric_value)
+            monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID
+            #@log.debug "processing node memory record"
+            if record.nil?
+                return nil
+            else
+                instance_name = record['DataItems'][0]['InstanceName']
+                #@log.info "Memory capacity #{@memory_capacity}"
+
+                percent = (metric_value.to_f/@memory_capacity*100).round(2)
+                #@log.debug "Percentage of Memory limit: #{percent}"
+                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::NODE_MEMORY_MONITOR_ID))
+                #@log.debug "Computed State : #{state}"
+                timestamp = record['DataItems'][0]['Timestamp']
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
+                #@log.info health_monitor_record
+
+                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName])
+                health_record = {}
+                time_now = Time.now.utc.iso8601
+                health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
+                @log.info "Processed Node Memory"
+                return health_record
+            end
+            return nil
+        end
+    end
+end
diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
new file mode 100644
index 000000000..0c1b378a0
--- /dev/null
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -0,0 +1,233 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+
+# frozen_string_literal: true
+
+module Fluent
+    require 'logger'
+    require 'json'
+    Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
+
+
+    class FilterHealthModelBuilder < Filter
+        Fluent::Plugin.register_filter('filter_health_model_builder', self)
+
+        config_param :enable_log, :integer, :default => 0
+        config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_health_model_builder.log'
+        config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json'
+        config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json'
+        config_param :health_state_serialized_path, :default => '/mnt/azure/health_model_state.json'
+        attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator
+        include HealthModel
+
+        @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime'
+        @@cluster_id = KubernetesApiClient.getClusterId
+        @@token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+        @@cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
+
+        def initialize
+            begin
+                super
+                @buffer = HealthModel::HealthModelBuffer.new
+                @cluster_health_state = ClusterHealthState.new(@@token_file_path, @@cert_file_path)
+                @health_model_definition = HealthModel::ParentMonitorProvider.new(HealthModel::HealthModelDefinitionParser.new(@model_definition_path).parse_file)
+                @monitor_factory = HealthModel::MonitorFactory.new
+                @hierarchy_builder = HealthHierarchyBuilder.new(@health_model_definition, @monitor_factory)
+                # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
+                @state_finalizers = [HealthModel::AggregateMonitorStateFinalizer.new]
+                @monitor_set = HealthModel::MonitorSet.new
+                @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set)
+                @kube_api_down_handler = HealthKubeApiDownHandler.new
+                @resources = HealthKubernetesResources.instance
+                @reducer = HealthSignalReducer.new
+                @state = HealthMonitorState.new
+                @generator = HealthMissingSignalGenerator.new
+                #TODO: cluster_labels needs to be initialized
+                @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
+                deserialized_state_info = @cluster_health_state.get_state
+                @state = HealthMonitorState.new
+                @state.initialize_state(deserialized_state_info)
+                @cluster_old_state = 'none'
+                @cluster_new_state = 'none'
+            rescue => e
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
+        end
+
+        def configure(conf)
+            begin
+                super
+                @log = nil
+                if @enable_log
+                    @log = Logger.new(@log_path, 'weekly')
+                    @log.info 'Starting filter_health_model_builder plugin'
+                end
+            rescue => e
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
+        end
+
+        def start
+            super
+        end
+
+        def shutdown
+            super
+        end
+
+        def filter_stream(tag, es)
+            begin
+                if !@@cluster_health_model_enabled
+                    @log.info "Cluster Health Model disabled in filter_health_model_builder"
+                    return []
+                end
+                new_es = MultiEventStream.new
+                time = Time.now
+
+                if tag.start_with?("oms.api.KubeHealth.DaemonSet")
+                    records = []
+                    if !es.nil?
+                        es.each{|time, record|
+                            records.push(record)
+                        }
+                        @buffer.add_to_buffer(records)
+                    end
+                    return []
+                elsif tag.start_with?("oms.api.KubeHealth.ReplicaSet")
+                    @log.info "TAG #{tag}"
+                    records = []
+                    es.each{|time, record|
+                        records.push(record)
+                    }
+                    @buffer.add_to_buffer(records)
+                    records_to_process = @buffer.get_buffer
+                    @buffer.reset_buffer
+
+                    health_monitor_records = []
+                    records_to_process.each do |record|
+                        monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+                        monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+                        #HealthMonitorRecord
+                        health_monitor_record = HealthMonitorRecord.new(
+                            record[HealthMonitorRecordFields::MONITOR_ID],
+                            record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+                            record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+                            record[HealthMonitorRecordFields::DETAILS]["state"],
+                            @provider.get_labels(record),
+                            @provider.get_config(monitor_id),
+                            record[HealthMonitorRecordFields::DETAILS]
+                        )
+
+                        health_monitor_records.push(health_monitor_record)
+                        #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+                    end
+
+                    @log.info "health_monitor_records.size #{health_monitor_records.size}"
+                    # Dedupe daemonset signals
+                    # Remove unit monitor signals for “gone” objects
+                    # update state for the reduced set of signals
+                    reduced_records = @reducer.reduce_signals(health_monitor_records, @resources)
+                    reduced_records.each{|record|
+                        @state.update_state(record,
+                            @provider.get_config(record.monitor_id)
+                            )
+                        # get the health state based on the monitor's operational state
+                        # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+                        record.state = @state.get_state(record.monitor_instance_id).new_state
+                    }
+                    @log.info "after deduping and removing gone objects reduced_records.size #{reduced_records.size}"
+
+                    reduced_records = @kube_api_down_handler.handle_kube_api_down(reduced_records)
+                    @log.info "after kube api down handler health_monitor_records.size #{health_monitor_records.size}"
+
+                    #get the list of  'none' and 'unknown' signals
+                    missing_signals = @generator.get_missing_signals(@@cluster_id, reduced_records, @resources, @provider)
+
+                    @log.info "after getting missing signals missing_signals.size #{missing_signals.size}"
+                    #update state for missing signals
+                    missing_signals.each{|signal|
+
+                        @state.update_state(signal, @provider.get_config(signal.monitor_id))
+                        @log.info "After Updating #{@state.get_state(signal.monitor_instance_id)} #{@state.get_state(signal.monitor_instance_id).new_state}"
+                        # for unknown/none records, update the "monitor state" to be the latest state (new_state) of the monitor instance from the state
+                        signal.state = @state.get_state(signal.monitor_instance_id).new_state
+                    }
+
+                    @generator.update_last_received_records(reduced_records)
+                    all_records = reduced_records.clone
+                    all_records.push(*missing_signals)
+
+                    @log.info "after Adding missing signals all_records.size #{all_records.size}"
+
+                    # build the health model
+                    @model_builder.process_records(all_records)
+                    all_monitors = @model_builder.finalize_model
+
+                    @log.info "after building health_model #{all_monitors.size}"
+
+                    # update the state for aggregate monitors (unit monitors are updated above)
+                    all_monitors.each{|monitor_instance_id, monitor|
+                        if monitor.is_aggregate_monitor
+                            @state.update_state(monitor,
+                                @provider.get_config(monitor.monitor_id)
+                                )
+                        end
+
+                        instance_state = @state.get_state(monitor_instance_id)
+                        #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+                        should_send = instance_state.should_send
+
+                        # always send cluster monitor as a heartbeat
+                        if !should_send && monitor_instance_id != MonitorId::CLUSTER
+                            all_monitors.delete(monitor_instance_id)
+                        end
+                    }
+
+                    @log.info "after optimizing health signals all_monitors.size #{all_monitors.size}"
+
+                    # for each key in monitor.keys,
+                    # get the state from health_monitor_state
+                    # generate the record to send
+                    all_monitors.keys.each{|key|
+                        record = @provider.get_record(all_monitors[key], state)
+                        if record[HealthMonitorRecordFields::MONITOR_ID] == MonitorId::CLUSTER && all_monitors.size > 1
+                            old_state = record[HealthMonitorRecordFields::OLD_STATE]
+                            new_state = record[HealthMonitorRecordFields::NEW_STATE]
+                            if old_state != new_state && @cluster_old_state != old_state && @cluster_new_state != new_state
+                                    ApplicationInsightsUtility.sendCustomEvent("HealthModel_ClusterStateChanged",{"old_state" => old_state , "new_state" => new_state, "monitor_count" => all_monitors.size})
+                                    @log.info "sent telemetry for cluster state change from #{record['OldState']} to #{record['NewState']}"
+                                    @cluster_old_state = old_state
+                                    @cluster_new_state = new_state
+                            end
+                        end
+                        #@log.info "#{record["Details"]} #{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+                        new_es.add(time, record)
+                    }
+
+                    #emit the stream
+                    router.emit_stream(@@rewrite_tag, new_es)
+
+                    #initialize monitor_set and model_builder
+                    @monitor_set = HealthModel::MonitorSet.new
+                    @model_builder = HealthModel::HealthModelBuilder.new(@hierarchy_builder, @state_finalizers, @monitor_set)
+
+                    #update cluster state custom resource
+                    @cluster_health_state.update_state(@state.to_h)
+
+                    # return an empty event stream, else the match will throw a NoMethodError
+                    return []
+                elsif tag.start_with?("oms.api.KubeHealth.AgentCollectionTime")
+                    # this filter also acts as a pass through as we are rewriting the tag and emitting to the fluent stream
+                    es
+                else
+                    raise 'Invalid tag #{tag} received'
+                end
+
+            rescue => e
+                 ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+                 @log.warn "Message: #{e.message} Backtrace: #{e.backtrace}"
+                 return nil
+            end
+        end
+    end
+end
diff --git a/source/code/plugin/health/agg_monitor_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb
new file mode 100644
index 000000000..48ca46184
--- /dev/null
+++ b/source/code/plugin/health/agg_monitor_id_labels.rb
@@ -0,0 +1,26 @@
+module HealthModel
+    class AggregateMonitorInstanceIdLabels
+        @@id_labels_mapping = {
+            MonitorId::SYSTEM_WORKLOAD => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME],
+            MonitorId::USER_WORKLOAD => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME],
+            MonitorId::NODE => [HealthMonitorLabels::AGENTPOOL, HealthMonitorLabels::ROLE, HealthMonitorLabels::HOSTNAME],
+            MonitorId::NAMESPACE => [HealthMonitorLabels::NAMESPACE],
+            MonitorId::AGENT_NODE_POOL => [HealthMonitorLabels::AGENTPOOL],
+            # MonitorId::ALL_AGENT_NODE_POOLS => [],
+            # MonitorId::ALL_NODE_POOLS => [],
+            # MonitorId::ALL_NODES => [],
+            # MonitorId::K8S_INFRASTRUCTURE => [],
+            # MonitorId::CLUSTER => [],
+            # MonitorId::WORKLOAD => []
+        }
+
+        def self.get_labels_for(monitor_id)
+            if @@id_labels_mapping.key?(monitor_id)
+                return @@id_labels_mapping[monitor_id]
+            else
+                return []
+            end
+
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb
new file mode 100644
index 000000000..794f716ce
--- /dev/null
+++ b/source/code/plugin/health/aggregate_monitor.rb
@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+
+require_relative 'health_model_constants'
+require 'json'
+
+module HealthModel
+  class AggregateMonitor
+    attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details
+    attr_reader :member_monitors, :member_state_counts
+
+    @@sort_key_order = {
+        MonitorState::UNKNOWN => 1,
+        MonitorState::CRITICAL => 2,
+        MonitorState::WARNING => 3,
+        MonitorState::HEALTHY => 4,
+        MonitorState::NONE => 5
+    }
+
+    # constructor
+    def initialize(
+      monitor_id,
+      monitor_instance_id,
+      state,
+      transition_date_time,
+      aggregation_algorithm,
+      aggregation_algorithm_params,
+      labels
+    )
+      @monitor_id = monitor_id
+      @monitor_instance_id = monitor_instance_id
+      @state = state
+      @transition_date_time = transition_date_time
+      @aggregation_algorithm = aggregation_algorithm || AggregationAlgorithm::WORSTOF
+      @aggregation_algorithm_params = aggregation_algorithm_params
+      @labels = labels
+      @member_monitors = {}
+      @member_state_counts = {}
+      @is_aggregate_monitor = true
+    end
+
+    # adds a member monitor as a child
+    def add_member_monitor(member_monitor_instance_id)
+      unless @member_monitors.key?(member_monitor_instance_id)
+        @member_monitors[member_monitor_instance_id] = true
+      end
+    end
+
+    #removes a member monitor
+    def remove_member_monitor(member_monitor_instance_id)
+        if @member_monitors.key?(member_monitor_instance_id)
+            @member_monitors.delete(member_monitor_instance_id)
+        end
+    end
+
+    # return the member monitors as an array
+    def get_member_monitors
+      @member_monitors.map(&:first)
+    end
+
+    # calculates the state of the aggregate monitor based on aggregation algorithm and child monitor states
+    def calculate_state(monitor_set)
+        case @aggregation_algorithm
+        when AggregationAlgorithm::WORSTOF
+            @state = calculate_worst_of_state(monitor_set)
+        when AggregationAlgorithm::PERCENTAGE
+            @state = calculate_percentage_state(monitor_set)
+        else
+            raise 'No aggregation algorithm specified'
+        end
+    end
+
+    def calculate_details(monitor_set)
+        @details = {}
+        @details['details'] = {}
+        @details['state'] = state
+        @details['timestamp'] = transition_date_time
+        ids = []
+        member_monitor_instance_ids = get_member_monitors
+        member_monitor_instance_ids.each{|member_monitor_id|
+            member_monitor = monitor_set.get_monitor(member_monitor_id)
+            member_state = member_monitor.state
+            if @details['details'].key?(member_state)
+                ids = @details['details'][member_state]
+                if !ids.include?(member_monitor.monitor_instance_id)
+                    ids.push(member_monitor.monitor_instance_id)
+                end
+                @details['details'][member_state] = ids
+            else
+                @details['details'][member_state] = [member_monitor.monitor_instance_id]
+            end
+        }
+    end
+
+    # calculates the worst of state, given the member monitors
+    def calculate_worst_of_state(monitor_set)
+
+        @member_state_counts = map_member_monitor_states(monitor_set)
+
+        if member_state_counts.length === 0
+            return MonitorState::NONE
+        end
+
+        if member_state_counts.key?(MonitorState::CRITICAL) && member_state_counts[MonitorState::CRITICAL] > 0
+            return MonitorState::CRITICAL
+        end
+        if member_state_counts.key?(MonitorState::ERROR) && member_state_counts[MonitorState::ERROR] > 0
+            return MonitorState::ERROR
+        end
+        if member_state_counts.key?(MonitorState::WARNING) &&  member_state_counts[MonitorState::WARNING] > 0
+            return MonitorState::WARNING
+        end
+
+        if member_state_counts.key?(MonitorState::UNKNOWN) &&  member_state_counts[MonitorState::UNKNOWN] > 0
+            return MonitorState::UNKNOWN
+        end
+
+        if member_state_counts.key?(MonitorState::HEALTHY) && member_state_counts[MonitorState::HEALTHY] > 0
+            return MonitorState::HEALTHY #healthy should win over none in aggregation
+        end
+
+        return MonitorState::NONE
+
+    end
+
+    # calculates a percentage state, given the aggregation algorithm parameters
+    def calculate_percentage_state(monitor_set)
+
+        #sort
+        #TODO: What if sorted_filtered is empty? is that even possible?
+        sorted_filtered = sort_filter_member_monitors(monitor_set)
+
+        state_threshold = @aggregation_algorithm_params['state_threshold'].to_f
+
+        size = sorted_filtered.size
+        if size == 1
+            @state =  sorted_filtered[0].state
+        else
+            count = ((state_threshold*size)/100).ceil
+            index = size - count
+            @state = sorted_filtered[index].state
+        end
+    end
+
+    # maps states of member monitors to counts
+    def map_member_monitor_states(monitor_set)
+        member_monitor_instance_ids = get_member_monitors
+        if member_monitor_instance_ids.nil? || member_monitor_instance_ids.size == 0
+            return {}
+        end
+
+        state_counts = {}
+
+        member_monitor_instance_ids.each {|monitor_instance_id|
+
+            member_monitor = monitor_set.get_monitor(monitor_instance_id)
+            monitor_state = member_monitor.state
+
+            if !state_counts.key?(monitor_state)
+                state_counts[monitor_state] = 1
+            else
+                count = state_counts[monitor_state]
+                state_counts[monitor_state] = count+1
+            end
+        }
+
+        return state_counts;
+    end
+
+    # Sort the member monitors in the following order
+=begin
+    1. Error
+    2. Unknown
+    3. Critical
+    4. Warning
+    5. Healthy
+    Remove 'none' state monitors
+=end
+    def sort_filter_member_monitors(monitor_set)
+        member_monitor_instance_ids = get_member_monitors
+        member_monitors = []
+
+        member_monitor_instance_ids.each {|monitor_instance_id|
+            member_monitor = monitor_set.get_monitor(monitor_instance_id)
+            member_monitors.push(member_monitor)
+        }
+
+	filtered = member_monitors.select{|monitor| monitor.state != MonitorState::NONE}
+        sorted = filtered.sort_by{ |monitor| [@@sort_key_order[monitor.state]] }
+
+        return sorted
+    end
+  end
+end
diff --git a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb
new file mode 100644
index 000000000..74e780924
--- /dev/null
+++ b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb
@@ -0,0 +1,33 @@
+module HealthModel
+    class AggregateMonitorStateFinalizer
+
+        def finalize(monitor_set)
+            top_level_monitor = monitor_set.get_monitor(MonitorId::CLUSTER)
+            if !top_level_monitor.nil?
+                calculate_subtree_state(top_level_monitor, monitor_set)
+            end
+            monitor_set.get_map.each{|k,v|
+                if v.is_aggregate_monitor
+                    v.calculate_details(monitor_set)
+                end
+            }
+        end
+
+        private
+        def calculate_subtree_state(monitor, monitor_set)
+            if monitor.nil? || !monitor.is_aggregate_monitor
+                raise 'AggregateMonitorStateFinalizer:calculateSubtreeState Parameter monitor must be non-null AggregateMonitor'
+            end
+
+            member_monitor_instance_ids = monitor.get_member_monitors # monitor_instance_ids
+            member_monitor_instance_ids.each{|member_monitor_instance_id|
+                member_monitor = monitor_set.get_monitor(member_monitor_instance_id)
+
+                if !member_monitor.nil? && member_monitor.is_aggregate_monitor
+                    calculate_subtree_state(member_monitor, monitor_set)
+                end
+            }
+            monitor.calculate_state(monitor_set)
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb
new file mode 100644
index 000000000..ac7e05675
--- /dev/null
+++ b/source/code/plugin/health/cluster_health_state.rb
@@ -0,0 +1,115 @@
+require "net/http"
+require "net/https"
+require "uri"
+
+module HealthModel
+    class ClusterHealthState
+
+        attr_reader :token_file_path, :cert_file_path, :log, :http_client, :uri, :token
+        @@resource_uri_template = "%{kube_api_server_url}/apis/azmon.container.insights/v1/namespaces/kube-system/healthstates/cluster-health-state"
+
+        def initialize(token_file_path, cert_file_path)
+            @token_file_path = token_file_path
+            @cert_file_path = cert_file_path
+            @log = HealthMonitorHelpers.get_log_handle
+            @http_client = get_http_client
+            @token = get_token
+        end
+
+        def update_state(state)
+            get_request = Net::HTTP::Get.new(@uri.request_uri)
+
+            get_request["Authorization"] = "Bearer #{@token}"
+            @log.info "Making GET request to #{@uri.request_uri} @ #{Time.now.utc.iso8601}"
+            get_response = @http_client.request(get_request)
+            @log.info  "Got response of #{get_response.code} for #{@uri.request_uri} @ #{Time.now.utc.iso8601}"
+
+            if get_response.code.to_i == 404 # NOT found
+                #POST
+                update_request = Net::HTTP::Post.new(@uri.request_uri)
+                update_request["Content-Type"] = "application/json"
+
+            elsif get_response.code.to_i == 200 # Update == Patch
+                #PATCH
+                update_request = Net::HTTP::Patch.new(@uri.request_uri)
+                update_request["Content-Type"] = "application/merge-patch+json"
+            end
+            update_request["Authorization"] = "Bearer #{@token}"
+
+            update_request_body = get_update_request_body
+            update_request_body["state"] = state.to_json
+            update_request.body = update_request_body.to_json
+
+            update_response = @http_client.request(update_request)
+            @log.info "Got a response of #{update_response.code} for #{update_request.method}"
+        end
+
+        def get_state
+            get_request = Net::HTTP::Get.new(@uri.request_uri)
+            get_request["Authorization"] = "Bearer #{@token}"
+            @log.info "Making GET request to #{@uri.request_uri} @ #{Time.now.utc.iso8601}"
+            get_response = @http_client.request(get_request)
+            @log.info  "Got response of #{get_response.code} for #{@uri.request_uri} @ #{Time.now.utc.iso8601}"
+
+            if get_response.code.to_i == 200
+                return JSON.parse(JSON.parse(get_response.body)["state"])
+            else
+                return {}
+            end
+        end
+
+        private
+        def get_token()
+            begin
+              if File.exist?(@token_file_path) && File.readable?(@token_file_path)
+                token_str = File.read(@token_file_path).strip
+                return token_str
+              else
+                @log.info ("Unable to read token string from #{@token_file_path}")
+                return nil
+              end
+            end
+        end
+
+        def get_http_client()
+            kube_api_server_url = get_kube_api_server_url
+            resource_uri = @@resource_uri_template % {
+                kube_api_server_url: kube_api_server_url
+            }
+            @uri = URI.parse(resource_uri)
+            http = Net::HTTP.new(@uri.host, @uri.port)
+            http.use_ssl = true
+            if !File.exist?(@cert_file_path)
+              raise "#{@cert_file_path} doesnt exist"
+            else
+              http.ca_file = @cert_file_path
+            end
+            http.verify_mode = OpenSSL::SSL::VERIFY_PEER
+            return http
+        end
+
+        def get_kube_api_server_url
+            if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"]
+                return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}"
+            else
+                @log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri")
+                if Gem.win_platform? #unit testing on windows dev machine
+                    value = %x( kubectl -n default get endpoints kubernetes --no-headers)
+                    url = "https://#{value.split(' ')[1]}"
+                    return "https://localhost:8080"  # This is NEVER used. this is just to return SOME value
+                end
+                return nil
+            end
+        end
+
+        def get_update_request_body
+            body = {}
+            body["apiVersion"] = "azmon.container.insights/v1"
+            body["kind"] = "HealthState"
+            body["metadata"] = {}
+            body["metadata"]["name"] = "cluster-health-state"
+            body["metadata"]["namespace"]  = "kube-system"
+            return body
+        end
+    end
+end
diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb
new file mode 100644
index 000000000..2da0050db
--- /dev/null
+++ b/source/code/plugin/health/health_hierarchy_builder.rb
@@ -0,0 +1,76 @@
+require 'json'
+module HealthModel
+    class HealthHierarchyBuilder
+
+        attr_accessor :health_model_definition, :monitor_factory
+
+        def initialize(health_model_definition, monitor_factory)
+
+            if !health_model_definition.is_a?(ParentMonitorProvider)
+                raise "Invalid Type Expected: ParentMonitorProvider Actual: #{@health_model_definition.class.name}"
+            end
+            @health_model_definition = health_model_definition
+
+            if !monitor_factory.is_a?(MonitorFactory)
+                raise "Invalid Type Expected: MonitorFactory Actual: #{@monitor_factory.class.name}"
+            end
+            @monitor_factory = monitor_factory
+        end
+
+        def process_record(health_monitor_record, monitor_set)
+            if !health_monitor_record.is_a?(HealthMonitorRecord)
+                raise "Unexpected Type #{health_monitor_record.class}"
+            end
+
+            # monitor state transition will always be on a unit monitor
+            child_monitor = @monitor_factory.create_unit_monitor(health_monitor_record)
+            monitor_set.add_or_update(child_monitor)
+            parent_monitor_id = @health_model_definition.get_parent_monitor_id(child_monitor)
+            monitor_labels = child_monitor.labels
+            monitor_id = child_monitor.monitor_id
+
+            # to construct the parent monitor,
+            # 1. Child's labels
+            # 2. Parent monitor's config to determine what labels to copy
+            # 3. Parent Monitor Id
+            # 4. Monitor Id --> Labels to hash Mapping to generate the monitor instance id for aggregate monitors
+
+            while !parent_monitor_id.nil?
+                #puts "Parent Monitor Id #{parent_monitor_id}"
+                # get the set of labels to copy to parent monitor
+                parent_monitor_labels = @health_model_definition.get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id)
+                # get the parent monitor configuration
+                parent_monitor_configuration = @health_model_definition.get_parent_monitor_config(parent_monitor_id)
+                #get monitor instance id for parent monitor. Does this belong in ParentMonitorProvider?
+                parent_monitor_instance_id = @health_model_definition.get_parent_monitor_instance_id(child_monitor.monitor_instance_id, parent_monitor_id, parent_monitor_labels)
+                # check if monitor set has the parent monitor id
+                # if not present, add
+                # if present, update the state based on the aggregation algorithm
+                parent_monitor = nil
+                if !monitor_set.contains?(parent_monitor_instance_id)
+                    parent_monitor = @monitor_factory.create_aggregate_monitor(parent_monitor_id, parent_monitor_instance_id, parent_monitor_labels, parent_monitor_configuration['aggregation_algorithm'], parent_monitor_configuration['aggregation_algorithm_params'], child_monitor)
+                    parent_monitor.add_member_monitor(child_monitor.monitor_instance_id)
+                else
+                    parent_monitor = monitor_set.get_monitor(parent_monitor_instance_id)
+                    # required to calculate the rollup state
+                    parent_monitor.add_member_monitor(child_monitor.monitor_instance_id)
+                    # update to the earliest of the transition times of child monitors
+                    if child_monitor.transition_date_time < parent_monitor.transition_date_time
+                        parent_monitor.transition_date_time = child_monitor.transition_date_time
+                    end
+                end
+
+                if parent_monitor.nil?
+                    raise 'Parent_monitor should not be nil for #{monitor_id}'
+                end
+
+                monitor_set.add_or_update(parent_monitor)
+
+                child_monitor = parent_monitor
+                parent_monitor_id = @health_model_definition.get_parent_monitor_id(child_monitor)
+                monitor_labels = child_monitor.labels
+                monitor_id = child_monitor.monitor_id
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_kube_api_down_handler.rb b/source/code/plugin/health/health_kube_api_down_handler.rb
new file mode 100644
index 000000000..7f7ba1bd3
--- /dev/null
+++ b/source/code/plugin/health/health_kube_api_down_handler.rb
@@ -0,0 +1,27 @@
+module HealthModel
+    class HealthKubeApiDownHandler
+        def initialize
+            @@monitors_to_change = [HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID,
+                                    HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID,
+                                    HealthMonitorConstants::NODE_CONDITION_MONITOR_ID,
+                                    HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID,
+                                    HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID]
+        end
+
+        # update kube-api dependent monitors to be 'unknown' if kube-api is down or monitor is unavailable
+        def handle_kube_api_down(health_monitor_records)
+            health_monitor_records_map = {}
+
+            health_monitor_records.map{|record| health_monitor_records_map[record.monitor_instance_id] = record}
+            if !health_monitor_records_map.key?(HealthMonitorConstants::KUBE_API_STATUS) || (health_monitor_records_map.key?(HealthMonitorConstants::KUBE_API_STATUS) && health_monitor_records_map[HealthMonitorConstants::KUBE_API_STATUS].state != 'pass')
+                #iterate over the map and set the state to unknown for related monitors
+                health_monitor_records.each{|health_monitor_record|
+                    if @@monitors_to_change.include?(health_monitor_record.monitor_id)
+                        health_monitor_record.state = HealthMonitorStates::UNKNOWN
+                    end
+                }
+            end
+            return health_monitor_records
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb
new file mode 100644
index 000000000..53f879bf5
--- /dev/null
+++ b/source/code/plugin/health/health_kubernetes_resources.rb
@@ -0,0 +1,102 @@
+require 'singleton'
+
+module HealthModel
+    class HealthKubernetesResources
+
+        include Singleton
+        attr_accessor :node_inventory, :pod_inventory, :deployment_inventory
+        attr_reader :nodes, :pods, :workloads
+
+        def initialize
+            @node_inventory = []
+            @pod_inventory =  []
+            @deployment_inventory =  []
+            @nodes = []
+            @pods = []
+            @workloads = []
+            @log = HealthMonitorHelpers.get_log_handle
+        end
+
+        def get_node_inventory
+            return @node_inventory
+        end
+
+        def get_nodes
+            @nodes = []
+            @node_inventory['items'].each {|node|
+                if !@nodes.include?(node['metadata']['name'])
+                    @nodes.push(node['metadata']['name'])
+                end
+
+            }
+            return @nodes
+        end
+
+        def get_pod_inventory
+            return @pod_inventory
+        end
+
+        def get_pods
+            return @pods
+        end
+
+        def get_workload_names
+            @pods = []
+            workload_names = {}
+            deployment_lookup = {}
+            @deployment_inventory['items'].each do |deployment|
+                match_labels = deployment['spec']['selector']['matchLabels'].to_h
+                namespace = deployment['metadata']['namespace']
+                match_labels.each{|k,v|
+                    deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
+                }
+            end
+            @pod_inventory['items'].each do |pod|
+                begin
+                    has_owner = !pod['metadata']['ownerReferences'].nil?
+                    owner_kind = ''
+                    if has_owner
+                        owner_kind = pod['metadata']['ownerReferences'][0]['kind']
+                        controller_name = pod['metadata']['ownerReferences'][0]['name']
+                    else
+                        owner_kind = pod['kind']
+                        controller_name = pod['metadata']['name']
+                    end
+
+                    namespace = pod['metadata']['namespace']
+
+                    workload_name = ''
+                    if owner_kind.nil?
+                        owner_kind = 'Pod'
+                    end
+                    case owner_kind.downcase
+                    when 'job'
+                        # we are excluding jobs
+                        next
+                    when 'replicaset'
+                        # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name
+                        labels = pod['metadata']['labels'].to_h
+                        labels.each {|k,v|
+                            lookup_key = "#{namespace}-#{k}=#{v}"
+                            if deployment_lookup.key?(lookup_key)
+                                workload_name = deployment_lookup[lookup_key]
+                                break
+                            end
+                        }
+                        if workload_name.empty?
+                            workload_name = "#{namespace}~~#{controller_name}"
+                        end
+                    when 'daemonset'
+                        workload_name = "#{namespace}~~#{controller_name}"
+                    else
+                        workload_name = "#{namespace}~~#{pod['metadata']['name']}"
+                    end
+                rescue => e
+                    @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}"
+                end
+                workload_names[workload_name] = true
+            end
+            return workload_names.keys
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb
new file mode 100644
index 000000000..ff7f6a390
--- /dev/null
+++ b/source/code/plugin/health/health_missing_signal_generator.rb
@@ -0,0 +1,142 @@
+module HealthModel
+    class HealthMissingSignalGenerator
+        attr_accessor :last_received_records, :current_received_records
+        attr_reader :missing_signals, :unknown_signals_hash
+
+        def initialize()
+            @last_received_records = {}
+            @unknown_signals_hash = {}
+        end
+
+        def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory, provider)
+            missing_monitor_ids = []
+            nodes = health_k8s_inventory.get_nodes
+            workload_names = health_k8s_inventory.get_workload_names
+            missing_signals_map = {}
+            missing_signals = []
+            health_monitor_records_map = {}
+            health_monitor_records.map{
+                |monitor| health_monitor_records_map[monitor.monitor_instance_id] = monitor
+            }
+
+            node_signals_hash = {}
+            nodes.each{|node|
+                node_signals_hash[node] = [HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID]
+            }
+            log = HealthMonitorHelpers.get_log_handle
+            log.info "last_received_records #{@last_received_records.size} nodes #{nodes}"
+            @last_received_records.each{|monitor_instance_id, monitor|
+                if !health_monitor_records_map.key?(monitor_instance_id)
+                    if HealthMonitorHelpers.is_node_monitor(monitor.monitor_id)
+                        node_name = monitor.labels['kubernetes.io/hostname']
+                        new_monitor = HealthMonitorRecord.new(
+                            monitor.monitor_id,
+                            monitor.monitor_instance_id,
+                            Time.now.utc.iso8601,
+                            monitor.state,
+                            monitor.labels,
+                            monitor.config,
+                            {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => ""}
+                        )
+                        if !node_name.nil? && nodes.include?(node_name)
+                            new_monitor.state = HealthMonitorStates::UNKNOWN
+                            new_monitor.details["state"] = HealthMonitorStates::UNKNOWN
+                            new_monitor.details["details"] = "Node present in inventory but no signal for #{monitor.monitor_id} from node #{node_name}"
+                            @unknown_signals_hash[monitor_instance_id] = new_monitor
+                        elsif !node_name.nil? && !nodes.include?(node_name)
+                            new_monitor.state = HealthMonitorStates::NONE
+                            new_monitor.details["state"] = HealthMonitorStates::NONE
+                            new_monitor.details["details"] = "Node NOT present in inventory.  node:  #{node_name}"
+                        end
+                        missing_signals_map[monitor_instance_id] = new_monitor
+                        log.info "Added missing signal #{new_monitor.monitor_instance_id} #{new_monitor.state}"
+                    elsif HealthMonitorHelpers.is_pods_ready_monitor(monitor.monitor_id)
+                        lookup = "#{monitor.labels[HealthMonitorLabels::NAMESPACE]}~~#{monitor.labels[HealthMonitorLabels::WORKLOAD_NAME]}"
+                        new_monitor = HealthMonitorRecord.new(
+                            monitor.monitor_id,
+                            monitor.monitor_instance_id,
+                            Time.now.utc.iso8601,
+                            monitor.state,
+                            monitor.labels,
+                            monitor.config,
+                            {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => ""}
+                        )
+                        if !lookup.nil? && workload_names.include?(lookup)
+                            new_monitor.state = HealthMonitorStates::UNKNOWN
+                            new_monitor.details["state"] = HealthMonitorStates::UNKNOWN
+                            new_monitor.details["details"] = "Workload present in inventory. But no signal for #{lookup}"
+                            @unknown_signals_hash[monitor_instance_id] = new_monitor
+                        elsif !lookup.nil? && !workload_names.include?(lookup)
+                            new_monitor.state = HealthMonitorStates::NONE
+                            new_monitor.details["state"] = HealthMonitorStates::NONE
+                            new_monitor.details["details"] = "Workload #{lookup} NOT present in inventory"
+                        end
+                        missing_signals_map[monitor_instance_id] = new_monitor
+                    end
+                end
+            }
+
+
+            health_monitor_records.each{|health_monitor_record|
+                # remove signals from the list of expected signals if we see them in the list of current signals
+                if HealthMonitorHelpers.is_node_monitor(health_monitor_record.monitor_id)
+                    node_name = health_monitor_record.labels['kubernetes.io/hostname']
+                    if node_signals_hash.key?(node_name)
+                        signals = node_signals_hash[node_name]
+                        signals.delete(health_monitor_record.monitor_id)
+                        if signals.size == 0
+                            node_signals_hash.delete(node_name)
+                        end
+                    end
+                end
+            }
+
+            # if the hash is not empty, means we have missing signals
+            if node_signals_hash.size > 0
+                # these signals were not sent previously
+                # these signals need to be assigned an unknown state
+                node_signals_hash.each{|node, monitor_ids|
+                    monitor_ids.each{|monitor_id|
+                        monitor_instance_id = HealthMonitorHelpers.get_monitor_instance_id(monitor_id, [cluster_id, node])
+                        new_monitor = HealthMonitorRecord.new(
+                            monitor_id,
+                            monitor_instance_id,
+                            Time.now.utc.iso8601,
+                            HealthMonitorStates::UNKNOWN,
+                            provider.get_node_labels(node),
+                            {},
+                            {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => "no signal received from node #{node}"}
+                        )
+                        missing_signals_map[monitor_instance_id] = new_monitor
+                        log.info "Added missing signal when node_signals_hash was not empty #{new_monitor.monitor_instance_id} #{new_monitor.state}"
+                    }
+                }
+            end
+
+            missing_signals_map.each{|k,v|
+                    missing_signals.push(v)
+            }
+
+            # if an unknown signal is present neither in missing signals or the incoming signals, change its state to none, and remove from unknown_signals
+            # in update_state of HealthMonitorState, send if latest_record_state is none
+            @unknown_signals_hash.each{|k,v|
+                if !missing_signals_map.key?(k) && !health_monitor_records_map.key?(k)
+                    monitor_record = @unknown_signals_hash[k]
+                    monitor_record.details["state"] = HealthMonitorStates::NONE # used for calculating the old and new states in update_state
+                    monitor_record.state = HealthMonitorStates::NONE #used for calculating the aggregate monitor state
+                    missing_signals.push(monitor_record)
+                    @unknown_signals_hash.delete(k)
+                    log.info "Updating state from unknown to none for #{k}"
+                end
+            }
+            return missing_signals
+        end
+
+        def update_last_received_records(last_received_records)
+            last_received_records_map = {}
+            last_received_records.map {|record| last_received_records_map[record.monitor_instance_id] = record }
+            @last_received_records = last_received_records_map
+        end
+    end
+
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_model_buffer.rb b/source/code/plugin/health/health_model_buffer.rb
new file mode 100644
index 000000000..1ccfe7349
--- /dev/null
+++ b/source/code/plugin/health/health_model_buffer.rb
@@ -0,0 +1,29 @@
+module HealthModel
+
+=begin
+    Class that is used to create a buffer for collecting the health records
+=end
+    class HealthModelBuffer
+
+        attr_reader :records_buffer, :log
+
+        def initialize
+            @records_buffer = []
+        end
+
+        # Returns the current buffer
+        def get_buffer
+            return @records_buffer
+        end
+
+        # adds records to the buffer
+        def add_to_buffer(records)
+            @records_buffer.push(*records)
+        end
+
+        # clears/resets the buffer
+        def reset_buffer
+            @records_buffer = []
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb
new file mode 100644
index 000000000..4cf802798
--- /dev/null
+++ b/source/code/plugin/health/health_model_builder.rb
@@ -0,0 +1,37 @@
+require_relative 'health_model_constants'
+require 'time'
+
+module HealthModel
+    class HealthModelBuilder
+        attr_accessor :hierarchy_builder, :state_finalizers, :monitor_set
+
+        def initialize(hierarchy_builder, state_finalizers, monitor_set)
+            @hierarchy_builder = hierarchy_builder
+            @state_finalizers = state_finalizers
+            @monitor_set = monitor_set
+        end
+
+        def process_records(health_records)
+            health_records.each{|health_record|
+                @hierarchy_builder.process_record(health_record, @monitor_set)
+            }
+        end
+
+        def finalize_model
+            if !@state_finalizers.is_a?(Array)
+                raise 'state finalizers should be an array'
+            end
+
+            if @state_finalizers.length == 0
+                raise '@state_finalizers length should not be zero or empty'
+            end
+
+            @state_finalizers.each{|finalizer|
+                finalizer.finalize(@monitor_set)
+            }
+
+            return @monitor_set.get_map
+        end
+
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb
new file mode 100644
index 000000000..82ae569f3
--- /dev/null
+++ b/source/code/plugin/health/health_model_constants.rb
@@ -0,0 +1,81 @@
+module HealthModel
+    class MonitorState
+        CRITICAL = "fail"
+        ERROR = "err"
+        WARNING = "warn"
+        NONE = "none"
+        HEALTHY = "pass"
+        UNKNOWN = "unknown"
+    end
+
+    class AggregationAlgorithm
+        WORSTOF = "worstOf"
+        PERCENTAGE = "percentage"
+    end
+
+    class MonitorId
+        CLUSTER = 'cluster';
+        ALL_NODES = 'all_nodes';
+        K8S_INFRASTRUCTURE = 'k8s_infrastructure'
+
+        NODE = 'node';
+        AGENT_NODE_POOL = 'agent_node_pool'
+        MASTER_NODE_POOL = 'master_node_pool'
+        ALL_AGENT_NODE_POOLS = 'all_agent_node_pools'
+        ALL_NODE_POOLS = 'all_node_pools';
+
+        WORKLOAD = 'all_workloads';
+        CAPACITY = 'capacity';
+
+        USER_WORKLOAD = 'user_workload';
+        SYSTEM_WORKLOAD = 'system_workload'
+        NAMESPACE = 'namespace';
+    end
+
+    class HealthMonitorRecordFields
+        CLUSTER_ID = "ClusterId"
+        MONITOR_ID = "MonitorId"
+        MONITOR_INSTANCE_ID = "MonitorInstanceId"
+        MONITOR_LABELS = "MonitorLabels"
+        DETAILS = "Details"
+        MONITOR_CONFIG = "MonitorConfig"
+        OLD_STATE = "OldState"
+        NEW_STATE = "NewState"
+        AGENT_COLLECTION_TIME = "AgentCollectionTime"
+        TIME_FIRST_OBSERVED = "TimeFirstObserved"
+        NODE_NAME = "NodeName"
+        NAMESPACE = "Namespace"
+    end
+
+    class HealthMonitorConstants
+        NODE_CPU_MONITOR_ID = "node_cpu_utilization"
+        NODE_MEMORY_MONITOR_ID = "node_memory_utilization"
+        CONTAINER_CPU_MONITOR_ID = "container_cpu_utilization"
+        CONTAINER_MEMORY_MONITOR_ID = "container_memory_utilization"
+        NODE_CONDITION_MONITOR_ID = "node_condition"
+        WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_cpu"
+        WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_memory"
+        WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization"
+        WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization"
+        KUBE_API_STATUS = "kube_api_status"
+        USER_WORKLOAD_PODS_READY_MONITOR_ID = "user_workload_pods_ready"
+        SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID = "system_workload_pods_ready"
+    end
+
+    class HealthMonitorStates
+        PASS = "pass"
+        FAIL = "fail"
+        WARNING = "warn"
+        NONE = "none"
+        UNKNOWN = "unknown"
+    end
+
+    class HealthMonitorLabels
+        WORKLOAD_NAME = "container.azm.ms/workload-name"
+        WORKLOAD_KIND = "container.azm.ms/workload-kind"
+        NAMESPACE = "container.azm.ms/namespace"
+        AGENTPOOL = "agentpool"
+        ROLE = "kubernetes.io/role"
+        HOSTNAME = "kubernetes.io/hostname"
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb
new file mode 100644
index 000000000..f6c7a781d
--- /dev/null
+++ b/source/code/plugin/health/health_model_definition_parser.rb
@@ -0,0 +1,50 @@
+=begin
+    Class to parse the health model definition. The definition expresses the relationship between monitors, how to roll up to an aggregate monitor,
+    and what labels to "pass on" to the parent monitor
+=end
+require 'json'
+
+module HealthModel
+    class HealthModelDefinitionParser
+        attr_accessor :health_model_definition_path, :health_model_definition
+
+        # Constructor
+        def initialize(path)
+            @health_model_definition = {}
+            @health_model_definition_path = path
+        end
+
+        # Parse the health model definition file and build the model roll-up hierarchy
+        def parse_file
+            if (!File.exist?(@health_model_definition_path))
+                raise "File does not exist in the specified path"
+            end
+
+            file = File.read(@health_model_definition_path)
+            temp_model = JSON.parse(file)
+            temp_model.each { |entry|
+                monitor_id = entry['monitor_id']
+                parent_monitor_id = entry['parent_monitor_id']
+                labels = entry['labels']  if entry['labels']
+                aggregation_algorithm = entry['aggregation_algorithm'] if entry['aggregation_algorithm']
+                aggregation_algorithm_params = entry['aggregation_algorithm_params'] if entry['aggregation_algorithm_params']
+                if parent_monitor_id.is_a?(Array)
+                    conditions = []
+                    parent_monitor_id.each{|condition|
+                        key = condition['label']
+                        operator = condition['operator']
+                        value = condition['value']
+                        parent_id = condition['id']
+                        conditions.push({"key" => key, "operator" => operator, "value" => value, "parent_id" => parent_id})
+                    }
+                    @health_model_definition[monitor_id] = {"conditions" => conditions, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params}
+                elsif parent_monitor_id.is_a?(String)
+                    @health_model_definition[monitor_id] = {"parent_monitor_id" => parent_monitor_id, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params}
+                elsif parent_monitor_id.nil?
+                    @health_model_definition[monitor_id] = {"parent_monitor_id" => nil, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params}
+                end
+            }
+            @health_model_definition
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb
new file mode 100644
index 000000000..9e2977a0e
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_helpers.rb
@@ -0,0 +1,36 @@
+require 'logger'
+require 'digest'
+
+module HealthModel
+    # static class that provides a bunch of utility methods
+    class HealthMonitorHelpers
+
+        @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log"
+
+        if Gem.win_platform? #unit testing on windows dev machine
+            @log_path = "C:\Temp\health_monitors.log"
+        end
+
+        @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
+
+        class << self
+            def is_node_monitor(monitor_id)
+                return (monitor_id == HealthMonitorConstants::NODE_CPU_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_MEMORY_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID)
+            end
+
+            def is_pods_ready_monitor(monitor_id)
+                return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
+            end
+
+            def get_log_handle
+                return @log
+            end
+
+            def get_monitor_instance_id(monitor_id, args = [])
+                string_to_hash = args.join("/")
+                return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}"
+            end
+        end
+
+    end
+end
diff --git a/source/code/plugin/health/health_monitor_optimizer.rb b/source/code/plugin/health/health_monitor_optimizer.rb
new file mode 100644
index 000000000..b33c8a986
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_optimizer.rb
@@ -0,0 +1,52 @@
+module HealthModel
+    class HealthMonitorOptimizer
+        #ctor
+        def initialize
+            @@health_signal_timeout = 240
+            @@first_record_sent = {}
+        end
+
+        def should_send(monitor_instance_id, health_monitor_state, health_monitor_config)
+
+            health_monitor_instance_state = health_monitor_state.get_state(monitor_instance_id)
+            health_monitor_records = health_monitor_instance_state.prev_records
+            health_monitor_config['ConsecutiveSamplesForStateTransition'].nil? ? samples_to_check = 1 : samples_to_check = health_monitor_config['ConsecutiveSamplesForStateTransition'].to_i
+
+            latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning
+            latest_record_state = latest_record["state"]
+            latest_record_time = latest_record["timestamp"] #string representation of time
+
+            new_state = health_monitor_instance_state.new_state
+            prev_sent_time = health_monitor_instance_state.prev_sent_record_time
+            time_first_observed = health_monitor_instance_state.state_change_time
+
+            if latest_record_state.downcase == new_state.downcase
+                time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60
+                if time_elapsed > @@health_signal_timeout # minutes
+                    return true
+                elsif !@@first_record_sent.key?(monitor_instance_id)
+                    @@first_record_sent[monitor_instance_id] = true
+                    return true
+                else
+                    return false
+                end
+            else
+                if samples_to_check == 1
+                    return true
+                elsif health_monitor_instance_state.prev_records.size == 1 && samples_to_check > 1
+                    return true
+                elsif health_monitor_instance_state.prev_records.size < samples_to_check
+                    return false
+                else
+                    # state change from previous sent state to latest record state
+                    #check state of last n records to see if they are all in the same state
+                    if (health_monitor_instance_state.is_state_change_consistent)
+                        return true
+                    else
+                        return false
+                    end
+                end
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb
new file mode 100644
index 000000000..0c1cbf7f2
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_provider.rb
@@ -0,0 +1,123 @@
+module HealthModel
+    class HealthMonitorProvider
+
+        attr_accessor :cluster_labels, :health_kubernetes_resources, :monitor_configuration_path, :cluster_id
+        attr_reader :monitor_configuration
+
+        def initialize(cluster_id, cluster_labels, health_kubernetes_resources, monitor_configuration_path)
+            @cluster_labels = Hash.new
+            cluster_labels.each{|k,v| @cluster_labels[k] = v}
+            @cluster_id = cluster_id
+            @health_kubernetes_resources = health_kubernetes_resources
+            @monitor_configuration_path = monitor_configuration_path
+            begin
+                @monitor_configuration = {}
+                file = File.open(@monitor_configuration_path, "r")
+                if !file.nil?
+                    fileContents = file.read
+                    @monitor_configuration = JSON.parse(fileContents)
+                    file.close
+                end
+            rescue => e
+                @log.info "Error when opening health config file #{e}"
+            end
+        end
+
+        def get_record(health_monitor_record, health_monitor_state)
+
+            labels = Hash.new
+            @cluster_labels.each{|k,v| labels[k] = v}
+            monitor_id = health_monitor_record.monitor_id
+            monitor_instance_id = health_monitor_record.monitor_instance_id
+            health_monitor_instance_state = health_monitor_state.get_state(monitor_instance_id)
+
+
+            monitor_labels = health_monitor_record.labels
+            if !monitor_labels.empty?
+                monitor_labels.keys.each do |key|
+                    labels[key] = monitor_labels[key]
+                end
+            end
+
+            prev_records = health_monitor_instance_state.prev_records
+            time_first_observed = health_monitor_instance_state.state_change_time # the oldest collection time
+            new_state = health_monitor_instance_state.new_state # this is updated before formatRecord is called
+            old_state = health_monitor_instance_state.old_state
+
+            config = get_config(monitor_id)
+
+            if prev_records.size == 1
+                details = prev_records[0]
+            else
+                details = prev_records
+            end
+
+            time_observed = Time.now.utc.iso8601
+
+            monitor_record = {}
+
+            monitor_record[HealthMonitorRecordFields::CLUSTER_ID] = @cluster_id
+            monitor_record[HealthMonitorRecordFields::MONITOR_LABELS] = labels.to_json
+            monitor_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+            monitor_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+            monitor_record[HealthMonitorRecordFields::NEW_STATE] = new_state
+            monitor_record[HealthMonitorRecordFields::OLD_STATE] = old_state
+            monitor_record[HealthMonitorRecordFields::DETAILS] = details.to_json
+            monitor_record[HealthMonitorRecordFields::MONITOR_CONFIG] = config.to_json
+            monitor_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601
+            monitor_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_first_observed
+
+            return monitor_record
+        end
+
+        def get_config(monitor_id)
+            if @monitor_configuration.key?(monitor_id)
+                return @monitor_configuration[monitor_id]
+            else
+                return {}
+            end
+        end
+
+        def get_labels(health_monitor_record)
+            monitor_labels = Hash.new
+            @cluster_labels.keys.each{|key|
+                monitor_labels[key] = @cluster_labels[key]
+            }
+            monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID]
+            case monitor_id
+            when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID
+
+                namespace = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['namespace']
+                workload_name = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadName']
+                workload_kind = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadKind']
+
+                monitor_labels[HealthMonitorLabels::WORKLOAD_NAME] = workload_name.split('~~')[1]
+                monitor_labels[HealthMonitorLabels::WORKLOAD_KIND] = workload_kind
+                monitor_labels[HealthMonitorLabels::NAMESPACE] = namespace
+
+            when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID
+                node_name = health_monitor_record[HealthMonitorRecordFields::NODE_NAME]
+                @health_kubernetes_resources.get_node_inventory['items'].each do |node|
+                    if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name']
+                        if !node["metadata"].nil? && !node["metadata"]["labels"].nil?
+                            monitor_labels = monitor_labels.merge(node["metadata"]["labels"])
+                        end
+                    end
+                end
+            end
+            return monitor_labels
+        end
+
+        def get_node_labels(node_name)
+            monitor_labels = {}
+            @health_kubernetes_resources.get_node_inventory['items'].each do |node|
+                if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name']
+                    if !node["metadata"].nil? && !node["metadata"]["labels"].nil?
+                        monitor_labels = node["metadata"]["labels"]
+                    end
+                end
+            end
+            return monitor_labels
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_record.rb b/source/code/plugin/health/health_monitor_record.rb
new file mode 100644
index 000000000..873736c3a
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_record.rb
@@ -0,0 +1,10 @@
+HealthMonitorRecord = Struct.new(
+    :monitor_id,
+    :monitor_instance_id,
+    :transition_date_time,
+    :state,
+    :labels,
+    :config,
+    :details
+    ) do
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
new file mode 100644
index 000000000..c3df5e3a9
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -0,0 +1,214 @@
+module HealthModel
+
+    HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records, :is_state_change_consistent, :should_send) do
+    end
+
+    # Class that is used to store the last sent state and latest monitors
+    # provides services like
+    # get_state -- returns the current state and details
+    # update_instance -- updates the state of the health monitor history records
+    # set_state -- sets the last health monitor state
+    class HealthMonitorState
+
+        def initialize
+            @@monitor_states = {}
+            @@first_record_sent = {}
+            @@health_signal_timeout = 240
+        end
+
+        def get_state(monitor_instance_id)
+            if @@monitor_states.key?(monitor_instance_id)
+                return @@monitor_states[monitor_instance_id]
+            end
+        end
+
+        def set_state(monitor_instance_id, health_monitor_instance_state)
+            @@monitor_states[monitor_instance_id] = health_monitor_instance_state
+        end
+
+        def to_h
+            return @@monitor_states
+        end
+
+        def initialize_state(deserialized_state)
+            @@monitor_states = {}
+            deserialized_state.each{|k,v|
+                health_monitor_instance_state_hash = JSON.parse(v)
+                state = HealthMonitorInstanceState.new(*health_monitor_instance_state_hash.values_at(*HealthMonitorInstanceState.members))
+                state.prev_sent_record_time = health_monitor_instance_state_hash["prev_sent_record_time"]
+                state.old_state = health_monitor_instance_state_hash["old_state"]
+                state.new_state = health_monitor_instance_state_hash["new_state"]
+                state.state_change_time = health_monitor_instance_state_hash["state_change_time"]
+                state.prev_records = health_monitor_instance_state_hash["prev_records"]
+                state.is_state_change_consistent = health_monitor_instance_state_hash["is_state_change_consistent"] || false
+                state.should_send = health_monitor_instance_state_hash["should_send"]
+                @@monitor_states[k] = state
+                @@first_record_sent[k] = true
+
+            }
+        end
+
+=begin
+when do u send?
+---------------
+1. if the signal hasnt been sent before
+2. if there is a "consistent" state change for monitors
+3. if the signal is stale (> 4hrs)
+4. If the latest state is none
+=end
+        def update_state(monitor, #UnitMonitor/AggregateMonitor
+            monitor_config #Hash
+            )
+            samples_to_keep = 1
+            monitor_instance_id = monitor.monitor_instance_id
+            log = HealthMonitorHelpers.get_log_handle
+            current_time = Time.now.utc.iso8601
+            health_monitor_instance_state = get_state(monitor_instance_id)
+            if !health_monitor_instance_state.nil?
+                health_monitor_instance_state.is_state_change_consistent = false
+                health_monitor_instance_state.should_send = false
+                set_state(monitor_instance_id, health_monitor_instance_state) # reset is_state_change_consistent
+            end
+
+            if !monitor_config.nil? && !monitor_config['ConsecutiveSamplesForStateTransition'].nil?
+                samples_to_keep = monitor_config['ConsecutiveSamplesForStateTransition'].to_i
+            end
+
+            if @@monitor_states.key?(monitor_instance_id)
+                health_monitor_instance_state = @@monitor_states[monitor_instance_id]
+                health_monitor_records = health_monitor_instance_state.prev_records #This should be an array
+
+                if health_monitor_records.size == samples_to_keep
+                    health_monitor_records.delete_at(0)
+                end
+                health_monitor_records.push(monitor.details)
+                health_monitor_instance_state.prev_records = health_monitor_records
+                @@monitor_states[monitor_instance_id] = health_monitor_instance_state
+            else
+                # if samples_to_keep == 1, then set new state to be the health_monitor_record state, else set it as none
+
+                old_state = HealthMonitorStates::NONE
+                new_state = HealthMonitorStates::NONE
+                if samples_to_keep == 1
+                    new_state = monitor.state
+                end
+
+                health_monitor_instance_state = HealthMonitorInstanceState.new(
+                    monitor.transition_date_time,
+                    old_state,
+                    new_state,
+                    monitor.transition_date_time,
+                    [monitor.details])
+
+                health_monitor_instance_state.should_send = true
+                @@monitor_states[monitor_instance_id] = health_monitor_instance_state
+            end
+
+
+            # update old and new state based on the history and latest record.
+            # TODO: this is a little hairy. Simplify
+
+            health_monitor_records = health_monitor_instance_state.prev_records
+            if monitor_config['ConsecutiveSamplesForStateTransition'].nil?
+                samples_to_check = 1
+            else
+                samples_to_check = monitor_config['ConsecutiveSamplesForStateTransition'].to_i
+            end
+
+            latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning
+            latest_record_state = latest_record["state"]
+            latest_record_time = latest_record["timestamp"] #string representation of time
+
+            new_state = health_monitor_instance_state.new_state
+            prev_sent_time = health_monitor_instance_state.prev_sent_record_time
+
+            # if the last sent state (new_state is different from latest monitor state)
+            if latest_record_state.downcase == new_state.downcase
+                time_elapsed = (Time.parse(latest_record_time) - Time.parse(prev_sent_time)) / 60
+                # check if health signal has "timed out"
+                if time_elapsed > @@health_signal_timeout # minutes
+                    # update record for last sent record time
+                    health_monitor_instance_state.old_state = health_monitor_instance_state.new_state
+                    health_monitor_instance_state.new_state = latest_record_state
+                    health_monitor_instance_state.prev_sent_record_time = current_time
+                    health_monitor_instance_state.should_send = true
+                    #log.debug "After Updating Monitor State #{health_monitor_instance_state}"
+                    set_state(monitor_instance_id, health_monitor_instance_state)
+                    log.debug "#{monitor_instance_id} condition: signal timeout should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}"
+                # check if the first record has been sent
+                elsif !@@first_record_sent.key?(monitor_instance_id)
+                    @@first_record_sent[monitor_instance_id] = true
+                    health_monitor_instance_state.should_send = true
+                    set_state(monitor_instance_id, health_monitor_instance_state)
+                end
+            # latest state is different that last sent state
+            else
+                #if latest_record_state is none, send
+                if latest_record_state.downcase == HealthMonitorStates::NONE
+                    health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state
+                    health_monitor_instance_state.new_state = latest_record_state
+                    health_monitor_instance_state.state_change_time = current_time
+                    health_monitor_instance_state.prev_sent_record_time = current_time
+                    health_monitor_instance_state.should_send = true
+                    if !@@first_record_sent.key?(monitor_instance_id)
+                        @@first_record_sent[monitor_instance_id] = true
+                    end
+                    set_state(monitor_instance_id, health_monitor_instance_state)
+                    log.debug "#{monitor_instance_id} condition: NONE state should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}"
+                # if it is a monitor that needs to instantly notify on state change, update the state
+                # mark the monitor to be sent
+                elsif samples_to_check == 1
+                    health_monitor_instance_state.old_state = health_monitor_instance_state.new_state #initially old = new, so when state change occurs, assign old to be new, and set new to be the latest record state
+                    health_monitor_instance_state.new_state = latest_record_state
+                    health_monitor_instance_state.state_change_time = current_time
+                    health_monitor_instance_state.prev_sent_record_time = current_time
+                    health_monitor_instance_state.should_send = true
+                    if !@@first_record_sent.key?(monitor_instance_id)
+                        @@first_record_sent[monitor_instance_id] = true
+                    end
+                    set_state(monitor_instance_id, health_monitor_instance_state)
+                    log.debug "#{monitor_instance_id} condition: state change, samples_to_check = #{samples_to_check} should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}"
+                else
+                    # state change from previous sent state to latest record state
+                    #check state of last n records to see if they are all in the same state
+                    if (is_state_change_consistent(health_monitor_records, samples_to_keep))
+                        first_record = health_monitor_records[0]
+                        latest_record = health_monitor_records[health_monitor_records.size-1] #since we push new records to the end, and remove oldest records from the beginning
+                        latest_record_state = latest_record["state"]
+                        latest_record_time = latest_record["timestamp"] #string representation of time
+
+                        health_monitor_instance_state.old_state = health_monitor_instance_state.new_state
+                        health_monitor_instance_state.is_state_change_consistent = true # This way it wont be recomputed in the optimizer.
+                        health_monitor_instance_state.should_send = true
+                        health_monitor_instance_state.new_state = latest_record_state
+                        health_monitor_instance_state.prev_sent_record_time = current_time
+                        health_monitor_instance_state.state_change_time = current_time
+
+                        set_state(monitor_instance_id, health_monitor_instance_state)
+
+                        if !@@first_record_sent.key?(monitor_instance_id)
+                            @@first_record_sent[monitor_instance_id] = true
+                        end
+                        log.debug "#{monitor_instance_id} condition: consistent state change, samples_to_check = #{samples_to_check} should_send #{health_monitor_instance_state.should_send} #{health_monitor_instance_state.old_state} --> #{health_monitor_instance_state.new_state}"
+                    end
+                end
+            end
+        end
+
+        private
+        def is_state_change_consistent(health_monitor_records, samples_to_check)
+            if health_monitor_records.nil? || health_monitor_records.size == 0 || health_monitor_records.size < samples_to_check
+                return false
+            end
+            i = 0
+            while i < health_monitor_records.size - 1
+                #log.debug "Prev: #{health_monitor_records[i].state} Current: #{health_monitor_records[i + 1].state}"
+                if health_monitor_records[i]["state"] != health_monitor_records[i + 1]["state"]
+                    return false
+                end
+                i += 1
+            end
+            return true
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
new file mode 100644
index 000000000..df47529e6
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -0,0 +1,369 @@
+require 'logger'
+require 'digest'
+
+module HealthModel
+    # static class that provides a bunch of utility methods
+    class HealthMonitorUtils
+
+        begin
+            if !Gem.win_platform?
+                require_relative '../KubernetesApiClient'
+            end
+        rescue => e
+            $log.info "Error loading KubernetesApiClient #{e.message}"
+        end
+
+        @@node_inventory = []
+
+        @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log"
+
+        if Gem.win_platform? #unit testing on windows dev machine
+            @log_path = "C:\Temp\health_monitors.log"
+        end
+
+        @log = Logger.new(@log_path, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
+        @@last_refresh_time = '2019-01-01T00:00:00Z'
+
+        class << self
+            # compute the percentage state given a value and a monitor configuration
+            def compute_percentage_state(value, config)
+
+                if config.nil? || config['WarnThresholdPercentage'].nil?
+                    warn_percentage = nil
+                else
+                    warn_percentage = config['WarnThresholdPercentage'].to_f
+                end
+                fail_percentage = config['FailThresholdPercentage'].to_f
+
+                if value > fail_percentage
+                    return HealthMonitorStates::FAIL
+                elsif !warn_percentage.nil? && value > warn_percentage
+                    return HealthMonitorStates::WARNING
+                else
+                    return HealthMonitorStates::PASS
+                end
+            end
+
+            def is_node_monitor(monitor_id)
+                return (monitor_id == HealthMonitorConstants::NODE_CPU_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_MEMORY_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID)
+            end
+
+            def is_pods_ready_monitor(monitor_id)
+                return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
+            end
+
+            def is_cluster_health_model_enabled
+                enabled = ENV["AZMON_CLUSTER_ENABLE_HEALTH_MODEL"]
+                if !enabled.nil? && enabled.casecmp("true") == 0
+                    return true
+                else
+                    return false
+                end
+            end
+
+            def get_pods_ready_hash(pod_inventory, deployment_inventory)
+                pods_ready_percentage_hash = {}
+                deployment_lookup = {}
+                deployment_inventory['items'].each do |deployment|
+                    match_labels = deployment['spec']['selector']['matchLabels'].to_h
+                    namespace = deployment['metadata']['namespace']
+                    match_labels.each{|k,v|
+                        deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
+                    }
+                end
+                pod_inventory['items'].each do |pod|
+                    begin
+                        has_owner = !pod['metadata']['ownerReferences'].nil?
+                        owner_kind = ''
+                        if has_owner
+                            owner_kind = pod['metadata']['ownerReferences'][0]['kind']
+                            controller_name = pod['metadata']['ownerReferences'][0]['name']
+                        else
+                            owner_kind = pod['kind']
+                            controller_name = pod['metadata']['name']
+                            #log.info "#{JSON.pretty_generate(pod)}"
+                        end
+
+                        namespace = pod['metadata']['namespace']
+                        status = pod['status']['phase']
+
+                        workload_name = ''
+                        if owner_kind.nil?
+                            owner_kind = 'Pod'
+                        end
+                        case owner_kind.downcase
+                        when 'job'
+                            # we are excluding jobs
+                            next
+                        when 'replicaset'
+                            # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name
+                            labels = pod['metadata']['labels'].to_h
+                            labels.each {|k,v|
+                                lookup_key = "#{namespace}-#{k}=#{v}"
+                                if deployment_lookup.key?(lookup_key)
+                                    workload_name = deployment_lookup[lookup_key]
+                                    break
+                                end
+                            }
+                            if workload_name.empty?
+                                workload_name = "#{namespace}~~#{controller_name}"
+                            end
+                        when 'daemonset'
+                            workload_name = "#{namespace}~~#{controller_name}"
+                        else
+                            workload_name = "#{namespace}~~#{pod['metadata']['name']}"
+                        end
+
+                        if pods_ready_percentage_hash.key?(workload_name)
+                            total_pods = pods_ready_percentage_hash[workload_name]['totalPods']
+                            pods_ready = pods_ready_percentage_hash[workload_name]['podsReady']
+                        else
+                            total_pods = 0
+                            pods_ready = 0
+                        end
+
+                        total_pods += 1
+                        if status == 'Running'
+                            pods_ready += 1
+                        end
+
+                        pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind}
+                    rescue => e
+                        log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}"
+                    end
+                end
+                return pods_ready_percentage_hash
+            end
+
+            def get_node_state_from_node_conditions(node_conditions)
+                pass = false
+                node_conditions.each do |condition|
+                    type = condition['type']
+                    status = condition['status']
+
+                    if ((type == "NetworkUnavailable" || type == "OutOfDisk") && (status == 'True' || status == 'Unknown'))
+                        return "fail"
+                    elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown'))
+                        return "warn"
+                    elsif type == "Ready" &&  status == 'True'
+                        pass = true
+                    end
+                end
+
+                if pass
+                    return "pass"
+                else
+                    return "fail"
+                end
+            end
+
+            def get_resource_subscription(pod_inventory, metric_name, metric_capacity)
+                subscription = 0.0
+                if !pod_inventory.empty?
+                    pod_inventory['items'].each do |pod|
+                        pod['spec']['containers'].each do |container|
+                            if !container['resources']['requests'].nil? && !container['resources']['requests'][metric_name].nil?
+                                subscription += KubernetesApiClient.getMetricNumericValue(metric_name, container['resources']['requests'][metric_name])
+                            end
+                        end
+                    end
+                end
+                #log.debug "#{metric_name} Subscription  #{subscription}"
+                return subscription
+            end
+
+            def get_cluster_cpu_memory_capacity(log)
+                begin
+                    node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                    cluster_cpu_capacity = 0.0
+                    cluster_memory_capacity = 0.0
+                    if !node_inventory.empty?
+                        node_inventory['items'].each do |node|
+                            cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores")
+                            if !cpu_capacity_json.nil?
+                                cpu_capacity_json.each do |cpu_capacity_node|
+                                    if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                                        cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value']
+                                    end
+                                end
+                                log.info "Cluster CPU Limit #{cluster_cpu_capacity}"
+                            else
+                                log.info "Error getting cpu_capacity"
+                            end
+                            memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes")
+                            if !memory_capacity_json.nil?
+                                memory_capacity_json.each do |memory_capacity_node|
+                                    if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                                        cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value']
+                                    end
+                                end
+                                log.info "Cluster Memory Limit #{cluster_memory_capacity}"
+                            else
+                                log.info "Error getting memory_capacity"
+                            end
+                        end
+                    else
+                        log.info "Unable to get cpu and memory capacity"
+                        return [0.0, 0.0]
+                    end
+                    return [cluster_cpu_capacity, cluster_memory_capacity]
+                rescue => e
+                    log.info e
+                end
+            end
+
+            def refresh_kubernetes_api_data(log, hostName, force: false)
+                #log.debug "refresh_kubernetes_api_data"
+                if ( ((Time.now.utc - Time.parse(@@last_refresh_time)) / 60 ) < 5.0 && !force)
+                    log.debug "Less than 5 minutes since last refresh at #{@@last_refresh_time}"
+                    return
+                end
+                if force
+                    log.debug "Force Refresh"
+                end
+
+                begin
+                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                    if !hostName.nil?
+                        podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body)
+                    else
+                        podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body)
+                    end
+                    podInventory['items'].each do |pod|
+                        has_owner = !pod['metadata']['ownerReferences'].nil?
+                        if !has_owner
+                            workload_name = pod['metadata']['name']
+                        else
+                            workload_name = pod['metadata']['ownerReferences'][0]['name']
+                        end
+                        namespace = pod['metadata']['namespace']
+                        #TODO: Figure this out for container cpu/memory
+                        #@@controllerMapping[workload_name] = namespace
+                        #log.debug "workload_name #{workload_name} namespace #{namespace}"
+                        pod['spec']['containers'].each do |container|
+                            key = [pod['metadata']['uid'], container['name']].join('/')
+
+                            if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['cpu'].nil?
+                                cpu_limit_value = KubernetesApiClient.getMetricNumericValue('cpu', container['resources']['limits']['cpu'])
+                            else
+                                log.info "CPU limit not set for container : #{container['name']}. Using Node Capacity"
+                                #TODO: Send warning health event #bestpractices
+                                cpu_limit_value = @cpu_capacity
+                            end
+
+                            if !container['resources'].empty? && !container['resources']['limits'].nil? && !container['resources']['limits']['memory'].nil?
+                                #log.info "Raw Memory Value #{container['resources']['limits']['memory']}"
+                                memory_limit_value = KubernetesApiClient.getMetricNumericValue('memory', container['resources']['limits']['memory'])
+                            else
+                                log.info "Memory limit not set for container : #{container['name']}. Using Node Capacity"
+                                memory_limit_value = @memory_capacity
+                            end
+
+                            #TODO: Figure this out for container cpu/memory
+                            #@@containerMetadata[key] = {"cpuLimit" => cpu_limit_value, "memoryLimit" => memory_limit_value, "controllerName" => workload_name, "namespace" => namespace}
+                        end
+                    end
+                rescue => e
+                    log.info "Error Refreshing Container Resource Limits #{e.backtrace}"
+                end
+                # log.info "Controller Mapping #{@@controllerMapping}"
+                # log.info "Node Inventory #{@@nodeInventory}"
+                # log.info "Container Metadata #{@@containerMetadata}"
+                # log.info "------------------------------------"
+                @@last_refresh_time = Time.now.utc.iso8601
+            end
+
+            def get_monitor_instance_id(monitor_id, args = [])
+                string_to_hash = args.join("/")
+                return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}"
+            end
+
+            def ensure_cpu_memory_capacity_set(log, cpu_capacity, memory_capacity, hostname)
+
+                log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}"
+                if cpu_capacity != 0.0 && memory_capacity != 0.0
+                    log.info "CPU And Memory Capacity are already set"
+                    return [cpu_capacity, memory_capacity]
+                end
+
+                begin
+                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                rescue Exception => e
+                    log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
+                    ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+                end
+                if !@@nodeInventory.nil?
+                    cpu_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
+                    if !cpu_capacity_json.nil?
+                        cpu_capacity_json.each do |cpu_info_node|
+                            if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == hostname
+                                if !cpu_info_node['DataItems'][0]['Collections'][0]['Value'].nil?
+                                    cpu_capacity = cpu_info_node['DataItems'][0]['Collections'][0]['Value']
+                                end
+                            end
+                        end
+                        log.info "CPU Limit #{cpu_capacity}"
+                    else
+                        log.info "Error getting cpu_capacity"
+                    end
+                    memory_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "memory", "memoryCapacityBytes")
+                    if !memory_capacity_json.nil?
+                        memory_capacity_json.each do |memory_info_node|
+                            if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == hostname
+                                if !memory_info_node['DataItems'][0]['Collections'][0]['Value'].nil?
+                                    memory_capacity = memory_info_node['DataItems'][0]['Collections'][0]['Value']
+                                end
+                            end
+                        end
+                        log.info "memory Limit #{memory_capacity}"
+                    else
+                        log.info "Error getting memory_capacity"
+                    end
+                    return [cpu_capacity, memory_capacity]
+                end
+            end
+
+            def build_metrics_hash(metrics_to_collect)
+                metrics_to_collect_arr = metrics_to_collect.split(',').map(&:strip)
+                metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h
+                return metrics_hash
+            end
+
+            def get_health_monitor_config
+                health_monitor_config = {}
+                begin
+                    file = File.open('/opt/microsoft/omsagent/plugin/healthmonitorconfig.json', "r")
+                    if !file.nil?
+                        fileContents = file.read
+                        health_monitor_config = JSON.parse(fileContents)
+                        file.close
+                    end
+                rescue => e
+                    log.info "Error when opening health config file #{e}"
+                end
+                return health_monitor_config
+            end
+
+            def get_cluster_labels
+                labels = {}
+                cluster_id = KubernetesApiClient.getClusterId
+                region = KubernetesApiClient.getClusterRegion
+                labels['container.azm.ms/cluster-region'] = region
+                if !cluster_id.nil?
+                    cluster_id_elements = cluster_id.split('/')
+                    azure_sub_id =  cluster_id_elements[2]
+                    resource_group = cluster_id_elements[4]
+                    cluster_name = cluster_id_elements[8]
+                    labels['container.azm.ms/cluster-subscription-id'] = azure_sub_id
+                    labels['container.azm.ms/cluster-resource-group'] = resource_group
+                    labels['container.azm.ms/cluster-name'] = cluster_name
+                end
+                return labels
+            end
+
+            def get_log_handle
+                return @log
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb
new file mode 100644
index 000000000..4cf53e82c
--- /dev/null
+++ b/source/code/plugin/health/health_signal_reducer.rb
@@ -0,0 +1,51 @@
+module HealthModel
+    # this class
+    # 1. dedupes daemonset signals and takes only the latest
+    # 2. removes signals for objects that are no longer in the inventory e.g. node might have sent signal before being scaled down
+    class HealthSignalReducer
+        def initialize
+
+        end
+
+        def reduce_signals(health_monitor_records, health_k8s_inventory)
+            nodes = health_k8s_inventory.get_nodes
+            workload_names = health_k8s_inventory.get_workload_names
+            reduced_signals_map = {}
+            reduced_signals = []
+            health_monitor_records.each{|health_monitor_record|
+                monitor_instance_id = health_monitor_record.monitor_instance_id
+                monitor_id = health_monitor_record.monitor_id
+                if reduced_signals_map.key?(monitor_instance_id)
+                    record = reduced_signals_map[monitor_instance_id]
+                    if health_monitor_record.transition_date_time > record.transition_date_time # always take the latest record for a monitor instance id
+                        puts 'Duplicate Daemon Set signal'
+                        reduced_signals_map[monitor_instance_id] = health_monitor_record
+                    end
+                elsif HealthMonitorHelpers.is_node_monitor(monitor_id)
+                    node_name = health_monitor_record.labels['kubernetes.io/hostname']
+                    if (node_name.nil? || !nodes.include?(node_name)) # only add daemon set records if node is present in the inventory
+                        next
+                    end
+                    reduced_signals_map[monitor_instance_id] = health_monitor_record
+                elsif HealthMonitorHelpers.is_pods_ready_monitor(monitor_id)
+                    workload_name = health_monitor_record.labels[HealthMonitorLabels::WORKLOAD_NAME]
+                    namespace = health_monitor_record.labels[HealthMonitorLabels::NAMESPACE]
+                    lookup = "#{namespace}~~#{workload_name}"
+                    if (workload_name.nil? || !workload_names.include?(lookup)) #only add pod record if present in the inventory
+                        next
+                    end
+                    reduced_signals_map[monitor_instance_id] = health_monitor_record
+                else
+                    reduced_signals_map[monitor_instance_id] = health_monitor_record
+                end
+            }
+
+            reduced_signals_map.each{|k,v|
+                reduced_signals.push(v)
+            }
+
+            return reduced_signals
+        end
+
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb
new file mode 100644
index 000000000..e6ec9d2c3
--- /dev/null
+++ b/source/code/plugin/health/monitor_factory.rb
@@ -0,0 +1,28 @@
+module HealthModel
+    class MonitorFactory
+
+        def initialize
+
+        end
+
+        def create_unit_monitor(monitor_record)
+            return UnitMonitor.new(monitor_record.monitor_id,
+                monitor_record.monitor_instance_id,
+                monitor_record.state,
+                monitor_record.transition_date_time,
+                monitor_record.labels,
+                monitor_record.config,
+                monitor_record.details)
+        end
+
+        def create_aggregate_monitor(monitor_id, monitor_instance_id, labels, aggregation_algorithm, aggregation_algorithm_params, child_monitor)
+            return AggregateMonitor.new(monitor_id,
+                monitor_instance_id,
+                child_monitor.state,
+                child_monitor.transition_date_time,
+                aggregation_algorithm,
+                aggregation_algorithm_params,
+                labels)
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/monitor_set.rb b/source/code/plugin/health/monitor_set.rb
new file mode 100644
index 000000000..8d5994419
--- /dev/null
+++ b/source/code/plugin/health/monitor_set.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+module HealthModel
+  class MonitorSet
+    attr_accessor :monitors
+
+    #constructor
+    def initialize
+      @monitors = {}
+    end
+
+    # checks if the monitor is present in the set
+    def contains?(monitor_instance_id)
+      @monitors.key?(monitor_instance_id)
+    end
+
+    # adds or updates the monitor
+    def add_or_update(monitor)
+        @monitors[monitor.monitor_instance_id] = monitor
+    end
+
+    # gets the monitor given the monitor instance id
+    def get_monitor(monitor_instance_id)
+      @monitors[monitor_instance_id] if @monitors.key?(monitor_instance_id)
+    end
+
+    # deletes a monitor from the set
+    def delete(monitor_instance_id)
+      if @monitors.key?(monitor_instance_id)
+        @monitors.delete(monitor_instance_id)
+      end
+    end
+
+    # gets the size of the monitor set
+    def get_size
+      @monitors.length
+    end
+
+    # gets the map of monitor instance id to monitors
+    def get_map
+        @monitors
+    end
+  end
+end
diff --git a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb
new file mode 100644
index 000000000..aafbd07a8
--- /dev/null
+++ b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module HealthModel
+  class NodeMonitorHierarchyReducer
+    def initialize
+    end
+
+    # Finalizes the Node Hierarchy. This removes node pools and node pool set from the hierarchy if they are not present.
+    def finalize(monitor_set)
+      monitors_to_reduce = [MonitorId::ALL_AGENT_NODE_POOLS, MonitorId::ALL_NODES]
+      # for the above monitors, which are constant per cluster, the monitor_id and monitor_instance_id are the same
+      monitors_to_reduce.each do |monitor_to_reduce|
+        monitor = monitor_set.get_monitor(monitor_to_reduce)
+        if !monitor.nil?
+            if monitor.is_aggregate_monitor && monitor.get_member_monitors.size == 1
+                #copy the children of member monitor as children of parent
+                member_monitor_instance_id = monitor.get_member_monitors[0] #gets the only member monitor instance id
+                member_monitor = monitor_set.get_monitor(member_monitor_instance_id)
+                #reduce only if the aggregation algorithms are the same
+                if !member_monitor.aggregation_algorithm.nil? && member_monitor.aggregation_algorithm == AggregationAlgorithm::WORSTOF && monitor.aggregation_algorithm == member_monitor.aggregation_algorithm
+                    member_monitor.get_member_monitors.each{|grandchild_monitor|
+                        monitor.add_member_monitor(grandchild_monitor)
+                    }
+                    monitor.remove_member_monitor(member_monitor_instance_id)
+                    # delete the member monitor from the monitor_set
+                    monitor_set.delete(member_monitor_instance_id)
+                end
+            end
+        end
+      end
+    end
+  end
+end
diff --git a/source/code/plugin/health/parent_monitor_provider.rb b/source/code/plugin/health/parent_monitor_provider.rb
new file mode 100644
index 000000000..6a27f11d8
--- /dev/null
+++ b/source/code/plugin/health/parent_monitor_provider.rb
@@ -0,0 +1,86 @@
+module HealthModel
+    class ParentMonitorProvider
+
+        attr_reader :health_model_definition, :parent_monitor_mapping, :parent_monitor_instance_mapping
+
+        def initialize(definition)
+            @health_model_definition = definition
+            @parent_monitor_mapping = {} #monitorId --> parent_monitor_id mapping
+            @parent_monitor_instance_mapping = {} #child monitor id -- > parent monitor instance mapping. Used in instances when the node no longer exists and impossible to compute from kube api results
+        end
+
+        # gets the parent monitor id given the state transition. It requires the monitor id and labels to determine the parent id
+        def get_parent_monitor_id(monitor)
+            monitor_id = monitor.monitor_id
+
+            # cache the parent monitor id so it is not recomputed every time
+            if @parent_monitor_mapping.key?(monitor.monitor_instance_id)
+                return @parent_monitor_mapping[monitor.monitor_instance_id]
+            end
+
+            if @health_model_definition.key?(monitor_id)
+                parent_monitor_id = @health_model_definition[monitor_id]['parent_monitor_id']
+                # check parent_monitor_id is an array, then evaluate the conditions, else return the parent_monitor_id
+                if parent_monitor_id.is_a?(String)
+                    @parent_monitor_mapping[monitor.monitor_instance_id] = parent_monitor_id
+                    return parent_monitor_id
+                end
+                if parent_monitor_id.nil?
+                    conditions = @health_model_definition[monitor_id]['conditions']
+                    if !conditions.nil? && conditions.is_a?(Array)
+                        labels = monitor.labels
+                        conditions.each{|condition|
+                            left = "#{labels[condition['key']]}"
+                            op = "#{condition['operator']}"
+                            right = "#{condition['value']}"
+                            cond = left.send(op.to_sym, right)
+
+                            if cond
+                                @parent_monitor_mapping[monitor.monitor_instance_id] = condition['parent_id']
+                                return condition['parent_id']
+                            end
+                        }
+                    end
+                    raise "Conditions were not met to determine the parent monitor id" if monitor_id != MonitorId::CLUSTER
+                end
+            else
+                raise "Invalid Monitor Id #{monitor_id} in get_parent_monitor_id"
+            end
+        end
+
+        def get_parent_monitor_labels(monitor_id, monitor_labels, parent_monitor_id)
+            labels_to_copy = @health_model_definition[monitor_id]['labels']
+            if labels_to_copy.nil?
+                return {}
+            end
+            parent_monitor_labels = {}
+            labels_to_copy.each{|label|
+                parent_monitor_labels[label] = monitor_labels[label]
+            }
+            return parent_monitor_labels
+        end
+
+        def get_parent_monitor_config(parent_monitor_id)
+            return @health_model_definition[parent_monitor_id]
+        end
+
+        def get_parent_monitor_instance_id(monitor_instance_id, parent_monitor_id, parent_monitor_labels)
+            if @parent_monitor_instance_mapping.key?(monitor_instance_id)
+                return @parent_monitor_instance_mapping[monitor_instance_id]
+            end
+
+            labels = AggregateMonitorInstanceIdLabels.get_labels_for(parent_monitor_id)
+            if !labels.is_a?(Array)
+                raise "Expected #{labels} to be an Array for #{parent_monitor_id}"
+            end
+            values = labels.map{|label| parent_monitor_labels[label]}
+            if values.nil? || values.empty? || values.size == 0
+                @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_id
+                return parent_monitor_id
+            end
+            parent_monitor_instance_id = "#{parent_monitor_id}-#{values.join('-')}"
+            @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_instance_id
+            return parent_monitor_instance_id
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb
new file mode 100644
index 000000000..9af599321
--- /dev/null
+++ b/source/code/plugin/health/unit_monitor.rb
@@ -0,0 +1,26 @@
+require_relative 'health_model_constants'
+require 'json'
+
+module HealthModel
+    class UnitMonitor
+
+        attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :labels, :config, :details, :is_aggregate_monitor
+
+        # constructor
+        def initialize(monitor_id, monitor_instance_id, state, transition_date_time, labels, config, details)
+            @monitor_id = monitor_id
+            @monitor_instance_id = monitor_instance_id
+            @transition_date_time = transition_date_time
+            @state = state
+            @labels = labels
+            @config = config
+            @details = details
+            @is_aggregate_monitor = false
+        end
+
+        def get_member_monitors
+            return nil
+        end
+
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index f5f65f01b..1702877a2 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -2,6 +2,7 @@
 # frozen_string_literal: true
 
 module Fluent
+
   class CAdvisor_Perf_Input < Input
     Plugin.register_input("cadvisorperf", self)
 
@@ -18,6 +19,8 @@ def initialize
     config_param :run_interval, :time, :default => "1m"
     config_param :tag, :string, :default => "oms.api.cadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
+    config_param :nodehealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Node"
+    #config_param :containerhealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Container"
 
     def configure(conf)
       super
@@ -51,11 +54,14 @@ def enumerate()
           record["DataType"] = "LINUX_PERF_BLOB"
           record["IPName"] = "LogManagement"
           eventStream.add(time, record) if record
-          #router.emit(@tag, time, record) if record
-        end
+                    #router.emit(@tag, time, record) if record
+            end
 
         router.emit_stream(@tag, eventStream) if eventStream
         router.emit_stream(@mdmtag, eventStream) if eventStream
+        #router.emit_stream(@containerhealthtag, eventStream) if eventStream
+        router.emit_stream(@nodehealthtag, eventStream) if eventStream
+
         @@istestvar = ENV["ISTEST"]
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 3a0e04c67..f177b62bf 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -67,7 +67,7 @@ def enumerate(eventList = nil)
             newEventQueryState.push(eventId)
             if !eventQueryState.empty? && eventQueryState.include?(eventId)
               next
-            end
+                end
             record["ObjectKind"] = items["involvedObject"]["kind"]
             record["Namespace"] = items["involvedObject"]["namespace"]
             record["Name"] = items["involvedObject"]["name"]
@@ -94,12 +94,12 @@ def enumerate(eventList = nil)
             eventStream.add(emitTime, wrapper) if wrapper
           end
           router.emit_stream(@tag, eventStream) if eventStream
-        end
+            end
         writeEventQueryState(newEventQueryState)
       rescue => errorStr
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-      end
+          end
     end
 
     def run_periodic
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
new file mode 100644
index 000000000..d9672da3b
--- /dev/null
+++ b/source/code/plugin/in_kube_health.rb
@@ -0,0 +1,307 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+require_relative "KubernetesApiClient"
+require_relative "oms_common"
+require_relative "omslog"
+require_relative "ApplicationInsightsUtility"
+
+module Fluent
+
+  Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
+  class KubeHealthInput < Input
+    Plugin.register_input("kubehealth", self)
+
+    config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json'
+
+    @@clusterCpuCapacity = 0.0
+    @@clusterMemoryCapacity = 0.0
+
+    def initialize
+      super
+      require "yaml"
+      require "json"
+
+      @@cluster_id = KubernetesApiClient.getClusterId
+      @resources = HealthKubernetesResources.instance
+      @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
+      @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
+    end
+
+    include HealthModel
+    config_param :run_interval, :time, :default => "1m"
+    config_param :tag, :string, :default => "oms.api.KubeHealth.ReplicaSet"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+        begin
+            if @run_interval
+                @finished = false
+                @condition = ConditionVariable.new
+                @mutex = Mutex.new
+                @thread = Thread.new(&method(:run_periodic))
+
+                @@hmlog = HealthMonitorUtils.get_log_handle
+                @@clusterName = KubernetesApiClient.getClusterName
+                @@clusterRegion = KubernetesApiClient.getClusterRegion
+                cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog)
+                @@clusterCpuCapacity = cluster_capacity[0]
+                @@clusterMemoryCapacity = cluster_capacity[1]
+                @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}"
+                if @@cluster_health_model_enabled
+                    ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {})
+                end
+            end
+        rescue => e
+            ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+        end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def enumerate
+      begin
+        if !@@cluster_health_model_enabled
+            @@hmlog.info "Cluster Health Model disabled in in_kube_health"
+            return
+        end
+
+        currentTime = Time.now
+        emitTime = currentTime.to_f
+        batchTime = currentTime.utc.iso8601
+        health_monitor_records = []
+        eventStream = MultiEventStream.new
+
+        #HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil)
+        # we do this so that if the call fails, we get a response code/header etc.
+        node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
+        node_inventory = JSON.parse(node_inventory_response.body)
+        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
+        pod_inventory = JSON.parse(pod_inventory_response.body)
+        deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body)
+
+        @resources.node_inventory = node_inventory
+        @resources.pod_inventory = pod_inventory
+        @resources.deployment_inventory = deployment_inventory
+
+        if node_inventory_response.code.to_i != 200
+          record = process_kube_api_up_monitor("fail", node_inventory_response)
+          health_monitor_records.push(record) if record
+        else
+          record = process_kube_api_up_monitor("pass", node_inventory_response)
+          health_monitor_records.push(record) if record
+        end
+
+        if !pod_inventory.nil?
+          record = process_cpu_oversubscribed_monitor(pod_inventory)
+          health_monitor_records.push(record) if record
+          record = process_memory_oversubscribed_monitor(pod_inventory)
+          health_monitor_records.push(record) if record
+          pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(pod_inventory, deployment_inventory)
+
+          system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'}
+          workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'}
+
+          system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
+          system_pods_ready_percentage_records.each do |record|
+            health_monitor_records.push(record) if record
+          end
+
+          workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID)
+          workload_pods_ready_percentage_records.each do |record|
+            health_monitor_records.push(record) if record
+          end
+        else
+            hmlog.info "POD INVENTORY IS NIL"
+        end
+
+        if !node_inventory.nil?
+          node_condition_records = process_node_condition_monitor(node_inventory)
+          node_condition_records.each do |record|
+            health_monitor_records.push(record) if record
+          end
+        else
+            hmlog.info "NODE INVENTORY IS NIL"
+        end
+
+        health_monitor_records.each do |record|
+          eventStream.add(emitTime, record)
+        end
+        router.emit_stream(@tag, eventStream) if eventStream
+      rescue => errorStr
+        @@hmlog.warn("error in_kube_health: #{errorStr.to_s}")
+        @@hmlog.debug "backtrace Input #{errorStr.backtrace}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def process_cpu_oversubscribed_monitor(pod_inventory)
+      timestamp = Time.now.utc.iso8601
+      subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"cpu", @@clusterCpuCapacity)
+      state =  subscription > @@clusterCpuCapacity ? "fail" : "pass"
+      #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}"
+
+      #CPU
+      monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID
+      health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}}
+      # @@hmlog.info health_monitor_record
+
+      monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id])
+      #hmlog.info "Monitor Instance Id: #{monitor_instance_id}"
+      health_record = {}
+      time_now = Time.now.utc.iso8601
+      health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+      health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+      health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+      health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+      health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+      #@@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor"
+      return health_record
+    end
+
+    def process_memory_oversubscribed_monitor(pod_inventory)
+      timestamp = Time.now.utc.iso8601
+      subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"memory", @@clusterMemoryCapacity)
+      state =  subscription > @@clusterMemoryCapacity ? "fail" : "pass"
+      #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}"
+
+      #CPU
+      monitor_id = HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID
+      health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}}
+      hmlog = HealthMonitorUtils.get_log_handle
+
+      monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id])
+      health_record = {}
+      time_now = Time.now.utc.iso8601
+      health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+      health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+      health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+      health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+      health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+      #@@hmlog.info "Successfully processed process_memory_oversubscribed_monitor"
+      return health_record
+    end
+
+    def process_kube_api_up_monitor(state, response)
+      timestamp = Time.now.utc.iso8601
+
+      monitor_id = HealthMonitorConstants::KUBE_API_STATUS
+      details = response.each_header.to_h
+      details['ResponseCode'] = response.code
+      health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details}
+      hmlog = HealthMonitorUtils.get_log_handle
+      #hmlog.info health_monitor_record
+
+      monitor_instance_id = HealthMonitorConstants::KUBE_API_STATUS
+      #hmlog.info "Monitor Instance Id: #{monitor_instance_id}"
+      health_record = {}
+      time_now = Time.now.utc.iso8601
+      health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+      health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+      health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+      health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+      health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+      #@@hmlog.info "Successfully processed process_kube_api_up_monitor"
+      return health_record
+    end
+
+    def process_pods_ready_percentage(pods_hash, config_monitor_id)
+      monitor_config = @provider.get_config(config_monitor_id)
+      hmlog = HealthMonitorUtils.get_log_handle
+
+      records = []
+      pods_hash.keys.each do |key|
+        workload_name = key
+        total_pods = pods_hash[workload_name]['totalPods']
+        pods_ready = pods_hash[workload_name]['podsReady']
+        namespace = pods_hash[workload_name]['namespace']
+        workload_kind = pods_hash[workload_name]['kind']
+        percent = pods_ready / total_pods * 100
+        timestamp = Time.now.utc.iso8601
+
+        state = HealthMonitorUtils.compute_percentage_state((100-percent), monitor_config)
+        health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workloadName" => workload_name, "namespace" => namespace, "workloadKind" => workload_kind}}
+        monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name])
+        health_record = {}
+        time_now = Time.now.utc.iso8601
+        health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
+        health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+        health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+        health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+        health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+        records.push(health_record)
+      end
+      #@@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}"
+      return records
+    end
+
+    def process_node_condition_monitor(node_inventory)
+      monitor_id = HealthMonitorConstants::NODE_CONDITION_MONITOR_ID
+      timestamp = Time.now.utc.iso8601
+      monitor_config = @provider.get_config(monitor_id)
+      node_condition_monitor_records = []
+      if !node_inventory.nil?
+          node_inventory['items'].each do |node|
+            node_name = node['metadata']['name']
+            conditions = node['status']['conditions']
+            state = HealthMonitorUtils.get_node_state_from_node_conditions(conditions)
+            #hmlog.debug "Node Name = #{node_name} State = #{state}"
+            details = {}
+            conditions.each do |condition|
+              details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']}
+            end
+            health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details}
+            monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name])
+            health_record = {}
+            time_now = Time.now.utc.iso8601
+            health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
+            health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+            health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+            health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+            health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+            health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+            health_record[HealthMonitorRecordFields::NODE_NAME] = node_name
+            node_condition_monitor_records.push(health_record)
+          end
+      end
+      #@@hmlog.info "Successfully processed process_node_condition_monitor #{node_condition_monitor_records.size}"
+      return node_condition_monitor_records
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
+        done = @finished
+        @mutex.unlock
+        if !done
+          begin
+            @@hmlog.info("in_kube_health::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            @@hmlog.warn "in_kube_health::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          end
+        end
+        @mutex.lock
+      end
+      @mutex.unlock
+    end
+  end
+end
diff --git a/test/code/plugin/filter_health_model_builder_test.rb b/test/code/plugin/filter_health_model_builder_test.rb
new file mode 100644
index 000000000..f4dba11ed
--- /dev/null
+++ b/test/code/plugin/filter_health_model_builder_test.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+require 'test/unit'
+require 'json'
+# require_relative '../../../source/code/plugin/health'
+
+Dir[File.join(__dir__, '../../../source/code/plugin/health', '*.rb')].each { |file| require file }
+
+class FilterHealthModelBuilderTest < Test::Unit::TestCase
+  include HealthModel
+
+  def test_event_stream
+    health_definition_path = 'C:\AzureMonitor\ContainerInsights\Docker-Provider\installer\conf\health_model_definition.json'
+    health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+    monitor_factory = MonitorFactory.new
+    hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+    # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
+    state_finalizers = [AggregateMonitorStateFinalizer.new]
+    monitor_set = MonitorSet.new
+    model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+    i = 1
+    loop do
+        mock_data_path = "C:/AzureMonitor/ContainerInsights/Docker-Provider/source/code/plugin/mock_data-#{i}.json"
+        file = File.read(mock_data_path)
+        data = JSON.parse(file)
+
+        health_monitor_records = []
+        data.each do |record|
+        health_monitor_record = HealthMonitorRecord.new(
+            record[HealthMonitorRecordFields::MONITOR_ID],
+            record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+            record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+            record[HealthMonitorRecordFields::DETAILS]["state"],
+            record[HealthMonitorRecordFields::MONITOR_LABELS],
+            record[HealthMonitorRecordFields::MONITOR_CONFIG],
+            record[HealthMonitorRecordFields::DETAILS]
+        )
+        state_transitions.push(state_transition)
+        end
+
+        model_builder.process_state_transitions(state_transitions)
+        changed_monitors = model_builder.finalize_model
+        changed_monitors.keys.each{|key|
+            puts key
+        }
+        i = i + 1
+        if i == 6
+            break
+        end
+    end
+    puts "Done"
+  end
+end
diff --git a/test/code/plugin/health/aggregate_monitor_spec.rb b/test/code/plugin/health/aggregate_monitor_spec.rb
new file mode 100644
index 000000000..729965999
--- /dev/null
+++ b/test/code/plugin/health/aggregate_monitor_spec.rb
@@ -0,0 +1,256 @@
+require_relative '../test_helpers'
+
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe "AggregateMonitor Spec" do
+    it "is_aggregate_monitor is true for AggregateMonitor" do
+        # Arrange/Act
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {})
+        # Assert
+        assert_equal monitor.is_aggregate_monitor, true
+    end
+
+    it "add_member_monitor tests -- adds a member monitor as a child monitor" do
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {})
+        #Act
+        monitor.add_member_monitor("child_monitor_1")
+        #Assert
+        assert_equal monitor.get_member_monitors.include?("child_monitor_1"), true
+
+        #Act
+        monitor.add_member_monitor("child_monitor_1")
+        #Assert
+        assert_equal monitor.get_member_monitors.size, 1
+    end
+
+    it "remove_member_monitor tests -- removes a member monitor as a child monitor" do
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {})
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+
+        #Act
+        monitor.remove_member_monitor("child_monitor_1")
+        #Assert
+        assert_equal monitor.get_member_monitors.size, 1
+
+        #Act
+        monitor.remove_member_monitor("unknown_child")
+        #Assert
+        assert_equal monitor.get_member_monitors.size, 1
+    end
+
+    it "calculate_details tests -- calculates rollup details based on member monitor states" do
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {})
+
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+
+        #Act
+        monitor.calculate_details(monitor_set)
+        #Assert
+        assert_equal monitor.details["details"], {"pass"=>["child_monitor_1"], "fail"=>["child_monitor_2"]}
+
+        #Arrange
+        child_monitor_3 = UnitMonitor.new("monitor_3", "child_monitor_3", "pass", "time", {}, {}, {})
+        monitor_set.add_or_update(child_monitor_3)
+        monitor.add_member_monitor("child_monitor_3")
+
+        #Act
+        monitor.calculate_details(monitor_set)
+        #Assert
+        assert_equal monitor.details["details"], {"pass"=>["child_monitor_1", "child_monitor_3"], "fail"=>["child_monitor_2"]}
+    end
+
+    it "calculate_state tests -- raises when right aggregation_algorithm NOT specified" do
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "", [], {})
+        #Assert
+        assert_raises do
+            monitor.calculate_state(monitor_set)
+        end
+    end
+
+    it "calculate_state tests -- calculate_worst_of_state " do
+        # Arrange -- pass, fail = fail
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "worstOf", [], {})
+
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "fail"
+
+        #Arrange -- pass, pass = pass
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "pass", "time", {}, {}, {})
+        monitor_set.add_or_update(child_monitor_2)
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "pass"
+
+        #Arrange -- pass, warn = warn
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "warn", "time", {}, {}, {})
+        monitor_set.add_or_update(child_monitor_2)
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "warn"
+
+        #Arrange -- warn, fail = fail
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "warn", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {})
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "fail"
+
+        #Arrange -- warn, unknown = unknown
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "warn", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "unknown", "time", {}, {}, {})
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "warn"
+
+        #Arrange -- pass, unknown = unknown
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "unknown", "time", {}, {}, {})
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "unknown"
+    end
+
+    it "calculate_state tests -- calculate_percentage_state " do
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 90.0}, {})
+
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "fail"
+
+        #Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 50.0}, {})
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "pass"
+
+        #Arrange -- single child monitor
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 33.3}, {})
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor.add_member_monitor("child_monitor_1")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "pass"
+
+
+        #Arrange -- remove none state
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :none, :time, "percentage", {"state_threshold" => 100.0}, {})
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "none", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "pass"
+
+
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 50.0}, {})
+
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "fail", "time", {}, {}, {})
+        child_monitor_3 = UnitMonitor.new("monitor_3", "child_monitor_3", "fail", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+        monitor_set.add_or_update(child_monitor_3)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+        monitor.add_member_monitor("child_monitor_3")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "fail"
+
+
+        # Arrange
+        monitor = AggregateMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, "percentage", {"state_threshold" => 90.0}, {})
+
+        child_monitor_1 = UnitMonitor.new("monitor_1", "child_monitor_1", "pass", "time", {}, {}, {})
+        child_monitor_2 = UnitMonitor.new("monitor_2", "child_monitor_2", "pass", "time", {}, {}, {})
+        child_monitor_3 = UnitMonitor.new("monitor_3", "child_monitor_3", "pass", "time", {}, {}, {})
+
+        monitor_set = MonitorSet.new
+        monitor_set.add_or_update(child_monitor_1)
+        monitor_set.add_or_update(child_monitor_2)
+        monitor_set.add_or_update(child_monitor_3)
+
+        monitor.add_member_monitor("child_monitor_1")
+        monitor.add_member_monitor("child_monitor_2")
+        monitor.add_member_monitor("child_monitor_3")
+        #Act
+        monitor.calculate_state(monitor_set)
+        #Assert
+        assert_equal monitor.state, "pass"
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb
new file mode 100644
index 000000000..f1ae0564d
--- /dev/null
+++ b/test/code/plugin/health/aggregate_monitor_state_finalizer_spec.rb
@@ -0,0 +1,59 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "AggregateMonitorStateFinalizer spec" do
+    it 'computes the right state and details' do
+        #arrange
+        monitor_set = Mock.new
+
+        #mock unit monitors
+        child1 = Mock.new
+        def child1.state; "pass"; end
+        def child1.monitor_id; "child1";end
+        def child1.monitor_instance_id; "child1"; end
+        def child1.nil?; false; end
+        def child1.is_aggregate_monitor; false; end
+
+        child2 = Mock.new
+        def child2.state; "fail"; end
+        def child2.monitor_id; "child2";end
+        def child2.monitor_instance_id; "child2"; end
+        def child2.nil?; false; end
+        def child2.is_aggregate_monitor; false; end
+
+        parent_monitor = AggregateMonitor.new("parent_monitor", "parent_monitor", :none, :time, "worstOf", nil, {})
+        parent_monitor.add_member_monitor("child1")
+        parent_monitor.add_member_monitor("child2")
+
+        top_level_monitor = AggregateMonitor.new("cluster", "cluster", :none, :time, "worstOf", nil, {})
+        top_level_monitor.add_member_monitor("parent_monitor")
+
+        monitor_set.expect(:get_map, {"cluster" => top_level_monitor, "parent_monitor" => parent_monitor, "child1" => child1, "child2" => child2})
+        monitor_set.expect(:get_monitor, top_level_monitor, ["cluster"])
+        monitor_set.expect(:get_monitor, parent_monitor, ["parent_monitor"])
+        monitor_set.expect(:get_monitor, child1, ["child1"])
+        monitor_set.expect(:get_monitor, child2, ["child2"])
+        monitor_set.expect(:get_monitor, child1, ["child1"])
+        monitor_set.expect(:get_monitor, child2, ["child2"])
+        monitor_set.expect(:get_monitor, parent_monitor, ["parent_monitor"])
+
+
+        monitor_set.expect(:get_monitor, parent_monitor, ["parent_monitor"])
+        monitor_set.expect(:get_monitor, child1, ["child1"])
+        monitor_set.expect(:get_monitor, child2, ["child2"])
+
+        #act
+        finalizer = AggregateMonitorStateFinalizer.new
+        finalizer.finalize(monitor_set)
+        #assert
+
+        assert_equal parent_monitor.state, "fail"
+        assert_equal parent_monitor.details, {"details"=>{"pass"=>["child1"], "fail"=>["child2"]}, "state"=>"fail", "timestamp"=>:time}
+
+        assert_equal top_level_monitor.state, "fail"
+        assert_equal top_level_monitor.details, {"details"=>{"fail"=>["parent_monitor"]}, "state"=>"fail", "timestamp"=>:time}
+
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/ca.crt b/test/code/plugin/health/ca.crt
new file mode 100644
index 000000000..9daeafb98
--- /dev/null
+++ b/test/code/plugin/health/ca.crt
@@ -0,0 +1 @@
+test
diff --git a/test/code/plugin/health/cluster_health_state_spec.rb b/test/code/plugin/health/cluster_health_state_spec.rb
new file mode 100644
index 000000000..897291fe2
--- /dev/null
+++ b/test/code/plugin/health/cluster_health_state_spec.rb
@@ -0,0 +1,37 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+require 'time'
+include HealthModel
+include Minitest
+
+describe "Cluster Health State Spec" do
+
+    it "ClusterHealthState.new throws if cert file is NOT present" do
+        state = {
+            "m1" => {
+                "state" => "pass",
+                "time"  => Time.now.utc.iso8601
+            }
+        }
+
+        token_file_path = 'token'
+        cert_file_path = '/var/ca.crt'
+
+        proc {ClusterHealthState.new(token_file_path, cert_file_path)}.must_raise
+
+    end
+
+    it "ClusterHealthState.new returns nil if token is NOT present" do
+        state = {
+            "m1" => {
+                "state" => "pass",
+                "time"  => Time.now.utc.iso8601
+            }
+        }
+        token_file_path = 'token'
+        cert_file_path = File.join(File.expand_path(File.dirname(__FILE__)), "ca.crt")
+
+        chs = ClusterHealthState.new(token_file_path, cert_file_path)
+        chs.token.must_be_nil
+    end
+end
diff --git a/test/code/plugin/health/health_hierarchy_builder_spec.rb b/test/code/plugin/health/health_hierarchy_builder_spec.rb
new file mode 100644
index 000000000..daafe0312
--- /dev/null
+++ b/test/code/plugin/health/health_hierarchy_builder_spec.rb
@@ -0,0 +1,11 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "HealthHierarchyBuilder spec" do
+    it 'builds right hierarchy given a child monitor and a parent monitor provider' do
+
+    end
+
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb
new file mode 100644
index 000000000..c27d969ec
--- /dev/null
+++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb
@@ -0,0 +1,222 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe "HealthKubernetesResources spec" do
+    it "returns the right set of nodes and workloads given node and pod inventory" do
+
+        #arrange
+        nodes_json = '{
+            "items": [
+                {
+                    "metadata": {
+                        "name": "aks-nodepool1-19574989-0"
+                        }
+                },
+                {
+                    "metadata": {
+                        "name": "aks-nodepool1-19574989-1"
+                    }
+                }
+            ]
+        }'
+
+        pods_json = '{
+            "items": [
+                {
+                    "metadata": {
+                        "name": "diliprdeploymentnodeapps-c4fdfb446-mzcsr",
+                        "generateName": "diliprdeploymentnodeapps-c4fdfb446-",
+                        "namespace": "default",
+                        "selfLink": "/api/v1/namespaces/default/pods/diliprdeploymentnodeapps-c4fdfb446-mzcsr",
+                        "uid": "ee31a9ce-526e-11e9-a899-6a5520730c61",
+                        "resourceVersion": "4597573",
+                        "creationTimestamp": "2019-03-29T22:06:40Z",
+                        "labels": {
+                            "app": "diliprsnodeapppod",
+                            "diliprPodLabel1": "p1",
+                            "diliprPodLabel2": "p2",
+                            "pod-template-hash": "709896002"
+                        },
+                        "ownerReferences": [
+                            {
+                                "apiVersion": "apps/v1",
+                                "kind": "ReplicaSet",
+                                "name": "diliprdeploymentnodeapps-c4fdfb446",
+                                "uid": "ee1e78e0-526e-11e9-a899-6a5520730c61",
+                                "controller": true,
+                                "blockOwnerDeletion": true
+                            }
+                        ]
+                    },
+                    "apiVersion": "v1",
+                    "kind": "Pod"
+                },
+                {
+                    "metadata": {
+                        "name": "pi-m8ccw",
+                        "generateName": "pi-",
+                        "namespace": "default",
+                        "selfLink": "/api/v1/namespaces/default/pods/pi-m8ccw",
+                        "uid": "9fb16aaa-7ccc-11e9-8d23-32c49ee6f300",
+                        "resourceVersion": "7940877",
+                        "creationTimestamp": "2019-05-22T20:03:10Z",
+                        "labels": {
+                            "controller-uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300",
+                            "job-name": "pi"
+                        },
+                        "ownerReferences": [
+                            {
+                                "apiVersion": "batch/v1",
+                                "kind": "Job",
+                                "name": "pi",
+                                "uid": "9fad836f-7ccc-11e9-8d23-32c49ee6f300",
+                                "controller": true,
+                                "blockOwnerDeletion": true
+                            }
+                        ]
+                    },
+                    "apiVersion": "v1",
+                    "kind": "Pod"
+                },
+                {
+                    "metadata": {
+                        "name": "rss-site",
+                        "namespace": "default",
+                        "selfLink": "/api/v1/namespaces/default/pods/rss-site",
+                        "uid": "68a34ea4-7ce4-11e9-8d23-32c49ee6f300",
+                        "resourceVersion": "7954135",
+                        "creationTimestamp": "2019-05-22T22:53:26Z",
+                        "labels": {
+                            "app": "web"
+                        },
+                        "annotations": {
+                            "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Pod\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"web\"},\"name\":\"rss-site\",\"namespace\":\"default\"},\"spec\":{\"containers\":[{\"image\":\"nginx\",\"name\":\"front-end\",\"ports\":[{\"containerPort\":80}]},{\"image\":\"nickchase/rss-php-nginx:v1\",\"name\":\"rss-reader\",\"ports\":[{\"containerPort\":88}]}]}}\n"
+                        }
+                    },
+                    "apiVersion": "v1",
+                    "kind": "Pod"
+                },
+                {
+                    "metadata": {
+                        "name": "kube-proxy-4hjws",
+                        "generateName": "kube-proxy-",
+                        "namespace": "kube-system",
+                        "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-4hjws",
+                        "uid": "8cf7c410-88f4-11e9-b1b0-5eb4a3e9de7d",
+                        "resourceVersion": "9661065",
+                        "creationTimestamp": "2019-06-07T07:19:12Z",
+                        "labels": {
+                            "component": "kube-proxy",
+                            "controller-revision-hash": "1271944371",
+                            "pod-template-generation": "16",
+                            "tier": "node"
+                        },
+                        "annotations": {
+                            "aks.microsoft.com/release-time": "seconds:1559735217 nanos:797729016 ",
+                            "remediator.aks.microsoft.com/kube-proxy-restart": "7"
+                        },
+                        "ownerReferences": [
+                            {
+                                "apiVersion": "apps/v1",
+                                "kind": "DaemonSet",
+                                "name": "kube-proxy",
+                                "uid": "45640bf6-44e5-11e9-9920-423525a6b683",
+                                "controller": true,
+                                "blockOwnerDeletion": true
+                            }
+                        ]
+                    },
+                    "apiVersion": "v1",
+                    "kind": "Pod"
+                }
+            ]
+        }'
+        deployments_json = '{
+            "items": [
+                {
+                    "metadata": {
+                        "name": "diliprdeploymentnodeapps",
+                        "namespace": "default",
+                        "selfLink": "/apis/extensions/v1beta1/namespaces/default/deployments/diliprdeploymentnodeapps",
+                        "uid": "ee1b111d-526e-11e9-a899-6a5520730c61",
+                        "resourceVersion": "4597575",
+                        "generation": 1,
+                        "creationTimestamp": "2019-03-29T22:06:40Z",
+                        "labels": {
+                            "diliprdeploymentLabel1": "d1",
+                            "diliprdeploymentLabel2": "d2"
+                        },
+                        "annotations": {
+                            "deployment.kubernetes.io/revision": "1",
+                            "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"diliprdeploymentLabel1\":\"d1\",\"diliprdeploymentLabel2\":\"d2\"},\"name\":\"diliprdeploymentnodeapps\",\"namespace\":\"default\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"app\":\"diliprsnodeapppod\"}},\"template\":{\"metadata\":{\"labels\":{\"app\":\"diliprsnodeapppod\",\"diliprPodLabel1\":\"p1\",\"diliprPodLabel2\":\"p2\"}},\"spec\":{\"containers\":[{\"image\":\"rdilip83/logeverysecond:v2\",\"name\":\"diliprcontainerhelloapp\"}]}}}}\n"
+                        }
+                    },
+                    "spec": {
+                        "replicas": 1,
+                        "selector": {
+                            "matchLabels": {
+                                "app": "diliprsnodeapppod"
+                            }
+                        },
+                        "template": {
+                            "metadata": {
+                                "creationTimestamp": null,
+                                "labels": {
+                                    "app": "diliprsnodeapppod",
+                                    "diliprPodLabel1": "p1",
+                                    "diliprPodLabel2": "p2"
+                                }
+                            },
+                            "spec": {
+                                "containers": [
+                                    {
+                                        "name": "diliprcontainerhelloapp",
+                                        "image": "rdilip83/logeverysecond:v2",
+                                        "resources": {},
+                                        "terminationMessagePath": "/dev/termination-log",
+                                        "terminationMessagePolicy": "File",
+                                        "imagePullPolicy": "IfNotPresent"
+                                    }
+                                ],
+                                "restartPolicy": "Always",
+                                "terminationGracePeriodSeconds": 30,
+                                "dnsPolicy": "ClusterFirst",
+                                "securityContext": {},
+                                "schedulerName": "default-scheduler"
+                            }
+                        },
+                        "strategy": {
+                            "type": "RollingUpdate",
+                            "rollingUpdate": {
+                                "maxUnavailable": "25%",
+                                "maxSurge": "25%"
+                            }
+                        },
+                        "revisionHistoryLimit": 2,
+                        "progressDeadlineSeconds": 600
+                    },
+                    "apiVersion": "extensions/v1beta1",
+                    "kind": "Deployment"
+                }
+            ]
+        }'
+        nodes = JSON.parse(nodes_json)
+        pods = JSON.parse(pods_json)
+        deployments = JSON.parse(deployments_json)
+        resources = HealthKubernetesResources.instance
+        resources.node_inventory = nodes
+        resources.pod_inventory = pods
+        resources.deployment_inventory = deployments
+        #act
+        parsed_nodes = resources.get_nodes
+        parsed_workloads = resources.get_workload_names
+
+        #assert
+        assert_equal parsed_nodes.size, 2
+        assert_equal parsed_workloads.size, 3
+
+        assert_equal parsed_nodes, ['aks-nodepool1-19574989-0', 'aks-nodepool1-19574989-1']
+        assert_equal parsed_workloads, ['default~~diliprdeploymentnodeapps', 'default~~rss-site', 'kube-system~~kube-proxy']
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_missing_signal_generator_spec.rb b/test/code/plugin/health/health_missing_signal_generator_spec.rb
new file mode 100644
index 000000000..98d65416d
--- /dev/null
+++ b/test/code/plugin/health/health_missing_signal_generator_spec.rb
@@ -0,0 +1,79 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each {|file| require file}
+include HealthModel
+include Minitest
+
+describe "HealthMissingSignalGenerator spec" do
+    it 'generates missing node signals' do
+        #arrange
+        resources = Mock.new
+        resources.expect(:get_nodes, ["node1"])
+        resources.expect(:get_workload_names, ["default~~workload1"])
+
+        provider = Mock.new
+        provider.expect(:get_node_labels, {HealthMonitorLabels::HOSTNAME => "node1"}, ["node1"])
+
+        node1_cpu_record = Mock.new
+        def node1_cpu_record.monitor_id; "node_cpu_utilization"; end
+        def node1_cpu_record.monitor_instance_id; "node_cpu_utilization"; end
+        def node1_cpu_record.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        def node1_cpu_record.config; {}; end
+        def node1_cpu_record.state; "pass"; end
+
+        node1_memory_record = Mock.new
+        def node1_memory_record.monitor_id; "node_memory_utilization"; end
+        def node1_memory_record.monitor_instance_id; "node_memory_utilization"; end
+        def node1_memory_record.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        def node1_memory_record.config; {}; end
+        def node1_memory_record.state; "pass"; end
+
+        node1_condition_record = Mock.new
+        def node1_condition_record.monitor_id; "node_condition"; end
+        def node1_condition_record.monitor_instance_id; "node_condition-0c593682737a955dc8e0947ad12754fe"; end
+        def node1_condition_record.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        def node1_condition_record.config; {}; end
+        def node1_condition_record.state; "pass"; end
+
+
+        workload1_pods_ready_record = Mock.new
+        def workload1_pods_ready_record.monitor_id; "user_workload_pods_ready"; end
+        def workload1_pods_ready_record.monitor_instance_id; "user_workload_pods_ready-workload1"; end
+        def workload1_pods_ready_record.labels; {HealthMonitorLabels::NAMESPACE => "default", HealthMonitorLabels::WORKLOAD_NAME => "workload1"}; end
+        def workload1_pods_ready_record.config; {}; end
+        def workload1_pods_ready_record.state; "pass"; end
+
+        generator = HealthMissingSignalGenerator.new
+        generator.update_last_received_records([node1_cpu_record, node1_memory_record, node1_condition_record, workload1_pods_ready_record])
+
+        #act
+        missing = generator.get_missing_signals('fake_cluster_id', [node1_cpu_record, node1_memory_record], resources, provider)
+
+        #assert
+        assert_equal missing.size, 2
+
+        assert_equal missing[0].monitor_id, "node_condition"
+        assert_equal missing[0].state, "unknown"
+        assert_equal missing[0].monitor_instance_id, "node_condition-0c593682737a955dc8e0947ad12754fe"
+
+        assert_equal missing[1].monitor_id, "user_workload_pods_ready"
+        assert_equal missing[1].state, "unknown"
+        assert_equal missing[1].monitor_instance_id, "user_workload_pods_ready-workload1"
+
+        #arrange
+        resources.expect(:get_nodes, ["node1"])
+        resources.expect(:get_workload_names, ["default~~workload1"])
+        provider.expect(:get_node_labels, {HealthMonitorLabels::HOSTNAME => "node1"}, ["node1"])
+        generator.update_last_received_records([node1_cpu_record, node1_memory_record])
+        #act
+        missing = generator.get_missing_signals('fake_cluster_id', [node1_cpu_record, node1_memory_record], resources, provider)
+        #assert
+        assert_equal missing.size, 2
+        assert_equal missing[0].monitor_id, "node_condition"
+        assert_equal missing[0].state, "unknown"
+        assert_equal missing[0].monitor_instance_id, "node_condition-0c593682737a955dc8e0947ad12754fe"
+
+        assert_equal missing[1].monitor_id, "user_workload_pods_ready"
+        assert_equal missing[1].state, "none"
+        assert_equal missing[1].monitor_instance_id, "user_workload_pods_ready-workload1"
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_model_buffer_spec.rb b/test/code/plugin/health/health_model_buffer_spec.rb
new file mode 100644
index 000000000..259513c08
--- /dev/null
+++ b/test/code/plugin/health/health_model_buffer_spec.rb
@@ -0,0 +1,25 @@
+require_relative '../../../../source/code/plugin/health/health_model_buffer'
+require_relative '../test_helpers'
+
+include HealthModel
+
+describe "HealthModelBuffer Spec" do
+    it "get_buffer returns the correct buffer data" do
+        # Arrange
+        buffer = HealthModelBuffer.new
+        # Act
+        buffer.add_to_buffer(['mockRecord'])
+        # Assert
+        assert_equal buffer.get_buffer.length, 1
+
+        #Act
+        buffer.add_to_buffer(['mockRecord1', 'mockRecord2'])
+        #Assert
+        assert_equal buffer.get_buffer.length, 3
+
+        #Act
+        buffer.reset_buffer
+        #Assert
+        assert_equal buffer.get_buffer.length, 0
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_model_builder_spec.rb b/test/code/plugin/health/health_model_builder_spec.rb
new file mode 100644
index 000000000..c49e6c92a
--- /dev/null
+++ b/test/code/plugin/health/health_model_builder_spec.rb
@@ -0,0 +1,37 @@
+require_relative '../test_helpers'
+# consider doing this in test_helpers.rb so that this code is common
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "HealthModelBuilder spec" do
+    it "Verify hierarchy builder and finalizer public methods are called" do
+        #arrange
+        mock_hierarchy_builder = Mock::new
+        health_record = Mock::new
+        mock_monitor_set = Mock::new
+        mock_state_finalizer = Mock::new
+        mock_hierarchy_builder.expect(:process_record, nil,  [health_record, mock_monitor_set])
+        mock_state_finalizer.expect(:finalize, {}, [mock_monitor_set])
+        def mock_monitor_set.get_map; {}; end
+
+        #act
+        builder = HealthModelBuilder.new(mock_hierarchy_builder, [mock_state_finalizer], mock_monitor_set)
+        builder.process_records([health_record])
+        builder.finalize_model
+        #assert
+        assert mock_hierarchy_builder.verify
+        assert mock_state_finalizer.verify
+    end
+
+    it "Verify finalize_model raises if state_finalizers is empty" do
+        #arrange
+        mock_hierarchy_builder = Mock.new
+        mock_monitor_set = Mock.new
+        builder = HealthModelBuilder.new(mock_hierarchy_builder, [], mock_monitor_set)
+        #act and assert
+        assert_raises do
+            builder.finalize_model
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_model_builder_test.rb b/test/code/plugin/health/health_model_builder_test.rb
new file mode 100644
index 000000000..df921049c
--- /dev/null
+++ b/test/code/plugin/health/health_model_builder_test.rb
@@ -0,0 +1,337 @@
+require 'test/unit'
+require 'json'
+# require_relative '../../../source/code/plugin/health'
+
+Dir[File.join(__dir__, '../../../../source/code/plugin/health', '*.rb')].each { |file| require file }
+
+class FilterHealthModelBuilderTest < Test::Unit::TestCase
+    include HealthModel
+
+    def test_event_stream
+        #setup
+        health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json')
+        health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+        monitor_factory = MonitorFactory.new
+        hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+        # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
+        state_finalizers = [AggregateMonitorStateFinalizer.new]
+        monitor_set = MonitorSet.new
+        model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+        nodes_file_map = {
+            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json",
+            "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            }
+
+        pods_file_map = {
+            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json",
+            "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            }
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        #test
+        state = HealthMonitorState.new()
+        generator = HealthMissingSignalGenerator.new
+
+        for scenario in ["first", "second", "third"]
+            mock_data_path = File.join(__dir__, "../../../../health_records/#{scenario}_daemon_set_signals.json")
+            file = File.read(mock_data_path)
+            records = JSON.parse(file)
+
+            node_inventory = JSON.parse(File.read(nodes_file_map[scenario]))
+            pod_inventory = JSON.parse(File.read(pods_file_map[scenario]))
+            deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/deployments.json")))
+            resources = HealthKubernetesResources.instance
+            resources.node_inventory = node_inventory
+            resources.pod_inventory = pod_inventory
+            resources.deployment_inventory = deployment_inventory
+
+            workload_names = resources.get_workload_names
+            provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../..//installer/conf/healthmonitorconfig.json"))
+
+            health_monitor_records = []
+            records.each do |record|
+                monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+                monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+                health_monitor_record = HealthMonitorRecord.new(
+                    record[HealthMonitorRecordFields::MONITOR_ID],
+                    record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+                    record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+                    record[HealthMonitorRecordFields::DETAILS]["state"],
+                    provider.get_labels(record),
+                    provider.get_config(monitor_id),
+                    record[HealthMonitorRecordFields::DETAILS]
+                )
+
+                state.update_state(health_monitor_record,
+                    provider.get_config(health_monitor_record.monitor_id)
+                    )
+
+                # get the health state based on the monitor's operational state
+                # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+                health_monitor_record.state = state.get_state(monitor_instance_id).new_state
+                health_monitor_records.push(health_monitor_record)
+                instance_state = state.get_state(monitor_instance_id)
+                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+            end
+
+
+            #handle kube api down
+            kube_api_down_handler = HealthKubeApiDownHandler.new
+            health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
+
+            # Dedupe daemonset signals
+            # Remove unit monitor signals for “gone” objects
+            reducer = HealthSignalReducer.new()
+            reduced_records = reducer.reduce_signals(health_monitor_records, resources)
+
+            cluster_id = 'fake_cluster_id'
+
+            #get the list of  'none' and 'unknown' signals
+            missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
+            #update state for missing signals
+            missing_signals.each{|signal|
+                state.update_state(signal,
+                    provider.get_config(signal.monitor_id)
+                    )
+            }
+            generator.update_last_received_records(reduced_records)
+            reduced_records.push(*missing_signals)
+
+            # build the health model
+            all_records = reduced_records
+            model_builder.process_records(all_records)
+            all_monitors = model_builder.finalize_model
+
+            # update the state for aggregate monitors (unit monitors are updated above)
+            all_monitors.each{|monitor_instance_id, monitor|
+                if monitor.is_aggregate_monitor
+                    state.update_state(monitor,
+                        provider.get_config(monitor.monitor_id)
+                        )
+                end
+
+                instance_state = state.get_state(monitor_instance_id)
+                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+                should_send = instance_state.should_send
+
+                # always send cluster monitor as a heartbeat
+                if !should_send && monitor_instance_id != MonitorId::CLUSTER
+                    all_monitors.delete(monitor_instance_id)
+                end
+            }
+
+            records_to_send = []
+            all_monitors.keys.each{|key|
+                record = provider.get_record(all_monitors[key], state)
+                #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+            }
+
+            if scenario == "first"
+                assert_equal 50, all_monitors.size
+            elsif scenario == "second"
+                assert_equal 34, all_monitors.size
+            elsif scenario == "third"
+                assert_equal 5, all_monitors.size
+            end
+            # for each key in monitor.keys,
+            # get the state from health_monitor_state
+            # generate the record to send
+            serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json'))
+            serializer.serialize(state)
+
+            deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json'))
+            deserialized_state = deserializer.deserialize
+
+            after_state = HealthMonitorState.new
+            after_state.initialize_state(deserialized_state)
+        end
+    end
+
+    def test_event_stream_aks_engine
+
+        #setup
+        health_definition_path = File.join(__dir__, '../../../../installer\conf\health_model_definition.json')
+        health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+        monitor_factory = MonitorFactory.new
+        hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+        state_finalizers = [AggregateMonitorStateFinalizer.new]
+        monitor_set = MonitorSet.new
+        model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+        nodes_file_map = {
+            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json",
+            #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+            "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
+            "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
+            "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
+            }
+
+        pods_file_map = {
+            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json",
+            #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+            "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
+            "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
+            "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
+            }
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'aks-engine-health',
+            'container.azm.ms/cluster-name' => 'aks-engine-health'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        #test
+        state = HealthMonitorState.new()
+        generator = HealthMissingSignalGenerator.new
+
+        for scenario in 1..3
+            mock_data_path = File.join(__dir__, "../../../../health_records/aks-engine/aks-engine-#{scenario}.json")
+            file = File.read(mock_data_path)
+            records = JSON.parse(file)
+
+            node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"]))
+            pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"]))
+            deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/aks-engine/deployments.json")))
+            resources = HealthKubernetesResources.instance
+            resources.node_inventory = node_inventory
+            resources.pod_inventory = pod_inventory
+            resources.deployment_inventory = deployment_inventory
+
+            workload_names = resources.get_workload_names
+            provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+            health_monitor_records = []
+            records.each do |record|
+                monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+                monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+                health_monitor_record = HealthMonitorRecord.new(
+                    record[HealthMonitorRecordFields::MONITOR_ID],
+                    record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+                    record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+                    record[HealthMonitorRecordFields::DETAILS]["state"],
+                    provider.get_labels(record),
+                    provider.get_config(monitor_id),
+                    record[HealthMonitorRecordFields::DETAILS]
+                )
+
+                state.update_state(health_monitor_record,
+                    provider.get_config(health_monitor_record.monitor_id)
+                    )
+
+                # get the health state based on the monitor's operational state
+                # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+                health_monitor_record.state = state.get_state(monitor_instance_id).new_state
+                health_monitor_records.push(health_monitor_record)
+                instance_state = state.get_state(monitor_instance_id)
+                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+            end
+
+
+            #handle kube api down
+            kube_api_down_handler = HealthKubeApiDownHandler.new
+            health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
+
+            # Dedupe daemonset signals
+            # Remove unit monitor signals for “gone” objects
+            reducer = HealthSignalReducer.new()
+            reduced_records = reducer.reduce_signals(health_monitor_records, resources)
+
+            cluster_id = 'fake_cluster_id'
+
+            #get the list of  'none' and 'unknown' signals
+            missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
+            #update state for missing signals
+            missing_signals.each{|signal|
+                state.update_state(signal,
+                    provider.get_config(signal.monitor_id)
+                    )
+            }
+            generator.update_last_received_records(reduced_records)
+            reduced_records.push(*missing_signals)
+
+            # build the health model
+            all_records = reduced_records
+            model_builder.process_records(all_records)
+            all_monitors = model_builder.finalize_model
+
+            # update the state for aggregate monitors (unit monitors are updated above)
+            all_monitors.each{|monitor_instance_id, monitor|
+                if monitor.is_aggregate_monitor
+                    state.update_state(monitor,
+                        provider.get_config(monitor.monitor_id)
+                        )
+                end
+
+                instance_state = state.get_state(monitor_instance_id)
+                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+                should_send = instance_state.should_send
+
+                # always send cluster monitor as a heartbeat
+                if !should_send && monitor_instance_id != MonitorId::CLUSTER
+                    all_monitors.delete(monitor_instance_id)
+                end
+            }
+
+            records_to_send = []
+            all_monitors.keys.each{|key|
+                record = provider.get_record(all_monitors[key], state)
+                #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+            }
+
+            if scenario == 1
+                assert_equal 58, all_monitors.size
+            elsif scenario == 2
+                assert_equal 37, all_monitors.size
+            elsif scenario == 3
+                assert_equal 6, all_monitors.size
+            end
+            # for each key in monitor.keys,
+            # get the state from health_monitor_state
+            # generate the record to send
+            serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json'))
+            serializer.serialize(state)
+
+            deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json'))
+            deserialized_state = deserializer.deserialize
+
+            after_state = HealthMonitorState.new
+            after_state.initialize_state(deserialized_state)
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_model_definition_parser_spec.rb b/test/code/plugin/health/health_model_definition_parser_spec.rb
new file mode 100644
index 000000000..56551510b
--- /dev/null
+++ b/test/code/plugin/health/health_model_definition_parser_spec.rb
@@ -0,0 +1,24 @@
+require_relative '../test_helpers'
+# consider doing this in test_helpers.rb so that this code is common
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe "HealthModelDefinitionParser spec " do
+    it "parses the definition file correctly with the right conditions" do
+        #arrange
+
+        parser = HealthModelDefinitionParser.new(File.join(File.expand_path(File.dirname(__FILE__)), 'test_health_model_definition.json'))
+        #act
+        model_definition = parser.parse_file
+
+        #assert
+        assert_equal model_definition['conditional_monitor_id'].key?("conditions"), true
+        assert_equal model_definition['conditional_monitor_id']["conditions"].size, 2
+        assert_equal model_definition['conditional_monitor_id'].key?("parent_monitor_id"), false
+
+        #assert
+        assert_equal model_definition['monitor_id'].key?("conditions"), false
+        assert_equal model_definition['monitor_id'].key?("parent_monitor_id"), true
+    end
+
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb
new file mode 100644
index 000000000..5fa8a6c6e
--- /dev/null
+++ b/test/code/plugin/health/health_monitor_state_spec.rb
@@ -0,0 +1,176 @@
+require_relative '../test_helpers'
+# consider doing this in test_helpers.rb so that this code is common
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "HealthMonitorState spec" do
+    it 'updates should_send to true for monitors which hasnt been sent before' do
+        #arrange
+        state = HealthMonitorState.new
+        mock_monitor = Mock.new
+        def mock_monitor.state; "pass"; end
+        def mock_monitor.monitor_id; "monitor_id"; end
+        def mock_monitor.monitor_instance_id; "monitor_instance_id"; end
+        def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
+        def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+
+        #act
+        state.update_state(mock_monitor, {})
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+        monitor_state.old_state.must_equal "none"
+        monitor_state.new_state.must_equal "pass"
+    end
+
+    it 'updates should_send to true for monitors which need no consistent state change' do
+        #arrange
+        state = HealthMonitorState.new
+        mock_monitor = Mock.new
+        def mock_monitor.state; "pass"; end
+        def mock_monitor.monitor_id; "monitor_id"; end
+        def mock_monitor.monitor_instance_id; "monitor_instance_id"; end
+        def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
+        def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+
+        #act
+        state.update_state(mock_monitor, {})
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+        monitor_state.old_state.must_equal "none"
+        monitor_state.new_state.must_equal "pass"
+
+        #arrange
+        def mock_monitor.state; "fail"; end
+        def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+        #act
+        state.update_state(mock_monitor, {})
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+        monitor_state.old_state.must_equal "pass"
+        monitor_state.new_state.must_equal "fail"
+    end
+
+    it 'updates should_send to false for monitors which need consistent state change and has no consistent state change' do
+        #arrange
+        state = HealthMonitorState.new
+        mock_monitor = Mock.new
+        def mock_monitor.state; "pass"; end
+        def mock_monitor.monitor_id; "monitor_id"; end
+        def mock_monitor.monitor_instance_id; "monitor_instance_id"; end
+        def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
+        def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+
+        config = JSON.parse('{
+            "WarnThresholdPercentage": 80.0,
+            "FailThresholdPercentage": 90.0,
+            "ConsecutiveSamplesForStateTransition": 3
+        }')
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+
+        #arrange
+        def mock_monitor.state; "fail"; end
+        def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal false
+    end
+
+    it 'updates should_send to true for monitors which need consistent state change and has a consistent state change' do
+        #arrange
+        state = HealthMonitorState.new
+        mock_monitor = Mock.new
+        def mock_monitor.state; "pass"; end
+        def mock_monitor.monitor_id; "monitor_id"; end
+        def mock_monitor.monitor_instance_id; "monitor_instance_id"; end
+        def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
+        def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+
+        config = JSON.parse('{
+            "WarnThresholdPercentage": 80.0,
+            "FailThresholdPercentage": 90.0,
+            "ConsecutiveSamplesForStateTransition": 3
+        }')
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+
+        #arrange
+        def mock_monitor.state; "fail"; end
+        def mock_monitor.details; {"state" => "fail", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal false
+
+        #act
+        state.update_state(mock_monitor, config)
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+        monitor_state.old_state.must_equal "none"
+        monitor_state.new_state.must_equal "fail"
+    end
+
+    it 'updates should_send to false for monitors which need consistent state change and has NO state change' do
+        #arrange
+        state = HealthMonitorState.new
+        mock_monitor = Mock.new
+        def mock_monitor.state; "pass"; end
+        def mock_monitor.monitor_id; "monitor_id"; end
+        def mock_monitor.monitor_instance_id; "monitor_instance_id"; end
+        def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
+        def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+
+        config = JSON.parse('{
+            "WarnThresholdPercentage": 80.0,
+            "FailThresholdPercentage": 90.0,
+            "ConsecutiveSamplesForStateTransition": 3
+        }')
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal true
+        monitor_state.old_state.must_equal "none"
+        monitor_state.new_state.must_equal "none"
+
+
+        #arrange
+        def mock_monitor.state; "pass"; end
+        def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal false
+
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state.should_send.must_equal true
+        monitor_state.old_state.must_equal "none"
+        monitor_state.new_state.must_equal "pass"
+
+        #act
+        state.update_state(mock_monitor, config)
+        monitor_state = state.get_state("monitor_instance_id")
+        #assert
+        monitor_state.should_send.must_equal false
+        monitor_state.old_state.must_equal "none"
+        monitor_state.new_state.must_equal "pass"
+    end
+
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_signal_reducer_spec.rb b/test/code/plugin/health/health_signal_reducer_spec.rb
new file mode 100644
index 000000000..f71a5c509
--- /dev/null
+++ b/test/code/plugin/health/health_signal_reducer_spec.rb
@@ -0,0 +1,96 @@
+require_relative '../test_helpers'
+# consider doing this in test_helpers.rb so that this code is common
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "HealthSignalReducer spec" do
+    it "returns the right set of records -- no reduction" do
+        #arrange
+        record1 = Mock.new
+        def record1.monitor_id; "node_cpu_utilization"; end
+        def record1.monitor_instance_id; "node_cpu_utilization-node1"; end
+        def record1.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        inventory = Mock.new
+        def inventory.get_nodes; ["node1"]; end
+        def inventory.get_workload_names; []; end
+        reducer = HealthSignalReducer.new
+        #act
+        reduced = reducer.reduce_signals([record1], inventory)
+        #Assert
+        assert_equal reduced.size, 1
+    end
+
+    it "returns only the latest record if multiple records are present for the same monitor" do
+        #arrange
+        record1 = Mock.new
+        def record1.monitor_id; "node_cpu_utilization"; end
+        def record1.monitor_instance_id; "node_cpu_utilization-node1"; end
+        def record1.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        def record1.transition_date_time; Time.now.utc.iso8601 ; end
+
+
+        record2 = Mock.new
+        def record2.monitor_id; "node_cpu_utilization"; end
+        def record2.monitor_instance_id; "node_cpu_utilization-node1"; end
+        def record2.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        def record2.transition_date_time; "#{Time.now.utc.iso8601}"  ; end
+
+        inventory = Mock.new
+        def inventory.get_nodes; ["node1"]; end
+        def inventory.get_workload_names; []; end
+        reducer = HealthSignalReducer.new
+        #act
+        reduced = reducer.reduce_signals([record1, record2], inventory)
+        #Assert
+        assert_equal reduced.size, 1
+    end
+
+    it "returns only those records if the node is present in the inventory" do
+        #arrange
+        record1 = Mock.new
+        def record1.monitor_id; "node_cpu_utilization"; end
+        def record1.monitor_instance_id; "node_cpu_utilization-node1"; end
+        def record1.labels; {HealthMonitorLabels::HOSTNAME => "node1"}; end
+        inventory = Mock.new
+        def inventory.get_nodes; ["node2"]; end
+        def inventory.get_workload_names; []; end
+
+        #act
+        reducer = HealthSignalReducer.new
+        #assert
+        assert_equal reducer.reduce_signals([record1], inventory).size, 0
+    end
+
+    it "returns only those records if the workdload name is present in the inventory" do
+        #arrange
+        record1 = Mock.new
+        def record1.monitor_id; "user_workload_pods_ready"; end
+        def record1.monitor_instance_id; "user_workload_pods_ready-workload1"; end
+        def record1.labels; {HealthMonitorLabels::NAMESPACE => "default", HealthMonitorLabels::WORKLOAD_NAME => "workload1"}; end
+        def record1.transition_date_time; Time.now.utc.iso8601 ; end
+
+        inventory = Mock.new
+        def inventory.get_nodes; ["node2"]; end
+        def inventory.get_workload_names; ["default~~workload1"]; end
+        reducer = HealthSignalReducer.new
+
+        #act
+        reduced = reducer.reduce_signals([record1], inventory)
+
+        #assert
+        assert_equal reduced.size, 1
+
+        #arrange
+        record2 = Mock.new
+        def record2.monitor_id; "user_workload_pods_ready"; end
+        def record2.monitor_instance_id; "user_workload_pods_ready-workload2"; end
+        def record2.labels; {HealthMonitorLabels::NAMESPACE => "default1", HealthMonitorLabels::WORKLOAD_NAME => "workload2"}; end
+        def record1.transition_date_time; Time.now.utc.iso8601 ; end
+        #act
+        reduced = reducer.reduce_signals([record1, record2], inventory)
+        #assert
+        assert_equal reduced.size, 1
+    end
+
+end
diff --git a/test/code/plugin/health/kube_api_down_handler_spec.rb b/test/code/plugin/health/kube_api_down_handler_spec.rb
new file mode 100644
index 000000000..3f3f9b37f
--- /dev/null
+++ b/test/code/plugin/health/kube_api_down_handler_spec.rb
@@ -0,0 +1,26 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe "KubeApiDownHandler spec" do
+    it "updates states for monitors in monitors_to_change" do
+        #arrange
+        record1 = HealthMonitorRecord.new("node_condition", "node_condition-node1", Time.now.utc.iso8601, "pass", {}, {}, {})
+        record2 = HealthMonitorRecord.new("kube_api_status", "kube_api_status", Time.now.utc.iso8601, "fail", {}, {}, {})
+        record3 = HealthMonitorRecord.new("user_workload_pods_ready", "user_workload_pods_ready-workload1", Time.now.utc.iso8601, "pass", {}, {}, {})
+        record4 = HealthMonitorRecord.new("system_workload_pods_ready", "system_workload_pods_ready-workload2", Time.now.utc.iso8601, "pass", {}, {}, {})
+        record5 = HealthMonitorRecord.new("subscribed_capacity_cpu", "subscribed_capacity_cpu", Time.now.utc.iso8601, "pass", {}, {}, {})
+        record6 = HealthMonitorRecord.new("subscribed_capacity_memory", "subscribed_capacity_memory", Time.now.utc.iso8601, "pass", {}, {}, {})
+        handler = HealthKubeApiDownHandler.new
+
+        #act
+        handler.handle_kube_api_down([record1, record2, record3, record4, record5, record6])
+        #assert
+        assert_equal record1.state, HealthMonitorStates::UNKNOWN
+        assert_equal record3.state, HealthMonitorStates::UNKNOWN
+        assert_equal record4.state, HealthMonitorStates::UNKNOWN
+        assert_equal record5.state, HealthMonitorStates::UNKNOWN
+        assert_equal record6.state, HealthMonitorStates::UNKNOWN
+
+    end
+end
diff --git a/test/code/plugin/health/monitor_factory_spec.rb b/test/code/plugin/health/monitor_factory_spec.rb
new file mode 100644
index 000000000..2135808bd
--- /dev/null
+++ b/test/code/plugin/health/monitor_factory_spec.rb
@@ -0,0 +1,28 @@
+require_relative '../test_helpers'
+# consider doing this in test_helpers.rb so that this code is common
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe "MonitorFactory Spec" do
+    it "returns UnitMonitor for create_unit_monitor" do
+        #Arrange
+        factory = MonitorFactory.new()
+        monitor_record = HealthMonitorRecord.new(:monitor_id, :monitor_instance_id, :time, :pass, {}, {}, {})
+        #act
+        monitor = factory.create_unit_monitor(monitor_record)
+        # assert
+        monitor.must_be_kind_of(UnitMonitor)
+    end
+
+    it "returns AggregateMonitor for create_aggregate_monitor" do
+        #arrange
+        factory = MonitorFactory.new()
+        mock = Minitest::Mock.new
+        def mock.state; :pass; end
+        def mock.transition_date_time; :time; end
+        #act
+        monitor = factory.create_aggregate_monitor(:monitor_id, :monitor_instance_id, :pass, {}, {}, mock)
+        #assert
+        monitor.must_be_kind_of(AggregateMonitor)
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/monitor_set_spec.rb b/test/code/plugin/health/monitor_set_spec.rb
new file mode 100644
index 000000000..1f4e970be
--- /dev/null
+++ b/test/code/plugin/health/monitor_set_spec.rb
@@ -0,0 +1,58 @@
+require_relative '../test_helpers'
+# consider doing this in test_helpers.rb so that this code is common
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe "MonitorSet Spec" do
+    it "add_or_update -- adds a monitor" do
+        #arrange
+        set = MonitorSet.new
+        mock_monitor = MiniTest::Mock.new
+        def mock_monitor.monitor_instance_id; "monitor_instance_id_1"; end
+        def mock_monitor.state; :pass;end
+        #act
+        set.add_or_update(mock_monitor)
+        #assert
+        assert_equal set.get_map.size, 1
+        assert_equal set.get_map.key?("monitor_instance_id_1"), true
+    end
+
+    it "add_or_update -- updates a monitor" do
+        #arrange
+        set = MonitorSet.new
+        mock_monitor = MiniTest::Mock.new
+        def mock_monitor.monitor_instance_id; "monitor_instance_id_1"; end
+        def mock_monitor.state; :pass;end
+        #act
+        set.add_or_update(mock_monitor)
+        #assert
+        assert_equal set.get_map["monitor_instance_id_1"].state, :pass
+
+        #act
+        def mock_monitor.state; :fail;end
+        set.add_or_update(mock_monitor)
+        #assert
+        assert_equal set.get_map["monitor_instance_id_1"].state, :fail
+    end
+
+    it "delete -- delete a monitor" do
+        #arrange
+        set = MonitorSet.new
+        mock_monitor = MiniTest::Mock.new
+        def mock_monitor.monitor_instance_id; "monitor_instance_id_1"; end
+        def mock_monitor.state; :pass;end
+        set.add_or_update(mock_monitor)
+
+        #act
+        set.delete("monitor_instance_id_1")
+        #assert
+        assert_equal set.get_map.size, 0
+    end
+
+    it "get_map -- returns a hash" do
+        #arrange
+        set = MonitorSet.new
+        #act and assert
+        set.get_map.must_be_kind_of(Hash)
+    end
+end
diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb
new file mode 100644
index 000000000..a83db50fc
--- /dev/null
+++ b/test/code/plugin/health/parent_monitor_provider_spec.rb
@@ -0,0 +1,144 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "ParentMonitorProvider spec" do
+    it 'returns correct parent_monitor_id for a non-condition case' do
+        #arrange
+        definition = JSON.parse('{
+            "monitor_id" : {
+                    "parent_monitor_id": "parent_monitor_id",
+                    "labels": [
+                        "label_1",
+                        "label_2"
+                    ]
+                }
+            }'
+        )
+        health_model_definition = ParentMonitorProvider.new(definition)
+
+        monitor = Mock.new
+        def monitor.monitor_id; "monitor_id"; end
+        def monitor.monitor_instance_id; "monitor_instance_id"; end
+
+        #act
+        parent_id = health_model_definition.get_parent_monitor_id(monitor)
+        #assert
+        assert_equal parent_id, "parent_monitor_id"
+    end
+
+    it 'returns raises for an incorrect monitor id' do
+        #arrange
+        definition = JSON.parse('{
+            "monitor_id" : {
+                    "parent_monitor_id": "parent_monitor_id",
+                    "labels": [
+                        "label_1",
+                        "label_2"
+                    ]
+                }
+            }'
+        )
+        health_model_definition = ParentMonitorProvider.new(definition)
+
+        monitor = Mock.new
+        def monitor.monitor_id; "monitor_id_!"; end
+        def monitor.monitor_instance_id; "monitor_instance_id"; end
+
+        #act and assert
+        assert_raises do
+            parent_id = health_model_definition.get_parent_monitor_id(monitor)
+        end
+    end
+
+    it 'returns correct parent_monitor_id for a conditional case' do
+        #arrange
+        definition = JSON.parse('{"conditional_monitor_id": {
+            "conditions": [
+              {
+                "key": "kubernetes.io/role",
+                "operator": "==",
+                "value": "master",
+                "parent_id": "master_node_pool"
+              },
+              {
+                "key": "kubernetes.io/role",
+                "operator": "==",
+                "value": "agent",
+                "parent_id": "agent_node_pool"
+              }
+            ],
+            "labels": [
+              "kubernetes.io/hostname",
+              "agentpool",
+              "kubernetes.io/role",
+              "container.azm.ms/cluster-region",
+              "container.azm.ms/cluster-subscription-id",
+              "container.azm.ms/cluster-resource-group",
+              "container.azm.ms/cluster-name"
+            ],
+            "aggregation_algorithm": "worstOf",
+            "aggregation_algorithm_params": null
+          }
+
+            }'
+        )
+        health_model_definition = ParentMonitorProvider.new(definition)
+
+        monitor = Mock.new
+        def monitor.monitor_id; "conditional_monitor_id"; end
+        def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end
+        def monitor.labels; {HealthMonitorLabels::ROLE => "master"}; end
+
+        #act
+        parent_id = health_model_definition.get_parent_monitor_id(monitor)
+        #assert
+        assert_equal parent_id, "master_node_pool"
+    end
+
+    it 'raises if conditions are not met' do
+        #arrange
+        definition = JSON.parse('{"conditional_monitor_id": {
+            "conditions": [
+              {
+                "key": "kubernetes.io/role",
+                "operator": "==",
+                "value": "master",
+                "parent_id": "master_node_pool"
+              },
+              {
+                "key": "kubernetes.io/role",
+                "operator": "==",
+                "value": "agent",
+                "parent_id": "agent_node_pool"
+              }
+            ],
+            "labels": [
+              "kubernetes.io/hostname",
+              "agentpool",
+              "kubernetes.io/role",
+              "container.azm.ms/cluster-region",
+              "container.azm.ms/cluster-subscription-id",
+              "container.azm.ms/cluster-resource-group",
+              "container.azm.ms/cluster-name"
+            ],
+            "aggregation_algorithm": "worstOf",
+            "aggregation_algorithm_params": null
+          }
+
+            }'
+        )
+        health_model_definition = ParentMonitorProvider.new(definition)
+
+        monitor = Mock.new
+        def monitor.monitor_id; "conditional_monitor_id"; end
+        def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end
+        def monitor.labels; {HealthMonitorLabels::ROLE => "master1"}; end
+
+        #act and assert
+        assert_raises do
+            parent_id = health_model_definition.get_parent_monitor_id(monitor)
+        end
+    end
+end
diff --git a/test/code/plugin/health/test_health_model_definition.json b/test/code/plugin/health/test_health_model_definition.json
new file mode 100644
index 000000000..31d219705
--- /dev/null
+++ b/test/code/plugin/health/test_health_model_definition.json
@@ -0,0 +1,42 @@
+[
+    {
+        "monitor_id": "monitor_id",
+        "parent_monitor_id": "parent_monitor_id",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/workload-name",
+            "container.azm.ms/workload-kind",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "conditional_monitor_id",
+        "aggregation_algorithm": "worstOf",
+        "labels": [
+            "kubernetes.io/hostname",
+            "agentpool",
+            "kubernetes.io/role",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ],
+        "parent_monitor_id": [
+            {
+                "label": "kubernetes.io/role",
+                "operator": "==",
+                "value": "master",
+                "id": "master_node_pool"
+            },
+            {
+                "label": "kubernetes.io/role",
+                "operator": "==",
+                "value": "agent",
+                "id": "agent_node_pool"
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/test/code/plugin/health/unit_monitor_spec.rb b/test/code/plugin/health/unit_monitor_spec.rb
new file mode 100644
index 000000000..4cbf794db
--- /dev/null
+++ b/test/code/plugin/health/unit_monitor_spec.rb
@@ -0,0 +1,20 @@
+require_relative '../../../../source/code/plugin/health/unit_monitor'
+require_relative '../test_helpers'
+
+include HealthModel
+
+describe "UnitMonitor Spec" do
+    it "is_aggregate_monitor is false for UnitMonitor" do
+        # Arrange/Act
+        monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {})
+        # Assert
+        assert_equal monitor.is_aggregate_monitor, false
+    end
+
+    it "get_member_monitors is nil for UnitMonitor" do
+        # Arrange/Act
+        monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {})
+        #Assert
+        assert_nil monitor.get_member_monitors
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/unit_monitor_test.rb b/test/code/plugin/health/unit_monitor_test.rb
new file mode 100644
index 000000000..e53617c99
--- /dev/null
+++ b/test/code/plugin/health/unit_monitor_test.rb
@@ -0,0 +1,16 @@
+require_relative '../../../../source/code/plugin/health/unit_monitor'
+require_relative '../test_helpers'
+
+class UnitMonitorTest < Minitest::Test
+    include HealthModel
+
+    def test_is_aggregate_monitor_false
+        monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {})
+        assert_equal monitor.is_aggregate_monitor, false
+    end
+
+    def test_get_member_monitors_nil
+        monitor = UnitMonitor.new(:monitor_id, :monitor_instance_id, :pass, :time, {}, {}, {})
+        assert_nil monitor.get_member_monitors
+    end
+end
diff --git a/test/code/plugin/test_helpers.rb b/test/code/plugin/test_helpers.rb
new file mode 100644
index 000000000..543f00ac9
--- /dev/null
+++ b/test/code/plugin/test_helpers.rb
@@ -0,0 +1,3 @@
+gem "minitest"
+require "minitest/spec"
+require 'minitest/autorun'
\ No newline at end of file

From 4adcd8bd70f98260e3b6d2b3e5780cbb1d5c71ec Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 15 Aug 2019 19:03:16 -0700
Subject: [PATCH 111/160] Fix Deserialization Bug (#249)

---
 source/code/plugin/health/cluster_health_state.rb | 8 ++++++--
 source/code/plugin/health/health_monitor_state.rb | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb
index ac7e05675..3b56dd243 100644
--- a/source/code/plugin/health/cluster_health_state.rb
+++ b/source/code/plugin/health/cluster_health_state.rb
@@ -16,8 +16,12 @@ def initialize(token_file_path, cert_file_path)
             @token = get_token
         end
 
-        def update_state(state)
+        def update_state(state) #state = hash of monitor_instance_id to HealthMonitorInstanceState struct
             get_request = Net::HTTP::Get.new(@uri.request_uri)
+            monitor_states_hash = {}
+            state.each {|monitor_instance_id, health_monitor_instance_state|
+                monitor_states_hash[monitor_instance_id] = health_monitor_instance_state.to_h
+            }
 
             get_request["Authorization"] = "Bearer #{@token}"
             @log.info "Making GET request to #{@uri.request_uri} @ #{Time.now.utc.iso8601}"
@@ -37,7 +41,7 @@ def update_state(state)
             update_request["Authorization"] = "Bearer #{@token}"
 
             update_request_body = get_update_request_body
-            update_request_body["state"] = state.to_json
+            update_request_body["state"] = monitor_states_hash.to_json
             update_request.body = update_request_body.to_json
 
             update_response = @http_client.request(update_request)
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index c3df5e3a9..e6205b481 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -33,7 +33,7 @@ def to_h
         def initialize_state(deserialized_state)
             @@monitor_states = {}
             deserialized_state.each{|k,v|
-                health_monitor_instance_state_hash = JSON.parse(v)
+                health_monitor_instance_state_hash = v
                 state = HealthMonitorInstanceState.new(*health_monitor_instance_state_hash.values_at(*HealthMonitorInstanceState.members))
                 state.prev_sent_record_time = health_monitor_instance_state_hash["prev_sent_record_time"]
                 state.old_state = health_monitor_instance_state_hash["old_state"]

From 2ee43076e43e117c8376d576e6aa5ee783a57bcc Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 16 Aug 2019 09:47:02 -0700
Subject: [PATCH 112/160] Fix the bug where capacity is not updated and cached
 value was being used (#251)

* Fix the Capacity computation

* fix node cpu and memory limits calculation
---
 .../plugin/health/health_monitor_utils.rb     | 40 +++++++++----------
 source/code/plugin/in_kube_health.rb          | 13 +++---
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index df47529e6..e9d59941e 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -172,35 +172,33 @@ def get_resource_subscription(pod_inventory, metric_name, metric_capacity)
                 return subscription
             end
 
-            def get_cluster_cpu_memory_capacity(log)
+            def get_cluster_cpu_memory_capacity(log, node_inventory: nil)
                 begin
-                    node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                    if node_inventory.nil?
+                        node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                    end
                     cluster_cpu_capacity = 0.0
                     cluster_memory_capacity = 0.0
                     if !node_inventory.empty?
-                        node_inventory['items'].each do |node|
-                            cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores")
-                            if !cpu_capacity_json.nil?
-                                cpu_capacity_json.each do |cpu_capacity_node|
-                                    if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
-                                        cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value']
-                                    end
+                        cpu_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "cpu", "cpuCapacityNanoCores")
+                        if !cpu_capacity_json.nil?
+                            cpu_capacity_json.each do |cpu_capacity_node|
+                                if !cpu_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                                    cluster_cpu_capacity += cpu_capacity_node['DataItems'][0]['Collections'][0]['Value']
                                 end
-                                log.info "Cluster CPU Limit #{cluster_cpu_capacity}"
-                            else
-                                log.info "Error getting cpu_capacity"
                             end
-                            memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes")
-                            if !memory_capacity_json.nil?
-                                memory_capacity_json.each do |memory_capacity_node|
-                                    if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
-                                        cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value']
-                                    end
+                        else
+                            log.info "Error getting cpu_capacity"
+                        end
+                        memory_capacity_json = KubernetesApiClient.parseNodeLimits(node_inventory, "capacity", "memory", "memoryCapacityBytes")
+                        if !memory_capacity_json.nil?
+                            memory_capacity_json.each do |memory_capacity_node|
+                                if !memory_capacity_node['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                                    cluster_memory_capacity += memory_capacity_node['DataItems'][0]['Collections'][0]['Value']
                                 end
-                                log.info "Cluster Memory Limit #{cluster_memory_capacity}"
-                            else
-                                log.info "Error getting memory_capacity"
                             end
+                        else
+                            log.info "Error getting memory_capacity"
                         end
                     else
                         log.info "Unable to get cpu and memory capacity"
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index d9672da3b..045ddf7c7 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -104,9 +104,9 @@ def enumerate
         end
 
         if !pod_inventory.nil?
-          record = process_cpu_oversubscribed_monitor(pod_inventory)
+          record = process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
           health_monitor_records.push(record) if record
-          record = process_memory_oversubscribed_monitor(pod_inventory)
+          record = process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
           health_monitor_records.push(record) if record
           pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(pod_inventory, deployment_inventory)
 
@@ -146,11 +146,12 @@ def enumerate
       end
     end
 
-    def process_cpu_oversubscribed_monitor(pod_inventory)
+    def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
       timestamp = Time.now.utc.iso8601
+      @@clusterCpuCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog, node_inventory: node_inventory)[0]
       subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"cpu", @@clusterCpuCapacity)
+      @@hmlog.info "Refreshed Cluster CPU Capacity #{@@clusterCpuCapacity}"
       state =  subscription > @@clusterCpuCapacity ? "fail" : "pass"
-      #@@hmlog.debug "CPU Oversubscribed Monitor State : #{state}"
 
       #CPU
       monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID
@@ -171,8 +172,10 @@ def process_cpu_oversubscribed_monitor(pod_inventory)
       return health_record
     end
 
-    def process_memory_oversubscribed_monitor(pod_inventory)
+    def process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
       timestamp = Time.now.utc.iso8601
+      @@clusterMemoryCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog,node_inventory: node_inventory)[1]
+      @@hmlog.info "Refreshed Cluster Memory Capacity #{@@clusterMemoryCapacity}"
       subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"memory", @@clusterMemoryCapacity)
       state =  subscription > @@clusterMemoryCapacity ? "fail" : "pass"
       #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}"

From e86f82f4aa0587532201d559d99bce537cb6e837 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 16 Aug 2019 13:58:08 -0700
Subject: [PATCH 113/160] changes (#250)

---
 source/code/plugin/DockerApiClient.rb       | 44 ++++++++++++---------
 source/code/plugin/in_containerinventory.rb |  3 +-
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index eb9d74531..ee2742dd4 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -44,7 +44,11 @@ def getResponse(request, isMultiJson, isVersion)
         return (isTimeOut) ? nil : parseResponse(dockerResponse, isMultiJson)
       rescue => errorStr
         $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        # Adding this check to avoid an infinite loop for the docker info call in exception telemetry
+        if !request.include? "GET /version "
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        end
+        return nil
       end
     end
 
@@ -80,28 +84,32 @@ def getDockerHostName()
 
     def listContainers()
       ids = []
-      request = DockerApiRestHelper.restDockerPs
-      containers = getResponse(request, true, false)
-      if !containers.nil? && !containers.empty?
-        containers.each do |container|
-          labels = (!container["Labels"].nil?) ? container["Labels"] : container["labels"]
-          if !labels.nil?
-            labelKeys = labels.keys
-            dockerTypeLabel = labelKeys.find { |k| "io.kubernetes.docker.type".downcase == k.downcase }
-            if !dockerTypeLabel.nil?
-              dockerTypeLabelValue = labels[dockerTypeLabel]
-              # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
-              if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
-                # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that
-                # are created in the pods for ContainerInventory
-                keyValue = labelKeys.find { |k| "io.kubernetes.pod.uid".downcase == k.downcase }
-                if !labels[keyValue].nil?
-                  ids.push(container["Id"])
+      begin
+        request = DockerApiRestHelper.restDockerPs
+        containers = getResponse(request, true, false)
+        if !containers.nil? && !containers.empty?
+          containers.each do |container|
+            labels = (!container["Labels"].nil?) ? container["Labels"] : container["labels"]
+            if !labels.nil?
+              labelKeys = labels.keys
+              dockerTypeLabel = labelKeys.find { |k| "io.kubernetes.docker.type".downcase == k.downcase }
+              if !dockerTypeLabel.nil?
+                dockerTypeLabelValue = labels[dockerTypeLabel]
+                # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
+                if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
+                  # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that
+                  # are created in the pods for ContainerInventory
+                  keyValue = labelKeys.find { |k| "io.kubernetes.pod.uid".downcase == k.downcase }
+                  if !labels[keyValue].nil?
+                    ids.push(container["Id"])
+                  end
                 end
               end
             end
           end
         end
+      rescue => errorStr
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
       return ids
     end
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index 4392de280..ccf61ab2e 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -198,7 +198,7 @@ def enumerate
       hostname = DockerApiClient.getDockerHostName
       begin
         containerIds = DockerApiClient.listContainers
-        if !containerIds.empty?
+        if !containerIds.nil? && !containerIds.empty?
           eventStream = MultiEventStream.new
           nameMap = DockerApiClient.getImageIdMap
           clusterCollectEnvironmentVar = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
@@ -252,6 +252,7 @@ def enumerate
         end
       rescue => errorStr
         $log.warn("Exception in enumerate container inventory: #{errorStr}")
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end
 

From c76ce47887cd7ac155c1651ebf8db233805481cf Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 16 Aug 2019 14:32:36 -0700
Subject: [PATCH 114/160] Added new Custom Metrics Regions, fixed MDM plugin
 crash bug (#253)

Added new regions, added handler for MDM plugin start
---
 installer/conf/container.conf |  2 +-
 installer/conf/kube.conf      |  4 +--
 source/code/plugin/out_mdm.rb | 53 ++++++++++++++++++-----------------
 3 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 6d810a0e2..4cb9e6913 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -32,7 +32,7 @@
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
   log_level info
 </filter>
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 4b4ec09ea..3cbc3ff17 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -70,14 +70,14 @@
 
 <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
   type filter_inventory2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
   log_level info
 </filter>
 
 #custom_metrics_mdm filter plugin for perf data from windows nodes
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
   log_level info
 </filter>
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 69ef25580..4b9d50a29 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -44,37 +44,38 @@ def start
       super
       begin
         file = File.read(@@azure_json_path)
+        @data_hash = JSON.parse(file)
+        aks_resource_id = ENV["AKS_RESOURCE_ID"]
+        aks_region = ENV["AKS_REGION"]
+
+        if aks_resource_id.to_s.empty?
+          @log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
+          @can_send_data_to_mdm = false
+        end
+        if aks_region.to_s.empty?
+          @log.info "Environment Variable AKS_REGION is not set.. "
+          @can_send_data_to_mdm = false
+        end
+        aks_region = aks_region.gsub(" ","")
+
+        if @can_send_data_to_mdm
+          @log.info "MDM Metrics supported in #{aks_region} region"
+          @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]}
+          @cached_access_token = get_access_token
+          @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
+          @post_request_uri = URI.parse(@@post_request_url)
+          @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
+          @http_client.use_ssl = true
+          @log.info "POST Request url: #{@@post_request_url}"
+          ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {})
+        end
       rescue => e
-        @log.info "Unable to read file #{@@azure_json_path} #{e}"
+        @log.info "exception when initializing out_mdm #{e}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "MDM"})
         @can_send_data_to_mdm = false
         return
       end
-      # Handle the case where the file read fails. Send Telemetry and exit the plugin?
-      @data_hash = JSON.parse(file)
-      @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]}
-      @cached_access_token = get_access_token
-      aks_resource_id = ENV["AKS_RESOURCE_ID"]
-      aks_region = ENV["AKS_REGION"]
-
-      if aks_resource_id.to_s.empty?
-        @log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
-        @can_send_data_to_mdm = false
-        return
-      end
-      if aks_region.to_s.empty?
-        @log.info "Environment Variable AKS_REGION is not set.. "
-        @can_send_data_to_mdm = false
-        return
-      end
-
-      aks_region = aks_region.gsub(" ","")
 
-      @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
-      @post_request_uri = URI.parse(@@post_request_url)
-      @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
-      @http_client.use_ssl = true
-      @log.info "POST Request url: #{@@post_request_url}"
-      ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {})
     end
 
     # get the access token only if the time to expiry is less than 5 minutes

From 10a79c8c5546fcbcf21532594b6d25f4e269e76b Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Mon, 19 Aug 2019 12:54:53 -0700
Subject: [PATCH 115/160] Add Missing Handlers (#254)

* Added Missing Handlers
---
 .../plugin/filter_cadvisor_health_node.rb     | 32 ++++++++++++-------
 .../plugin/health/health_monitor_utils.rb     |  2 +-
 source/code/plugin/in_kube_health.rb          | 20 +++++++-----
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
index 627a525e7..ce57c2c62 100644
--- a/source/code/plugin/filter_cadvisor_health_node.rb
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -47,21 +47,29 @@ def initialize
         end
 
         def configure(conf)
-            super
-            @log = HealthMonitorUtils.get_log_handle
-            @log.debug {'Starting filter_cadvisor2health plugin'}
+            begin
+                super
+                @log = HealthMonitorUtils.get_log_handle
+                @log.debug {'Starting filter_cadvisor2health plugin'}
+            rescue => e
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
         end
 
         def start
-            super
-            @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
-            @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}"
-            node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
-            @cpu_capacity = node_capacity[0]
-            @memory_capacity = node_capacity[1]
-            @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}"
-            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName)
-            ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {})
+            begin
+                super
+                @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
+                @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}"
+                node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
+                @cpu_capacity = node_capacity[0]
+                @memory_capacity = node_capacity[1]
+                @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}"
+                #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName)
+                ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {})
+            rescue => e
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
         end
 
         def filter_stream(tag, es)
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index e9d59941e..b1c77a4a1 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -13,7 +13,7 @@ class HealthMonitorUtils
             $log.info "Error loading KubernetesApiClient #{e.message}"
         end
 
-        @@node_inventory = []
+        @@nodeInventory = {}
 
         @log_path = "/var/opt/microsoft/docker-cimprov/log/health_monitors.log"
 
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 045ddf7c7..199e03e56 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -18,14 +18,18 @@ class KubeHealthInput < Input
     @@clusterMemoryCapacity = 0.0
 
     def initialize
-      super
-      require "yaml"
-      require "json"
-
-      @@cluster_id = KubernetesApiClient.getClusterId
-      @resources = HealthKubernetesResources.instance
-      @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
-      @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
+      begin
+        super
+        require "yaml"
+        require "json"
+
+        @@cluster_id = KubernetesApiClient.getClusterId
+        @resources = HealthKubernetesResources.instance
+        @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
+        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
+      rescue => e
+        ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+      end
     end
 
     include HealthModel

From 851ab4ec7a2062a85c5f58e1642971d8580f16aa Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 21 Aug 2019 14:03:52 -0700
Subject: [PATCH 116/160] Return MultiEventStream.new instead of empty array
 (#256)

---
 source/code/plugin/filter_cadvisor_health_node.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
index ce57c2c62..faa574993 100644
--- a/source/code/plugin/filter_cadvisor_health_node.rb
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -75,7 +75,7 @@ def start
         def filter_stream(tag, es)
             if !@@cluster_health_model_enabled
                 @log.info "Cluster Health Model disabled in filter_cadvisor_health_node"
-                return []
+                return MultiEventStream.new
             end
             new_es = MultiEventStream.new
             #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName)

From f20debb244c5ec2b9eba23e0588520dbe7a4490b Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 22 Aug 2019 17:58:17 -0700
Subject: [PATCH 117/160] Added explicit require_relative to avoid loading
 errors (#258)

* Adding explicit require_relative
---
 source/code/plugin/health/agg_monitor_id_labels.rb           | 2 ++
 source/code/plugin/health/health_kube_api_down_handler.rb    | 1 +
 source/code/plugin/health/health_kubernetes_resources.rb     | 1 +
 source/code/plugin/health/health_missing_signal_generator.rb | 3 +++
 source/code/plugin/health/health_model_builder.rb            | 1 -
 source/code/plugin/health/health_monitor_helpers.rb          | 1 +
 source/code/plugin/health/health_monitor_provider.rb         | 2 ++
 source/code/plugin/health/health_monitor_state.rb            | 2 ++
 source/code/plugin/health/health_monitor_utils.rb            | 1 +
 source/code/plugin/health/health_signal_reducer.rb           | 2 ++
 source/code/plugin/health/monitor_factory.rb                 | 3 +++
 source/code/plugin/health/node_monitor_hierarchy_reducer.rb  | 1 +
 source/code/plugin/health/parent_monitor_provider.rb         | 1 +
 source/code/plugin/health/unit_monitor.rb                    | 1 -
 14 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/source/code/plugin/health/agg_monitor_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb
index 48ca46184..86a3381cd 100644
--- a/source/code/plugin/health/agg_monitor_id_labels.rb
+++ b/source/code/plugin/health/agg_monitor_id_labels.rb
@@ -1,3 +1,5 @@
+require_relative 'health_model_constants'
+
 module HealthModel
     class AggregateMonitorInstanceIdLabels
         @@id_labels_mapping = {
diff --git a/source/code/plugin/health/health_kube_api_down_handler.rb b/source/code/plugin/health/health_kube_api_down_handler.rb
index 7f7ba1bd3..7f72360f8 100644
--- a/source/code/plugin/health/health_kube_api_down_handler.rb
+++ b/source/code/plugin/health/health_kube_api_down_handler.rb
@@ -1,3 +1,4 @@
+require_relative 'health_model_constants'
 module HealthModel
     class HealthKubeApiDownHandler
         def initialize
diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb
index 53f879bf5..2f591722b 100644
--- a/source/code/plugin/health/health_kubernetes_resources.rb
+++ b/source/code/plugin/health/health_kubernetes_resources.rb
@@ -1,4 +1,5 @@
 require 'singleton'
+require_relative 'health_model_constants'
 
 module HealthModel
     class HealthKubernetesResources
diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb
index ff7f6a390..419680afa 100644
--- a/source/code/plugin/health/health_missing_signal_generator.rb
+++ b/source/code/plugin/health/health_missing_signal_generator.rb
@@ -1,3 +1,6 @@
+require_relative 'health_model_constants'
+require_relative 'health_monitor_record'
+
 module HealthModel
     class HealthMissingSignalGenerator
         attr_accessor :last_received_records, :current_received_records
diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb
index 4cf802798..13813c8d9 100644
--- a/source/code/plugin/health/health_model_builder.rb
+++ b/source/code/plugin/health/health_model_builder.rb
@@ -1,4 +1,3 @@
-require_relative 'health_model_constants'
 require 'time'
 
 module HealthModel
diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb
index 9e2977a0e..9f0315978 100644
--- a/source/code/plugin/health/health_monitor_helpers.rb
+++ b/source/code/plugin/health/health_monitor_helpers.rb
@@ -1,5 +1,6 @@
 require 'logger'
 require 'digest'
+require_relative 'health_model_constants'
 
 module HealthModel
     # static class that provides a bunch of utility methods
diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb
index 0c1cbf7f2..60ad69d76 100644
--- a/source/code/plugin/health/health_monitor_provider.rb
+++ b/source/code/plugin/health/health_monitor_provider.rb
@@ -1,3 +1,5 @@
+require_relative 'health_model_constants'
+
 module HealthModel
     class HealthMonitorProvider
 
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index e6205b481..498c75ec7 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -1,3 +1,5 @@
+require_relative 'health_model_constants'
+
 module HealthModel
 
     HealthMonitorInstanceState = Struct.new(:prev_sent_record_time, :old_state, :new_state, :state_change_time, :prev_records, :is_state_change_consistent, :should_send) do
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index b1c77a4a1..e707651dc 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -1,5 +1,6 @@
 require 'logger'
 require 'digest'
+require_relative 'health_model_constants'
 
 module HealthModel
     # static class that provides a bunch of utility methods
diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb
index 4cf53e82c..1d520da8d 100644
--- a/source/code/plugin/health/health_signal_reducer.rb
+++ b/source/code/plugin/health/health_signal_reducer.rb
@@ -1,3 +1,5 @@
+require_relative 'health_model_constants'
+
 module HealthModel
     # this class
     # 1. dedupes daemonset signals and takes only the latest
diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb
index e6ec9d2c3..5f2c3945c 100644
--- a/source/code/plugin/health/monitor_factory.rb
+++ b/source/code/plugin/health/monitor_factory.rb
@@ -1,3 +1,6 @@
+require_relative 'aggregate_monitor'
+require_relative 'unit_monitor'
+
 module HealthModel
     class MonitorFactory
 
diff --git a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb
index aafbd07a8..0bad4517e 100644
--- a/source/code/plugin/health/node_monitor_hierarchy_reducer.rb
+++ b/source/code/plugin/health/node_monitor_hierarchy_reducer.rb
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+require_relative 'health_model_constants'
 
 module HealthModel
   class NodeMonitorHierarchyReducer
diff --git a/source/code/plugin/health/parent_monitor_provider.rb b/source/code/plugin/health/parent_monitor_provider.rb
index 6a27f11d8..4577abb99 100644
--- a/source/code/plugin/health/parent_monitor_provider.rb
+++ b/source/code/plugin/health/parent_monitor_provider.rb
@@ -1,3 +1,4 @@
+require_relative 'health_model_constants'
 module HealthModel
     class ParentMonitorProvider
 
diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb
index 9af599321..64262aa2e 100644
--- a/source/code/plugin/health/unit_monitor.rb
+++ b/source/code/plugin/health/unit_monitor.rb
@@ -1,4 +1,3 @@
-require_relative 'health_model_constants'
 require 'json'
 
 module HealthModel

From a8804df7c0ccc645dc8f51ea8fbf1f9431c13957 Mon Sep 17 00:00:00 2001
From: ganga1980 <gangams@microsoft.com>
Date: Wed, 28 Aug 2019 11:38:36 -0700
Subject: [PATCH 118/160] Gangams/enable ai telemetry in mc (#252)

* enable ai telemetry to configure different ikey and endpoint per cloud
---
 source/code/go/src/plugins/telemetry.go          | 11 ++++++++++-
 source/code/plugin/ApplicationInsightsUtility.rb | 13 ++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 5fc0fa843..4f22b8c03 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -42,6 +42,7 @@ const (
 	envAKSResourceID                                  = "AKS_RESOURCE_ID"
 	envACSResourceName                                = "ACS_RESOURCE_NAME"
 	envAppInsightsAuth                                = "APPLICATIONINSIGHTS_AUTH"
+	envAppInsightsEndpoint                            = "APPLICATIONINSIGHTS_ENDPOINT"
 	metricNameAvgFlushRate                            = "ContainerLogAvgRecordsFlushedPerSec"
 	metricNameAvgLogGenerationRate                    = "ContainerLogsGeneratedPerSec"
 	metricNameLogSize                                 = "ContainerLogsSize"
@@ -141,7 +142,15 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 		return -1, err
 	}
 
-	TelemetryClient = appinsights.NewTelemetryClient(string(decIkey))
+	appInsightsEndpoint := os.Getenv(envAppInsightsEndpoint)
+	telemetryClientConfig := appinsights.NewTelemetryConfiguration(string(decIkey))
+	// endpoint override required only for sovereign clouds
+	if appInsightsEndpoint != "" {
+		Log("Overriding the default AppInsights EndpointUrl with %s", appInsightsEndpoint)
+		telemetryClientConfig.EndpointUrl = envAppInsightsEndpoint
+	}
+	TelemetryClient = appinsights.NewTelemetryClientFromConfig(telemetryClientConfig)
+
 	telemetryOffSwitch := os.Getenv("DISABLE_TELEMETRY")
 	if strings.Compare(strings.ToLower(telemetryOffSwitch), "true") == 0 {
 		Log("Appinsights telemetry is disabled \n")
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 5dc2bfab8..bb4831701 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -18,6 +18,7 @@ class ApplicationInsightsUtility
   @@EnvAksRegion = "AKS_REGION"
   @@EnvAgentVersion = "AGENT_VERSION"
   @@EnvApplicationInsightsKey = "APPLICATIONINSIGHTS_AUTH"
+  @@EnvApplicationInsightsEndpoint = "APPLICATIONINSIGHTS_ENDPOINT"
   @@EnvControllerType = "CONTROLLER_TYPE"
 
   @@CustomProperties = {}
@@ -62,6 +63,7 @@ def initializeUtility()
         @@CustomProperties["AgentVersion"] = ENV[@@EnvAgentVersion]
         @@CustomProperties["ControllerType"] = ENV[@@EnvControllerType]
         encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
+        appInsightsEndpoint = ENV[@@EnvApplicationInsightsEndpoint]
 
         #Check if telemetry is turned off
         telemetryOffSwitch = ENV["DISABLE_TELEMETRY"]
@@ -70,7 +72,16 @@ def initializeUtility()
           @@Tc = ApplicationInsights::TelemetryClient.new
         elsif !encodedAppInsightsKey.nil?
           decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
-          @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
+          #override ai endpoint if its available otherwise use default.
+          if appInsightsEndpoint && !appInsightsEndpoint.nil? && !appInsightsEndpoint.empty?
+            $log.info("AppInsightsUtility: Telemetry client uses overrided endpoint url : #{appInsightsEndpoint}")
+            telemetrySynchronousSender = ApplicationInsights::Channel::SynchronousSender.new appInsightsEndpoint
+            telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender)
+            telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue
+            @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, telemetryChannel
+          else
+            @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
+          end
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")

From 8a5ebb037025fcf9576c9e3f92bc5614638ec548 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 10 Sep 2019 15:18:45 -0700
Subject: [PATCH 119/160] Fixing null check out_mdm bug, tomlparser bug,
 exposing Replica Set service name as an ENV variable (#261)

* Expose replica set service as an env variable

* Fixing null check out_mdm bug, and tomlparser bug

* Updating the env variable name to be more specific to health model
---
 installer/conf/container.conf   |  2 +-
 installer/scripts/tomlparser.rb | 11 ++++++++---
 source/code/plugin/out_mdm.rb   |  3 ++-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 4cb9e6913..0b26357f0 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -76,7 +76,7 @@
   heartbeat_type tcp
 
   <server>
-    host healthmodel-replicaset-service.kube-system
+    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_ENDPOINT']}"
     port 25227
   </server>
 
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index 067586629..b66e1257e 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -16,6 +16,7 @@
 @logTailPath = "/var/log/containers/*.log"
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
 @excludePath = "*.csv2" #some invalid path
+@enable_health_model = false
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap(path)
@@ -121,10 +122,12 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   end
 
   begin
-    if !parsedConfig.nil?  && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
+    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
         @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
-        puts "enable_health_model = #{@enable_health_model}"
+    else
+        @enable_health_model = false
     end
+    puts "enable_health_model = #{@enable_health_model}"
   rescue => errorStr
     puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
     @enable_health_model = false
@@ -140,7 +143,9 @@ def populateSettingValuesFromConfigMap(parsedConfig)
     Dir["/etc/config/settings/*settings"].each{|file|
         puts "Parsing File #{file}"
         settings = parseConfigMap(file)
-        configMapSettings = configMapSettings.merge(settings)
+        if !settings.nil?
+            configMapSettings = configMapSettings.merge(settings)
+        end
     }
 
   if !configMapSettings.nil?
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 4b9d50a29..b8d10090d 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -55,8 +55,9 @@ def start
         if aks_region.to_s.empty?
           @log.info "Environment Variable AKS_REGION is not set.. "
           @can_send_data_to_mdm = false
+        else
+          aks_region = aks_region.gsub(" ","")
         end
-        aks_region = aks_region.gsub(" ","")
 
         if @can_send_data_to_mdm
           @log.info "MDM Metrics supported in #{aks_region} region"

From a939bf796ce2bc420d4862399d1312aa5e572e9e Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 10 Sep 2019 17:18:48 -0700
Subject: [PATCH 120/160] Changes for creating custom plugins with namespace
 settings for prometheus scraping (#262)

* changes

* changes

* changes

* changes

* changes

* changes

* chnages

* changes

* telemetry changes

* changes
---
 installer/conf/telegraf-rs.conf               |  3 +-
 .../scripts/tomlparser-prom-customconfig.rb   | 76 +++++++++++++++++--
 source/code/plugin/in_kube_nodes.rb           |  2 +
 3 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index ce60bfa04..3450ab88f 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -552,7 +552,7 @@
   ##     set this to `https` & most likely set the tls config.
   ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
   ## - prometheus.io/port: If port is not 9102 use this annotation
-  monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
+  $AZMON_RS_PROM_MONITOR_PODS
 
   fieldpass = $AZMON_RS_PROM_FIELDPASS
   fielddrop = $AZMON_RS_PROM_FIELDDROP
@@ -579,6 +579,7 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
+$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER
 # [[inputs.exec]]
 #   ## Commands array
 #   interval = "15m"
diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb
index d9fdf1cc2..d44bf3342 100644
--- a/installer/scripts/tomlparser-prom-customconfig.rb
+++ b/installer/scripts/tomlparser-prom-customconfig.rb
@@ -18,6 +18,14 @@
 @defaultRsK8sServices = []
 @defaultRsMonitorPods = false
 
+#Configurations to be used for the auto-generated input prometheus plugins for namespace filtering
+@metricVersion = 2
+@urlTag = "scrapeUrl"
+@bearerToken = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+@responseTimeout = "15s"
+@tlsCa = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+@insecureSkipVerify = true
+
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
   begin
@@ -53,6 +61,48 @@ def checkForType(variable, varType)
   end
 end
 
+def replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods)
+  begin
+    new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", ("monitor_kubernetes_pods = #{monitorKubernetesPods}"))
+    new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", "")
+  rescue => errorStr
+    puts "config::error::Exception while replacing default pod monitor settings: #{errorStr}"
+  end
+  return new_contents
+end
+
+def createPrometheusPluginsWithNamespaceSetting(monitorKubernetesPods, monitorKubernetesPodsNamespaces, new_contents, interval, fieldPassSetting, fieldDropSetting)
+  begin
+    new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", "# Commenting this out since new plugins will be created per namespace\n  # $AZMON_RS_PROM_MONITOR_PODS")
+    pluginConfigsWithNamespaces = ""
+    monitorKubernetesPodsNamespaces.each do |namespace|
+      if !namespace.nil?
+        #Stripping namespaces to remove leading and trailing whitespaces
+        namespace.strip!
+        if namespace.length > 0
+          pluginConfigsWithNamespaces += "\n[[inputs.prometheus]]
+  interval = \"#{interval}\"
+  monitor_kubernetes_pods = true
+  monitor_kubernetes_pods_namespace = \"#{namespace}\"
+  fieldpass = #{fieldPassSetting}
+  fielddrop = #{fieldDropSetting}
+  metric_version = #{@metricVersion}
+  url_tag = \"#{@urlTag}\"
+  bearer_token = \"#{@bearerToken}\"
+  response_timeout = \"#{@responseTimeout}\"
+  tls_ca = \"#{@tlsCa}\"
+  insecure_skip_verify = #{@insecureSkipVerify}\n"
+        end
+      end
+    end
+    new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", pluginConfigsWithNamespaces)
+    return new_contents
+  rescue => errorStr
+    puts "config::error::Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults"
+    replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods)
+  end
+end
+
 # Use the ruby structure created after config parsing to set the right values to be used as environment variables
 def populateSettingValuesFromConfigMap(parsedConfig)
   # Checking to see if this is the daemonset or replicaset to parse config accordingly
@@ -68,6 +118,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
           urls = parsedConfig[:prometheus_data_collection_settings][:cluster][:urls]
           kubernetesServices = parsedConfig[:prometheus_data_collection_settings][:cluster][:kubernetes_services]
           monitorKubernetesPods = parsedConfig[:prometheus_data_collection_settings][:cluster][:monitor_kubernetes_pods]
+          monitorKubernetesPodsNamespaces = parsedConfig[:prometheus_data_collection_settings][:cluster][:monitor_kubernetes_pods_namespaces]
 
           # Check for the right datattypes to enforce right setting values
           if checkForType(interval, String) &&
@@ -75,7 +126,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
              checkForTypeArray(fieldDrop, String) &&
              checkForTypeArray(kubernetesServices, String) &&
              checkForTypeArray(urls, String) &&
-             !monitorKubernetesPods.nil? && (!!monitorKubernetesPods == monitorKubernetesPods) #Checking for Boolean type, since 'Boolean' is not defined as a type in ruby
+             (monitorKubernetesPods.nil? || (!monitorKubernetesPods.nil? && (!!monitorKubernetesPods == monitorKubernetesPods))) #Checking for Boolean type, since 'Boolean' is not defined as a type in ruby
             puts "config::Successfully passed typecheck for config settings for replicaset"
             #if setting is nil assign default values
             interval = (interval.nil?) ? @defaultRsInterval : interval
@@ -83,7 +134,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
             fieldDrop = (fieldDrop.nil?) ? @defaultRsFieldDrop : fieldDrop
             kubernetesServices = (kubernetesServices.nil?) ? @defaultRsK8sServices : kubernetesServices
             urls = (urls.nil?) ? @defaultRsPromUrls : urls
-            monitorKubernetesPods = (kubernetesServices.nil?) ? @defaultRsMonitorPods : monitorKubernetesPods
+            monitorKubernetesPods = (monitorKubernetesPods.nil?) ? @defaultRsMonitorPods : monitorKubernetesPods
 
             file_name = "/opt/telegraf-test-rs.conf"
             # Copy the telegraf config file to a temp file to run telegraf in test mode with this config
@@ -93,11 +144,24 @@ def populateSettingValuesFromConfigMap(parsedConfig)
             #Replace the placeholder config values with values from custom config
             text = File.read(file_name)
             new_contents = text.gsub("$AZMON_RS_PROM_INTERVAL", interval)
-            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDPASS", ((fieldPass.length > 0) ? ("[\"" + fieldPass.join("\",\"") + "\"]") : "[]"))
-            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDDROP", ((fieldDrop.length > 0) ? ("[\"" + fieldDrop.join("\",\"") + "\"]") : "[]"))
+            fieldPassSetting = (fieldPass.length > 0) ? ("[\"" + fieldPass.join("\",\"") + "\"]") : "[]"
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDPASS", fieldPassSetting)
+            fieldDropSetting = (fieldDrop.length > 0) ? ("[\"" + fieldDrop.join("\",\"") + "\"]") : "[]"
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDDROP", fieldDropSetting)
             new_contents = new_contents.gsub("$AZMON_RS_PROM_URLS", ((urls.length > 0) ? ("[\"" + urls.join("\",\"") + "\"]") : "[]"))
             new_contents = new_contents.gsub("$AZMON_RS_PROM_K8S_SERVICES", ((kubernetesServices.length > 0) ? ("[\"" + kubernetesServices.join("\",\"") + "\"]") : "[]"))
-            new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", (monitorKubernetesPods ? "true" : "false"))
+
+            # Check to see if monitor_kubernetes_pods is set to true with a valid setting for monitor_kubernetes_namespaces to enable scraping for specific namespaces
+            # Adding nil check here as well since checkForTypeArray returns true even if setting is nil to accomodate for other settings to be able -
+            # - to use defaults in case of nil settings
+            if monitorKubernetesPods && !monitorKubernetesPodsNamespaces.nil? && checkForTypeArray(monitorKubernetesPodsNamespaces, String)
+              new_contents = createPrometheusPluginsWithNamespaceSetting(monitorKubernetesPods, monitorKubernetesPodsNamespaces, new_contents, interval, fieldPassSetting, fieldDropSetting)
+              monitorKubernetesPodsNamespacesLength = monitorKubernetesPodsNamespaces.length
+            else
+              new_contents = replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods)
+              monitorKubernetesPodsNamespacesLength = 0
+            end
+
             File.open(file_name, "w") { |file| file.puts new_contents }
             puts "config::Successfully substituted the placeholders in telegraf conf file for replicaset"
             #Set environment variables for telemetry
@@ -110,6 +174,8 @@ def populateSettingValuesFromConfigMap(parsedConfig)
               file.write("export TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH=#{kubernetesServices.length}\n")
               file.write("export TELEMETRY_RS_PROM_URLS_LENGTH=#{urls.length}\n")
               file.write("export TELEMETRY_RS_PROM_MONITOR_PODS=\"#{monitorKubernetesPods}\"\n")
+              file.write("export TELEMETRY_RS_PROM_MONITOR_PODS_NS_LENGTH=\"#{monitorKubernetesPodsNamespacesLength}\"\n")
+
               # Close file after writing all environment variables
               file.close
               puts "config::Successfully created telemetry file for replicaset"
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 24ab51d4c..7249957ab 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -15,6 +15,7 @@ class Kube_nodeInventory_Input < Input
     @@rsPromK8sServiceCount = ENV["TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH"]
     @@rsPromUrlCount = ENV["TELEMETRY_RS_PROM_URLS_LENGTH"]
     @@rsPromMonitorPods = ENV["TELEMETRY_RS_PROM_MONITOR_PODS"]
+    @@rsPromMonitorPodsNamespaceLength = ENV["TELEMETRY_RS_PROM_MONITOR_PODS_NS_LENGTH"]
 
     def initialize
       super
@@ -150,6 +151,7 @@ def enumerate
                   properties["rsPromServ"] = @@rsPromK8sServiceCount
                   properties["rsPromUrl"] = @@rsPromUrlCount
                   properties["rsPromMonPods"] = @@rsPromMonitorPods
+                  properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
                 end
                 ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
                 telemetrySent = true

From 2a072332b105ddb57cfd77cbebd67e9ec7a728fa Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 11 Sep 2019 22:32:09 -0700
Subject: [PATCH 121/160] Cherry-pick hotfix 09092019 to ci_feature (#265)

---
 installer/conf/container-health.conf    | 103 ++++++++++++++++++++++++
 installer/conf/container.conf           |  25 ------
 installer/datafiles/base_container.data |   5 ++
 3 files changed, 108 insertions(+), 25 deletions(-)
 create mode 100644 installer/conf/container-health.conf

diff --git a/installer/conf/container-health.conf b/installer/conf/container-health.conf
new file mode 100644
index 000000000..4cb9e6913
--- /dev/null
+++ b/installer/conf/container-health.conf
@@ -0,0 +1,103 @@
+# Fluentd config file for OMS Docker - container components (non kubeAPI)
+
+# Forward port 25225 for container logs
+<source>
+	type forward
+	port 25225
+	bind 127.0.0.1
+</source>
+
+# Container inventory
+<source>
+  type containerinventory
+  tag oms.containerinsights.containerinventory
+  run_interval 60s
+  log_level debug
+</source>
+
+#cadvisor perf
+<source>
+  type cadvisorperf
+  tag oms.api.cadvisorperf
+  run_interval 60s
+  log_level debug
+</source>
+
+<filter oms.api.KubeHealth.DaemonSet.Node**>
+  type filter_cadvisor_health_node
+  log_level debug
+</filter>
+
+
+#custom_metrics_mdm filter plugin
+<filter mdm.cadvisorperf**>
+  type filter_cadvisor2mdm
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+  log_level info
+</filter>
+
+<match oms.containerinsights.containerinventory**>
+  type out_oms
+  log_level debug
+  num_threads 5
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
+  max_retry_wait 9m
+</match>
+
+<match oms.api.cadvisorperf**>
+  type out_oms
+  log_level debug
+  num_threads 5
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
+  max_retry_wait 9m
+</match>
+
+
+<match oms.api.KubeHealth.DaemonSet**>
+  @type forward
+  send_timeout 60s
+  recover_wait 10s
+  hard_timeout 60s
+  heartbeat_type tcp
+
+  <server>
+    host healthmodel-replicaset-service.kube-system
+    port 25227
+  </server>
+
+  <secondary>
+    @type file
+    path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+  </secondary>
+</match>
+
+<match mdm.cadvisorperf**>
+  type out_mdm
+  log_level debug
+  num_threads 5
+  buffer_chunk_limit 20m
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 30s
+  max_retry_wait 9m
+  retry_mdm_post_wait_minutes 60
+</match>
diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 0b26357f0..e68e4ff64 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -23,12 +23,6 @@
   log_level debug
 </source>
 
-<filter oms.api.KubeHealth.DaemonSet.Node**>
-  type filter_cadvisor_health_node
-  log_level debug
-</filter>
-
-
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
@@ -67,25 +61,6 @@
   max_retry_wait 9m
 </match>
 
-
-<match oms.api.KubeHealth.DaemonSet**>
-  @type forward
-  send_timeout 60s
-  recover_wait 10s
-  hard_timeout 60s
-  heartbeat_type tcp
-
-  <server>
-    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_ENDPOINT']}"
-    port 25227
-  </server>
-
-  <secondary>
-    @type file
-    path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
-  </secondary>
-</match>
-
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 3dc1a18cd..0ea3bc984 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -30,6 +30,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb;			source/code/plugin/KubernetesApiClient.rb;			644; root; root
 
 /etc/opt/microsoft/docker-cimprov/container.conf;			    installer/conf/container.conf;                      644; root; root
+/etc/opt/microsoft/docker-cimprov/container-health.conf;		installer/conf/container-health.conf;               644; root; root
 
 /opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb;     source/code/plugin/CAdvisorMetricsAPIClient.rb;     644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_perf.rb;			        source/code/plugin/in_kube_perf.rb;			    	644; root; root
@@ -257,6 +258,9 @@ chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_fai
 mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 
+mv /etc/opt/microsoft/docker-cimprov/container-health.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container-health.conf
+chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container-health.conf
+
 %Postuninstall_10
 # If we're an upgrade, skip all of this cleanup
 if ${{PERFORMING_UPGRADE_NOT}}; then
@@ -268,6 +272,7 @@ if ${{PERFORMING_UPGRADE_NOT}}; then
    rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt
    rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
    rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/container.conf
+   rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/container-health.conf
    rmdir /var/opt/microsoft/docker-cimprov/log 2> /dev/null
    rmdir /var/opt/microsoft/docker-cimprov/state/ContainerInventory 2> /dev/null
    rmdir /var/opt/microsoft/docker-cimprov/state/ImageInventory 2> /dev/null

From 2fee9fd3c1cfa31d143cc4b2174d40f426b15b3a Mon Sep 17 00:00:00 2001
From: ganga1980 <gangams@microsoft.com>
Date: Mon, 23 Sep 2019 15:03:12 -0700
Subject: [PATCH 122/160] Gangams/add telemetry hybrid (#264)

* add telemetry to detect the cloud, distro and kernel version

* add null check since providerId optional

* detect azurestack cloud

* rename to KubernetesProviderID since ProviderID name already used in LA

* capture workspaceCloud to the telemetry

* trim the domain read from file
---
 .../code/plugin/ApplicationInsightsUtility.rb | 28 +++++++++++++++++++
 source/code/plugin/in_kube_nodes.rb           | 15 ++++++++++
 2 files changed, 43 insertions(+)

diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index bb4831701..85b424e69 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -64,6 +64,7 @@ def initializeUtility()
         @@CustomProperties["ControllerType"] = ENV[@@EnvControllerType]
         encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey]
         appInsightsEndpoint = ENV[@@EnvApplicationInsightsEndpoint]
+        @@CustomProperties["WorkspaceCloud"] = getWorkspaceCloud
 
         #Check if telemetry is turned off
         telemetryOffSwitch = ENV["DISABLE_TELEMETRY"]
@@ -230,5 +231,32 @@ def getWorkspaceId()
         $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}")
       end
     end
+
+    def getWorkspaceCloud()
+      begin
+        adminConf = {}
+        confFile = File.open(@OmsAdminFilePath, "r")
+        confFile.each_line do |line|
+          splitStrings = line.split("=")
+          adminConf[splitStrings[0]] = splitStrings[1]
+        end
+        workspaceDomain = adminConf["URL_TLD"].strip
+        workspaceCloud = "AzureCloud"
+        if workspaceDomain.casecmp("opinsights.azure.com") == 0
+          workspaceCloud = "AzureCloud"
+        elsif workspaceDomain.casecmp("opinsights.azure.cn") == 0
+          workspaceCloud = "AzureChinaCloud"
+        elsif workspaceDomain.casecmp("opinsights.azure.us") == 0
+          workspaceCloud = "AzureUSGovernment"
+        elsif workspaceDomain.casecmp("opinsights.azure.de") == 0
+          workspaceCloud = "AzureGermanCloud"
+        else
+          workspaceCloud = "Unknown"
+        end
+        return workspaceCloud
+      rescue => errorStr
+        $log.warn("Exception in AppInsightsUtility: getWorkspaceCloud - error: #{errorStr}")
+      end
+    end
   end
 end
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 7249957ab..42bc13b68 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -8,6 +8,7 @@ class Kube_nodeInventory_Input < Input
     @@ContainerNodeInventoryTag = "oms.containerinsights.ContainerNodeInventory"
     @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
     @@promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
+    @@AzStackCloudFileName = "/etc/kubernetes/host/azurestackcloud.json"
 
     @@rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
     @@rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
@@ -84,6 +85,17 @@ def enumerate
               record["Labels"] = [items["metadata"]["labels"]]
               record["Status"] = ""
 
+              if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
+                if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
+                  record["KubernetesProviderID"] = "azurestack"
+                else
+                  record["KubernetesProviderID"] = items["spec"]["providerID"]
+                end
+              else
+                record["KubernetesProviderID"] = "onprem"
+              end
+
+
               # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
               # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
               # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
@@ -139,6 +151,9 @@ def enumerate
                 properties["KubeletVersion"] = record["KubeletVersion"]
                 properties["OperatingSystem"] = nodeInfo["operatingSystem"]
                 properties["DockerVersion"] = dockerVersion
+                properties["KubernetesProviderID"] = record["KubernetesProviderID"]
+                properties["KernelVersion"] = nodeInfo["kernelVersion"]
+                properties["OSImage"] = nodeInfo["osImage"]
 
                 capacityInfo = items["status"]["capacity"]
                 ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)

From 5eea104b5b5fc1b29ab978c2b2a501530efd6b6e Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 1 Oct 2019 17:00:46 -0700
Subject: [PATCH 123/160] KubeMonAgentEvents changes to collect configuration
 events (#267)

* changes

* changes

* changes

* changes

* changes

* changes

* env changes

* changes

* changes

* changes

* reverting

* changes

* cahnges

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* chnages

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes
---
 installer/conf/td-agent-bit.conf              |   2 +-
 installer/datafiles/base_container.data       |   1 +
 installer/scripts/ConfigParseErrorLogger.rb   |  21 ++
 .../scripts/td-agent-bit-conf-customizer.rb   |   3 +-
 .../scripts/tomlparser-prom-customconfig.rb   |  19 +-
 installer/scripts/tomlparser.rb               |  42 +--
 source/code/go/src/plugins/oms.go             | 313 +++++++++++++++++-
 source/code/go/src/plugins/out_oms.go         |   5 +-
 source/code/go/src/plugins/telemetry.go       |  10 +-
 9 files changed, 378 insertions(+), 38 deletions(-)
 create mode 100644 installer/scripts/ConfigParseErrorLogger.rb

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 4e3de6c46..6a1bf3e3e 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -28,6 +28,7 @@
     Path /var/log/containers/omsagent*.log
     DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db
     DB.Sync Off
+    Parser docker
     Mem_Buf_Limit 1m
     Path_Key filepath
     Skip_Long_Lines On
@@ -51,7 +52,6 @@
 [FILTER]
     Name grep
     Match oms.container.log.flbplugin.*
-    Exclude log E! [\[]inputs.prometheus[\]]
 
 [OUTPUT]
     Name                            oms
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 0ea3bc984..159550a90 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -116,6 +116,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root
 /opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root
 /opt/td-agent-bit-conf-customizer.rb;                               installer/scripts/td-agent-bit-conf-customizer.rb;     755; root; root
+/opt/ConfigParseErrorLogger.rb;                                     installer/scripts/ConfigParseErrorLogger.rb;           755; root; root
 
 
 
diff --git a/installer/scripts/ConfigParseErrorLogger.rb b/installer/scripts/ConfigParseErrorLogger.rb
new file mode 100644
index 000000000..5d6db8016
--- /dev/null
+++ b/installer/scripts/ConfigParseErrorLogger.rb
@@ -0,0 +1,21 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+class ConfigParseErrorLogger
+  require "json"
+
+  def initialize
+  end
+
+  class << self
+    def logError(message)
+      begin
+        errorMessage = "config::error::" + message
+        jsonMessage = errorMessage.to_json
+        STDERR.puts jsonMessage
+      rescue => errorStr
+        puts "Error in ConfigParserErrorLogger::logError: #{errorStr}"
+      end
+    end
+  end
+end
diff --git a/installer/scripts/td-agent-bit-conf-customizer.rb b/installer/scripts/td-agent-bit-conf-customizer.rb
index 1e62e3cc2..fae3acb36 100644
--- a/installer/scripts/td-agent-bit-conf-customizer.rb
+++ b/installer/scripts/td-agent-bit-conf-customizer.rb
@@ -1,4 +1,5 @@
 #!/usr/local/bin/ruby
+require_relative "ConfigParseErrorLogger"
 
 @td_agent_bit_conf_path = "/etc/opt/microsoft/docker-cimprov/td-agent-bit.conf"
 
@@ -40,7 +41,7 @@ def substituteFluentBitPlaceHolders
     File.open(@td_agent_bit_conf_path, "w") { |file| file.puts new_contents }
     puts "config::Successfully substituted the placeholders in td-agent-bit.conf file"
   rescue => errorStr
-    puts "td-agent-bit-config-customizer: error while substituting values: #{errorStr}"
+    ConfigParseErrorLogger.logError("td-agent-bit-config-customizer: error while substituting values in td-agent-bit.conf file: #{errorStr}")
   end
 end
 
diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb
index d44bf3342..ab868f1a9 100644
--- a/installer/scripts/tomlparser-prom-customconfig.rb
+++ b/installer/scripts/tomlparser-prom-customconfig.rb
@@ -1,6 +1,7 @@
 #!/usr/local/bin/ruby
 
 require_relative "tomlrb"
+require_relative "ConfigParseErrorLogger"
 require "fileutils"
 
 @promConfigMapMountPath = "/etc/config/settings/prometheus-data-collection-settings"
@@ -40,7 +41,7 @@ def parseConfigMap
       return nil
     end
   rescue => errorStr
-    puts "config::error::Exception while parsing toml config file for prometheus config: #{errorStr}, using defaults"
+    ConfigParseErrorLogger.logError("Exception while parsing config map for prometheus config: #{errorStr}, using defaults, please check config map for errors")
     return nil
   end
 end
@@ -66,7 +67,7 @@ def replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods)
     new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", ("monitor_kubernetes_pods = #{monitorKubernetesPods}"))
     new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", "")
   rescue => errorStr
-    puts "config::error::Exception while replacing default pod monitor settings: #{errorStr}"
+    puts "Exception while replacing default pod monitor settings: #{errorStr}"
   end
   return new_contents
 end
@@ -98,7 +99,7 @@ def createPrometheusPluginsWithNamespaceSetting(monitorKubernetesPods, monitorKu
     new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", pluginConfigsWithNamespaces)
     return new_contents
   rescue => errorStr
-    puts "config::error::Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults"
+    puts "Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults"
     replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods)
   end
 end
@@ -181,10 +182,10 @@ def populateSettingValuesFromConfigMap(parsedConfig)
               puts "config::Successfully created telemetry file for replicaset"
             end
           else
-            puts "config::Typecheck failed for prometheus config settings for replicaset, using defaults"
+            ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for replicaset, using defaults, please use right types for all settings")
           end # end of type check condition
         rescue => errorStr
-          puts "config::error::Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults"
+          ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults")
           setRsPromDefaults
           puts "****************End Prometheus Config Processing********************"
         end
@@ -236,16 +237,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
               puts "config::Successfully created telemetry file for daemonset"
             end
           else
-            puts "config::Typecheck failed for prometheus config settings for daemonset, using defaults"
+            ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for daemonset, using defaults, please use right types for all settings")
           end # end of type check condition
         rescue => errorStr
-          puts "config::error::Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults"
+          ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults, please check correctness of configmap")
           puts "****************End Prometheus Config Processing********************"
         end
       end # end of controller type check
     end
   else
-    puts "config::error:: Controller undefined while processing prometheus config, using defaults"
+    ConfigParseErrorLogger.logError("Controller undefined while processing prometheus config, using defaults")
   end
 end
 
@@ -258,7 +259,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   end
 else
   if (File.file?(@promConfigMapMountPath))
-    puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
+    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported version")
   else
     puts "config::No configmap mounted for prometheus custom config, using defaults"
   end
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index b66e1257e..523f8c307 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -1,7 +1,8 @@
 #!/usr/local/bin/ruby
 
 require_relative "tomlrb"
-require 'json'
+require_relative "ConfigParseErrorLogger"
+require "json"
 
 @log_settings_config_map_mount_path = "/etc/config/settings/log-data-collection-settings"
 @agent_settings_config_map_mount_path = "/etc/config/settings/agent-settings"
@@ -33,7 +34,7 @@ def parseConfigMap(path)
       return nil
     end
   rescue => errorStr
-    puts "config::error::Exception while parsing toml config file: #{errorStr}, using defaults"
+    ConfigParseErrorLogger.logError("Exception while parsing config map for log collection/env variable settings: #{errorStr}, using defaults, please check config map for errors")
     @excludePath = "*_kube-system_*.log"
     return nil
   end
@@ -70,7 +71,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         end
       end
     rescue => errorStr
-      puts "config::error::Exception while reading config settings for stdout log collection - #{errorStr}, using defaults"
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for stdout log collection - #{errorStr}, using defaults, please check config map for errors")
     end
 
     #Get stderr log config settings
@@ -107,7 +108,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         end
       end
     rescue => errorStr
-      puts "config::error:Exception while reading config settings for stderr log collection - #{errorStr}, using defaults"
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for stderr log collection - #{errorStr}, using defaults, please check config map for errors")
     end
 
     #Get environment variables log config settings
@@ -117,42 +118,43 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         puts "config::Using config map setting for cluster level environment variable collection"
       end
     rescue => errorStr
-      puts "config::error::Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults"
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults, please check config map for errors")
     end
   end
 
   begin
     if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
-        @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
+      @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
     else
-        @enable_health_model = false
+      @enable_health_model = false
     end
     puts "enable_health_model = #{@enable_health_model}"
   rescue => errorStr
-    puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
+    ConfigParseErrorLogger.logError("Exception while reading config map settings for health_model enabled setting - #{errorStr}, using defaults, please check config map for errors")
     @enable_health_model = false
   end
 end
 
 @configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
 puts "****************Start Config Processing********************"
+
 if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
-    configMapSettings = {}
+  configMapSettings = {}
 
-    #iterate over every *settings file and build a hash of settings
-    Dir["/etc/config/settings/*settings"].each{|file|
-        puts "Parsing File #{file}"
-        settings = parseConfigMap(file)
-        if !settings.nil?
-            configMapSettings = configMapSettings.merge(settings)
-        end
-    }
+  #iterate over every *settings file and build a hash of settings
+  Dir["/etc/config/settings/*settings"].each { |file|
+    puts "Parsing File #{file}"
+    settings = parseConfigMap(file)
+    if !settings.nil?
+      configMapSettings = configMapSettings.merge(settings)
+    end
+  }
 
   if !configMapSettings.nil?
     populateSettingValuesFromConfigMap(configMapSettings)
   end
 else
-    puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
+  ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
   @excludePath = "*_kube-system_*.log"
 end
 
@@ -178,13 +180,13 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
   file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
   file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
-    #health_model settings
+  #health_model settings
   file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
   # Close file after writing all environment variables
   file.close
   puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
   puts "****************End Config Processing********************"
 else
-  puts "config::error::Exception while opening file for writing config environment variables"
+  puts "Exception while opening file for writing config environment variables"
   puts "****************End Config Processing********************"
 end
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index c5ad307d8..6d78455bd 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -28,6 +28,9 @@ const ContainerLogDataType = "CONTAINER_LOG_BLOB"
 // DataType for Insights metric
 const InsightsMetricsDataType = "INSIGHTS_METRICS_BLOB"
 
+// DataType for KubeMonAgentEvent
+const KubeMonAgentEventDataType = "KUBE_MON_AGENT_EVENTS_BLOB"
+
 //env varibale which has ResourceId for LA
 const ResourceIdEnv = "AKS_RESOURCE_ID"
 
@@ -46,6 +49,20 @@ const TelegrafTagClusterName = "clusterName"
 // clusterId tag
 const TelegrafTagClusterID = "clusterId"
 
+const ConfigErrorEventCategory = "container.azm.ms/configmap"
+
+const PromScrapingErrorEventCategory = "container.azm.ms/promscraping"
+
+const NoErrorEventCategory = "container.azm.ms/noerror"
+
+const KubeMonAgentEventError = "Error"
+
+const KubeMonAgentEventWarning = "Warning"
+
+const KubeMonAgentEventInfo = "Info"
+
+const KubeMonAgentEventsFlushedEvent = "KubeMonAgentEventsFlushed"
+
 // ContainerLogPluginConfFilePath --> config file path for container log plugin
 const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
 const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
@@ -54,6 +71,8 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp
 const IPName = "Containers"
 const defaultContainerInventoryRefreshInterval = 60
 
+const kubeMonAgentConfigEventFlushInterval = 60
+
 var (
 	// PluginConfiguration the plugins configuration
 	PluginConfiguration map[string]string
@@ -71,6 +90,8 @@ var (
 	ResourceCentric bool
 	//ResourceName
 	ResourceName string
+	//KubeMonAgentEvents skip first flush
+	skipKubeMonEventsFlush bool
 )
 
 var (
@@ -88,11 +109,19 @@ var (
 	ContainerLogTelemetryMutex = &sync.Mutex{}
 	// ClientSet for querying KubeAPIs
 	ClientSet *kubernetes.Clientset
+	// Config error hash
+	ConfigErrorEvent map[string]KubeMonAgentEventTags
+	// Prometheus scraping error hash
+	PromScrapeErrorEvent map[string]KubeMonAgentEventTags
+	// EventHashUpdateMutex read and write mutex access to the event hash
+	EventHashUpdateMutex = &sync.Mutex{}
 )
 
 var (
 	// ContainerImageNameRefreshTicker updates the container image and names periodically
 	ContainerImageNameRefreshTicker *time.Ticker
+	// KubeMonAgentConfigEventsSendTicker to send config events every hour
+	KubeMonAgentConfigEventsSendTicker *time.Ticker
 )
 
 var (
@@ -142,6 +171,41 @@ type ContainerLogBlob struct {
 	DataItems []DataItem `json:"DataItems"`
 }
 
+// Config Error message to be sent to Log Analytics
+type laKubeMonAgentEvents struct {
+	Computer       string `json:"Computer"`
+	CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated
+	Category       string `json:"Category"`
+	Level          string `json:"Level"`
+	ClusterId      string `json:"ClusterId"`
+	ClusterName    string `json:"ClusterName"`
+	Message        string `json:"Message"`
+	Tags           string `json:"Tags"`
+}
+
+type KubeMonAgentEventTags struct {
+	PodName        string
+	ContainerId    string
+	FirstOccurance string
+	LastOccurance  string
+	Count          int
+}
+
+type KubeMonAgentEventBlob struct {
+	DataType  string                 `json:"DataType"`
+	IPName    string                 `json:"IPName"`
+	DataItems []laKubeMonAgentEvents `json:"DataItems"`
+}
+
+// KubeMonAgentEventType to be used as enum
+type KubeMonAgentEventType int
+
+const (
+	// KubeMonAgentEventType to be used as enum for ConfigError and ScrapingError
+	ConfigError KubeMonAgentEventType = iota
+	PromScrapingError
+)
+
 func createLogger() *log.Logger {
 	var logfile *os.File
 	path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log"
@@ -262,6 +326,223 @@ func convert(in interface{}) (float64, bool) {
 	}
 }
 
+// PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA
+func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType KubeMonAgentEventType) {
+	var logRecordString = ToString(record["log"])
+	var eventTimeStamp = ToString(record["time"])
+	containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"]))
+
+	Log("Locked EventHashUpdateMutex for updating hash \n ")
+	EventHashUpdateMutex.Lock()
+	switch errType {
+	case ConfigError:
+		// Doing this since the error logger library is adding quotes around the string and a newline to the end because
+		// we are converting string to json to log lines in different lines as one record
+		logRecordString = strings.TrimSuffix(logRecordString, "\n")
+		logRecordString = logRecordString[1 : len(logRecordString)-1]
+
+		if val, ok := ConfigErrorEvent[logRecordString]; ok {
+			Log("In config error existing hash update\n")
+			eventCount := val.Count
+			eventFirstOccurance := val.FirstOccurance
+
+			ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{
+				PodName:        podName,
+				ContainerId:    containerID,
+				FirstOccurance: eventFirstOccurance,
+				LastOccurance:  eventTimeStamp,
+				Count:          eventCount + 1,
+			}
+		} else {
+			ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{
+				PodName:        podName,
+				ContainerId:    containerID,
+				FirstOccurance: eventTimeStamp,
+				LastOccurance:  eventTimeStamp,
+				Count:          1,
+			}
+		}
+
+	case PromScrapingError:
+		// Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash
+		var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ")
+		if scrapingSplitString != nil && len(scrapingSplitString) == 2 {
+			var splitString = scrapingSplitString[1]
+			// Trimming the newline character at the end since this is being added as the key
+			splitString = strings.TrimSuffix(splitString, "\n")
+			if splitString != "" {
+				if val, ok := PromScrapeErrorEvent[splitString]; ok {
+					Log("In config error existing hash update\n")
+					eventCount := val.Count
+					eventFirstOccurance := val.FirstOccurance
+
+					PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{
+						PodName:        podName,
+						ContainerId:    containerID,
+						FirstOccurance: eventFirstOccurance,
+						LastOccurance:  eventTimeStamp,
+						Count:          eventCount + 1,
+					}
+				} else {
+					PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{
+						PodName:        podName,
+						ContainerId:    containerID,
+						FirstOccurance: eventTimeStamp,
+						LastOccurance:  eventTimeStamp,
+						Count:          1,
+					}
+				}
+			}
+		}
+	}
+	EventHashUpdateMutex.Unlock()
+	Log("Unlocked EventHashUpdateMutex after updating hash \n ")
+}
+
+// Function to get config error log records after iterating through the two hashes
+func flushKubeMonAgentEventRecords() {
+	for ; true; <-KubeMonAgentConfigEventsSendTicker.C {
+		if skipKubeMonEventsFlush != true {
+			Log("In flushConfigErrorRecords\n")
+			start := time.Now()
+			var resp *http.Response
+			var postError error
+			var elapsed time.Duration
+			var laKubeMonAgentEventsRecords []laKubeMonAgentEvents
+			telemetryDimensions := make(map[string]string)
+
+			telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent))
+			telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent))
+
+			if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) {
+				EventHashUpdateMutex.Lock()
+				Log("Locked EventHashUpdateMutex for reading hashes\n")
+				for k, v := range ConfigErrorEvent {
+					tagJson, err := json.Marshal(v)
+
+					if err != nil {
+						message := fmt.Sprintf("Error while Marshalling config error event tags: %s", err.Error())
+						Log(message)
+						SendException(message)
+					} else {
+						laKubeMonAgentEventsRecord := laKubeMonAgentEvents{
+							Computer:       Computer,
+							CollectionTime: start.Format(time.RFC3339),
+							Category:       ConfigErrorEventCategory,
+							Level:          KubeMonAgentEventError,
+							ClusterId:      ResourceID,
+							ClusterName:    ResourceName,
+							Message:        k,
+							Tags:           fmt.Sprintf("%s", tagJson),
+						}
+						laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+					}
+				}
+
+				for k, v := range PromScrapeErrorEvent {
+					tagJson, err := json.Marshal(v)
+					if err != nil {
+						message := fmt.Sprintf("Error while Marshalling prom scrape error event tags: %s", err.Error())
+						Log(message)
+						SendException(message)
+					} else {
+						laKubeMonAgentEventsRecord := laKubeMonAgentEvents{
+							Computer:       Computer,
+							CollectionTime: start.Format(time.RFC3339),
+							Category:       PromScrapingErrorEventCategory,
+							Level:          KubeMonAgentEventWarning,
+							ClusterId:      ResourceID,
+							ClusterName:    ResourceName,
+							Message:        k,
+							Tags:           fmt.Sprintf("%s", tagJson),
+						}
+						laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+					}
+				}
+
+				//Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour
+				for k := range PromScrapeErrorEvent {
+					delete(PromScrapeErrorEvent, k)
+				}
+				Log("PromScrapeErrorEvent cache cleared\n")
+				EventHashUpdateMutex.Unlock()
+				Log("Unlocked EventHashUpdateMutex for reading hashes\n")
+			} else {
+				//Sending a record in case there are no errors to be able to differentiate between no data vs no errors
+				tagsValue := KubeMonAgentEventTags{}
+
+				tagJson, err := json.Marshal(tagsValue)
+				if err != nil {
+					message := fmt.Sprintf("Error while Marshalling no error tags: %s", err.Error())
+					Log(message)
+					SendException(message)
+				} else {
+					laKubeMonAgentEventsRecord := laKubeMonAgentEvents{
+						Computer:       Computer,
+						CollectionTime: start.Format(time.RFC3339),
+						Category:       NoErrorEventCategory,
+						Level:          KubeMonAgentEventInfo,
+						ClusterId:      ResourceID,
+						ClusterName:    ResourceName,
+						Message:        "No errors",
+						Tags:           fmt.Sprintf("%s", tagJson),
+					}
+					laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord)
+				}
+			}
+
+			if len(laKubeMonAgentEventsRecords) > 0 {
+				kubeMonAgentEventEntry := KubeMonAgentEventBlob{
+					DataType:  KubeMonAgentEventDataType,
+					IPName:    IPName,
+					DataItems: laKubeMonAgentEventsRecords}
+
+				marshalled, err := json.Marshal(kubeMonAgentEventEntry)
+
+				if err != nil {
+					message := fmt.Sprintf("Error while marshalling kubemonagentevent entry: %s", err.Error())
+					Log(message)
+					SendException(message)
+				} else {
+					req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled))
+					req.Header.Set("Content-Type", "application/json")
+					//expensive to do string len for every request, so use a flag
+					if ResourceCentric == true {
+						req.Header.Set("x-ms-AzureResourceId", ResourceID)
+					}
+
+					resp, postError = HTTPClient.Do(req)
+					elapsed = time.Since(start)
+
+					if postError != nil {
+						message := fmt.Sprintf("Error when sending kubemonagentevent request %s \n", err.Error())
+						Log(message)
+						Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed)
+					} else if resp == nil || resp.StatusCode != 200 {
+						if resp != nil {
+							Log("Status %s Status Code %d", resp.Status, resp.StatusCode)
+						}
+						Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed)
+					} else {
+						numRecords := len(laKubeMonAgentEventsRecords)
+						Log("Successfully flushed %d records in %s", numRecords, elapsed)
+
+						// Send telemetry to AppInsights resource
+						SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions)
+
+					}
+					if resp != nil && resp.Body != nil {
+						defer resp.Body.Close()
+					}
+				}
+			}
+		} else {
+			// Setting this to false to allow for subsequent flushes after the first hour
+			skipKubeMonEventsFlush = false
+		}
+	}
+}
+
 //Translates telegraf time series to one or more Azure loganalytics metric(s)
 func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
 
@@ -431,7 +712,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	DataUpdateMutex.Unlock()
 
 	for _, record := range tailPluginRecords {
-		containerID, k8sNamespace := GetContainerIDK8sNamespaceFromFileName(ToString(record["filepath"]))
+		containerID, k8sNamespace, _ := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"]))
 		logEntrySource := ToString(record["stream"])
 
 		if strings.EqualFold(logEntrySource, "stdout") {
@@ -502,6 +783,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			SendException(message)
 			return output.FLB_OK
 		}
+
 		req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled))
 		req.Header.Set("Content-Type", "application/json")
 		//expensive to do string len for every request, so use a flag
@@ -552,11 +834,12 @@ func containsKey(currentMap map[string]bool, key string) bool {
 	return c
 }
 
-// GetContainerIDK8sNamespaceFromFileName Gets the container ID From the file Name
+// GetContainerIDK8sNamespacePodNameFromFileName Gets the container ID, k8s namespace and pod name From the file Name
 // sample filename kube-proxy-dgcx7_kube-system_kube-proxy-8df7e49e9028b60b5b0d0547f409c455a9567946cf763267b7e6fa053ab8c182.log
-func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) {
+func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, string, string) {
 	id := ""
 	ns := ""
+	podName := ""
 
 	start := strings.LastIndex(filename, "-")
 	end := strings.LastIndex(filename, ".")
@@ -576,7 +859,16 @@ func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) {
 		ns = filename[start+1 : end]
 	}
 
-	return id, ns
+	start = strings.Index(filename, "/containers/")
+	end = strings.Index(filename, "_")
+
+	if start >= end || start == -1 || end == -1 {
+		podName = ""
+	} else {
+		podName = filename[(start + len("/containers/")):end]
+	}
+
+	return id, ns, podName
 }
 
 // InitializePlugin reads and populates plugin configuration
@@ -586,6 +878,12 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	StderrIgnoreNsSet = make(map[string]bool)
 	ImageIDMap = make(map[string]string)
 	NameIDMap = make(map[string]string)
+	// Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container
+	// whereas the prometheus scrape error hash needs to be refreshed every hour
+	ConfigErrorEvent = make(map[string]KubeMonAgentEventTags)
+	PromScrapeErrorEvent = make(map[string]KubeMonAgentEventTags)
+	// Initilizing this to true to skip the first kubemonagentevent flush since the errors are not populated at this time
+	skipKubeMonEventsFlush = true
 
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
@@ -640,6 +938,9 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
 	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval))
 
+	Log("kubeMonAgentConfigEventFlushInterval = %d \n", kubeMonAgentConfigEventFlushInterval)
+	KubeMonAgentConfigEventsSendTicker = time.NewTicker(time.Minute * time.Duration(kubeMonAgentConfigEventFlushInterval))
+
 	// Populate Computer field
 	containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"])
 	if err != nil {
@@ -682,7 +983,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 		populateExcludedStdoutNamespaces()
 		populateExcludedStderrNamespaces()
 		go updateContainerImageNameMaps()
+
+		// Flush config error records every hour
+		go flushKubeMonAgentEventRecords()
 	} else {
 		Log("Running in replicaset. Disabling container enrichment caching & updates \n")
 	}
+
 }
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index e9e7124b7..1f1915798 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -1,14 +1,14 @@
 package main
 
 import (
-	"github.com/fluent/fluent-bit-go/output"
 	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
+	"github.com/fluent/fluent-bit-go/output"
 )
 import (
 	"C"
+	"os"
 	"strings"
 	"unsafe"
-	"os"
 )
 
 //export FLBPluginRegister
@@ -61,6 +61,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 
 	incomingTag := strings.ToLower(C.GoString(tag))
 	if strings.Contains(incomingTag, "oms.container.log.flbplugin") {
+		// This will also include populating cache to be sent as for config events
 		return PushToAppInsightsTraces(records, appinsights.Information, incomingTag)
 	} else if strings.Contains(incomingTag, "oms.container.perf.telegraf") {
 		return PostTelegrafMetricsToLA(records)
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 4f22b8c03..d5675187f 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -198,7 +198,15 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int {
 	var logLines []string
 	for _, record := range records {
-		logLines = append(logLines, ToString(record["log"]))
+		// If record contains config error or prometheus scraping errors send it to KubeMonAgentEvents table
+		var logEntry = ToString(record["log"])
+		if strings.Contains(logEntry, "config::error") {
+			populateKubeMonAgentEventHash(record, ConfigError)
+		} else if strings.Contains(logEntry, "E! [inputs.prometheus]") {
+			populateKubeMonAgentEventHash(record, PromScrapingError)
+		} else {
+			logLines = append(logLines, logEntry)
+		}
 	}
 
 	traceEntry := strings.Join(logLines, "\n")

From c472b120c473f75e3895e3a5bd1adea96b95e250 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 26 Sep 2019 11:00:50 -0700
Subject: [PATCH 124/160] Fix the Dupe Perf Data Issue from the DaemonSet
 (#266)

* Dupe Perf Record Fix
---
 installer/conf/container-health.conf              | 4 ++--
 installer/conf/kube.conf                          | 4 ++--
 source/code/plugin/filter_health_model_builder.rb | 6 +++---
 source/code/plugin/in_cadvisor_perf.rb            | 4 ++--
 source/code/plugin/in_kube_health.rb              | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/installer/conf/container-health.conf b/installer/conf/container-health.conf
index 4cb9e6913..e6edf41df 100644
--- a/installer/conf/container-health.conf
+++ b/installer/conf/container-health.conf
@@ -23,7 +23,7 @@
   log_level debug
 </source>
 
-<filter oms.api.KubeHealth.DaemonSet.Node**>
+<filter kubehealth.DaemonSet.Node**>
   type filter_cadvisor_health_node
   log_level debug
 </filter>
@@ -68,7 +68,7 @@
 </match>
 
 
-<match oms.api.KubeHealth.DaemonSet**>
+<match kubehealth.DaemonSet**>
   @type forward
   send_timeout 60s
   recover_wait 10s
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 3cbc3ff17..8e1f6ae88 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -55,7 +55,7 @@
 #Kubernetes health
 <source>
   type kubehealth
- tag oms.api.KubeHealth.ReplicaSet
+ tag kubehealth.ReplicaSet
   run_interval 60s
   log_level debug
 </source>
@@ -82,7 +82,7 @@
   log_level info
 </filter>
 
-<filter oms.api.KubeHealth**>
+<filter kubehealth**>
   type filter_health_model_builder
 </filter>
 <match mdm.cadvisorperf**>
diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index 0c1b378a0..39452cb7e 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -84,7 +84,7 @@ def filter_stream(tag, es)
                 new_es = MultiEventStream.new
                 time = Time.now
 
-                if tag.start_with?("oms.api.KubeHealth.DaemonSet")
+                if tag.start_with?("kubehealth.DaemonSet")
                     records = []
                     if !es.nil?
                         es.each{|time, record|
@@ -93,7 +93,7 @@ def filter_stream(tag, es)
                         @buffer.add_to_buffer(records)
                     end
                     return []
-                elsif tag.start_with?("oms.api.KubeHealth.ReplicaSet")
+                elsif tag.start_with?("kubehealth.ReplicaSet")
                     @log.info "TAG #{tag}"
                     records = []
                     es.each{|time, record|
@@ -220,7 +220,7 @@ def filter_stream(tag, es)
                     # this filter also acts as a pass through as we are rewriting the tag and emitting to the fluent stream
                     es
                 else
-                    raise 'Invalid tag #{tag} received'
+                    raise "Invalid tag #{tag} received"
                 end
 
             rescue => e
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 1702877a2..ce205322d 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -19,8 +19,8 @@ def initialize
     config_param :run_interval, :time, :default => "1m"
     config_param :tag, :string, :default => "oms.api.cadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
-    config_param :nodehealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Node"
-    #config_param :containerhealthtag, :string, :default => "oms.api.KubeHealth.DaemonSet.Container"
+    config_param :nodehealthtag, :string, :default => "kubehealth.DaemonSet.Node"
+    #config_param :containerhealthtag, :string, :default => "kubehealth.DaemonSet.Container"
 
     def configure(conf)
       super
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 199e03e56..5d29eb035 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -34,7 +34,7 @@ def initialize
 
     include HealthModel
     config_param :run_interval, :time, :default => "1m"
-    config_param :tag, :string, :default => "oms.api.KubeHealth.ReplicaSet"
+    config_param :tag, :string, :default => "kubehealth.ReplicaSet"
 
     def configure(conf)
       super

From 98e4114bb499943fe60b06d5719f906f7b2b7b0d Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 3 Oct 2019 15:48:35 -0700
Subject: [PATCH 125/160] PR for 1. Container Memory CPU monitor 2.
 Configuration for Node Conditions 3. Fixed Type Changes 4. Use Env variable,
 and health_forward (that handles network errors at init) 5. Unit Tests (#268)

---
 installer/conf/container-health.conf          |  103 -
 installer/conf/container.conf                 |   31 +
 installer/conf/health_model_definition.json   |   91 +-
 installer/conf/healthmonitorconfig.json       |   29 +-
 installer/conf/kube.conf                      |   15 +-
 installer/datafiles/base_container.data       |   13 +-
 installer/scripts/tomlparser.rb               |   50 +-
 .../filter_cadvisor_health_container.rb       |  233 +-
 .../plugin/filter_cadvisor_health_node.rb     |  131 +-
 .../plugin/filter_health_model_builder.rb     |   78 +-
 .../plugin/health/agg_monitor_id_labels.rb    |   11 +-
 .../health_container_cpu_memory_aggregator.rb |  258 +
 ...h_container_cpu_memory_record_formatter.rb |   34 +
 .../health/health_kube_api_down_handler.rb    |   12 +-
 .../health/health_kubernetes_resources.rb     |  291 +-
 .../health/health_missing_signal_generator.rb |    8 +-
 .../plugin/health/health_model_constants.rb   |   88 +-
 .../plugin/health/health_monitor_helpers.rb   |   42 +-
 .../plugin/health/health_monitor_provider.rb  |   22 +-
 .../plugin/health/health_monitor_utils.rb     |   41 +-
 .../plugin/health/health_signal_reducer.rb    |    1 -
 .../plugin/health/parent_monitor_provider.rb  |    5 +-
 source/code/plugin/health/unit_monitor.rb     |    1 +
 source/code/plugin/in_cadvisor_perf.rb        |    7 +-
 source/code/plugin/in_kube_events.rb          |    2 +-
 source/code/plugin/in_kube_health.rb          |   61 +-
 source/code/plugin/out_health_forward.rb      |  677 ++
 test/code/plugin/health/cadvisor_perf.json    | 2540 +++++++
 test/code/plugin/health/deployments.json      | 1385 ++++
 ...th_container_cpu_memory_aggregator_spec.rb |  190 +
 ...tainer_cpu_memory_record_formatter_spec.rb |   58 +
 .../health/health_kubernetes_resource_spec.rb |   26 +-
 .../health/health_model_builder_test.rb       |  162 +-
 test/code/plugin/health/nodes.json            | 1966 ++++++
 .../health/parent_monitor_provider_spec.rb    |   10 +-
 test/code/plugin/health/pods.json             | 5987 +++++++++++++++++
 36 files changed, 13988 insertions(+), 671 deletions(-)
 delete mode 100644 installer/conf/container-health.conf
 create mode 100644 source/code/plugin/health/health_container_cpu_memory_aggregator.rb
 create mode 100644 source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
 create mode 100644 source/code/plugin/out_health_forward.rb
 create mode 100644 test/code/plugin/health/cadvisor_perf.json
 create mode 100644 test/code/plugin/health/deployments.json
 create mode 100644 test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb
 create mode 100644 test/code/plugin/health/health_container_cpu_memory_record_formatter_spec.rb
 create mode 100644 test/code/plugin/health/nodes.json
 create mode 100644 test/code/plugin/health/pods.json

diff --git a/installer/conf/container-health.conf b/installer/conf/container-health.conf
deleted file mode 100644
index e6edf41df..000000000
--- a/installer/conf/container-health.conf
+++ /dev/null
@@ -1,103 +0,0 @@
-# Fluentd config file for OMS Docker - container components (non kubeAPI)
-
-# Forward port 25225 for container logs
-<source>
-	type forward
-	port 25225
-	bind 127.0.0.1
-</source>
-
-# Container inventory
-<source>
-  type containerinventory
-  tag oms.containerinsights.containerinventory
-  run_interval 60s
-  log_level debug
-</source>
-
-#cadvisor perf
-<source>
-  type cadvisorperf
-  tag oms.api.cadvisorperf
-  run_interval 60s
-  log_level debug
-</source>
-
-<filter kubehealth.DaemonSet.Node**>
-  type filter_cadvisor_health_node
-  log_level debug
-</filter>
-
-
-#custom_metrics_mdm filter plugin
-<filter mdm.cadvisorperf**>
-  type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
-  log_level info
-</filter>
-
-<match oms.containerinsights.containerinventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.api.cadvisorperf**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-
-<match kubehealth.DaemonSet**>
-  @type forward
-  send_timeout 60s
-  recover_wait 10s
-  hard_timeout 60s
-  heartbeat_type tcp
-
-  <server>
-    host healthmodel-replicaset-service.kube-system
-    port 25227
-  </server>
-
-  <secondary>
-    @type file
-    path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
-  </secondary>
-</match>
-
-<match mdm.cadvisorperf**>
-  type out_mdm
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-  retry_mdm_post_wait_minutes 60
-</match>
diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index e68e4ff64..5f08043c7 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -23,6 +23,16 @@
   log_level debug
 </source>
 
+<filter kubehealth.DaemonSet.Node**>
+  type filter_cadvisor_health_node
+  log_level debug
+</filter>
+
+<filter kubehealth.DaemonSet.Container**>
+  type filter_cadvisor_health_container
+  log_level debug
+</filter>
+
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
@@ -61,6 +71,27 @@
   max_retry_wait 9m
 </match>
 
+
+<match kubehealth.DaemonSet**>
+  @type health_forward
+  send_timeout 60s
+  recover_wait 10s
+  hard_timeout 60s
+  heartbeat_type tcp
+  skip_network_error_at_init true
+  expire_dns_cache 600s
+
+  <server>
+    host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
+    port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+  </server>
+
+  <secondary>
+    @type file
+    path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+  </secondary>
+</match>
+
 <match mdm.cadvisorperf**>
   type out_mdm
   log_level debug
diff --git a/installer/conf/health_model_definition.json b/installer/conf/health_model_definition.json
index 1112fe158..e6c9e1808 100644
--- a/installer/conf/health_model_definition.json
+++ b/installer/conf/health_model_definition.json
@@ -23,6 +23,61 @@
             "container.azm.ms/cluster-name"
         ]
     },
+    {
+        "monitor_id": "container",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/workload-name",
+            "container.azm.ms/workload-kind",
+            "container.azm.ms/container",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ],
+        "parent_monitor_id": [
+            {
+                "label": "container.azm.ms/namespace",
+                "operator": "==",
+                "value": "kube-system",
+                "id": "system_workload"
+            },
+            {
+                "label": "container.azm.ms/namespace",
+                "operator": "!=",
+                "value": "kube-system",
+                "id": "user_workload"
+            }
+        ]
+    },
+    {
+        "monitor_id": "container_cpu_utilization",
+        "parent_monitor_id": "container",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/workload-name",
+            "container.azm.ms/workload-kind",
+            "container.azm.ms/container",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
+    {
+        "monitor_id": "container_memory_utilization",
+        "parent_monitor_id": "container",
+        "labels": [
+            "container.azm.ms/namespace",
+            "container.azm.ms/workload-name",
+            "container.azm.ms/workload-kind",
+            "container.azm.ms/container",
+            "container.azm.ms/cluster-region",
+            "container.azm.ms/cluster-subscription-id",
+            "container.azm.ms/cluster-resource-group",
+            "container.azm.ms/cluster-name"
+        ]
+    },
     {
         "monitor_id": "system_workload_pods_ready",
         "parent_monitor_id": "system_workload",
@@ -104,6 +159,9 @@
             "kubernetes.io/hostname",
             "agentpool",
             "kubernetes.io/role",
+            "node-role.kubernetes.io/master",
+            "node-role.kubernetes.io/compute",
+            "node-role.kubernetes.io/infra",
             "container.azm.ms/cluster-region",
             "container.azm.ms/cluster-subscription-id",
             "container.azm.ms/cluster-resource-group",
@@ -117,6 +175,9 @@
             "kubernetes.io/hostname",
             "agentpool",
             "kubernetes.io/role",
+            "node-role.kubernetes.io/master",
+            "node-role.kubernetes.io/compute",
+            "node-role.kubernetes.io/infra",
             "container.azm.ms/cluster-region",
             "container.azm.ms/cluster-subscription-id",
             "container.azm.ms/cluster-resource-group",
@@ -130,6 +191,9 @@
             "kubernetes.io/hostname",
             "agentpool",
             "kubernetes.io/role",
+            "node-role.kubernetes.io/master",
+            "node-role.kubernetes.io/compute",
+            "node-role.kubernetes.io/infra",
             "container.azm.ms/cluster-region",
             "container.azm.ms/cluster-subscription-id",
             "container.azm.ms/cluster-resource-group",
@@ -143,12 +207,33 @@
             "kubernetes.io/hostname",
             "agentpool",
             "kubernetes.io/role",
+            "node-role.kubernetes.io/master",
+            "node-role.kubernetes.io/compute",
+            "node-role.kubernetes.io/infra",
             "container.azm.ms/cluster-region",
             "container.azm.ms/cluster-subscription-id",
             "container.azm.ms/cluster-resource-group",
             "container.azm.ms/cluster-name"
         ],
         "parent_monitor_id": [
+            {
+                "label": "node-role.kubernetes.io/master",
+                "operator": "==",
+                "value": "true",
+                "id": "master_node_pool"
+            },
+            {
+                "label": "node-role.kubernetes.io/compute",
+                "operator": "==",
+                "value": "true",
+                "id": "agent_node_pool"
+            },
+            {
+                "label": "node-role.kubernetes.io/infra",
+                "operator": "==",
+                "value": "true",
+                "id": "agent_node_pool"
+            },
             {
                 "label": "kubernetes.io/role",
                 "operator": "==",
@@ -161,14 +246,16 @@
                 "value": "agent",
                 "id": "agent_node_pool"
             }
-        ]
+        ],
+        "default_parent_monitor_id": "agent_node_pool"
     },
     {
         "monitor_id": "master_node_pool",
         "aggregation_algorithm": "percentage",
         "aggregation_algorithm_params": {
             "critical_threshold": 80.0,
-            "warning_threshold": 90.0
+            "warning_threshold": 90.0,
+            "state_threshold": 80.0
         },
         "parent_monitor_id": "all_nodes",
         "labels": [
diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json
index 28d562652..ea6b23856 100644
--- a/installer/conf/healthmonitorconfig.json
+++ b/installer/conf/healthmonitorconfig.json
@@ -2,30 +2,41 @@
     "node_cpu_utilization": {
         "WarnThresholdPercentage": 80.0,
         "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3
+        "ConsecutiveSamplesForStateTransition": 3,
+        "Operator": ">"
     },
     "node_memory_utilization": {
         "WarnThresholdPercentage": 80.0,
         "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3
+        "ConsecutiveSamplesForStateTransition": 3,
+        "Operator": ">"
     },
     "container_cpu_utilization": {
         "WarnThresholdPercentage": 80.0,
         "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3
+        "StateThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3,
+        "Operator": ">"
     },
     "container_memory_utilization": {
         "WarnThresholdPercentage": 80.0,
         "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3
+        "StateThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3,
+        "Operator": ">"
     },
     "user_workload_pods_ready": {
-        "WarnThresholdPercentage": 0.0,
-        "FailThresholdPercentage": 10.0,
-        "ConsecutiveSamplesForStateTransition": 2
+        "WarnThresholdPercentage": 100.0,
+        "FailThresholdPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 2,
+        "Operator": "<"
     },
     "system_workload_pods_ready": {
-        "FailThresholdPercentage": 0.0,
-        "ConsecutiveSamplesForStateTransition": 2
+        "FailThresholdPercentage": 100.0,
+        "ConsecutiveSamplesForStateTransition": 2,
+        "Operator": "<"
+    },
+    "node_condition": {
+        "NodeConditionTypesForFailedState": "outofdisk,networkunavailable"
     }
 }
\ No newline at end of file
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 8e1f6ae88..40f4ac880 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,7 +1,7 @@
 # Fluentd config file for OMS Docker - cluster components (kubeAPI)
 <source>
   type forward
-  port 25227
+  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
   bind 0.0.0.0
 </source>
 
@@ -234,14 +234,17 @@
   max_retry_wait 9m
 </match>
 
-<match oms.api.KubeHealth.AgentCollectionTime**>
-  type out_oms_api
+<match kubehealth.Signals**>
+  type out_oms
   log_level debug
-  buffer_chunk_limit 10m
+  num_threads 5
+  buffer_chunk_limit 20m
   buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_api_KubeHealth*.buffer
-  buffer_queue_limit 10
+  buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
+  buffer_queue_limit 20
+  buffer_queue_full_action drop_oldest_chunk
   flush_interval 20s
   retry_limit 10
   retry_wait 30s
+  max_retry_wait 9m
 </match>
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 159550a90..981f51f4c 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -30,7 +30,6 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb;			source/code/plugin/KubernetesApiClient.rb;			644; root; root
 
 /etc/opt/microsoft/docker-cimprov/container.conf;			    installer/conf/container.conf;                      644; root; root
-/etc/opt/microsoft/docker-cimprov/container-health.conf;		installer/conf/container-health.conf;               644; root; root
 
 /opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb;     source/code/plugin/CAdvisorMetricsAPIClient.rb;     644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_perf.rb;			        source/code/plugin/in_kube_perf.rb;			    	644; root; root
@@ -119,18 +118,20 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/ConfigParseErrorLogger.rb;                                     installer/scripts/ConfigParseErrorLogger.rb;           755; root; root
 
 
-
+/opt/microsoft/omsagent/plugin/filter_cadvisor_health_container.rb;                                 source/code/plugin/filter_cadvisor_health_container.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/filter_cadvisor_health_node.rb;                                      source/code/plugin/filter_cadvisor_health_node.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/filter_health_model_builder.rb;                                      source/code/plugin/filter_health_model_builder.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_health.rb;                                                   source/code/plugin/in_kube_health.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/out_health_forward.rb;                                               source/code/plugin/out_health_forward.rb; 644; root; root
 /etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json;					                installer/conf/healthmonitorconfig.json; 644; root; root
 /etc/opt/microsoft/docker-cimprov/health/health_model_definition.json;					            installer/conf/health_model_definition.json; 644; root; root
 
-
 /opt/microsoft/omsagent/plugin/health/aggregate_monitor.rb;                                         source/code/plugin/health/aggregate_monitor.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb;                      		    source/code/plugin/health/agg_monitor_id_labels.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/agg_monitor_id_labels.rb;                                     source/code/plugin/health/agg_monitor_id_labels.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/aggregate_monitor_state_finalizer.rb;                         source/code/plugin/health/aggregate_monitor_state_finalizer.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/cluster_health_state.rb;                                      source/code/plugin/health/cluster_health_state.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_container_cpu_memory_aggregator.rb;                    source/code/plugin/health/health_container_cpu_memory_aggregator.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_container_cpu_memory_record_formatter.rb;              source/code/plugin/health/health_container_cpu_memory_record_formatter.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_hierarchy_builder.rb;                                  source/code/plugin/health/health_hierarchy_builder.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_kubernetes_resources.rb;                               source/code/plugin/health/health_kubernetes_resources.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_kube_api_down_handler.rb;                              source/code/plugin/health/health_kube_api_down_handler.rb; 644; root; root
@@ -259,9 +260,6 @@ chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_fai
 mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 
-mv /etc/opt/microsoft/docker-cimprov/container-health.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container-health.conf
-chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container-health.conf
-
 %Postuninstall_10
 # If we're an upgrade, skip all of this cleanup
 if ${{PERFORMING_UPGRADE_NOT}}; then
@@ -273,7 +271,6 @@ if ${{PERFORMING_UPGRADE_NOT}}; then
    rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt
    rm -f /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt
    rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/container.conf
-   rm -f /etc/opt/microsoft/omsagent/conf/omsagent.d/container-health.conf
    rmdir /var/opt/microsoft/docker-cimprov/log 2> /dev/null
    rmdir /var/opt/microsoft/docker-cimprov/state/ContainerInventory 2> /dev/null
    rmdir /var/opt/microsoft/docker-cimprov/state/ImageInventory 2> /dev/null
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index 523f8c307..cd16cbf9b 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -2,10 +2,8 @@
 
 require_relative "tomlrb"
 require_relative "ConfigParseErrorLogger"
-require "json"
 
-@log_settings_config_map_mount_path = "/etc/config/settings/log-data-collection-settings"
-@agent_settings_config_map_mount_path = "/etc/config/settings/agent-settings"
+@configMapMountPath = "/etc/config/settings/log-data-collection-settings"
 @configVersion = ""
 @configSchemaVersion = ""
 # Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist
@@ -17,19 +15,18 @@
 @logTailPath = "/var/log/containers/*.log"
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
 @excludePath = "*.csv2" #some invalid path
-@enable_health_model = false
 
 # Use parser to parse the configmap toml file to a ruby structure
-def parseConfigMap(path)
+def parseConfigMap
   begin
     # Check to see if config map is created
-    if (File.file?(path))
-      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values from #{path}"
-      parsedConfig = Tomlrb.load_file(path, symbolize_keys: true)
-      puts "config::Successfully parsed mounted config map from #{path}"
+    if (File.file?(@configMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values"
+      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map"
       return parsedConfig
     else
-      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults for #{path}"
+      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults"
       @excludePath = "*_kube-system_*.log"
       return nil
     end
@@ -121,40 +118,19 @@ def populateSettingValuesFromConfigMap(parsedConfig)
       ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults, please check config map for errors")
     end
   end
-
-  begin
-    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
-      @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
-    else
-      @enable_health_model = false
-    end
-    puts "enable_health_model = #{@enable_health_model}"
-  rescue => errorStr
-    ConfigParseErrorLogger.logError("Exception while reading config map settings for health_model enabled setting - #{errorStr}, using defaults, please check config map for errors")
-    @enable_health_model = false
-  end
 end
 
 @configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
 puts "****************Start Config Processing********************"
-
 if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
-  configMapSettings = {}
-
-  #iterate over every *settings file and build a hash of settings
-  Dir["/etc/config/settings/*settings"].each { |file|
-    puts "Parsing File #{file}"
-    settings = parseConfigMap(file)
-    if !settings.nil?
-      configMapSettings = configMapSettings.merge(settings)
-    end
-  }
-
+  configMapSettings = parseConfigMap
   if !configMapSettings.nil?
     populateSettingValuesFromConfigMap(configMapSettings)
   end
 else
-  ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+  if (File.file?(@configMapMountPath))
+    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+  end
   @excludePath = "*_kube-system_*.log"
 end
 
@@ -180,8 +156,6 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
   file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
   file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
-  #health_model settings
-  file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
   # Close file after writing all environment variables
   file.close
   puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
@@ -189,4 +163,4 @@ def populateSettingValuesFromConfigMap(parsedConfig)
 else
   puts "Exception while opening file for writing config environment variables"
   puts "****************End Config Processing********************"
-end
+end
\ No newline at end of file
diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb
index 4090092a9..2eccd125f 100644
--- a/source/code/plugin/filter_cadvisor_health_container.rb
+++ b/source/code/plugin/filter_cadvisor_health_container.rb
@@ -5,66 +5,57 @@ module Fluent
     require 'logger'
     require 'json'
     require_relative 'oms_common'
-    require_relative 'HealthMonitorUtils'
-    require_relative 'HealthMonitorState'
     require_relative "ApplicationInsightsUtility"
+    Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
 
 
     class CAdvisor2ContainerHealthFilter < Filter
+        include HealthModel
         Fluent::Plugin.register_filter('filter_cadvisor_health_container', self)
 
         config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/health_monitors.log'
         config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryRssBytes'
         config_param :container_resource_refresh_interval_minutes, :integer, :default => 5
 
-        @@object_name_k8s_node = 'K8SNode'
         @@object_name_k8s_container = 'K8SContainer'
-
         @@counter_name_cpu = 'cpuusagenanocores'
         @@counter_name_memory_rss = 'memoryrssbytes'
 
-        @@health_monitor_config = {}
-
-        @@hostName = (OMS::Common.get_hostname)
-        @@clusterName = KubernetesApiClient.getClusterName
-        @@clusterId = KubernetesApiClient.getClusterId
-        @@clusterRegion = KubernetesApiClient.getClusterRegion
-        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
-
         def initialize
-            super
-            @cpu_capacity = 0.0
-            @memory_capacity = 0.0
-            @last_resource_refresh = DateTime.now.to_time.to_i
-            @metrics_to_collect_hash = {}
+            begin
+                super
+                @metrics_to_collect_hash = {}
+                @formatter = HealthContainerCpuMemoryRecordFormatter.new
+            rescue => e
+                @log.info "Error in filter_cadvisor_health_container initialize #{e.backtrace}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
         end
 
         def configure(conf)
-            super
-            @log = HealthMonitorUtils.getLogHandle
-            @log.debug {'Starting filter_cadvisor2health plugin'}
+            begin
+                super
+                @log = HealthMonitorUtils.get_log_handle
+                @log.debug {'Starting filter_cadvisor2health plugin'}
+            rescue => e
+                @log.info "Error in filter_cadvisor_health_container configure #{e.backtrace}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
         end
 
         def start
-            super
-            @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
-            @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}"
-            node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
-            @cpu_capacity = node_capacity[0]
-            @memory_capacity = node_capacity[1]
-            @log.info "CPU Capacity #{@cpu_capacity} Memory Capacity #{@memory_capacity}"
-            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @@hostName)
-            @@health_monitor_config = HealthMonitorUtils.getHealthMonitorConfig
-            ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health Plugin Start", {})
+            begin
+                super
+                @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
+                ApplicationInsightsUtility.sendCustomEvent("filter_cadvisor_health_container Plugin Start", {})
+            rescue => e
+                @log.info "Error in filter_cadvisor_health_container start #{e.backtrace}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+            end
         end
 
         def filter_stream(tag, es)
-            if !@@cluster_health_model_enabled
-                @log.info "Cluster Health Model disabled in filter_cadvisor_health_container"
-                return []
-            end
             new_es = MultiEventStream.new
-            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName)
             records_count = 0
             es.each { |time, record|
               begin
@@ -74,10 +65,11 @@ def filter_stream(tag, es)
                     records_count += 1
                 end
               rescue => e
-                router.emit_error_event(tag, time, record, e)
+                @log.info "Error in filter_cadvisor_health_container filter_stream #{e.backtrace}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
               end
             }
-            @log.debug "Filter Records Count #{records_count}"
+            @log.debug "filter_cadvisor_health_container Records Count #{records_count}"
             new_es
         end
 
@@ -88,176 +80,19 @@ def filter(tag, time, record)
                 end
                 object_name = record['DataItems'][0]['ObjectName']
                 counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase
-                if @metrics_to_collect_hash.key?(counter_name.downcase)
-                    metric_value = record['DataItems'][0]['Collections'][0]['Value']
-                    case object_name
-                    when @@object_name_k8s_container
-                        case counter_name.downcase
-                        when @@counter_name_cpu
-                            # @log.debug "Object Name #{object_name}"
-                            # @log.debug "Counter Name #{counter_name}"
-                            # @log.debug "Metric Value #{metric_value}"
-                            #return process_container_cpu_record(record, metric_value)
-                        when @@counter_name_memory_rss
-                            #return process_container_memory_record(record, metric_value)
-                        end
-                    when @@object_name_k8s_node
-                        case counter_name.downcase
-                        when @@counter_name_cpu
-                            #process_node_cpu_record(record, metric_value)
-                        when @@counter_name_memory_rss
-                            #process_node_memory_record(record, metric_value)
-                        end
+                if @metrics_to_collect_hash.key?(counter_name)
+                    if object_name == @@object_name_k8s_container
+                        return @formatter.get_record_from_cadvisor_record(record)
                     end
                 end
+                return nil
             rescue => e
                 @log.debug "Error in filter #{e}"
                 @log.debug "record #{record}"
                 @log.debug "backtrace #{e.backtrace}"
-                ApplicationInsightsUtility.sendExceptionTelemetry(e)
-                return nil
-            end
-        end
-
-        def process_container_cpu_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID
-            @log.debug "processing container cpu record"
-            if record.nil?
-                return nil
-            else
-                instance_name = record['DataItems'][0]['InstanceName']
-                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
-                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
-                if !container_metadata.nil?
-                    cpu_limit = container_metadata['cpuLimit']
-                end
-
-                if cpu_limit.to_s.empty?
-                    #@log.info "CPU Limit is nil"
-                    cpu_limit = @cpu_capacity
-                end
-
-                #@log.info "cpu limit #{cpu_limit}"
-
-                percent = (metric_value.to_f/cpu_limit*100).round(2)
-                #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}"
-                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID])
-                #@log.debug "Computed State : #{state}"
-                timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
-                #@log.info health_monitor_record
-
-                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName, key])
-                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
-                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
-                @log.info "Processed Container CPU #{temp}"
-                return record
-            end
-            return nil
-        end
-
-        def process_container_memory_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID
-            #@log.debug "processing container memory record"
-            if record.nil?
+                ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
                 return nil
-            else
-                instance_name = record['DataItems'][0]['InstanceName']
-                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
-                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
-                if !container_metadata.nil?
-                    memory_limit = container_metadata['memoryLimit']
-                end
-
-                if memory_limit.to_s.empty?
-                    #@log.info "Memory Limit is nil"
-                    memory_limit = @memory_capacity
-                end
-
-                #@log.info "memory limit #{memory_limit}"
-
-                percent = (metric_value.to_f/memory_limit*100).round(2)
-                #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}"
-                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID])
-                #@log.debug "Computed State : #{state}"
-                timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
-                #@log.info health_monitor_record
-
-                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName, key])
-                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
-                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
-                @log.info "Processed Container Memory #{temp}"
-                return record
-            end
-            return nil
-        end
-
-        def process_node_cpu_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID
-            #@log.debug "processing node cpu record"
-            if record.nil?
-                return nil
-            else
-                instance_name = record['DataItems'][0]['InstanceName']
-                #@log.info "CPU capacity #{@cpu_capacity}"
-
-                percent = (metric_value.to_f/@cpu_capacity*100).round(2)
-                #@log.debug "Percentage of CPU limit: #{percent}"
-                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_CPU_MONITOR_ID])
-                #@log.debug "Computed State : #{state}"
-                timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
-
-                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(@log, monitor_id, [@@clusterId, @@hostName])
-                # record = HealthMonitorSignalReducer.reduceSignal(@log, monitor_id, monitor_instance_id, @@health_monitor_config[monitor_id], node_name: @@hostName)
-                # temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
-                health_record = {}
-                time_now = Time.now.utc.iso8601
-                health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
-                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
-                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
-                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
-                health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
-                @log.info "Processed Node CPU"
-                return health_record
-            end
-            return nil
-        end
-
-        def process_node_memory_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID
-            #@log.debug "processing node memory record"
-            if record.nil?
-                return nil
-            else
-                instance_name = record['DataItems'][0]['InstanceName']
-                #@log.info "Memory capacity #{@memory_capacity}"
-
-                percent = (metric_value.to_f/@memory_capacity*100).round(2)
-                #@log.debug "Percentage of Memory limit: #{percent}"
-                state = HealthMonitorState.computeHealthMonitorState(@log, monitor_id, percent, @@health_monitor_config[HealthMonitorConstants::NODE_MEMORY_MONITOR_ID])
-                #@log.debug "Computed State : #{state}"
-                timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
-                #@log.info health_monitor_record
-
-                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName])
-                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
-                # temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
-                health_record = {}
-                time_now = Time.now.utc.iso8601
-                health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
-                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
-                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
-                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
-                health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
-                @log.info "Processed Node Memory"
-                return health_record
             end
-            return nil
         end
     end
 end
diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
index faa574993..d2f735cd1 100644
--- a/source/code/plugin/filter_cadvisor_health_node.rb
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -30,13 +30,10 @@ class CAdvisor2NodeHealthFilter < Filter
         @@clusterName = KubernetesApiClient.getClusterName
         @@clusterId = KubernetesApiClient.getClusterId
         @@clusterRegion = KubernetesApiClient.getClusterRegion
-        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
 
         def initialize
             begin
                 super
-                @cpu_capacity = 0.0
-                @memory_capacity = 0.0
                 @last_resource_refresh = DateTime.now.to_time.to_i
                 @metrics_to_collect_hash = {}
                 @resources = HealthKubernetesResources.instance # this doesnt require node and pod inventory. So no need to populate them
@@ -59,6 +56,8 @@ def configure(conf)
         def start
             begin
                 super
+                @cpu_capacity = 1.0 #avoid divide by zero error in case of network issues accessing kube-api
+                @memory_capacity = 1.0
                 @metrics_to_collect_hash = HealthMonitorUtils.build_metrics_hash(@metrics_to_collect)
                 @log.debug "Calling ensure_cpu_memory_capacity_set cpu_capacity #{@cpu_capacity} memory_capacity #{@memory_capacity}"
                 node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
@@ -73,27 +72,26 @@ def start
         end
 
         def filter_stream(tag, es)
-            if !@@cluster_health_model_enabled
-                @log.info "Cluster Health Model disabled in filter_cadvisor_health_node"
-                return MultiEventStream.new
-            end
-            new_es = MultiEventStream.new
-            #HealthMonitorUtils.refresh_kubernetes_api_data(@log, @hostName)
-            records_count = 0
-            es.each { |time, record|
-              begin
+            begin
+                node_capacity = HealthMonitorUtils.ensure_cpu_memory_capacity_set(@@hm_log, @cpu_capacity, @memory_capacity, @@hostName)
+                @cpu_capacity = node_capacity[0]
+                @memory_capacity = node_capacity[1]
+                new_es = MultiEventStream.new
+                records_count = 0
+                es.each { |time, record|
                 filtered_record = filter(tag, time, record)
                 if !filtered_record.nil?
                     new_es.add(time, filtered_record)
                     records_count += 1
                 end
-              rescue => e
-                @log.info "Error in filter_stream for filter_cadvisor_health_node #{e.message}"
+                }
+                @log.debug "Filter Records Count #{records_count}"
+                return new_es
+            rescue => e
+                @log.info "Error in filter_cadvisor_health_node filter_stream #{e.backtrace}"
                 ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
-              end
-            }
-            @log.debug "Filter Records Count #{records_count}"
-            new_es
+                return MultiEventStream.new
+            end
         end
 
         def filter(tag, time, record)
@@ -101,21 +99,12 @@ def filter(tag, time, record)
                 if record.key?("MonitorLabels")
                     return record
                 end
+
                 object_name = record['DataItems'][0]['ObjectName']
                 counter_name = record['DataItems'][0]['Collections'][0]['CounterName'].downcase
                 if @metrics_to_collect_hash.key?(counter_name.downcase)
                     metric_value = record['DataItems'][0]['Collections'][0]['Value']
                     case object_name
-                    when @@object_name_k8s_container
-                        case counter_name.downcase
-                        when @@counter_name_cpu
-                            # @log.debug "Object Name #{object_name}"
-                            # @log.debug "Counter Name #{counter_name}"
-                            # @log.debug "Metric Value #{metric_value}"
-                            #return process_container_cpu_record(record, metric_value)
-                        when @@counter_name_memory_rss
-                            #return process_container_memory_record(record, metric_value)
-                        end
                     when @@object_name_k8s_node
                         case counter_name.downcase
                         when @@counter_name_cpu
@@ -134,82 +123,8 @@ def filter(tag, time, record)
             end
         end
 
-        def process_container_cpu_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID
-            @log.debug "processing container cpu record"
-            if record.nil?
-                return nil
-            else
-                instance_name = record['DataItems'][0]['InstanceName']
-                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
-                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
-                if !container_metadata.nil?
-                    cpu_limit = container_metadata['cpuLimit']
-                end
-
-                if cpu_limit.to_s.empty?
-                    #@log.info "CPU Limit is nil"
-                    cpu_limit = @cpu_capacity
-                end
-
-                #@log.info "cpu limit #{cpu_limit}"
-
-                percent = (metric_value.to_f/cpu_limit*100).round(2)
-                #@log.debug "Container #{key} | Percentage of CPU limit: #{percent}"
-                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(monitor_id))
-                #@log.debug "Computed State : #{state}"
-                timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
-                #@log.info health_monitor_record
-
-                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName, key])
-                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
-                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
-                @log.info "Processed Container CPU #{temp}"
-                return record
-            end
-            return nil
-        end
-
-        def process_container_memory_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID
-            #@log.debug "processing container memory record"
-            if record.nil?
-                return nil
-            else
-                instance_name = record['DataItems'][0]['InstanceName']
-                key = HealthMonitorUtils.getContainerKeyFromInstanceName(instance_name)
-                container_metadata = HealthMonitorUtils.getContainerMetadata(key)
-                if !container_metadata.nil?
-                    memory_limit = container_metadata['memoryLimit']
-                end
-
-                if memory_limit.to_s.empty?
-                    #@log.info "Memory Limit is nil"
-                    memory_limit = @memory_capacity
-                end
-
-                #@log.info "memory limit #{memory_limit}"
-
-                percent = (metric_value.to_f/memory_limit*100).round(2)
-                #@log.debug "Container #{key} | Percentage of Memory limit: #{percent}"
-                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID))
-                #@log.debug "Computed State : #{state}"
-                timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
-                #@log.info health_monitor_record
-
-                monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName, key])
-                #@log.info "Monitor Instance Id: #{monitor_instance_id}"
-                temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
-                @log.info "Processed Container Memory #{temp}"
-                return record
-            end
-            return nil
-        end
-
         def process_node_cpu_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::NODE_CPU_MONITOR_ID
+            monitor_id = MonitorId::NODE_CPU_MONITOR_ID
             #@log.debug "processing node cpu record"
             if record.nil?
                 return nil
@@ -219,7 +134,7 @@ def process_node_cpu_record(record, metric_value)
 
                 percent = (metric_value.to_f/@cpu_capacity*100).round(2)
                 #@log.debug "Percentage of CPU limit: #{percent}"
-                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::NODE_CPU_MONITOR_ID))
+                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_CPU_MONITOR_ID))
                 #@log.debug "Computed State : #{state}"
                 timestamp = record['DataItems'][0]['Timestamp']
                 health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
@@ -231,7 +146,7 @@ def process_node_cpu_record(record, metric_value)
                 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
                 health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
                 health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
                 health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
                 health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
                 @log.info "Processed Node CPU"
@@ -241,7 +156,7 @@ def process_node_cpu_record(record, metric_value)
         end
 
         def process_node_memory_record(record, metric_value)
-            monitor_id = HealthMonitorConstants::NODE_MEMORY_MONITOR_ID
+            monitor_id = MonitorId::NODE_MEMORY_MONITOR_ID
             #@log.debug "processing node memory record"
             if record.nil?
                 return nil
@@ -251,7 +166,7 @@ def process_node_memory_record(record, metric_value)
 
                 percent = (metric_value.to_f/@memory_capacity*100).round(2)
                 #@log.debug "Percentage of Memory limit: #{percent}"
-                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(HealthMonitorConstants::NODE_MEMORY_MONITOR_ID))
+                state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_MEMORY_MONITOR_ID))
                 #@log.debug "Computed State : #{state}"
                 timestamp = record['DataItems'][0]['Timestamp']
                 health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"memoryRssBytes" => metric_value.to_f, "memoryUtilizationPercentage" => percent}}
@@ -263,7 +178,7 @@ def process_node_memory_record(record, metric_value)
                 health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
                 health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
                 health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-                health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
                 health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
                 health_record[HealthMonitorRecordFields::NODE_NAME] =  @@hostName
                 @log.info "Processed Node Memory"
diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index 39452cb7e..5aa7f610e 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -19,11 +19,10 @@ class FilterHealthModelBuilder < Filter
         attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator
         include HealthModel
 
-        @@rewrite_tag = 'oms.api.KubeHealth.AgentCollectionTime'
+        @@rewrite_tag = 'kubehealth.Signals'
         @@cluster_id = KubernetesApiClient.getClusterId
         @@token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
         @@cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
 
         def initialize
             begin
@@ -49,6 +48,7 @@ def initialize
                 @state.initialize_state(deserialized_state_info)
                 @cluster_old_state = 'none'
                 @cluster_new_state = 'none'
+                @container_cpu_memory_records = []
             rescue => e
                 ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
             end
@@ -77,31 +77,45 @@ def shutdown
 
         def filter_stream(tag, es)
             begin
-                if !@@cluster_health_model_enabled
-                    @log.info "Cluster Health Model disabled in filter_health_model_builder"
-                    return []
-                end
                 new_es = MultiEventStream.new
                 time = Time.now
 
-                if tag.start_with?("kubehealth.DaemonSet")
-                    records = []
+                if tag.start_with?("kubehealth.DaemonSet.Node")
+                    node_records = []
+                    if !es.nil?
+                        es.each{|time, record|
+                            node_records.push(record)
+                        }
+                        @buffer.add_to_buffer(node_records)
+                    end
+                    return MultiEventStream.new
+                elsif tag.start_with?("kubehealth.DaemonSet.Container")
+                    container_records = []
                     if !es.nil?
                         es.each{|time, record|
-                            records.push(record)
+                            container_records.push(record)
                         }
-                        @buffer.add_to_buffer(records)
                     end
-                    return []
+                    container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
+                    deduped_records = container_records_aggregator.dedupe_records(container_records)
+                    @container_cpu_memory_records.push(*deduped_records) # push the records for aggregation later
+                    return MultiEventStream.new
                 elsif tag.start_with?("kubehealth.ReplicaSet")
-                    @log.info "TAG #{tag}"
                     records = []
                     es.each{|time, record|
                         records.push(record)
                     }
                     @buffer.add_to_buffer(records)
+
+                    container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
+                    container_records_aggregator.aggregate(@container_cpu_memory_records)
+                    container_records_aggregator.compute_state
+                    aggregated_container_records = container_records_aggregator.get_records
+                    @buffer.add_to_buffer(aggregated_container_records)
+
                     records_to_process = @buffer.get_buffer
                     @buffer.reset_buffer
+                    @container_cpu_memory_records = []
 
                     health_monitor_records = []
                     records_to_process.each do |record|
@@ -117,7 +131,6 @@ def filter_stream(tag, es)
                             @provider.get_config(monitor_id),
                             record[HealthMonitorRecordFields::DETAILS]
                         )
-
                         health_monitor_records.push(health_monitor_record)
                         #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
                     end
@@ -159,6 +172,8 @@ def filter_stream(tag, es)
 
                     @log.info "after Adding missing signals all_records.size #{all_records.size}"
 
+                    HealthMonitorHelpers.add_agentpool_node_label_if_not_present(all_records)
+
                     # build the health model
                     @model_builder.process_records(all_records)
                     all_monitors = @model_builder.finalize_model
@@ -185,23 +200,36 @@ def filter_stream(tag, es)
 
                     @log.info "after optimizing health signals all_monitors.size #{all_monitors.size}"
 
+                    current_time = Time.now
+                    emit_time = current_time.to_f
                     # for each key in monitor.keys,
                     # get the state from health_monitor_state
                     # generate the record to send
                     all_monitors.keys.each{|key|
                         record = @provider.get_record(all_monitors[key], state)
-                        if record[HealthMonitorRecordFields::MONITOR_ID] == MonitorId::CLUSTER && all_monitors.size > 1
-                            old_state = record[HealthMonitorRecordFields::OLD_STATE]
-                            new_state = record[HealthMonitorRecordFields::NEW_STATE]
-                            if old_state != new_state && @cluster_old_state != old_state && @cluster_new_state != new_state
-                                    ApplicationInsightsUtility.sendCustomEvent("HealthModel_ClusterStateChanged",{"old_state" => old_state , "new_state" => new_state, "monitor_count" => all_monitors.size})
-                                    @log.info "sent telemetry for cluster state change from #{record['OldState']} to #{record['NewState']}"
-                                    @cluster_old_state = old_state
-                                    @cluster_new_state = new_state
+                        if record[HealthMonitorRecordFields::MONITOR_ID] == MonitorId::CLUSTER
+                            if !record[HealthMonitorRecordFields::DETAILS].nil?
+                                details = JSON.parse(record[HealthMonitorRecordFields::DETAILS])
+                                details[HealthMonitorRecordFields::HEALTH_MODEL_DEFINITION_VERSION] = "#{ENV['HEALTH_MODEL_DEFINITION_VERSION']}"
+                                record[HealthMonitorRecordFields::DETAILS] = details.to_json
+                            end
+                            if all_monitors.size > 1
+                                old_state = record[HealthMonitorRecordFields::OLD_STATE]
+                                new_state = record[HealthMonitorRecordFields::NEW_STATE]
+                                if old_state != new_state && @cluster_old_state != old_state && @cluster_new_state != new_state
+                                        ApplicationInsightsUtility.sendCustomEvent("HealthModel_ClusterStateChanged",{"old_state" => old_state , "new_state" => new_state, "monitor_count" => all_monitors.size})
+                                        @log.info "sent telemetry for cluster state change from #{record['OldState']} to #{record['NewState']}"
+                                        @cluster_old_state = old_state
+                                        @cluster_new_state = new_state
+                                end
                             end
                         end
-                        #@log.info "#{record["Details"]} #{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
-                        new_es.add(time, record)
+                        record_wrapper = {
+                            "DataType" => "KUBE_HEALTH_BLOB",
+                            "IPName" => "ContainerInsights",
+                            "DataItems" => [record.each { |k, v| record[k] = v }],
+                        }
+                        new_es.add(emit_time, record_wrapper)
                     }
 
                     #emit the stream
@@ -215,8 +243,8 @@ def filter_stream(tag, es)
                     @cluster_health_state.update_state(@state.to_h)
 
                     # return an empty event stream, else the match will throw a NoMethodError
-                    return []
-                elsif tag.start_with?("oms.api.KubeHealth.AgentCollectionTime")
+                    return MultiEventStream.new
+                elsif tag.start_with?("kubehealth.Signals")
                     # this filter also acts as a pass through as we are rewriting the tag and emitting to the fluent stream
                     es
                 else
diff --git a/source/code/plugin/health/agg_monitor_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb
index 86a3381cd..bb016adb4 100644
--- a/source/code/plugin/health/agg_monitor_id_labels.rb
+++ b/source/code/plugin/health/agg_monitor_id_labels.rb
@@ -1,5 +1,3 @@
-require_relative 'health_model_constants'
-
 module HealthModel
     class AggregateMonitorInstanceIdLabels
         @@id_labels_mapping = {
@@ -8,12 +6,9 @@ class AggregateMonitorInstanceIdLabels
             MonitorId::NODE => [HealthMonitorLabels::AGENTPOOL, HealthMonitorLabels::ROLE, HealthMonitorLabels::HOSTNAME],
             MonitorId::NAMESPACE => [HealthMonitorLabels::NAMESPACE],
             MonitorId::AGENT_NODE_POOL => [HealthMonitorLabels::AGENTPOOL],
-            # MonitorId::ALL_AGENT_NODE_POOLS => [],
-            # MonitorId::ALL_NODE_POOLS => [],
-            # MonitorId::ALL_NODES => [],
-            # MonitorId::K8S_INFRASTRUCTURE => [],
-            # MonitorId::CLUSTER => [],
-            # MonitorId::WORKLOAD => []
+            MonitorId::CONTAINER => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME, HealthMonitorLabels::CONTAINER],
+            MonitorId::CONTAINER_CPU_MONITOR_ID => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME],
+            MonitorId::CONTAINER_MEMORY_MONITOR_ID => [HealthMonitorLabels::NAMESPACE, HealthMonitorLabels::WORKLOAD_NAME],
         }
 
         def self.get_labels_for(monitor_id)
diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
new file mode 100644
index 000000000..e98c288b3
--- /dev/null
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -0,0 +1,258 @@
+require_relative 'health_model_constants'
+=begin
+    @cpu_records/@memory_records
+        [
+            {
+            "namespace_workload_container_name" : {
+                "limit" : limit, #number
+                "limit_set" : limit_set, #bool
+                "record_count" : record_count, #number
+                "workload_name": workload_name,
+                "workload_kind": workload_kind,
+                "namespace" : namespace,
+                "container": container,
+                records:[
+                    {
+                        "counter_value": counter_value,
+                        "pod_name": pod_name,
+                        "container": container,
+                        "state" : state
+                    },
+                    {
+                        "counter_value": counter_value,
+                        "pod_name": pod_name,
+                        "container": container,
+                        "state" : state
+                    }
+                ]
+            }
+        }
+    ]
+=end
+module HealthModel
+    # this class aggregates the records at the container level
+    class HealthContainerCpuMemoryAggregator
+
+        attr_reader :pod_uid_lookup, :workload_container_count, :cpu_records, :memory_records, :provider
+
+        @@memory_counter_name = 'memoryRssBytes'
+        @@cpu_counter_name = 'cpuUsageNanoCores'
+        def initialize(resources, provider)
+            @pod_uid_lookup = resources.get_pod_uid_lookup
+            @workload_container_count = resources.get_workload_container_count
+            @cpu_records = {}
+            @memory_records = {}
+            @log = HealthMonitorHelpers.get_log_handle
+            @provider = provider
+        end
+
+        def dedupe_records(container_records)
+            cpu_deduped_instances = {}
+            memory_deduped_instances = {}
+            container_records = container_records.select{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
+
+            container_records.each do |record|
+                begin
+                    instance_name = record["InstanceName"]
+                    counter_name = record["CounterName"]
+                    case counter_name
+                    when @@memory_counter_name
+                        resource_instances = memory_deduped_instances
+                    when @@cpu_counter_name
+                        resource_instances = cpu_deduped_instances
+                    else
+                        @log.info "Unexpected Counter Name #{counter_name}"
+                        next
+                    end
+                    if !resource_instances.key?(instance_name)
+                        resource_instances[instance_name] = record
+                    else
+                        r = resource_instances[instance_name]
+                        if record["Timestamp"] > r["Timestamp"]
+                            @log.info "Dropping older record"
+                            resource_instances[instance_name] = record
+                        end
+                    end
+                rescue => e
+                    @log.info "Exception when deduping record #{record}"
+                end
+            end
+            return cpu_deduped_instances.values.concat(memory_deduped_instances.values)
+        end
+
+        def aggregate(container_records)
+            #filter and select only cpuUsageNanoCores and memoryRssBytes
+            container_records = container_records.select{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
+            # poduid lookup has poduid/cname --> workload_name, namespace, cpu_limit, memory limit mapping
+            # from the container records, extract the poduid/cname, get the values from poduid_lookup, and aggregate based on namespace_workload_cname
+            container_records.each do |record|
+                begin
+                    instance_name = record["InstanceName"]
+                    lookup_key = instance_name.split('/').last(2).join('/')
+                    if !@pod_uid_lookup.key?(lookup_key)
+                        next
+                    end
+                    namespace = @pod_uid_lookup[lookup_key]['namespace']
+                    workload_name = @pod_uid_lookup[lookup_key]['workload_name']
+                    cname = lookup_key.split('/')[1]
+                    counter_name = record["CounterName"]
+                    case counter_name
+                    when @@memory_counter_name
+                        resource_hash = @memory_records
+                        resource_type = 'memory'
+                    when @@cpu_counter_name
+                        resource_hash = @cpu_records
+                        resource_type = 'cpu'
+                    else
+                        @log.info "Unexpected Counter Name #{counter_name}"
+                        next
+                    end
+
+                    # this is used as a look up from the pod_uid_lookup in kubernetes_health_resources object
+                    resource_hash_key = "#{namespace}_#{workload_name.split('~~')[1]}_#{cname}"
+
+                    # if the resource map doesnt contain the key, add limit, count and records
+                    if !resource_hash.key?(resource_hash_key)
+                        resource_hash[resource_hash_key] = {}
+                        resource_hash[resource_hash_key]["limit"] = @pod_uid_lookup[lookup_key]["#{resource_type}_limit"]
+                        resource_hash[resource_hash_key]["limit_set"] = @pod_uid_lookup[lookup_key]["#{resource_type}_limit_set"]
+                        resource_hash[resource_hash_key]["record_count"] = @workload_container_count[resource_hash_key]
+                        resource_hash[resource_hash_key]["workload_name"] = @pod_uid_lookup[lookup_key]["workload_name"]
+                        resource_hash[resource_hash_key]["workload_kind"] = @pod_uid_lookup[lookup_key]["workload_kind"]
+                        resource_hash[resource_hash_key]["namespace"] = @pod_uid_lookup[lookup_key]["namespace"]
+                        resource_hash[resource_hash_key]["container"] = @pod_uid_lookup[lookup_key]["container"]
+                        resource_hash[resource_hash_key]["records"] = []
+                    end
+
+                    container_instance_record = {}
+
+                    pod_name = @pod_uid_lookup[lookup_key]["pod_name"]
+                    #append the record to the hash
+                    # append only if the record is not a duplicate record
+                    container_instance_record["pod_name"] = pod_name
+                    container_instance_record["counter_value"] = record["CounterValue"]
+                    container_instance_record["container"] = @pod_uid_lookup[lookup_key]["container"]
+                    container_instance_record["state"] = calculate_container_instance_state(
+                        container_instance_record["counter_value"],
+                        resource_hash[resource_hash_key]["limit"],
+                        @provider.get_config(MonitorId::CONTAINER_MEMORY_MONITOR_ID))
+                    resource_hash[resource_hash_key]["records"].push(container_instance_record)
+                rescue => e
+                    @log.info "Error in HealthContainerCpuMemoryAggregator aggregate #{e.backtrace} #{e.message} #{record}"
+                end
+            end
+        end
+
+        def compute_state()
+            # if missing records, set state to unknown
+            # if limits not set, set state to warning
+            # if all records present, sort in descending order of metric, compute index based on StateThresholdPercentage, get the state (pass/fail/warn) based on monitor state (Using [Fail/Warn]ThresholdPercentage, and set the state)
+            @memory_records.each{|k,v|
+                calculate_monitor_state(v, @provider.get_config(MonitorId::CONTAINER_MEMORY_MONITOR_ID))
+            }
+
+            @cpu_records.each{|k,v|
+                calculate_monitor_state(v, @provider.get_config(MonitorId::CONTAINER_CPU_MONITOR_ID))
+            }
+
+            @log.info "Finished computing state"
+        end
+
+        def get_records
+            time_now = Time.now.utc.iso8601
+            container_cpu_memory_records = []
+
+            @cpu_records.each{|resource_key, record|
+                health_monitor_record = {
+                    "timestamp" => time_now,
+                    "state" => record["state"],
+                    "details" => {
+                        "cpu_limit_millicores" => record["limit"]/1000000.to_f,
+                        "cpu_usage_instances" => record["records"].map{|r| r.each {|k,v|
+                            k == "counter_value" ? r[k] = r[k] / 1000000.to_f : r[k]
+                        }},
+                        "workload_name" => record["workload_name"],
+                        "workload_kind" => record["workload_kind"],
+                        "namespace" => record["namespace"],
+                        "container" => record["container"],
+                        "limit_set" => record["limit_set"]
+                        }
+                    }
+
+                monitor_instance_id = HealthMonitorHelpers.get_monitor_instance_id(MonitorId::CONTAINER_CPU_MONITOR_ID, resource_key.split('_')) #container_cpu_utilization-namespace-workload-container
+
+                health_record = {}
+                health_record[HealthMonitorRecordFields::MONITOR_ID] = MonitorId::CONTAINER_CPU_MONITOR_ID
+                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                container_cpu_memory_records.push(health_record)
+            }
+
+            @memory_records.each{|resource_key, record|
+                health_monitor_record = {
+                    "timestamp" => time_now,
+                    "state" => record["state"],
+                    "details" => {
+                        "memory_limit_bytes" => record["limit"],
+                        "memory_usage_instances" => record["records"],
+                        "workload_name" => record["workload_name"],
+                        "workload_kind" => record["workload_kind"],
+                        "namespace" => record["namespace"],
+                        "container" => record["container"]
+                        }
+                    }
+
+                monitor_instance_id = HealthMonitorHelpers.get_monitor_instance_id(MonitorId::CONTAINER_MEMORY_MONITOR_ID, resource_key.split('_')) #container_cpu_utilization-namespace-workload-container
+
+                health_record = {}
+                health_record[HealthMonitorRecordFields::MONITOR_ID] = MonitorId::CONTAINER_MEMORY_MONITOR_ID
+                health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
+                health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                container_cpu_memory_records.push(health_record)
+            }
+            return container_cpu_memory_records
+        end
+
+        private
+        def calculate_monitor_state(v, config)
+            if !v['limit_set'] && v['namespace'] != 'kube-system'
+                v["state"] = HealthMonitorStates::WARNING
+            else
+                # sort records by descending order of metric
+                v["records"] = v["records"].sort_by{|record| record["counter_value"]}.reverse
+                size = v["records"].size
+                if size < v["record_count"]
+                    unknown_count = v["record_count"] - size
+                    for i in unknown_count.downto(1)
+                        # it requires a lot of computation to figure out which actual pod is not sending the signal
+                        v["records"].insert(0, {"counter_value" => -1, "container" => v["container"], "pod_name" =>  "???", "state" => HealthMonitorStates::UNKNOWN }) #insert -1 for unknown records
+                    end
+                end
+
+                if size == 1
+                    state_index = 0
+                else
+                    state_threshold = config['StateThresholdPercentage'].to_f
+                    count = ((state_threshold*size)/100).ceil
+                    state_index = size - count
+                end
+                v["state"] = v["records"][state_index]["state"]
+            end
+        end
+
+        def calculate_container_instance_state(counter_value, limit, config)
+            percent_value = counter_value * 100  / limit
+            if percent_value > config['FailThresholdPercentage']
+                return HealthMonitorStates::FAIL
+            elsif percent_value > config['WarnThresholdPercentage']
+                return HealthMonitorStates::WARN
+            else
+                return HealthMonitorStates::PASS
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
new file mode 100644
index 000000000..5c7db82d9
--- /dev/null
+++ b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
@@ -0,0 +1,34 @@
+module HealthModel
+    class HealthContainerCpuMemoryRecordFormatter
+
+        @@health_container_cpu_memory_record_template = '{
+                "InstanceName": "%{instance_name}",
+                "CounterName" : "%{counter_name}",
+                "CounterValue" : %{metric_value},
+                "Timestamp" : "%{timestamp}"
+            }'
+        def initialize
+            @log = HealthMonitorHelpers.get_log_handle
+        end
+
+        def get_record_from_cadvisor_record(cadvisor_record)
+            begin
+                instance_name = cadvisor_record['DataItems'][0]['InstanceName']
+                counter_name = cadvisor_record['DataItems'][0]['Collections'][0]['CounterName']
+                metric_value = cadvisor_record['DataItems'][0]['Collections'][0]['Value']
+                timestamp = cadvisor_record['DataItems'][0]['Timestamp']
+
+                health_container_cpu_memory_record = @@health_container_cpu_memory_record_template % {
+                    instance_name: instance_name,
+                    counter_name: counter_name,
+                    metric_value: metric_value,
+                    timestamp: timestamp
+                }
+                return JSON.parse(health_container_cpu_memory_record)
+            rescue => e
+                @log.info "Error in get_record_from_cadvisor_record #{e.message} #{e.backtrace}"
+                return nil
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_kube_api_down_handler.rb b/source/code/plugin/health/health_kube_api_down_handler.rb
index 7f72360f8..a87c43ef1 100644
--- a/source/code/plugin/health/health_kube_api_down_handler.rb
+++ b/source/code/plugin/health/health_kube_api_down_handler.rb
@@ -2,11 +2,11 @@
 module HealthModel
     class HealthKubeApiDownHandler
         def initialize
-            @@monitors_to_change = [HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID,
-                                    HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID,
-                                    HealthMonitorConstants::NODE_CONDITION_MONITOR_ID,
-                                    HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID,
-                                    HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID]
+            @@monitors_to_change = [MonitorId::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID,
+                                    MonitorId::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID,
+                                    MonitorId::NODE_CONDITION_MONITOR_ID,
+                                    MonitorId::USER_WORKLOAD_PODS_READY_MONITOR_ID,
+                                    MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID]
         end
 
         # update kube-api dependent monitors to be 'unknown' if kube-api is down or monitor is unavailable
@@ -14,7 +14,7 @@ def handle_kube_api_down(health_monitor_records)
             health_monitor_records_map = {}
 
             health_monitor_records.map{|record| health_monitor_records_map[record.monitor_instance_id] = record}
-            if !health_monitor_records_map.key?(HealthMonitorConstants::KUBE_API_STATUS) || (health_monitor_records_map.key?(HealthMonitorConstants::KUBE_API_STATUS) && health_monitor_records_map[HealthMonitorConstants::KUBE_API_STATUS].state != 'pass')
+            if !health_monitor_records_map.key?(MonitorId::KUBE_API_STATUS) || (health_monitor_records_map.key?(MonitorId::KUBE_API_STATUS) && health_monitor_records_map[MonitorId::KUBE_API_STATUS].state != 'pass')
                 #iterate over the map and set the state to unknown for related monitors
                 health_monitor_records.each{|health_monitor_record|
                     if @@monitors_to_change.include?(health_monitor_record.monitor_id)
diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb
index 2f591722b..30a9ac7ca 100644
--- a/source/code/plugin/health/health_kubernetes_resources.rb
+++ b/source/code/plugin/health/health_kubernetes_resources.rb
@@ -5,8 +5,8 @@ module HealthModel
     class HealthKubernetesResources
 
         include Singleton
-        attr_accessor :node_inventory, :pod_inventory, :deployment_inventory
-        attr_reader :nodes, :pods, :workloads
+        attr_accessor :node_inventory, :pod_inventory, :deployment_inventory, :pod_uid_lookup, :workload_container_count
+        attr_reader :nodes, :pods, :workloads, :deployment_lookup
 
         def initialize
             @node_inventory = []
@@ -16,6 +16,9 @@ def initialize
             @pods = []
             @workloads = []
             @log = HealthMonitorHelpers.get_log_handle
+            @pod_uid_lookup = {}
+            @deployment_lookup = {}
+            @workload_container_count = {}
         end
 
         def get_node_inventory
@@ -33,71 +36,255 @@ def get_nodes
             return @nodes
         end
 
-        def get_pod_inventory
-            return @pod_inventory
-        end
-
-        def get_pods
-            return @pods
+        def set_deployment_inventory(deployments)
+            @deployment_inventory = deployments
+            @deployment_lookup = {}
         end
 
         def get_workload_names
-            @pods = []
             workload_names = {}
-            deployment_lookup = {}
-            @deployment_inventory['items'].each do |deployment|
-                match_labels = deployment['spec']['selector']['matchLabels'].to_h
-                namespace = deployment['metadata']['namespace']
-                match_labels.each{|k,v|
-                    deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
-                }
+            @pod_inventory['items'].each do |pod|
+                workload_name = get_workload_name(pod)
+                workload_names[workload_name] = true if workload_name
             end
+            return workload_names.keys
+        end
+
+        def build_pod_uid_lookup
+            @workload_container_count = {}
             @pod_inventory['items'].each do |pod|
                 begin
-                    has_owner = !pod['metadata']['ownerReferences'].nil?
-                    owner_kind = ''
-                    if has_owner
-                        owner_kind = pod['metadata']['ownerReferences'][0]['kind']
-                        controller_name = pod['metadata']['ownerReferences'][0]['name']
-                    else
-                        owner_kind = pod['kind']
-                        controller_name = pod['metadata']['name']
+                    namespace = pod['metadata']['namespace']
+                    poduid = pod['metadata']['uid']
+                    pod_name = pod['metadata']['name']
+                    workload_name = get_workload_name(pod)
+                    workload_kind = get_workload_kind(pod)
+                    # we don't show jobs in container health
+                    if workload_kind.casecmp('job') == 0
+                        next
+                    end
+                    pod['spec']['containers'].each do |container|
+                        cname = container['name']
+                        key = "#{poduid}/#{cname}"
+                        cpu_limit_set = true
+                        memory_limit_set = true
+                        begin
+                            cpu_limit = get_numeric_value('cpu', container['resources']['limits']['cpu'])
+                        rescue => exception
+                            #@log.info "Exception getting container cpu limit #{container['resources']}"
+                            cpu_limit = get_node_capacity(pod['spec']['nodeName'], 'cpu')
+                            cpu_limit_set = false
+                        end
+                        begin
+                            memory_limit = get_numeric_value('memory', container['resources']['limits']['memory'])
+                        rescue => exception
+                            #@log.info "Exception getting container memory limit #{container['resources']}"
+                            memory_limit = get_node_capacity(pod['spec']['nodeName'], 'memory')
+                            memory_limit_set = false
+                        end
+                        @pod_uid_lookup[key] = {"workload_kind" => workload_kind, "workload_name" => workload_name, "namespace" => namespace, "cpu_limit" => cpu_limit, "memory_limit" => memory_limit, "cpu_limit_set" => cpu_limit_set, "memory_limit_set" => memory_limit_set, "container" => cname, "pod_name" => pod_name}
+                        container_count_key = "#{namespace}_#{workload_name.split('~~')[1]}_#{cname}"
+                        if !@workload_container_count.key?(container_count_key)
+                            @workload_container_count[container_count_key] = 1
+                        else
+                            count = @workload_container_count[container_count_key]
+                            @workload_container_count[container_count_key] = count + 1
+                        end
                     end
+                rescue => e
+                    @log.info "Error in build_pod_uid_lookup  #{pod} #{e.message}"
+                end
+            end
+        end
 
-                    namespace = pod['metadata']['namespace']
+        def get_pod_uid_lookup
+            return @pod_uid_lookup
+        end
 
-                    workload_name = ''
-                    if owner_kind.nil?
-                        owner_kind = 'Pod'
-                    end
-                    case owner_kind.downcase
-                    when 'job'
-                        # we are excluding jobs
-                        next
-                    when 'replicaset'
-                        # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name
-                        labels = pod['metadata']['labels'].to_h
-                        labels.each {|k,v|
-                            lookup_key = "#{namespace}-#{k}=#{v}"
-                            if deployment_lookup.key?(lookup_key)
-                                workload_name = deployment_lookup[lookup_key]
-                                break
-                            end
-                        }
-                        if workload_name.empty?
-                            workload_name = "#{namespace}~~#{controller_name}"
+        def get_workload_container_count
+            return @workload_container_count
+        end
+
+        private
+        def get_workload_name(pod)
+
+            if @deployment_lookup.empty?
+                @deployment_inventory['items'].each do |deployment|
+                    match_labels = deployment['spec']['selector']['matchLabels'].to_h
+                    namespace = deployment['metadata']['namespace']
+                    match_labels.each{|k,v|
+                        @deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
+                    }
+                end
+            end
+
+            begin
+                has_owner = !pod['metadata']['ownerReferences'].nil?
+                owner_kind = ''
+                if has_owner
+                    owner_kind = pod['metadata']['ownerReferences'][0]['kind']
+                    controller_name = pod['metadata']['ownerReferences'][0]['name']
+                else
+                    owner_kind = pod['kind']
+                    controller_name = pod['metadata']['name']
+                end
+                namespace = pod['metadata']['namespace']
+
+                workload_name = ''
+                if owner_kind.nil?
+                    owner_kind = 'Pod'
+                end
+                case owner_kind.downcase
+                when 'job'
+                    # we are excluding jobs
+                    return nil
+                when 'replicaset'
+                    # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name
+                    labels = pod['metadata']['labels'].to_h
+                    labels.each {|k,v|
+                        lookup_key = "#{namespace}-#{k}=#{v}"
+                        if @deployment_lookup.key?(lookup_key)
+                            workload_name = @deployment_lookup[lookup_key]
+                            break
                         end
-                    when 'daemonset'
+                    }
+                    if workload_name.empty?
                         workload_name = "#{namespace}~~#{controller_name}"
-                    else
-                        workload_name = "#{namespace}~~#{pod['metadata']['name']}"
                     end
-                rescue => e
-                    @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}"
+                when 'daemonset'
+                    workload_name = "#{namespace}~~#{controller_name}"
+                else
+                    workload_name = "#{namespace}~~#{pod['metadata']['name']}"
                 end
-                workload_names[workload_name] = true
+                return workload_name
+            rescue => e
+                @log.info "Error in get_workload_name(pod) #{e.message}"
+                return nil
+            end
+        end
+
+        def get_workload_kind(pod)
+            if @deployment_lookup.empty?
+                @deployment_inventory['items'].each do |deployment|
+                    match_labels = deployment['spec']['selector']['matchLabels'].to_h
+                    namespace = deployment['metadata']['namespace']
+                    match_labels.each{|k,v|
+                        @deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
+                    }
+                end
+            end
+
+            begin
+                has_owner = !pod['metadata']['ownerReferences'].nil?
+                owner_kind = ''
+                if has_owner
+                    owner_kind = pod['metadata']['ownerReferences'][0]['kind']
+                else
+                    owner_kind = pod['kind']
+                end
+
+                if owner_kind.nil?
+                    owner_kind = 'Pod'
+                end
+                return owner_kind
+            rescue => e
+                @log.info "Error in get_workload_kind(pod) #{e.message}"
+                return nil
             end
-            return workload_names.keys
         end
+
+        def get_node_capacity(node_name, type)
+            if node_name.nil? #unscheduled pods will not have a node name
+                return -1
+            end
+            begin
+                @node_inventory["items"].each do |node|
+                    if (!node["status"]["capacity"].nil?) && node["metadata"]["name"].casecmp(node_name.downcase) == 0
+                        return get_numeric_value(type, node["status"]["capacity"][type])
+                    end
+                end
+            rescue => e
+                @log.info "Error in get_node_capacity(pod, #{type}) #{e.backtrace} #{e.message}"
+                return -1
+            end
+        end
+
+        #Cannot reuse the code from KubernetesApiClient, for unit testing reasons. KubernetesApiClient has a dependency on oms_common.rb etc.
+        def get_numeric_value(metricName, metricVal)
+            metricValue = metricVal.downcase
+            begin
+              case metricName
+              when "memory" #convert to bytes for memory
+                #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/
+                if (metricValue.end_with?("ki"))
+                  metricValue.chomp!("ki")
+                  metricValue = Float(metricValue) * 1024.0 ** 1
+                elsif (metricValue.end_with?("mi"))
+                  metricValue.chomp!("mi")
+                  metricValue = Float(metricValue) * 1024.0 ** 2
+                elsif (metricValue.end_with?("gi"))
+                  metricValue.chomp!("gi")
+                  metricValue = Float(metricValue) * 1024.0 ** 3
+                elsif (metricValue.end_with?("ti"))
+                  metricValue.chomp!("ti")
+                  metricValue = Float(metricValue) * 1024.0 ** 4
+                elsif (metricValue.end_with?("pi"))
+                  metricValue.chomp!("pi")
+                  metricValue = Float(metricValue) * 1024.0 ** 5
+                elsif (metricValue.end_with?("ei"))
+                  metricValue.chomp!("ei")
+                  metricValue = Float(metricValue) * 1024.0 ** 6
+                elsif (metricValue.end_with?("zi"))
+                  metricValue.chomp!("zi")
+                  metricValue = Float(metricValue) * 1024.0 ** 7
+                elsif (metricValue.end_with?("yi"))
+                  metricValue.chomp!("yi")
+                  metricValue = Float(metricValue) * 1024.0 ** 8
+                elsif (metricValue.end_with?("k"))
+                  metricValue.chomp!("k")
+                  metricValue = Float(metricValue) * 1000.0 ** 1
+                elsif (metricValue.end_with?("m"))
+                  metricValue.chomp!("m")
+                  metricValue = Float(metricValue) * 1000.0 ** 2
+                elsif (metricValue.end_with?("g"))
+                  metricValue.chomp!("g")
+                  metricValue = Float(metricValue) * 1000.0 ** 3
+                elsif (metricValue.end_with?("t"))
+                  metricValue.chomp!("t")
+                  metricValue = Float(metricValue) * 1000.0 ** 4
+                elsif (metricValue.end_with?("p"))
+                  metricValue.chomp!("p")
+                  metricValue = Float(metricValue) * 1000.0 ** 5
+                elsif (metricValue.end_with?("e"))
+                  metricValue.chomp!("e")
+                  metricValue = Float(metricValue) * 1000.0 ** 6
+                elsif (metricValue.end_with?("z"))
+                  metricValue.chomp!("z")
+                  metricValue = Float(metricValue) * 1000.0 ** 7
+                elsif (metricValue.end_with?("y"))
+                  metricValue.chomp!("y")
+                  metricValue = Float(metricValue) * 1000.0 ** 8
+                else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units')
+                  metricValue = Float(metricValue)
+                end
+              when "cpu" #convert to nanocores for cpu
+                #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/
+                if (metricValue.end_with?("m"))
+                  metricValue.chomp!("m")
+                  metricValue = Float(metricValue) * 1000.0 ** 2
+                else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units')
+                  metricValue = Float(metricValue) * 1000.0 ** 3
+                end
+              else
+                @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value")
+                metricValue = 0
+              end #case statement
+            rescue => error
+              @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value")
+              return 0
+            end
+            return metricValue
+          end
+
     end
 end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb
index 419680afa..1827a0190 100644
--- a/source/code/plugin/health/health_missing_signal_generator.rb
+++ b/source/code/plugin/health/health_missing_signal_generator.rb
@@ -24,14 +24,14 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory
 
             node_signals_hash = {}
             nodes.each{|node|
-                node_signals_hash[node] = [HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID]
+                node_signals_hash[node] = [MonitorId::NODE_MEMORY_MONITOR_ID, MonitorId::NODE_CPU_MONITOR_ID, MonitorId::NODE_CONDITION_MONITOR_ID]
             }
             log = HealthMonitorHelpers.get_log_handle
             log.info "last_received_records #{@last_received_records.size} nodes #{nodes}"
             @last_received_records.each{|monitor_instance_id, monitor|
                 if !health_monitor_records_map.key?(monitor_instance_id)
                     if HealthMonitorHelpers.is_node_monitor(monitor.monitor_id)
-                        node_name = monitor.labels['kubernetes.io/hostname']
+                        node_name = monitor.labels[HealthMonitorLabels::HOSTNAME]
                         new_monitor = HealthMonitorRecord.new(
                             monitor.monitor_id,
                             monitor.monitor_instance_id,
@@ -83,7 +83,7 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory
             health_monitor_records.each{|health_monitor_record|
                 # remove signals from the list of expected signals if we see them in the list of current signals
                 if HealthMonitorHelpers.is_node_monitor(health_monitor_record.monitor_id)
-                    node_name = health_monitor_record.labels['kubernetes.io/hostname']
+                    node_name = health_monitor_record.labels[HealthMonitorLabels::HOSTNAME]
                     if node_signals_hash.key?(node_name)
                         signals = node_signals_hash[node_name]
                         signals.delete(health_monitor_record.monitor_id)
@@ -111,7 +111,7 @@ def get_missing_signals(cluster_id, health_monitor_records, health_k8s_inventory
                             {"timestamp" => Time.now.utc.iso8601, "state" => HealthMonitorStates::UNKNOWN, "details" => "no signal received from node #{node}"}
                         )
                         missing_signals_map[monitor_instance_id] = new_monitor
-                        log.info "Added missing signal when node_signals_hash was not empty #{new_monitor.monitor_instance_id} #{new_monitor.state}"
+                        log.info "Added missing signal when node_signals_hash was not empty #{new_monitor.monitor_instance_id} #{new_monitor.state} #{new_monitor.labels.keys}"
                     }
                 }
             end
diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb
index 82ae569f3..0922c7ff2 100644
--- a/source/code/plugin/health/health_model_constants.rb
+++ b/source/code/plugin/health/health_model_constants.rb
@@ -2,80 +2,80 @@ module HealthModel
     class MonitorState
         CRITICAL = "fail"
         ERROR = "err"
-        WARNING = "warn"
-        NONE = "none"
         HEALTHY = "pass"
+        NONE = "none"
         UNKNOWN = "unknown"
+        WARNING = "warn"
     end
 
     class AggregationAlgorithm
-        WORSTOF = "worstOf"
         PERCENTAGE = "percentage"
+        WORSTOF = "worstOf"
     end
 
     class MonitorId
-        CLUSTER = 'cluster';
-        ALL_NODES = 'all_nodes';
-        K8S_INFRASTRUCTURE = 'k8s_infrastructure'
-
-        NODE = 'node';
         AGENT_NODE_POOL = 'agent_node_pool'
-        MASTER_NODE_POOL = 'master_node_pool'
         ALL_AGENT_NODE_POOLS = 'all_agent_node_pools'
-        ALL_NODE_POOLS = 'all_node_pools';
-
-        WORKLOAD = 'all_workloads';
-        CAPACITY = 'capacity';
-
-        USER_WORKLOAD = 'user_workload';
-        SYSTEM_WORKLOAD = 'system_workload'
+        ALL_NODE_POOLS = 'all_node_pools'
+        ALL_NODES = 'all_nodes'
+        CAPACITY = 'capacity'
+        CLUSTER = 'cluster'
+        CONTAINER = 'container'
+        CONTAINER_CPU_MONITOR_ID = "container_cpu_utilization"
+        CONTAINER_MEMORY_MONITOR_ID = "container_memory_utilization"
+        K8S_INFRASTRUCTURE = 'k8s_infrastructure'
+        KUBE_API_STATUS = "kube_api_status"
+        MASTER_NODE_POOL = 'master_node_pool'
         NAMESPACE = 'namespace';
+        NODE = 'node';
+        NODE_CONDITION_MONITOR_ID = "node_condition"
+        NODE_CPU_MONITOR_ID = "node_cpu_utilization"
+        NODE_MEMORY_MONITOR_ID = "node_memory_utilization"
+        SYSTEM_WORKLOAD = 'system_workload'
+        SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID = "system_workload_pods_ready"
+        USER_WORKLOAD = 'user_workload';
+        USER_WORKLOAD_PODS_READY_MONITOR_ID = "user_workload_pods_ready"
+        WORKLOAD = 'all_workloads';
+        WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization"
+        WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization"
+        WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_cpu"
+        WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_memory"
     end
 
     class HealthMonitorRecordFields
         CLUSTER_ID = "ClusterId"
-        MONITOR_ID = "MonitorId"
-        MONITOR_INSTANCE_ID = "MonitorInstanceId"
-        MONITOR_LABELS = "MonitorLabels"
         DETAILS = "Details"
+        HEALTH_MODEL_DEFINITION_VERSION = "HealthModelDefinitionVersion"
         MONITOR_CONFIG = "MonitorConfig"
-        OLD_STATE = "OldState"
+        MONITOR_ID = "MonitorTypeId"
+        MONITOR_INSTANCE_ID = "MonitorInstanceId"
+        MONITOR_LABELS = "MonitorLabels"
         NEW_STATE = "NewState"
-        AGENT_COLLECTION_TIME = "AgentCollectionTime"
-        TIME_FIRST_OBSERVED = "TimeFirstObserved"
         NODE_NAME = "NodeName"
-        NAMESPACE = "Namespace"
-    end
-
-    class HealthMonitorConstants
-        NODE_CPU_MONITOR_ID = "node_cpu_utilization"
-        NODE_MEMORY_MONITOR_ID = "node_memory_utilization"
-        CONTAINER_CPU_MONITOR_ID = "container_cpu_utilization"
-        CONTAINER_MEMORY_MONITOR_ID = "container_memory_utilization"
-        NODE_CONDITION_MONITOR_ID = "node_condition"
-        WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_cpu"
-        WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID = "subscribed_capacity_memory"
-        WORKLOAD_CONTAINER_CPU_PERCENTAGE_MONITOR_ID = "container_cpu_utilization"
-        WORKLOAD_CONTAINER_MEMORY_PERCENTAGE_MONITOR_ID = "container_memory_utilization"
-        KUBE_API_STATUS = "kube_api_status"
-        USER_WORKLOAD_PODS_READY_MONITOR_ID = "user_workload_pods_ready"
-        SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID = "system_workload_pods_ready"
+        OLD_STATE = "OldState"
+        PARENT_MONITOR_INSTANCE_ID = "ParentMonitorInstanceId"
+        TIME_FIRST_OBSERVED = "TimeFirstObserved"
+        TIME_GENERATED = "TimeGenerated"
     end
 
     class HealthMonitorStates
-        PASS = "pass"
         FAIL = "fail"
-        WARNING = "warn"
         NONE = "none"
+        PASS = "pass"
         UNKNOWN = "unknown"
+        WARNING = "warn"
     end
 
     class HealthMonitorLabels
-        WORKLOAD_NAME = "container.azm.ms/workload-name"
-        WORKLOAD_KIND = "container.azm.ms/workload-kind"
-        NAMESPACE = "container.azm.ms/namespace"
         AGENTPOOL = "agentpool"
-        ROLE = "kubernetes.io/role"
+        CONTAINER = "container.azm.ms/container"
         HOSTNAME = "kubernetes.io/hostname"
+        NAMESPACE = "container.azm.ms/namespace"
+        ROLE = "kubernetes.io/role"
+        WORKLOAD_KIND = "container.azm.ms/workload-kind"
+        WORKLOAD_NAME = "container.azm.ms/workload-name"
+        MASTERROLE = "node-role.kubernetes.io/master"
+        COMPUTEROLE = "node-role.kubernetes.io/compute"
+        INFRAROLE = "node-role.kubernetes.io/infra"
     end
 end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb
index 9f0315978..4efd4c608 100644
--- a/source/code/plugin/health/health_monitor_helpers.rb
+++ b/source/code/plugin/health/health_monitor_helpers.rb
@@ -16,11 +16,11 @@ class HealthMonitorHelpers
 
         class << self
             def is_node_monitor(monitor_id)
-                return (monitor_id == HealthMonitorConstants::NODE_CPU_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_MEMORY_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID)
+                return (monitor_id == MonitorId::NODE_CPU_MONITOR_ID || monitor_id == MonitorId::NODE_MEMORY_MONITOR_ID || monitor_id == MonitorId::NODE_CONDITION_MONITOR_ID)
             end
 
             def is_pods_ready_monitor(monitor_id)
-                return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
+                return (monitor_id == MonitorId::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
             end
 
             def get_log_handle
@@ -31,6 +31,44 @@ def get_monitor_instance_id(monitor_id, args = [])
                 string_to_hash = args.join("/")
                 return "#{monitor_id}-#{Digest::MD5.hexdigest(string_to_hash)}"
             end
+
+            def add_agentpool_node_label_if_not_present(records)
+                records.each{|record|
+                    # continue if it is not a node monitor
+                    if !is_node_monitor(record.monitor_id)
+                        #@log.info "#{record.monitor_id} is not a NODE MONITOR"
+                        next
+                    end
+                    labels_keys = record.labels.keys
+
+                    if labels_keys.include?(HealthMonitorLabels::AGENTPOOL)
+                        @log.info "#{record.monitor_id} includes agentpool label. Value = #{record.labels[HealthMonitorLabels::AGENTPOOL]}"
+                        @log.info "Labels present = #{labels_keys}"
+                        next
+                    else
+                        #@log.info "#{record} does not include agentpool label."
+                        @log.info "Labels present = #{labels_keys}"
+                        role_name = 'unknown'
+                        if record.labels.include?(HealthMonitorLabels::ROLE)
+                            role_name = record.labels[HealthMonitorLabels::ROLE]
+                        elsif record.labels.include?(HealthMonitorLabels::MASTERROLE)
+                            if !record.labels[HealthMonitorLabels::MASTERROLE].empty?
+                                role_name = 'master'
+                            end
+                        elsif record.labels.include?(HealthMonitorLabels::COMPUTEROLE)
+                            if !record.labels[HealthMonitorLabels::COMPUTEROLE].empty?
+                                role_name = 'compute'
+                            end
+                        elsif record.labels.include?(HealthMonitorLabels::INFRAROLE)
+                            if !record.labels[HealthMonitorLabels::INFRAROLE].empty?
+                                role_name = 'infra'
+                            end
+                        end
+                        @log.info "Adding agentpool label #{role_name}_node_pool for #{record.monitor_id}"
+                        record.labels[HealthMonitorLabels::AGENTPOOL] = "#{role_name}_node_pool"
+                    end
+                }
+            end
         end
 
     end
diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb
index 60ad69d76..e75824268 100644
--- a/source/code/plugin/health/health_monitor_provider.rb
+++ b/source/code/plugin/health/health_monitor_provider.rb
@@ -66,8 +66,9 @@ def get_record(health_monitor_record, health_monitor_state)
             monitor_record[HealthMonitorRecordFields::OLD_STATE] = old_state
             monitor_record[HealthMonitorRecordFields::DETAILS] = details.to_json
             monitor_record[HealthMonitorRecordFields::MONITOR_CONFIG] = config.to_json
-            monitor_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] = Time.now.utc.iso8601
+            monitor_record[HealthMonitorRecordFields::TIME_GENERATED] = Time.now.utc.iso8601
             monitor_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_first_observed
+            monitor_record[HealthMonitorRecordFields::PARENT_MONITOR_INSTANCE_ID] = ''
 
             return monitor_record
         end
@@ -87,17 +88,28 @@ def get_labels(health_monitor_record)
             }
             monitor_id = health_monitor_record[HealthMonitorRecordFields::MONITOR_ID]
             case monitor_id
-            when HealthMonitorConstants::CONTAINER_CPU_MONITOR_ID, HealthMonitorConstants::CONTAINER_MEMORY_MONITOR_ID, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID
+            when MonitorId::CONTAINER_CPU_MONITOR_ID, MonitorId::CONTAINER_MEMORY_MONITOR_ID, MonitorId::USER_WORKLOAD_PODS_READY_MONITOR_ID, MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID
 
                 namespace = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['namespace']
-                workload_name = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadName']
-                workload_kind = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workloadKind']
+                workload_name = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workload_name']
+                workload_kind = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['workload_kind']
 
                 monitor_labels[HealthMonitorLabels::WORKLOAD_NAME] = workload_name.split('~~')[1]
                 monitor_labels[HealthMonitorLabels::WORKLOAD_KIND] = workload_kind
                 monitor_labels[HealthMonitorLabels::NAMESPACE] = namespace
 
-            when HealthMonitorConstants::NODE_CPU_MONITOR_ID, HealthMonitorConstants::NODE_MEMORY_MONITOR_ID, HealthMonitorConstants::NODE_CONDITION_MONITOR_ID
+                # add the container name for container memory/cpu
+                if monitor_id == MonitorId::CONTAINER_CPU_MONITOR_ID || monitor_id == MonitorId::CONTAINER_MEMORY_MONITOR_ID
+                    container = health_monitor_record[HealthMonitorRecordFields::DETAILS]['details']['container']
+                    monitor_labels[HealthMonitorLabels::CONTAINER] = container
+                end
+
+                #TODO: This doesn't belong here. Move this elsewhere
+                health_monitor_record[HealthMonitorRecordFields::DETAILS]['details'].delete('namespace')
+                health_monitor_record[HealthMonitorRecordFields::DETAILS]['details'].delete('workload_name')
+                health_monitor_record[HealthMonitorRecordFields::DETAILS]['details'].delete('workload_kind')
+
+            when MonitorId::NODE_CPU_MONITOR_ID, MonitorId::NODE_MEMORY_MONITOR_ID, MonitorId::NODE_CONDITION_MONITOR_ID
                 node_name = health_monitor_record[HealthMonitorRecordFields::NODE_NAME]
                 @health_kubernetes_resources.get_node_inventory['items'].each do |node|
                     if !node_name.nil? && !node['metadata']['name'].nil? && node_name == node['metadata']['name']
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index e707651dc..27e9b9a6e 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -36,21 +36,31 @@ def compute_percentage_state(value, config)
                 end
                 fail_percentage = config['FailThresholdPercentage'].to_f
 
-                if value > fail_percentage
-                    return HealthMonitorStates::FAIL
-                elsif !warn_percentage.nil? && value > warn_percentage
-                    return HealthMonitorStates::WARNING
+                if !config.nil? && !config['Operator'].nil? && config['Operator'] == '<'
+                    if value < fail_percentage
+                        return HealthMonitorStates::FAIL
+                    elsif !warn_percentage.nil? && value < warn_percentage
+                        return HealthMonitorStates::WARNING
+                    else
+                        return HealthMonitorStates::PASS
+                    end
                 else
-                    return HealthMonitorStates::PASS
+                    if value > fail_percentage
+                        return HealthMonitorStates::FAIL
+                    elsif !warn_percentage.nil? && value > warn_percentage
+                        return HealthMonitorStates::WARNING
+                    else
+                        return HealthMonitorStates::PASS
+                    end
                 end
             end
 
             def is_node_monitor(monitor_id)
-                return (monitor_id == HealthMonitorConstants::NODE_CPU_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_MEMORY_MONITOR_ID || monitor_id == HealthMonitorConstants::NODE_CONDITION_MONITOR_ID)
+                return (monitor_id == MonitorId::NODE_CPU_MONITOR_ID || monitor_id == MonitorId::NODE_MEMORY_MONITOR_ID || monitor_id == MonitorId::NODE_CONDITION_MONITOR_ID)
             end
 
             def is_pods_ready_monitor(monitor_id)
-                return (monitor_id == HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
+                return (monitor_id == MonitorId::USER_WORKLOAD_PODS_READY_MONITOR_ID || monitor_id == MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
             end
 
             def is_cluster_health_model_enabled
@@ -136,13 +146,23 @@ def get_pods_ready_hash(pod_inventory, deployment_inventory)
                 return pods_ready_percentage_hash
             end
 
-            def get_node_state_from_node_conditions(node_conditions)
+            def get_node_state_from_node_conditions(monitor_config, node_conditions)
                 pass = false
+                failtypes = ['outofdisk', 'networkunavailable'].to_set #default fail types
+                if !monitor_config.nil? && !monitor_config["NodeConditionTypesForFailedState"].nil?
+                    failtypes = monitor_config["NodeConditionTypesForFailedState"]
+		    if !failtypes.nil?
+		    	failtypes = failtypes.split(',').map{|x| x.downcase}.map{|x| x.gsub(" ","")}.to_set
+		    end
+                end
+		log = get_log_handle
+		#log.info "Fail Types #{failtypes.inspect}"
                 node_conditions.each do |condition|
                     type = condition['type']
                     status = condition['status']
 
-                    if ((type == "NetworkUnavailable" || type == "OutOfDisk") && (status == 'True' || status == 'Unknown'))
+                    #for each condition in the configuration, check if the type is not false. If yes, update state to fail
+                    if (failtypes.include?(type.downcase) && (status == 'True' || status == 'Unknown'))
                         return "fail"
                     elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown'))
                         return "warn"
@@ -280,11 +300,12 @@ def get_monitor_instance_id(monitor_id, args = [])
             def ensure_cpu_memory_capacity_set(log, cpu_capacity, memory_capacity, hostname)
 
                 log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}"
-                if cpu_capacity != 0.0 && memory_capacity != 0.0
+                if cpu_capacity != 1.0 && memory_capacity != 1.0
                     log.info "CPU And Memory Capacity are already set"
                     return [cpu_capacity, memory_capacity]
                 end
 
+                log.info "CPU and Memory Capacity Not set"
                 begin
                     @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
                 rescue Exception => e
diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb
index 1d520da8d..f92f24ac3 100644
--- a/source/code/plugin/health/health_signal_reducer.rb
+++ b/source/code/plugin/health/health_signal_reducer.rb
@@ -20,7 +20,6 @@ def reduce_signals(health_monitor_records, health_k8s_inventory)
                 if reduced_signals_map.key?(monitor_instance_id)
                     record = reduced_signals_map[monitor_instance_id]
                     if health_monitor_record.transition_date_time > record.transition_date_time # always take the latest record for a monitor instance id
-                        puts 'Duplicate Daemon Set signal'
                         reduced_signals_map[monitor_instance_id] = health_monitor_record
                     end
                 elsif HealthMonitorHelpers.is_node_monitor(monitor_id)
diff --git a/source/code/plugin/health/parent_monitor_provider.rb b/source/code/plugin/health/parent_monitor_provider.rb
index 4577abb99..4ab6e6297 100644
--- a/source/code/plugin/health/parent_monitor_provider.rb
+++ b/source/code/plugin/health/parent_monitor_provider.rb
@@ -8,6 +8,7 @@ def initialize(definition)
             @health_model_definition = definition
             @parent_monitor_mapping = {} #monitorId --> parent_monitor_id mapping
             @parent_monitor_instance_mapping = {} #child monitor id -- > parent monitor instance mapping. Used in instances when the node no longer exists and impossible to compute from kube api results
+            @log = HealthMonitorHelpers.get_log_handle
         end
 
         # gets the parent monitor id given the state transition. It requires the monitor id and labels to determine the parent id
@@ -35,14 +36,13 @@ def get_parent_monitor_id(monitor)
                             op = "#{condition['operator']}"
                             right = "#{condition['value']}"
                             cond = left.send(op.to_sym, right)
-
                             if cond
                                 @parent_monitor_mapping[monitor.monitor_instance_id] = condition['parent_id']
                                 return condition['parent_id']
                             end
                         }
                     end
-                    raise "Conditions were not met to determine the parent monitor id" if monitor_id != MonitorId::CLUSTER
+                    return @health_model_definition[monitor_id]['default_parent_monitor_id']
                 end
             else
                 raise "Invalid Monitor Id #{monitor_id} in get_parent_monitor_id"
@@ -81,6 +81,7 @@ def get_parent_monitor_instance_id(monitor_instance_id, parent_monitor_id, paren
             end
             parent_monitor_instance_id = "#{parent_monitor_id}-#{values.join('-')}"
             @parent_monitor_instance_mapping[monitor_instance_id] = parent_monitor_instance_id
+            @log.info "parent_monitor_instance_id for #{monitor_instance_id} => #{parent_monitor_instance_id}"
             return parent_monitor_instance_id
         end
     end
diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb
index 64262aa2e..9af599321 100644
--- a/source/code/plugin/health/unit_monitor.rb
+++ b/source/code/plugin/health/unit_monitor.rb
@@ -1,3 +1,4 @@
+require_relative 'health_model_constants'
 require 'json'
 
 module HealthModel
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index ce205322d..810fb512f 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -20,7 +20,7 @@ def initialize
     config_param :tag, :string, :default => "oms.api.cadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
     config_param :nodehealthtag, :string, :default => "kubehealth.DaemonSet.Node"
-    #config_param :containerhealthtag, :string, :default => "kubehealth.DaemonSet.Container"
+    config_param :containerhealthtag, :string, :default => "kubehealth.DaemonSet.Container"
 
     def configure(conf)
       super
@@ -54,12 +54,11 @@ def enumerate()
           record["DataType"] = "LINUX_PERF_BLOB"
           record["IPName"] = "LogManagement"
           eventStream.add(time, record) if record
-                    #router.emit(@tag, time, record) if record
-            end
+        end
 
         router.emit_stream(@tag, eventStream) if eventStream
         router.emit_stream(@mdmtag, eventStream) if eventStream
-        #router.emit_stream(@containerhealthtag, eventStream) if eventStream
+        router.emit_stream(@containerhealthtag, eventStream) if eventStream
         router.emit_stream(@nodehealthtag, eventStream) if eventStream
 
         @@istestvar = ENV["ISTEST"]
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index f177b62bf..5538ba4aa 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -84,7 +84,7 @@ def enumerate(eventList = nil)
             else
               record["Computer"] = (OMS::Common.get_hostname)
             end
-            record["ClusterName"] = KubernetesApiClient.getClusterName
+                record['ClusterName'] = KubernetesApiClient.getClusterName
             record["ClusterId"] = KubernetesApiClient.getClusterId
             wrapper = {
               "DataType" => "KUBE_EVENTS_BLOB",
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 5d29eb035..9a1b8f9a9 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -26,7 +26,6 @@ def initialize
         @@cluster_id = KubernetesApiClient.getClusterId
         @resources = HealthKubernetesResources.instance
         @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
-        @@cluster_health_model_enabled = HealthMonitorUtils.is_cluster_health_model_enabled
       rescue => e
         ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
       end
@@ -55,9 +54,7 @@ def start
                 @@clusterCpuCapacity = cluster_capacity[0]
                 @@clusterMemoryCapacity = cluster_capacity[1]
                 @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}"
-                if @@cluster_health_model_enabled
-                    ApplicationInsightsUtility.sendCustomEvent("in_kube_health Plugin Start", {})
-                end
+                initialize_inventory
             end
         rescue => e
             ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
@@ -76,10 +73,6 @@ def shutdown
 
     def enumerate
       begin
-        if !@@cluster_health_model_enabled
-            @@hmlog.info "Cluster Health Model disabled in in_kube_health"
-            return
-        end
 
         currentTime = Time.now
         emitTime = currentTime.to_f
@@ -97,7 +90,8 @@ def enumerate
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
-        @resources.deployment_inventory = deployment_inventory
+        @resources.set_deployment_inventory(deployment_inventory)
+        @resources.build_pod_uid_lookup
 
         if node_inventory_response.code.to_i != 200
           record = process_kube_api_up_monitor("fail", node_inventory_response)
@@ -117,12 +111,12 @@ def enumerate
           system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'}
           workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'}
 
-          system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, HealthMonitorConstants::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
+          system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
           system_pods_ready_percentage_records.each do |record|
             health_monitor_records.push(record) if record
           end
 
-          workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, HealthMonitorConstants::USER_WORKLOAD_PODS_READY_MONITOR_ID)
+          workload_pods_ready_percentage_records = process_pods_ready_percentage(workload_pods, MonitorId::USER_WORKLOAD_PODS_READY_MONITOR_ID)
           workload_pods_ready_percentage_records.each do |record|
             health_monitor_records.push(record) if record
           end
@@ -158,7 +152,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
       state =  subscription > @@clusterCpuCapacity ? "fail" : "pass"
 
       #CPU
-      monitor_id = HealthMonitorConstants::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID
+      monitor_id = MonitorId::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID
       health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}}
       # @@hmlog.info health_monitor_record
 
@@ -169,7 +163,7 @@ def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
       health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
       health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
       health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-      health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
       health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
       health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
       #@@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor"
@@ -185,7 +179,7 @@ def process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
       #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}"
 
       #CPU
-      monitor_id = HealthMonitorConstants::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID
+      monitor_id = MonitorId::WORKLOAD_MEMORY_OVERSUBSCRIBED_MONITOR_ID
       health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterMemoryCapacity" => @@clusterMemoryCapacity.to_f, "clusterMemoryRequests" => subscription.to_f}}
       hmlog = HealthMonitorUtils.get_log_handle
 
@@ -195,7 +189,7 @@ def process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
       health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
       health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
       health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-      health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
       health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
       health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
       #@@hmlog.info "Successfully processed process_memory_oversubscribed_monitor"
@@ -205,21 +199,21 @@ def process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
     def process_kube_api_up_monitor(state, response)
       timestamp = Time.now.utc.iso8601
 
-      monitor_id = HealthMonitorConstants::KUBE_API_STATUS
+      monitor_id = MonitorId::KUBE_API_STATUS
       details = response.each_header.to_h
       details['ResponseCode'] = response.code
       health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details}
       hmlog = HealthMonitorUtils.get_log_handle
       #hmlog.info health_monitor_record
 
-      monitor_instance_id = HealthMonitorConstants::KUBE_API_STATUS
+      monitor_instance_id = MonitorId::KUBE_API_STATUS
       #hmlog.info "Monitor Instance Id: #{monitor_instance_id}"
       health_record = {}
       time_now = Time.now.utc.iso8601
       health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
       health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
       health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-      health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
       health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
       health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
       #@@hmlog.info "Successfully processed process_kube_api_up_monitor"
@@ -240,15 +234,15 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id)
         percent = pods_ready / total_pods * 100
         timestamp = Time.now.utc.iso8601
 
-        state = HealthMonitorUtils.compute_percentage_state((100-percent), monitor_config)
-        health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workloadName" => workload_name, "namespace" => namespace, "workloadKind" => workload_kind}}
+        state = HealthMonitorUtils.compute_percentage_state(percent, monitor_config)
+        health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workload_name" => workload_name, "namespace" => namespace, "workload_kind" => workload_kind}}
         monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name])
         health_record = {}
         time_now = Time.now.utc.iso8601
         health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
         health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
         health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-        health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+        health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
         health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
         health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
         records.push(health_record)
@@ -258,7 +252,7 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id)
     end
 
     def process_node_condition_monitor(node_inventory)
-      monitor_id = HealthMonitorConstants::NODE_CONDITION_MONITOR_ID
+      monitor_id = MonitorId::NODE_CONDITION_MONITOR_ID
       timestamp = Time.now.utc.iso8601
       monitor_config = @provider.get_config(monitor_id)
       node_condition_monitor_records = []
@@ -266,11 +260,12 @@ def process_node_condition_monitor(node_inventory)
           node_inventory['items'].each do |node|
             node_name = node['metadata']['name']
             conditions = node['status']['conditions']
-            state = HealthMonitorUtils.get_node_state_from_node_conditions(conditions)
-            #hmlog.debug "Node Name = #{node_name} State = #{state}"
+            state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions)
             details = {}
             conditions.each do |condition|
-              details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message']}
+                state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL
+                details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => state}
+                #@@hmlog.info "Node Condition details: #{JSON.pretty_generate(details)}"
             end
             health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details}
             monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name])
@@ -279,7 +274,7 @@ def process_node_condition_monitor(node_inventory)
             health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
             health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
             health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-            health_record[HealthMonitorRecordFields::AGENT_COLLECTION_TIME] =  time_now
+            health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
             health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
             health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
             health_record[HealthMonitorRecordFields::NODE_NAME] = node_name
@@ -290,6 +285,20 @@ def process_node_condition_monitor(node_inventory)
       return node_condition_monitor_records
     end
 
+    def initialize_inventory
+        #this is required because there are other components, like the container cpu memory aggregator, that depends on the mapping being initialized
+        node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
+        node_inventory = JSON.parse(node_inventory_response.body)
+        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
+        pod_inventory = JSON.parse(pod_inventory_response.body)
+        deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body)
+
+        @resources.node_inventory = node_inventory
+        @resources.pod_inventory = pod_inventory
+        @resources.set_deployment_inventory(deployment_inventory)
+        @resources.build_pod_uid_lookup
+    end
+
     def run_periodic
       @mutex.lock
       done = @finished
diff --git a/source/code/plugin/out_health_forward.rb b/source/code/plugin/out_health_forward.rb
new file mode 100644
index 000000000..18664a22a
--- /dev/null
+++ b/source/code/plugin/out_health_forward.rb
@@ -0,0 +1,677 @@
+#
+# Fluentd
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+require 'base64'
+require 'socket'
+require 'fileutils'
+
+require 'cool.io'
+
+require 'fluent/output'
+require 'fluent/config/error'
+
+module Fluent
+  class ForwardOutputError < StandardError
+  end
+
+  class ForwardOutputResponseError < ForwardOutputError
+  end
+
+  class ForwardOutputConnectionClosedError < ForwardOutputError
+  end
+
+  class ForwardOutputACKTimeoutError < ForwardOutputResponseError
+  end
+
+  class HealthForwardOutput < ObjectBufferedOutput
+    Plugin.register_output('health_forward', self)
+
+    def initialize
+      super
+      require 'fluent/plugin/socket_util'
+      @nodes = []  #=> [Node]
+    end
+
+    desc 'The timeout time when sending event logs.'
+    config_param :send_timeout, :time, default: 60
+    desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
+    config_param :heartbeat_type, default: :udp do |val|
+      case val.downcase
+      when 'tcp'
+        :tcp
+      when 'udp'
+        :udp
+      when 'none'
+        :none
+      else
+        raise ConfigError, "forward output heartbeat type should be 'tcp', 'udp', or 'none'"
+      end
+    end
+    desc 'The interval of the heartbeat packer.'
+    config_param :heartbeat_interval, :time, default: 1
+    desc 'The wait time before accepting a server fault recovery.'
+    config_param :recover_wait, :time, default: 10
+    desc 'The hard timeout used to detect server failure.'
+    config_param :hard_timeout, :time, default: 60
+    desc 'Set TTL to expire DNS cache in seconds.'
+    config_param :expire_dns_cache, :time, default: nil  # 0 means disable cache
+    desc 'The threshold parameter used to detect server faults.'
+    config_param :phi_threshold, :integer, default: 16
+    desc 'Use the "Phi accrual failure detector" to detect server failure.'
+    config_param :phi_failure_detector, :bool, default: true
+
+    # if any options added that requires extended forward api, fix @extend_internal_protocol
+
+    desc 'Change the protocol to at-least-once.'
+    config_param :require_ack_response, :bool, default: false  # require in_forward to respond with ack
+    desc 'This option is used when require_ack_response is true.'
+    config_param :ack_response_timeout, :time, default: 190  # 0 means do not wait for ack responses
+    # Linux default tcp_syn_retries is 5 (in many environment)
+    # 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
+    desc 'Enable client-side DNS round robin.'
+    config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
+
+    attr_reader :nodes
+
+    config_param :port, :integer, default: DEFAULT_LISTEN_PORT, deprecated: "User <server> host xxx </server> instead."
+    config_param :host, :string, default: nil, deprecated: "Use <server> port xxx </server> instead."
+    desc 'Skip network related error, e.g. DNS error, during plugin setup'
+    config_param :skip_network_error_at_init, :bool, :default => false
+
+
+    attr_accessor :extend_internal_protocol
+
+    def configure(conf)
+      super
+
+      # backward compatibility
+      if host = conf['host']
+        port = conf['port']
+        port = port ? port.to_i : DEFAULT_LISTEN_PORT
+        e = conf.add_element('server')
+        e['host'] = host
+        e['port'] = port.to_s
+      end
+
+      recover_sample_size = @recover_wait / @heartbeat_interval
+
+      # add options here if any options addes which uses extended protocol
+      @extend_internal_protocol = if @require_ack_response
+                                    true
+                                  else
+                                    false
+                                  end
+
+      if @dns_round_robin
+        if @heartbeat_type == :udp
+          raise ConfigError, "forward output heartbeat type must be 'tcp' or 'none' to use dns_round_robin option"
+        end
+      end
+
+      conf.elements.each {|e|
+        next if e.name != "server"
+
+        host = e['host']
+        port = e['port']
+        port = port ? port.to_i : DEFAULT_LISTEN_PORT
+
+        weight = e['weight']
+        weight = weight ? weight.to_i : 60
+
+        standby = !!e['standby']
+
+        name = e['name']
+        unless name
+          name = "#{host}:#{port}"
+        end
+
+        failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
+
+        node_conf = NodeConfig2.new(name, host, port, weight, standby, failure,
+          @phi_threshold, recover_sample_size, @expire_dns_cache, @phi_failure_detector, @dns_round_robin, @skip_network_error_at_init)
+
+        if @heartbeat_type == :none
+          @nodes << NoneHeartbeatNode.new(log, node_conf)
+        else
+          @nodes << Node.new(log, node_conf)
+        end
+        log.info "adding forwarding server '#{name}'", host: host, port: port, weight: weight, plugin_id: plugin_id
+      }
+
+      if @nodes.empty?
+        raise ConfigError, "forward output plugin requires at least one <server> is required"
+      end
+    end
+
+    def start
+      super
+
+      @rand_seed = Random.new.seed
+      rebuild_weight_array
+      @rr = 0
+
+      unless @heartbeat_type == :none
+        @loop = Coolio::Loop.new
+
+        if @heartbeat_type == :udp
+          # assuming all hosts use udp
+          @usock = SocketUtil.create_udp_socket(@nodes.first.host)
+          @usock.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK)
+          @hb = HeartbeatHandler.new(@usock, method(:on_heartbeat))
+          @loop.attach(@hb)
+        end
+
+        @timer = HeartbeatRequestTimer.new(@heartbeat_interval, method(:on_timer))
+        @loop.attach(@timer)
+
+        @thread = Thread.new(&method(:run))
+      end
+    end
+
+    def shutdown
+      @finished = true
+      if @loop
+        @loop.watchers.each {|w| w.detach }
+        @loop.stop
+      end
+      @thread.join if @thread
+      @usock.close if @usock
+    end
+
+    def run
+      @loop.run if @loop
+    rescue
+      log.error "unexpected error", error: $!.to_s
+      log.error_backtrace
+    end
+
+    def write_objects(tag, chunk)
+      return if chunk.empty?
+
+      error = nil
+
+      wlen = @weight_array.length
+      wlen.times do
+        @rr = (@rr + 1) % wlen
+        node = @weight_array[@rr]
+
+        if node.available?
+          begin
+            send_data(node, tag, chunk)
+            return
+          rescue
+            # for load balancing during detecting crashed servers
+            error = $!  # use the latest error
+          end
+        end
+      end
+
+      if error
+        raise error
+      else
+        raise "no nodes are available"  # TODO message
+      end
+    end
+
+    private
+
+    def rebuild_weight_array
+      standby_nodes, regular_nodes = @nodes.partition {|n|
+        n.standby?
+      }
+
+      lost_weight = 0
+      regular_nodes.each {|n|
+        unless n.available?
+          lost_weight += n.weight
+        end
+      }
+      log.debug "rebuilding weight array", lost_weight: lost_weight
+
+      if lost_weight > 0
+        standby_nodes.each {|n|
+          if n.available?
+            regular_nodes << n
+            log.warn "using standby node #{n.host}:#{n.port}", weight: n.weight
+            lost_weight -= n.weight
+            break if lost_weight <= 0
+          end
+        }
+      end
+
+      weight_array = []
+      gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) }
+      regular_nodes.each {|n|
+        (n.weight / gcd).times {
+          weight_array << n
+        }
+      }
+
+      # for load balancing during detecting crashed servers
+      coe = (regular_nodes.size * 6) / weight_array.size
+      weight_array *= coe if coe > 1
+
+      r = Random.new(@rand_seed)
+      weight_array.sort_by! { r.rand }
+
+      @weight_array = weight_array
+    end
+
+    # MessagePack FixArray length = 3 (if @extend_internal_protocol)
+    #                             = 2 (else)
+    FORWARD_HEADER = [0x92].pack('C').freeze
+    FORWARD_HEADER_EXT = [0x93].pack('C').freeze
+    def forward_header
+      if @extend_internal_protocol
+        FORWARD_HEADER_EXT
+      else
+        FORWARD_HEADER
+      end
+    end
+
+    #FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
+    def send_heartbeat_tcp(node)
+      sock = connect(node)
+      begin
+        opt = [1, @send_timeout.to_i].pack('I!I!')  # { int l_onoff; int l_linger; }
+        sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
+        opt = [@send_timeout.to_i, 0].pack('L!L!')  # struct timeval
+        # don't send any data to not cause a compatibility problem
+        #sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
+        #sock.write FORWARD_TCP_HEARTBEAT_DATA
+        node.heartbeat(true)
+      ensure
+        sock.close
+      end
+    end
+
+    def send_data(node, tag, chunk)
+      sock = connect(node)
+      begin
+        opt = [1, @send_timeout.to_i].pack('I!I!')  # { int l_onoff; int l_linger; }
+        sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
+
+        opt = [@send_timeout.to_i, 0].pack('L!L!')  # struct timeval
+        sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
+
+        # beginArray(2)
+        sock.write forward_header
+
+        # writeRaw(tag)
+        sock.write tag.to_msgpack  # tag
+
+        # beginRaw(size)
+        sz = chunk.size
+        #if sz < 32
+        #  # FixRaw
+        #  sock.write [0xa0 | sz].pack('C')
+        #elsif sz < 65536
+        #  # raw 16
+        #  sock.write [0xda, sz].pack('Cn')
+        #else
+        # raw 32
+        sock.write [0xdb, sz].pack('CN')
+        #end
+
+        # writeRawBody(packed_es)
+        chunk.write_to(sock)
+
+        if @extend_internal_protocol
+          option = {}
+          option['chunk'] = Base64.encode64(chunk.unique_id) if @require_ack_response
+          sock.write option.to_msgpack
+
+          if @require_ack_response && @ack_response_timeout > 0
+            # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
+            # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
+            # and process them asynchronously.
+            if IO.select([sock], nil, nil, @ack_response_timeout)
+              raw_data = sock.recv(1024)
+
+              # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
+              # If this happens we assume the data wasn't delivered and retry it.
+              if raw_data.empty?
+                @log.warn "node #{node.host}:#{node.port} closed the connection. regard it as unavailable."
+                node.disable!
+                raise ForwardOutputConnectionClosedError, "node #{node.host}:#{node.port} closed connection"
+              else
+                # Serialization type of the response is same as sent data.
+                res = MessagePack.unpack(raw_data)
+
+                if res['ack'] != option['chunk']
+                  # Some errors may have occured when ack and chunk id is different, so send the chunk again.
+                  raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
+                end
+              end
+
+            else
+              # IO.select returns nil on timeout.
+              # There are 2 types of cases when no response has been received:
+              # (1) the node does not support sending responses
+              # (2) the node does support sending response but responses have not arrived for some reasons.
+              @log.warn "no response from #{node.host}:#{node.port}. regard it as unavailable."
+              node.disable!
+              raise ForwardOutputACKTimeoutError, "node #{node.host}:#{node.port} does not return ACK"
+            end
+          end
+        end
+
+        node.heartbeat(false)
+        return res  # for test
+      ensure
+        sock.close
+      end
+    end
+
+    def connect(node)
+      # TODO unix socket?
+      TCPSocket.new(node.resolved_host, node.port)
+    end
+
+    class HeartbeatRequestTimer < Coolio::TimerWatcher
+      def initialize(interval, callback)
+        super(interval, true)
+        @callback = callback
+      end
+
+      def on_timer
+        @callback.call
+      rescue
+        # TODO log?
+      end
+    end
+
+    def on_timer
+      return if @finished
+      @nodes.each {|n|
+        if n.tick
+          rebuild_weight_array
+        end
+        begin
+          #log.trace "sending heartbeat #{n.host}:#{n.port} on #{@heartbeat_type}"
+          if @heartbeat_type == :tcp
+            send_heartbeat_tcp(n)
+          else
+            @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
+          end
+        rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED
+          # TODO log
+          log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", error: $!.to_s
+        end
+      }
+    end
+
+    class HeartbeatHandler < Coolio::IO
+      def initialize(io, callback)
+        super(io)
+        @io = io
+        @callback = callback
+      end
+
+      def on_readable
+        begin
+          msg, addr = @io.recvfrom(1024)
+        rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR
+          return
+        end
+        host = addr[3]
+        port = addr[1]
+        sockaddr = Socket.pack_sockaddr_in(port, host)
+        @callback.call(sockaddr, msg)
+      rescue
+        # TODO log?
+      end
+    end
+
+    def on_heartbeat(sockaddr, msg)
+      port, host = Socket.unpack_sockaddr_in(sockaddr)
+      if node = @nodes.find {|n| n.sockaddr == sockaddr }
+        #log.trace "heartbeat from '#{node.name}'", :host=>node.host, :port=>node.port
+        if node.heartbeat
+          rebuild_weight_array
+        end
+      end
+    end
+
+    NodeConfig2 = Struct.new("NodeConfig2", :name, :host, :port, :weight, :standby, :failure,
+      :phi_threshold, :recover_sample_size, :expire_dns_cache, :phi_failure_detector, :dns_round_robin, :skip_network_error)
+
+    class Node
+      def initialize(log, conf)
+        @log = log
+        @conf = conf
+        @name = @conf.name
+        @host = @conf.host
+        @port = @conf.port
+        @weight = @conf.weight
+        @failure = @conf.failure
+        @available = true
+
+        @resolved_host = nil
+        @resolved_time = 0
+        begin
+          resolved_host  # check dns
+        rescue => e
+          if @conf.skip_network_error
+            log.warn "#{@name} got network error during setup. Resolve host later", :error => e, :error_class => e.class
+          else
+            raise
+          end
+        end
+       end
+
+      attr_reader :conf
+      attr_reader :name, :host, :port, :weight
+      attr_reader :sockaddr  # used by on_heartbeat
+      attr_reader :failure, :available # for test
+
+      def available?
+        @available
+      end
+
+      def disable!
+        @available = false
+      end
+
+      def standby?
+        @conf.standby
+      end
+
+      def resolved_host
+        case @conf.expire_dns_cache
+        when 0
+          # cache is disabled
+          return resolve_dns!
+
+        when nil
+          # persistent cache
+          return @resolved_host ||= resolve_dns!
+
+        else
+          now = Engine.now
+          rh = @resolved_host
+          if !rh || now - @resolved_time >= @conf.expire_dns_cache
+            rh = @resolved_host = resolve_dns!
+            @resolved_time = now
+          end
+          return rh
+        end
+      end
+
+      def resolve_dns!
+        addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
+        addrinfo = @conf.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
+        @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
+        addrinfo[3]
+      end
+      private :resolve_dns!
+
+      def tick
+        now = Time.now.to_f
+        if !@available
+          if @failure.hard_timeout?(now)
+            @failure.clear
+          end
+          return nil
+        end
+
+        if @failure.hard_timeout?(now)
+          @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, hard_timeout: true
+          @available = false
+          @resolved_host = nil  # expire cached host
+          @failure.clear
+          return true
+        end
+
+        if @conf.phi_failure_detector
+          phi = @failure.phi(now)
+          #$log.trace "phi '#{@name}'", :host=>@host, :port=>@port, :phi=>phi
+          if phi > @conf.phi_threshold
+            @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi
+            @available = false
+            @resolved_host = nil  # expire cached host
+            @failure.clear
+            return true
+          end
+        end
+        return false
+      end
+
+      def heartbeat(detect=true)
+        now = Time.now.to_f
+        @failure.add(now)
+        #@log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size
+        if detect && !@available && @failure.sample_size > @conf.recover_sample_size
+          @available = true
+          @log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
+          return true
+        else
+          return nil
+        end
+      end
+
+      def to_msgpack(out = '')
+        [@host, @port, @weight, @available].to_msgpack(out)
+      end
+    end
+
+    # Override Node to disable heartbeat
+    class NoneHeartbeatNode < Node
+      def available?
+        true
+      end
+
+      def tick
+        false
+      end
+
+      def heartbeat(detect=true)
+        true
+      end
+    end
+
+    class FailureDetector
+      PHI_FACTOR = 1.0 / Math.log(10.0)
+      SAMPLE_SIZE = 1000
+
+      def initialize(heartbeat_interval, hard_timeout, init_last)
+        @heartbeat_interval = heartbeat_interval
+        @last = init_last
+        @hard_timeout = hard_timeout
+
+        # microsec
+        @init_gap = (heartbeat_interval * 1e6).to_i
+        @window = [@init_gap]
+      end
+
+      def hard_timeout?(now)
+        now - @last > @hard_timeout
+      end
+
+      def add(now)
+        if @window.empty?
+          @window << @init_gap
+          @last = now
+        else
+          gap = now - @last
+          @window << (gap * 1e6).to_i
+          @window.shift if @window.length > SAMPLE_SIZE
+          @last = now
+        end
+      end
+
+      def phi(now)
+        size = @window.size
+        return 0.0 if size == 0
+
+        # Calculate weighted moving average
+        mean_usec = 0
+        fact = 0
+        @window.each_with_index {|gap,i|
+          mean_usec += gap * (1+i)
+          fact += (1+i)
+        }
+        mean_usec = mean_usec / fact
+
+        # Normalize arrive intervals into 1sec
+        mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1
+
+        # Calculate phi of the phi accrual failure detector
+        t = now - @last - @heartbeat_interval + 1
+        phi = PHI_FACTOR * t / mean
+
+        return phi
+      end
+
+      def sample_size
+        @window.size
+      end
+
+      def clear
+        @window.clear
+        @last = 0
+      end
+    end
+
+    ## TODO
+    #class RPC
+    #  def initialize(this)
+    #    @this = this
+    #  end
+    #
+    #  def list_nodes
+    #    @this.nodes
+    #  end
+    #
+    #  def list_fault_nodes
+    #    list_nodes.select {|n| !n.available? }
+    #  end
+    #
+    #  def list_available_nodes
+    #    list_nodes.select {|n| n.available? }
+    #  end
+    #
+    #  def add_node(name, host, port, weight)
+    #  end
+    #
+    #  def recover_node(host, port)
+    #  end
+    #
+    #  def remove_node(host, port)
+    #  end
+    #end
+  end
+end
diff --git a/test/code/plugin/health/cadvisor_perf.json b/test/code/plugin/health/cadvisor_perf.json
new file mode 100644
index 000000000..35eae32b6
--- /dev/null
+++ b/test/code/plugin/health/cadvisor_perf.json
@@ -0,0 +1,2540 @@
+[
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:39Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 14061568
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:44Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 7249920
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:45Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 14442496
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:49Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 5988352
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:43Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/f65e6a62-c5c8-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 40284160
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:41Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/69e68b21-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 101965824
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 3203072
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:42Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 9658368
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:42Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9543dbb7-a1f2-11e9-8b08-d602e29755d5/metrics-server",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 21491712
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639906
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639899
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639895
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639903
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/f65e6a62-c5c8-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566580259
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/69e68b21-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566589936
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1563224142
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1563224144
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9543dbb7-a1f2-11e9-8b08-d602e29755d5/metrics-server",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639893
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:39Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 349987
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:44Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 773186
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:45Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 2718196
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:49Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 2007695
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:43Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/f65e6a62-c5c8-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 674463
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:41Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/69e68b21-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 2159553
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 3575667
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:42Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 0
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:42Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9543dbb7-a1f2-11e9-8b08-d602e29755d5/metrics-server",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 633968
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:39Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 11546624
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:39Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 11546624
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:44Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 5652480
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:45Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 10981376
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:49Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 2875392
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:43Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/f65e6a62-c5c8-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 20627456
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:41Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/69e68b21-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 69353472
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 462848
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:42Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/e690309f-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 8212480
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:42Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9543dbb7-a1f2-11e9-8b08-d602e29755d5/metrics-server",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 16543744
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:45Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-1",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 814518272
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:45Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-1",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 82091339.40983607
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:45Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-1",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 2089115648
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:50Z",
+        "Host": "aks-nodepool1-19574989-1",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-1",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1552408751.22
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:56Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b1e04e1c-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 85528576
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:54Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/49e373c8-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 25415680
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:53Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/65a6f978-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 111738880
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:55Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster-nanny",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 8417280
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:01Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 19492864
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9583b2ab-a1f2-11e9-8b08-d602e29755d5/main",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 12918784
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:46Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 3379200
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 9818112
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b1e04e1c-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566590024
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/49e373c8-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566580398
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/65a6f978-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566589942
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster-nanny",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566580342
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566580337
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9583b2ab-a1f2-11e9-8b08-d602e29755d5/main",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639936
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1563224072
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1563224077
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:56Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b1e04e1c-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 4447595
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:54Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/49e373c8-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 2765529
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:53Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/65a6f978-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 5565414
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:55Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster-nanny",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 863810
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:01Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 886196
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9583b2ab-a1f2-11e9-8b08-d602e29755d5/main",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 855014
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:46Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 1794634
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 0
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:56Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b1e04e1c-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 76308480
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:54Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/49e373c8-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 21319680
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:53Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/65a6f978-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 78180352
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:55Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster-nanny",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 7909376
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:01Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/24ab7e32-c5c9-11e9-8736-86290fd7dd1f/heapster",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 18968576
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/9583b2ab-a1f2-11e9-8b08-d602e29755d5/main",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 9871360
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:46Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 462848
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/bb3d3ef2-a742-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 8212480
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-0",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 865943552
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-0",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 95432166.25
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:12:57Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-0",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 2191216640
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:03Z",
+        "Host": "aks-nodepool1-19574989-0",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-0",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1552408749.66
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:07Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b2a0e1b3-bd3f-11e9-b2a7-d61658c73830/tunnel-front",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 17743872
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:12Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/114f7246-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 24162304
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:07Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 11472896
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:06Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 3821568
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/8dbd5e8b-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 92057600
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b2a0e1b3-bd3f-11e9-b2a7-d61658c73830/tunnel-front",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1565641691
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/114f7246-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566580300
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1565204288
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/redirector",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1565204284
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/8dbd5e8b-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566589995
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:07Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b2a0e1b3-bd3f-11e9-b2a7-d61658c73830/tunnel-front",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 35140951
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:12Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/114f7246-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 983407
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:07Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 0
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:06Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/redirector",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 4221562
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/8dbd5e8b-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 1881274
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:07Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b2a0e1b3-bd3f-11e9-b2a7-d61658c73830/tunnel-front",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 4161536
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:12Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/114f7246-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 18952192
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:07Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 8224768
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:06Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/44a61692-b945-11e9-a1b6-127094e7fd94/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 483328
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/8dbd5e8b-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 74915840
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:14Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-3",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 554704896
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:14Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-3",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 88981130.86666666
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:14Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-3",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 1633976320
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:14:15Z",
+        "Host": "aks-nodepool1-19574989-3",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-3",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1565204130.6
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/be78d7f6-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 92954624
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:33Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 7446528
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:22Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 14811136
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 15114240
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:35Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 5406720
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:32Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/94e52ab1-a1f2-11e9-8b08-d602e29755d5/autoscaler",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 10043392
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/2c3de48d-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 58052608
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 9904128
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 3645440
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/be78d7f6-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566590079
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639920
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639940
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639904
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639932
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/94e52ab1-a1f2-11e9-8b08-d602e29755d5/autoscaler",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1562639909
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/2c3de48d-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1566580349
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1563224204
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1563224199
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/be78d7f6-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 3004849
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:33Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 796842
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:22Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 708906
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 3451625
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:35Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 2572419
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:32Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/94e52ab1-a1f2-11e9-8b08-d602e29755d5/autoscaler",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 548275
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/2c3de48d-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 1740316
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 0
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 3156661
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/be78d7f6-c5df-11e9-8736-86290fd7dd1f/omsagent",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 66428928
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:33Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/dnsmasq",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 5611520
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:22Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/sidecar",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 11833344
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/kubedns",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 11063296
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:35Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/95046bc6-a1f2-11e9-8b08-d602e29755d5/healthz",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 2551808
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:32Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/94e52ab1-a1f2-11e9-8b08-d602e29755d5/autoscaler",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 9244672
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:37Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/2c3de48d-c5c9-11e9-8736-86290fd7dd1f/kube-proxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 20402176
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/azureproxy",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 8216576
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:31Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SContainer",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/06fef5f6-a743-11e9-a38a-22d1c75c4357/redirector",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 462848
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:30Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-2",
+        "Collections": [
+          {
+            "CounterName": "memoryRssBytes",
+            "Value": 853344256
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:30Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-2",
+        "Collections": [
+          {
+            "CounterName": "cpuUsageNanoCores",
+            "Value": 114265842.16
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:30Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-2",
+        "Collections": [
+          {
+            "CounterName": "memoryWorkingSetBytes",
+            "Value": 1892982784
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  },
+  {
+    "DataItems": [
+      {
+        "Timestamp": "2019-08-23T22:13:40Z",
+        "Host": "aks-nodepool1-19574989-2",
+        "ObjectName": "K8SNode",
+        "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/aks-nodepool1-19574989-2",
+        "Collections": [
+          {
+            "CounterName": "restartTimeEpoch",
+            "Value": 1561082409.36
+          }
+        ]
+      }
+    ],
+    "DataType": "LINUX_PERF_BLOB",
+    "IPName": "LogManagement"
+  }
+]
\ No newline at end of file
diff --git a/test/code/plugin/health/deployments.json b/test/code/plugin/health/deployments.json
new file mode 100644
index 000000000..75586db04
--- /dev/null
+++ b/test/code/plugin/health/deployments.json
@@ -0,0 +1,1385 @@
+{
+    "apiVersion": "v1",
+    "items": [
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "2"
+                },
+                "creationTimestamp": "2019-08-23T17:12:00Z",
+                "generation": 2,
+                "labels": {
+                    "addonmanager.kubernetes.io/mode": "EnsureExists",
+                    "k8s-app": "heapster",
+                    "kubernetes.io/cluster-service": "true"
+                },
+                "name": "heapster",
+                "namespace": "kube-system",
+                "resourceVersion": "19048928",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/heapster",
+                "uid": "1e98c3d1-c5c9-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 1,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "k8s-app": "heapster"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 1,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "creationTimestamp": null,
+                        "labels": {
+                            "k8s-app": "heapster"
+                        }
+                    },
+                    "spec": {
+                        "affinity": {
+                            "nodeAffinity": {
+                                "requiredDuringSchedulingIgnoredDuringExecution": {
+                                    "nodeSelectorTerms": [
+                                        {
+                                            "matchExpressions": [
+                                                {
+                                                    "key": "kubernetes.azure.com/cluster",
+                                                    "operator": "Exists"
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            }
+                        },
+                        "containers": [
+                            {
+                                "command": [
+                                    "/heapster",
+                                    "--source=kubernetes.summary_api:\"\""
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/heapster-amd64:v1.5.3",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "failureThreshold": 3,
+                                    "httpGet": {
+                                        "path": "/healthz",
+                                        "port": 8082,
+                                        "scheme": "HTTP"
+                                    },
+                                    "initialDelaySeconds": 180,
+                                    "periodSeconds": 10,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 5
+                                },
+                                "name": "heapster",
+                                "resources": {
+                                    "limits": {
+                                        "cpu": "88m",
+                                        "memory": "204Mi"
+                                    },
+                                    "requests": {
+                                        "cpu": "88m",
+                                        "memory": "204Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File"
+                            },
+                            {
+                                "command": [
+                                    "/pod_nanny",
+                                    "--config-dir=/etc/config",
+                                    "--cpu=80m",
+                                    "--extra-cpu=0.5m",
+                                    "--memory=140Mi",
+                                    "--extra-memory=4Mi",
+                                    "--threshold=5",
+                                    "--deployment=heapster",
+                                    "--container=heapster",
+                                    "--poll-period=300000",
+                                    "--estimator=exponential"
+                                ],
+                                "env": [
+                                    {
+                                        "name": "MY_POD_NAME",
+                                        "valueFrom": {
+                                            "fieldRef": {
+                                                "apiVersion": "v1",
+                                                "fieldPath": "metadata.name"
+                                            }
+                                        }
+                                    },
+                                    {
+                                        "name": "MY_POD_NAMESPACE",
+                                        "valueFrom": {
+                                            "fieldRef": {
+                                                "apiVersion": "v1",
+                                                "fieldPath": "metadata.namespace"
+                                            }
+                                        }
+                                    }
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/addon-resizer:1.8.1",
+                                "imagePullPolicy": "IfNotPresent",
+                                "name": "heapster-nanny",
+                                "resources": {
+                                    "limits": {
+                                        "cpu": "50m",
+                                        "memory": "90Mi"
+                                    },
+                                    "requests": {
+                                        "cpu": "50m",
+                                        "memory": "90Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File",
+                                "volumeMounts": [
+                                    {
+                                        "mountPath": "/etc/config",
+                                        "name": "heapster-config-volume"
+                                    }
+                                ]
+                            }
+                        ],
+                        "dnsPolicy": "ClusterFirst",
+                        "imagePullSecrets": [
+                            {
+                                "name": "emptyacrsecret"
+                            }
+                        ],
+                        "nodeSelector": {
+                            "beta.kubernetes.io/os": "linux"
+                        },
+                        "priorityClassName": "system-node-critical",
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "heapster",
+                        "serviceAccountName": "heapster",
+                        "terminationGracePeriodSeconds": 30,
+                        "tolerations": [
+                            {
+                                "key": "CriticalAddonsOnly",
+                                "operator": "Exists"
+                            }
+                        ],
+                        "volumes": [
+                            {
+                                "configMap": {
+                                    "defaultMode": 420,
+                                    "name": "heapster-config"
+                                },
+                                "name": "heapster-config-volume"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 1,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-08-23T17:12:00Z",
+                        "lastUpdateTime": "2019-08-23T17:12:00Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 2,
+                "readyReplicas": 1,
+                "replicas": 1,
+                "updatedReplicas": 1
+            }
+        },
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "5",
+                    "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns-autoscaler\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kube-dns-autoscaler\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns-autoscaler\"}},\"template\":{\"metadata\":{\"annotations\":{\"scheduler.alpha.kubernetes.io/critical-pod\":\"\",\"seccomp.security.alpha.kubernetes.io/pod\":\"docker/default\"},\"labels\":{\"k8s-app\":\"kube-dns-autoscaler\"}},\"spec\":{\"containers\":[{\"command\":[\"/cluster-proportional-autoscaler\",\"--namespace=kube-system\",\"--configmap=kube-dns-autoscaler\",\"--target=deployment/kube-dns-v20\",\"--default-params={\\\"ladder\\\":{\\\"coresToReplicas\\\":[[1,2],[512,3],[1024,4],[2048,5]],\\\"nodesToReplicas\\\":[[1,2],[8,3],[16,4],[32,5]]}}\",\"--logtostderr=true\",\"--v=2\"],\"image\":\"aksrepos.azurecr.io/mirror/cluster-proportional-autoscaler-amd64:1.1.2-r2\",\"name\":\"autoscaler\",\"resources\":{\"requests\":{\"cpu\":\"20m\",\"memory\":\"10Mi\"}}}],\"dnsPolicy\":\"Default\",\"imagePullSecrets\":[{\"name\":\"emptyacrsecret\"}],\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kube-dns-autoscaler\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n"
+                },
+                "creationTimestamp": "2019-03-12T16:38:30Z",
+                "generation": 5,
+                "labels": {
+                    "addonmanager.kubernetes.io/mode": "Reconcile",
+                    "k8s-app": "kube-dns-autoscaler",
+                    "kubernetes.io/cluster-service": "true"
+                },
+                "name": "kube-dns-autoscaler",
+                "namespace": "kube-system",
+                "resourceVersion": "15144046",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kube-dns-autoscaler",
+                "uid": "4509acaf-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 1,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "k8s-app": "kube-dns-autoscaler"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 1,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "annotations": {
+                            "scheduler.alpha.kubernetes.io/critical-pod": "",
+                            "seccomp.security.alpha.kubernetes.io/pod": "docker/default"
+                        },
+                        "creationTimestamp": null,
+                        "labels": {
+                            "k8s-app": "kube-dns-autoscaler"
+                        }
+                    },
+                    "spec": {
+                        "containers": [
+                            {
+                                "command": [
+                                    "/cluster-proportional-autoscaler",
+                                    "--namespace=kube-system",
+                                    "--configmap=kube-dns-autoscaler",
+                                    "--target=deployment/kube-dns-v20",
+                                    "--default-params={\"ladder\":{\"coresToReplicas\":[[1,2],[512,3],[1024,4],[2048,5]],\"nodesToReplicas\":[[1,2],[8,3],[16,4],[32,5]]}}",
+                                    "--logtostderr=true",
+                                    "--v=2"
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/cluster-proportional-autoscaler-amd64:1.1.2-r2",
+                                "imagePullPolicy": "IfNotPresent",
+                                "name": "autoscaler",
+                                "resources": {
+                                    "requests": {
+                                        "cpu": "20m",
+                                        "memory": "10Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File"
+                            }
+                        ],
+                        "dnsPolicy": "Default",
+                        "imagePullSecrets": [
+                            {
+                                "name": "emptyacrsecret"
+                            }
+                        ],
+                        "priorityClassName": "system-node-critical",
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "kube-dns-autoscaler",
+                        "serviceAccountName": "kube-dns-autoscaler",
+                        "terminationGracePeriodSeconds": 30,
+                        "tolerations": [
+                            {
+                                "key": "CriticalAddonsOnly",
+                                "operator": "Exists"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 1,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-03-12T16:38:30Z",
+                        "lastUpdateTime": "2019-03-12T16:38:30Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 5,
+                "readyReplicas": 1,
+                "replicas": 1,
+                "updatedReplicas": 1
+            }
+        },
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "6",
+                    "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"version\":\"v20\"},\"name\":\"kube-dns-v20\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"kube-dns\",\"version\":\"v20\"}},\"template\":{\"metadata\":{\"annotations\":{\"prometheus.io/port\":\"10055\",\"prometheus.io/scrape\":\"true\"},\"labels\":{\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"version\":\"v20\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}},\"podAntiAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"kubernetes.io/hostname\"},\"weight\":100}]}},\"containers\":[{\"args\":[\"--kubecfg-file=/config/kubeconfig\",\"--config-dir=/kube-dns-config\",\"--domain=cluster.local.\",\"--dns-port=10053\",\"--v=2\"],\"env\":[{\"name\":\"PROMETHEUS_PORT\",\"value\":\"10055\"}],\"image\":\"aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/healthcheck/kubedns\",\"port\":10054,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"kubedns\",\"ports\":[{\"containerPort\":10053,\"name\":\"dns-local\",\"protocol\":\"UDP\"},{\"containerPort\":10053,\"name\":\"dns-tcp-local\",\"protocol\":\"TCP\"},{\"containerPort\":10055,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"readinessProbe\":{\"httpGet\":{\"path\":\"/readiness\",\"port\":8081,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":5},\"resources\":{\"limits\":{\"memory\":\"170Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"70Mi\"}},\"volumeMounts\":[{\"mountPath\":\"/kube-dns-config\",\"name\":\"kube-dns-config\"},{\"mountPath\":\"/config\",\"name\":\"kubedns-kubecfg\",\"readOnly\":true}]},{\"args\":[\"-v=2\",\"-logtostderr\",\"-configDir=/kube-dns-config\",\"-restartDnsmasq=true\",\"--\",\"-k\",\"--cache-size=1000\",\"--no-negcache\",\"--no-resolv\",\"--server=127.0.0.1#10053\",\"--server=/cluster.local/127.0.0.1#10053\",\"--server=/in-addr.arpa/127.0.0.1#10053\",\"--server=/ip6.arpa/127.0.0.1#10053\",\"--log-facility=-\"],\"image\":\"aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10\",\"name\":\"dnsmasq\",\"ports\":[{\"containerPort\":53,\"name\":\"dns\",\"protocol\":\"UDP\"},{\"containerPort\":53,\"name\":\"dns-tcp\",\"protocol\":\"TCP\"}],\"volumeMounts\":[{\"mountPath\":\"/kube-dns-config\",\"name\":\"kube-dns-config\"}]},{\"args\":[\"--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 \\u003e/dev/null || exit 1; done\",\"--url=/healthz-dnsmasq\",\"--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 \\u003e/dev/null || exit 1; done\",\"--url=/healthz-kubedns\",\"--port=8080\",\"--quiet\"],\"env\":[{\"name\":\"PROBE_DOMAINS\",\"value\":\"bing.com kubernetes.default.svc.cluster.local\"}],\"image\":\"aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2\",\"livenessProbe\":{\"failureThreshold\":5,\"httpGet\":{\"path\":\"/healthz-dnsmasq\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"healthz\",\"ports\":[{\"containerPort\":8080,\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"memory\":\"50Mi\"},\"requests\":{\"cpu\":\"10m\",\"memory\":\"50Mi\"}}},{\"args\":[\"--v=2\",\"--logtostderr\",\"--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV\",\"--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV\"],\"image\":\"aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10\",\"livenessProbe\":{\"httpGet\":{\"path\":\"/metrics\",\"port\":10054,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":60,\"successThreshold\":1,\"timeoutSeconds\":5},\"name\":\"sidecar\",\"ports\":[{\"containerPort\":10054,\"name\":\"metrics\",\"protocol\":\"TCP\"}],\"resources\":{\"requests\":{\"cpu\":\"10m\",\"memory\":\"20Mi\"}}}],\"dnsPolicy\":\"Default\",\"imagePullSecrets\":[{\"name\":\"emptyacrsecret\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kube-dns\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}],\"volumes\":[{\"configMap\":{\"name\":\"kube-dns\",\"optional\":true},\"name\":\"kube-dns-config\"},{\"configMap\":{\"name\":\"kubedns-kubecfg\"},\"name\":\"kubedns-kubecfg\"}]}}}}\n"
+                },
+                "creationTimestamp": "2019-03-12T16:38:30Z",
+                "generation": 7,
+                "labels": {
+                    "addonmanager.kubernetes.io/mode": "Reconcile",
+                    "k8s-app": "kube-dns",
+                    "kubernetes.io/cluster-service": "true",
+                    "version": "v20"
+                },
+                "name": "kube-dns-v20",
+                "namespace": "kube-system",
+                "resourceVersion": "15144054",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kube-dns-v20",
+                "uid": "4523fcd7-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 2,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "k8s-app": "kube-dns",
+                        "version": "v20"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 1,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "annotations": {
+                            "prometheus.io/port": "10055",
+                            "prometheus.io/scrape": "true"
+                        },
+                        "creationTimestamp": null,
+                        "labels": {
+                            "k8s-app": "kube-dns",
+                            "kubernetes.io/cluster-service": "true",
+                            "version": "v20"
+                        }
+                    },
+                    "spec": {
+                        "affinity": {
+                            "nodeAffinity": {
+                                "requiredDuringSchedulingIgnoredDuringExecution": {
+                                    "nodeSelectorTerms": [
+                                        {
+                                            "matchExpressions": [
+                                                {
+                                                    "key": "kubernetes.azure.com/cluster",
+                                                    "operator": "Exists"
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            },
+                            "podAntiAffinity": {
+                                "preferredDuringSchedulingIgnoredDuringExecution": [
+                                    {
+                                        "podAffinityTerm": {
+                                            "labelSelector": {
+                                                "matchExpressions": [
+                                                    {
+                                                        "key": "k8s-app",
+                                                        "operator": "In",
+                                                        "values": [
+                                                            "kube-dns"
+                                                        ]
+                                                    }
+                                                ]
+                                            },
+                                            "topologyKey": "kubernetes.io/hostname"
+                                        },
+                                        "weight": 100
+                                    }
+                                ]
+                            }
+                        },
+                        "containers": [
+                            {
+                                "args": [
+                                    "--kubecfg-file=/config/kubeconfig",
+                                    "--config-dir=/kube-dns-config",
+                                    "--domain=cluster.local.",
+                                    "--dns-port=10053",
+                                    "--v=2"
+                                ],
+                                "env": [
+                                    {
+                                        "name": "PROMETHEUS_PORT",
+                                        "value": "10055"
+                                    }
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "failureThreshold": 5,
+                                    "httpGet": {
+                                        "path": "/healthcheck/kubedns",
+                                        "port": 10054,
+                                        "scheme": "HTTP"
+                                    },
+                                    "initialDelaySeconds": 60,
+                                    "periodSeconds": 10,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 5
+                                },
+                                "name": "kubedns",
+                                "ports": [
+                                    {
+                                        "containerPort": 10053,
+                                        "name": "dns-local",
+                                        "protocol": "UDP"
+                                    },
+                                    {
+                                        "containerPort": 10053,
+                                        "name": "dns-tcp-local",
+                                        "protocol": "TCP"
+                                    },
+                                    {
+                                        "containerPort": 10055,
+                                        "name": "metrics",
+                                        "protocol": "TCP"
+                                    }
+                                ],
+                                "readinessProbe": {
+                                    "failureThreshold": 3,
+                                    "httpGet": {
+                                        "path": "/readiness",
+                                        "port": 8081,
+                                        "scheme": "HTTP"
+                                    },
+                                    "initialDelaySeconds": 30,
+                                    "periodSeconds": 10,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 5
+                                },
+                                "resources": {
+                                    "limits": {
+                                        "memory": "170Mi"
+                                    },
+                                    "requests": {
+                                        "cpu": "100m",
+                                        "memory": "70Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File",
+                                "volumeMounts": [
+                                    {
+                                        "mountPath": "/kube-dns-config",
+                                        "name": "kube-dns-config"
+                                    },
+                                    {
+                                        "mountPath": "/config",
+                                        "name": "kubedns-kubecfg",
+                                        "readOnly": true
+                                    }
+                                ]
+                            },
+                            {
+                                "args": [
+                                    "-v=2",
+                                    "-logtostderr",
+                                    "-configDir=/kube-dns-config",
+                                    "-restartDnsmasq=true",
+                                    "--",
+                                    "-k",
+                                    "--cache-size=1000",
+                                    "--no-negcache",
+                                    "--no-resolv",
+                                    "--server=127.0.0.1#10053",
+                                    "--server=/cluster.local/127.0.0.1#10053",
+                                    "--server=/in-addr.arpa/127.0.0.1#10053",
+                                    "--server=/ip6.arpa/127.0.0.1#10053",
+                                    "--log-facility=-"
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10",
+                                "imagePullPolicy": "IfNotPresent",
+                                "name": "dnsmasq",
+                                "ports": [
+                                    {
+                                        "containerPort": 53,
+                                        "name": "dns",
+                                        "protocol": "UDP"
+                                    },
+                                    {
+                                        "containerPort": 53,
+                                        "name": "dns-tcp",
+                                        "protocol": "TCP"
+                                    }
+                                ],
+                                "resources": {},
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File",
+                                "volumeMounts": [
+                                    {
+                                        "mountPath": "/kube-dns-config",
+                                        "name": "kube-dns-config"
+                                    }
+                                ]
+                            },
+                            {
+                                "args": [
+                                    "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 \u003e/dev/null || exit 1; done",
+                                    "--url=/healthz-dnsmasq",
+                                    "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 \u003e/dev/null || exit 1; done",
+                                    "--url=/healthz-kubedns",
+                                    "--port=8080",
+                                    "--quiet"
+                                ],
+                                "env": [
+                                    {
+                                        "name": "PROBE_DOMAINS",
+                                        "value": "bing.com kubernetes.default.svc.cluster.local"
+                                    }
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "failureThreshold": 5,
+                                    "httpGet": {
+                                        "path": "/healthz-dnsmasq",
+                                        "port": 8080,
+                                        "scheme": "HTTP"
+                                    },
+                                    "initialDelaySeconds": 60,
+                                    "periodSeconds": 10,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 5
+                                },
+                                "name": "healthz",
+                                "ports": [
+                                    {
+                                        "containerPort": 8080,
+                                        "protocol": "TCP"
+                                    }
+                                ],
+                                "resources": {
+                                    "limits": {
+                                        "memory": "50Mi"
+                                    },
+                                    "requests": {
+                                        "cpu": "10m",
+                                        "memory": "50Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File"
+                            },
+                            {
+                                "args": [
+                                    "--v=2",
+                                    "--logtostderr",
+                                    "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV",
+                                    "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV"
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "failureThreshold": 3,
+                                    "httpGet": {
+                                        "path": "/metrics",
+                                        "port": 10054,
+                                        "scheme": "HTTP"
+                                    },
+                                    "initialDelaySeconds": 60,
+                                    "periodSeconds": 10,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 5
+                                },
+                                "name": "sidecar",
+                                "ports": [
+                                    {
+                                        "containerPort": 10054,
+                                        "name": "metrics",
+                                        "protocol": "TCP"
+                                    }
+                                ],
+                                "resources": {
+                                    "requests": {
+                                        "cpu": "10m",
+                                        "memory": "20Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File"
+                            }
+                        ],
+                        "dnsPolicy": "Default",
+                        "imagePullSecrets": [
+                            {
+                                "name": "emptyacrsecret"
+                            }
+                        ],
+                        "nodeSelector": {
+                            "beta.kubernetes.io/os": "linux"
+                        },
+                        "priorityClassName": "system-node-critical",
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "kube-dns",
+                        "serviceAccountName": "kube-dns",
+                        "terminationGracePeriodSeconds": 30,
+                        "tolerations": [
+                            {
+                                "key": "CriticalAddonsOnly",
+                                "operator": "Exists"
+                            }
+                        ],
+                        "volumes": [
+                            {
+                                "configMap": {
+                                    "defaultMode": 420,
+                                    "name": "kube-dns",
+                                    "optional": true
+                                },
+                                "name": "kube-dns-config"
+                            },
+                            {
+                                "configMap": {
+                                    "defaultMode": 420,
+                                    "name": "kubedns-kubecfg"
+                                },
+                                "name": "kubedns-kubecfg"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 2,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-07-23T14:46:03Z",
+                        "lastUpdateTime": "2019-07-23T14:46:03Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 7,
+                "readyReplicas": 2,
+                "replicas": 2,
+                "updatedReplicas": 2
+            }
+        },
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "6",
+                    "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"kubernetes-dashboard\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"strategy\":{\"rollingUpdate\":{\"maxSurge\":0,\"maxUnavailable\":1},\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"kubernetes-dashboard\",\"kubernetes.io/cluster-service\":\"true\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"image\":\"aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64:v1.10.1\",\"livenessProbe\":{\"failureThreshold\":3,\"httpGet\":{\"path\":\"/\",\"port\":9090,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"periodSeconds\":10,\"successThreshold\":1,\"timeoutSeconds\":30},\"name\":\"main\",\"ports\":[{\"containerPort\":9090,\"name\":\"http\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"100m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"50Mi\"}}}],\"imagePullSecrets\":[{\"name\":\"emptyacrsecret\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"kubernetes-dashboard\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n"
+                },
+                "creationTimestamp": "2019-03-12T16:38:31Z",
+                "generation": 6,
+                "labels": {
+                    "addonmanager.kubernetes.io/mode": "Reconcile",
+                    "k8s-app": "kubernetes-dashboard",
+                    "kubernetes.io/cluster-service": "true"
+                },
+                "name": "kubernetes-dashboard",
+                "namespace": "kube-system",
+                "resourceVersion": "15831521",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/kubernetes-dashboard",
+                "uid": "45b9cc8d-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 1,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "k8s-app": "kubernetes-dashboard",
+                        "kubernetes.io/cluster-service": "true"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 0,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "creationTimestamp": null,
+                        "labels": {
+                            "k8s-app": "kubernetes-dashboard",
+                            "kubernetes.io/cluster-service": "true"
+                        }
+                    },
+                    "spec": {
+                        "affinity": {
+                            "nodeAffinity": {
+                                "requiredDuringSchedulingIgnoredDuringExecution": {
+                                    "nodeSelectorTerms": [
+                                        {
+                                            "matchExpressions": [
+                                                {
+                                                    "key": "kubernetes.azure.com/cluster",
+                                                    "operator": "Exists"
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            }
+                        },
+                        "containers": [
+                            {
+                                "image": "aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64:v1.10.1",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "failureThreshold": 3,
+                                    "httpGet": {
+                                        "path": "/",
+                                        "port": 9090,
+                                        "scheme": "HTTP"
+                                    },
+                                    "initialDelaySeconds": 30,
+                                    "periodSeconds": 10,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 30
+                                },
+                                "name": "main",
+                                "ports": [
+                                    {
+                                        "containerPort": 9090,
+                                        "name": "http",
+                                        "protocol": "TCP"
+                                    }
+                                ],
+                                "resources": {
+                                    "limits": {
+                                        "cpu": "100m",
+                                        "memory": "500Mi"
+                                    },
+                                    "requests": {
+                                        "cpu": "100m",
+                                        "memory": "50Mi"
+                                    }
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File"
+                            }
+                        ],
+                        "dnsPolicy": "ClusterFirst",
+                        "imagePullSecrets": [
+                            {
+                                "name": "emptyacrsecret"
+                            }
+                        ],
+                        "nodeSelector": {
+                            "beta.kubernetes.io/os": "linux"
+                        },
+                        "priorityClassName": "system-node-critical",
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "kubernetes-dashboard",
+                        "serviceAccountName": "kubernetes-dashboard",
+                        "terminationGracePeriodSeconds": 30,
+                        "tolerations": [
+                            {
+                                "key": "CriticalAddonsOnly",
+                                "operator": "Exists"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 1,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-03-12T16:38:32Z",
+                        "lastUpdateTime": "2019-03-12T16:38:32Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 6,
+                "readyReplicas": 1,
+                "replicas": 1,
+                "updatedReplicas": 1
+            }
+        },
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "5",
+                    "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"k8s-app\":\"metrics-server\",\"kubernetes.io/cluster-service\":\"true\"},\"name\":\"metrics-server\",\"namespace\":\"kube-system\"},\"spec\":{\"selector\":{\"matchLabels\":{\"k8s-app\":\"metrics-server\"}},\"template\":{\"metadata\":{\"labels\":{\"k8s-app\":\"metrics-server\"},\"name\":\"metrics-server\"},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"command\":[\"/metrics-server\",\"--source=kubernetes.summary_api:''\"],\"image\":\"aksrepos.azurecr.io/mirror/metrics-server-amd64:v0.2.1\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"metrics-server\"}],\"imagePullSecrets\":[{\"name\":\"emptyacrsecret\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"metrics-server\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}]}}}}\n"
+                },
+                "creationTimestamp": "2019-03-12T16:38:31Z",
+                "generation": 5,
+                "labels": {
+                    "addonmanager.kubernetes.io/mode": "Reconcile",
+                    "k8s-app": "metrics-server",
+                    "kubernetes.io/cluster-service": "true"
+                },
+                "name": "metrics-server",
+                "namespace": "kube-system",
+                "resourceVersion": "15144043",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/metrics-server",
+                "uid": "45556857-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 1,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "k8s-app": "metrics-server"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 1,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "creationTimestamp": null,
+                        "labels": {
+                            "k8s-app": "metrics-server"
+                        },
+                        "name": "metrics-server"
+                    },
+                    "spec": {
+                        "affinity": {
+                            "nodeAffinity": {
+                                "requiredDuringSchedulingIgnoredDuringExecution": {
+                                    "nodeSelectorTerms": [
+                                        {
+                                            "matchExpressions": [
+                                                {
+                                                    "key": "kubernetes.azure.com/cluster",
+                                                    "operator": "Exists"
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            }
+                        },
+                        "containers": [
+                            {
+                                "command": [
+                                    "/metrics-server",
+                                    "--source=kubernetes.summary_api:''"
+                                ],
+                                "image": "aksrepos.azurecr.io/mirror/metrics-server-amd64:v0.2.1",
+                                "imagePullPolicy": "IfNotPresent",
+                                "name": "metrics-server",
+                                "resources": {},
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File"
+                            }
+                        ],
+                        "dnsPolicy": "ClusterFirst",
+                        "imagePullSecrets": [
+                            {
+                                "name": "emptyacrsecret"
+                            }
+                        ],
+                        "nodeSelector": {
+                            "beta.kubernetes.io/os": "linux"
+                        },
+                        "priorityClassName": "system-node-critical",
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "metrics-server",
+                        "serviceAccountName": "metrics-server",
+                        "terminationGracePeriodSeconds": 30,
+                        "tolerations": [
+                            {
+                                "key": "CriticalAddonsOnly",
+                                "operator": "Exists"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 1,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-03-12T16:38:31Z",
+                        "lastUpdateTime": "2019-03-12T16:38:31Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 5,
+                "readyReplicas": 1,
+                "replicas": 1,
+                "updatedReplicas": 1
+            }
+        },
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "7",
+                    "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"name\":\"omsagent-rs\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"rsName\":\"omsagent-rs\"}},\"strategy\":{\"type\":\"RollingUpdate\"},\"template\":{\"metadata\":{\"annotations\":{\"agentVersion\":\"1.10.0.1\",\"dockerProviderVersion\":\"6.0.0-0\",\"schema-versions\":\"v1\"},\"labels\":{\"rsName\":\"omsagent-rs\"}},\"spec\":{\"containers\":[{\"env\":[{\"name\":\"AKS_RESOURCE_ID\",\"value\":\"/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test\"},{\"name\":\"AKS_REGION\",\"value\":\"eastus\"},{\"name\":\"CONTROLLER_TYPE\",\"value\":\"ReplicaSet\"},{\"name\":\"NODE_IP\",\"valueFrom\":{\"fieldRef\":{\"fieldPath\":\"status.hostIP\"}}}],\"image\":\"mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/bin/bash\",\"-c\",\"/opt/livenessprobe.sh\"]},\"initialDelaySeconds\":60,\"periodSeconds\":60},\"name\":\"omsagent\",\"ports\":[{\"containerPort\":25225,\"protocol\":\"TCP\"},{\"containerPort\":25224,\"protocol\":\"UDP\"},{\"containerPort\":25227,\"name\":\"in-rs-tcp\",\"protocol\":\"TCP\"}],\"resources\":{\"limits\":{\"cpu\":\"150m\",\"memory\":\"500Mi\"},\"requests\":{\"cpu\":\"110m\",\"memory\":\"250Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/var/run/host\",\"name\":\"docker-sock\"},{\"mountPath\":\"/var/log\",\"name\":\"host-log\"},{\"mountPath\":\"/var/lib/docker/containers\",\"name\":\"containerlog-path\"},{\"mountPath\":\"/etc/kubernetes/host\",\"name\":\"azure-json-path\"},{\"mountPath\":\"/etc/omsagent-secret\",\"name\":\"omsagent-secret\",\"readOnly\":true},{\"mountPath\":\"/etc/config\",\"name\":\"omsagent-rs-config\"},{\"mountPath\":\"/etc/config/settings\",\"name\":\"settings-vol-config\",\"readOnly\":true}]}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\",\"kubernetes.io/role\":\"agent\"},\"serviceAccountName\":\"omsagent\",\"volumes\":[{\"hostPath\":{\"path\":\"/var/run\"},\"name\":\"docker-sock\"},{\"hostPath\":{\"path\":\"/etc/hostname\"},\"name\":\"container-hostname\"},{\"hostPath\":{\"path\":\"/var/log\"},\"name\":\"host-log\"},{\"hostPath\":{\"path\":\"/var/lib/docker/containers\"},\"name\":\"containerlog-path\"},{\"hostPath\":{\"path\":\"/etc/kubernetes\"},\"name\":\"azure-json-path\"},{\"name\":\"omsagent-secret\",\"secret\":{\"secretName\":\"omsagent-secret\"}},{\"configMap\":{\"name\":\"omsagent-rs-config\"},\"name\":\"omsagent-rs-config\"},{\"configMap\":{\"name\":\"container-azm-ms-agentconfig\",\"optional\":true},\"name\":\"settings-vol-config\"}]}}}}\n"
+                },
+                "creationTimestamp": "2019-08-19T22:44:22Z",
+                "generation": 7,
+                "labels": {
+                    "rsName": "omsagent-rs"
+                },
+                "name": "omsagent-rs",
+                "namespace": "kube-system",
+                "resourceVersion": "19063500",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/omsagent-rs",
+                "uid": "e32d7e82-c2d2-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 1,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "rsName": "omsagent-rs"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 1,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "annotations": {
+                            "agentVersion": "1.10.0.1",
+                            "dockerProviderVersion": "6.0.0-0",
+                            "schema-versions": "v1"
+                        },
+                        "creationTimestamp": null,
+                        "labels": {
+                            "rsName": "omsagent-rs"
+                        }
+                    },
+                    "spec": {
+                        "containers": [
+                            {
+                                "env": [
+                                    {
+                                        "name": "AKS_RESOURCE_ID",
+                                        "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test"
+                                    },
+                                    {
+                                        "name": "AKS_REGION",
+                                        "value": "eastus"
+                                    },
+                                    {
+                                        "name": "CONTROLLER_TYPE",
+                                        "value": "ReplicaSet"
+                                    },
+                                    {
+                                        "name": "NODE_IP",
+                                        "valueFrom": {
+                                            "fieldRef": {
+                                                "apiVersion": "v1",
+                                                "fieldPath": "status.hostIP"
+                                            }
+                                        }
+                                    }
+                                ],
+                                "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "exec": {
+                                        "command": [
+                                            "/bin/bash",
+                                            "-c",
+                                            "/opt/livenessprobe.sh"
+                                        ]
+                                    },
+                                    "failureThreshold": 3,
+                                    "initialDelaySeconds": 60,
+                                    "periodSeconds": 60,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 1
+                                },
+                                "name": "omsagent",
+                                "ports": [
+                                    {
+                                        "containerPort": 25225,
+                                        "protocol": "TCP"
+                                    },
+                                    {
+                                        "containerPort": 25224,
+                                        "protocol": "UDP"
+                                    },
+                                    {
+                                        "containerPort": 25227,
+                                        "name": "in-rs-tcp",
+                                        "protocol": "TCP"
+                                    }
+                                ],
+                                "resources": {
+                                    "limits": {
+                                        "cpu": "150m",
+                                        "memory": "500Mi"
+                                    },
+                                    "requests": {
+                                        "cpu": "110m",
+                                        "memory": "250Mi"
+                                    }
+                                },
+                                "securityContext": {
+                                    "privileged": true
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File",
+                                "volumeMounts": [
+                                    {
+                                        "mountPath": "/var/run/host",
+                                        "name": "docker-sock"
+                                    },
+                                    {
+                                        "mountPath": "/var/log",
+                                        "name": "host-log"
+                                    },
+                                    {
+                                        "mountPath": "/var/lib/docker/containers",
+                                        "name": "containerlog-path"
+                                    },
+                                    {
+                                        "mountPath": "/etc/kubernetes/host",
+                                        "name": "azure-json-path"
+                                    },
+                                    {
+                                        "mountPath": "/etc/omsagent-secret",
+                                        "name": "omsagent-secret",
+                                        "readOnly": true
+                                    },
+                                    {
+                                        "mountPath": "/etc/config",
+                                        "name": "omsagent-rs-config"
+                                    },
+                                    {
+                                        "mountPath": "/etc/config/settings",
+                                        "name": "settings-vol-config",
+                                        "readOnly": true
+                                    }
+                                ]
+                            }
+                        ],
+                        "dnsPolicy": "ClusterFirst",
+                        "nodeSelector": {
+                            "beta.kubernetes.io/os": "linux",
+                            "kubernetes.io/role": "agent"
+                        },
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "omsagent",
+                        "serviceAccountName": "omsagent",
+                        "terminationGracePeriodSeconds": 30,
+                        "volumes": [
+                            {
+                                "hostPath": {
+                                    "path": "/var/run",
+                                    "type": ""
+                                },
+                                "name": "docker-sock"
+                            },
+                            {
+                                "hostPath": {
+                                    "path": "/etc/hostname",
+                                    "type": ""
+                                },
+                                "name": "container-hostname"
+                            },
+                            {
+                                "hostPath": {
+                                    "path": "/var/log",
+                                    "type": ""
+                                },
+                                "name": "host-log"
+                            },
+                            {
+                                "hostPath": {
+                                    "path": "/var/lib/docker/containers",
+                                    "type": ""
+                                },
+                                "name": "containerlog-path"
+                            },
+                            {
+                                "hostPath": {
+                                    "path": "/etc/kubernetes",
+                                    "type": ""
+                                },
+                                "name": "azure-json-path"
+                            },
+                            {
+                                "name": "omsagent-secret",
+                                "secret": {
+                                    "defaultMode": 420,
+                                    "secretName": "omsagent-secret"
+                                }
+                            },
+                            {
+                                "configMap": {
+                                    "defaultMode": 420,
+                                    "name": "omsagent-rs-config"
+                                },
+                                "name": "omsagent-rs-config"
+                            },
+                            {
+                                "configMap": {
+                                    "defaultMode": 420,
+                                    "name": "container-azm-ms-agentconfig",
+                                    "optional": true
+                                },
+                                "name": "settings-vol-config"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 1,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-08-19T22:44:22Z",
+                        "lastUpdateTime": "2019-08-19T22:44:22Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 7,
+                "readyReplicas": 1,
+                "replicas": 1,
+                "updatedReplicas": 1
+            }
+        },
+        {
+            "apiVersion": "extensions/v1beta1",
+            "kind": "Deployment",
+            "metadata": {
+                "annotations": {
+                    "deployment.kubernetes.io/revision": "9",
+                    "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"extensions/v1beta1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"addonmanager.kubernetes.io/mode\":\"Reconcile\",\"component\":\"tunnel\",\"kubernetes.io/cluster-service\":\"true\",\"tier\":\"node\"},\"name\":\"tunnelfront\",\"namespace\":\"kube-system\"},\"spec\":{\"replicas\":1,\"selector\":{\"matchLabels\":{\"component\":\"tunnel\"}},\"template\":{\"metadata\":{\"labels\":{\"component\":\"tunnel\"}},\"spec\":{\"affinity\":{\"nodeAffinity\":{\"requiredDuringSchedulingIgnoredDuringExecution\":{\"nodeSelectorTerms\":[{\"labelSelector\":null,\"matchExpressions\":[{\"key\":\"kubernetes.azure.com/cluster\",\"operator\":\"Exists\"}]}]}}},\"containers\":[{\"env\":[{\"name\":\"OVERRIDE_TUNNEL_SERVER_NAME\",\"value\":\"t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io\"},{\"name\":\"TUNNEL_CLUSTERUSER_NAME\",\"value\":\"28957308\"},{\"name\":\"TUNNELGATEWAY_SERVER_NAME\",\"value\":\"dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io\"},{\"name\":\"TUNNELGATEWAY_SSH_PORT\",\"value\":\"22\"},{\"name\":\"TUNNELGATEWAY_TLS_PORT\",\"value\":\"443\"},{\"name\":\"KUBE_CONFIG\",\"value\":\"/etc/kubernetes/kubeconfig/kubeconfig\"}],\"image\":\"aksrepos.azurecr.io/prod/hcp-tunnel-front:v1.9.2-v4.0.7\",\"imagePullPolicy\":\"IfNotPresent\",\"livenessProbe\":{\"exec\":{\"command\":[\"/lib/tunnel-front/check-tunnel-connection.sh\"]},\"failureThreshold\":12,\"initialDelaySeconds\":10,\"periodSeconds\":60},\"name\":\"tunnel-front\",\"resources\":{\"requests\":{\"cpu\":\"10m\",\"memory\":\"64Mi\"}},\"securityContext\":{\"privileged\":true},\"volumeMounts\":[{\"mountPath\":\"/etc/kubernetes/kubeconfig\",\"name\":\"kubeconfig\",\"readOnly\":true},{\"mountPath\":\"/etc/kubernetes/certs\",\"name\":\"certificates\",\"readOnly\":true}]}],\"dnsPolicy\":\"Default\",\"imagePullSecrets\":[{\"name\":\"emptyacrsecret\"}],\"nodeSelector\":{\"beta.kubernetes.io/os\":\"linux\"},\"priorityClassName\":\"system-node-critical\",\"serviceAccountName\":\"tunnelfront\",\"tolerations\":[{\"key\":\"CriticalAddonsOnly\",\"operator\":\"Exists\"}],\"volumes\":[{\"configMap\":{\"name\":\"tunnelfront-kubecfg\",\"optional\":true},\"name\":\"kubeconfig\"},{\"hostPath\":{\"path\":\"/etc/kubernetes/certs\"},\"name\":\"certificates\"}]}}}}\n"
+                },
+                "creationTimestamp": "2019-03-12T16:38:32Z",
+                "generation": 9,
+                "labels": {
+                    "addonmanager.kubernetes.io/mode": "Reconcile",
+                    "component": "tunnel",
+                    "kubernetes.io/cluster-service": "true",
+                    "tier": "node"
+                },
+                "name": "tunnelfront",
+                "namespace": "kube-system",
+                "resourceVersion": "17628811",
+                "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/tunnelfront",
+                "uid": "45e524e6-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "progressDeadlineSeconds": 2147483647,
+                "replicas": 1,
+                "revisionHistoryLimit": 10,
+                "selector": {
+                    "matchLabels": {
+                        "component": "tunnel"
+                    }
+                },
+                "strategy": {
+                    "rollingUpdate": {
+                        "maxSurge": 1,
+                        "maxUnavailable": 1
+                    },
+                    "type": "RollingUpdate"
+                },
+                "template": {
+                    "metadata": {
+                        "creationTimestamp": null,
+                        "labels": {
+                            "component": "tunnel"
+                        }
+                    },
+                    "spec": {
+                        "affinity": {
+                            "nodeAffinity": {
+                                "requiredDuringSchedulingIgnoredDuringExecution": {
+                                    "nodeSelectorTerms": [
+                                        {
+                                            "matchExpressions": [
+                                                {
+                                                    "key": "kubernetes.azure.com/cluster",
+                                                    "operator": "Exists"
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            }
+                        },
+                        "containers": [
+                            {
+                                "env": [
+                                    {
+                                        "name": "OVERRIDE_TUNNEL_SERVER_NAME",
+                                        "value": "t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                                    },
+                                    {
+                                        "name": "TUNNEL_CLUSTERUSER_NAME",
+                                        "value": "28957308"
+                                    },
+                                    {
+                                        "name": "TUNNELGATEWAY_SERVER_NAME",
+                                        "value": "dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io"
+                                    },
+                                    {
+                                        "name": "TUNNELGATEWAY_SSH_PORT",
+                                        "value": "22"
+                                    },
+                                    {
+                                        "name": "TUNNELGATEWAY_TLS_PORT",
+                                        "value": "443"
+                                    },
+                                    {
+                                        "name": "KUBE_CONFIG",
+                                        "value": "/etc/kubernetes/kubeconfig/kubeconfig"
+                                    }
+                                ],
+                                "image": "aksrepos.azurecr.io/prod/hcp-tunnel-front:v1.9.2-v4.0.7",
+                                "imagePullPolicy": "IfNotPresent",
+                                "livenessProbe": {
+                                    "exec": {
+                                        "command": [
+                                            "/lib/tunnel-front/check-tunnel-connection.sh"
+                                        ]
+                                    },
+                                    "failureThreshold": 12,
+                                    "initialDelaySeconds": 10,
+                                    "periodSeconds": 60,
+                                    "successThreshold": 1,
+                                    "timeoutSeconds": 1
+                                },
+                                "name": "tunnel-front",
+                                "resources": {
+                                    "requests": {
+                                        "cpu": "10m",
+                                        "memory": "64Mi"
+                                    }
+                                },
+                                "securityContext": {
+                                    "privileged": true
+                                },
+                                "terminationMessagePath": "/dev/termination-log",
+                                "terminationMessagePolicy": "File",
+                                "volumeMounts": [
+                                    {
+                                        "mountPath": "/etc/kubernetes/kubeconfig",
+                                        "name": "kubeconfig",
+                                        "readOnly": true
+                                    },
+                                    {
+                                        "mountPath": "/etc/kubernetes/certs",
+                                        "name": "certificates",
+                                        "readOnly": true
+                                    }
+                                ]
+                            }
+                        ],
+                        "dnsPolicy": "Default",
+                        "imagePullSecrets": [
+                            {
+                                "name": "emptyacrsecret"
+                            }
+                        ],
+                        "nodeSelector": {
+                            "beta.kubernetes.io/os": "linux"
+                        },
+                        "priorityClassName": "system-node-critical",
+                        "restartPolicy": "Always",
+                        "schedulerName": "default-scheduler",
+                        "securityContext": {},
+                        "serviceAccount": "tunnelfront",
+                        "serviceAccountName": "tunnelfront",
+                        "terminationGracePeriodSeconds": 30,
+                        "tolerations": [
+                            {
+                                "key": "CriticalAddonsOnly",
+                                "operator": "Exists"
+                            }
+                        ],
+                        "volumes": [
+                            {
+                                "configMap": {
+                                    "defaultMode": 420,
+                                    "name": "tunnelfront-kubecfg",
+                                    "optional": true
+                                },
+                                "name": "kubeconfig"
+                            },
+                            {
+                                "hostPath": {
+                                    "path": "/etc/kubernetes/certs",
+                                    "type": ""
+                                },
+                                "name": "certificates"
+                            }
+                        ]
+                    }
+                }
+            },
+            "status": {
+                "availableReplicas": 1,
+                "conditions": [
+                    {
+                        "lastTransitionTime": "2019-03-12T16:38:32Z",
+                        "lastUpdateTime": "2019-03-12T16:38:32Z",
+                        "message": "Deployment has minimum availability.",
+                        "reason": "MinimumReplicasAvailable",
+                        "status": "True",
+                        "type": "Available"
+                    }
+                ],
+                "observedGeneration": 9,
+                "readyReplicas": 1,
+                "replicas": 1,
+                "updatedReplicas": 1
+            }
+        }
+    ],
+    "kind": "List",
+    "metadata": {
+        "resourceVersion": "",
+        "selfLink": ""
+    }
+}
diff --git a/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb b/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb
new file mode 100644
index 000000000..074878fe2
--- /dev/null
+++ b/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb
@@ -0,0 +1,190 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+
+describe 'HealthContainerCpuMemoryAggregator spec' do
+
+    it 'dedupes and drops older records' do
+        formatted_records = JSON.parse'[{
+            "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+            "CounterName": "memoryRssBytes",
+            "CounterValue": 14061568,
+            "Timestamp": "2019-08-23T23:13:39Z"
+        },
+        {
+            "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/952488f3-a1f2-11e9-8b08-d602e29755d5/sidecar",
+            "CounterName": "memoryRssBytes",
+            "CounterValue": 14061568,
+            "Timestamp": "2019-08-23T22:13:39Z"
+        }]'
+
+        resources = HealthKubernetesResources.instance
+        nodes = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'nodes.json')))
+        pods = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'pods.json')))
+        deployments = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'deployments.json')))
+
+        resources.pod_inventory = pods
+        resources.node_inventory = nodes
+        resources.set_deployment_inventory(deployments)
+        resources.build_pod_uid_lookup #call this in in_kube_health every min
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+        cluster_id = 'fake_cluster_id'
+        provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+        aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
+        deduped_records = aggregator.dedupe_records(formatted_records)
+        deduped_records.size.must_equal 1
+        deduped_records[0]["Timestamp"].must_equal "2019-08-23T23:13:39Z"
+    end
+
+    it 'aggregates based on container name' do
+        file = File.read(File.join(File.expand_path(File.dirname(__FILE__)),'cadvisor_perf.json'))
+        records = JSON.parse(file)
+        records = records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'}
+        formatted_records = []
+        formatter = HealthContainerCpuMemoryRecordFormatter.new
+        records.each{|record|
+            formatted_record = formatter.get_record_from_cadvisor_record(record)
+            formatted_records.push(formatted_record)
+        }
+
+        resources = HealthKubernetesResources.instance
+        nodes = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'nodes.json')))
+        pods = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'pods.json')))
+        deployments = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'deployments.json')))
+
+        resources.pod_inventory = pods
+        resources.node_inventory = nodes
+        resources.set_deployment_inventory(deployments)
+        resources.build_pod_uid_lookup #call this in in_kube_health every min
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+        aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
+        deduped_records = aggregator.dedupe_records(formatted_records)
+        aggregator.aggregate(deduped_records)
+        aggregator.compute_state
+        records = aggregator.get_records
+        records.size.must_equal 30
+        #records have all the required details
+        records.each{|record|
+            record["Details"]["details"]["container"].wont_be_nil
+            record["Details"]["details"]["workload_name"].wont_be_nil
+            record["Details"]["details"]["workload_kind"].wont_be_nil
+            record["Details"]["details"]["namespace"].wont_be_nil
+            record["Details"]["timestamp"].wont_be_nil
+            record["Details"]["state"].wont_be_nil
+            record["MonitorTypeId"].wont_be_nil
+            record["MonitorInstanceId"].wont_be_nil
+            record["TimeFirstObserved"].wont_be_nil
+            record["TimeGenerated"].wont_be_nil
+        }
+    end
+
+    it "calculates the state correctly" do
+        file = File.read(File.join(File.expand_path(File.dirname(__FILE__)),'cadvisor_perf.json'))
+        records = JSON.parse(file)
+        records = records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'}
+        formatted_records = []
+        formatter = HealthContainerCpuMemoryRecordFormatter.new
+        records.each{|record|
+            formatted_record = formatter.get_record_from_cadvisor_record(record)
+            formatted_records.push(formatted_record)
+        }
+
+        resources = HealthKubernetesResources.instance
+        nodes = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'nodes.json')))
+        pods = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'pods.json')))
+        deployments = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'deployments.json')))
+
+        resources.pod_inventory = pods
+        resources.node_inventory = nodes
+        resources.set_deployment_inventory(deployments)
+        resources.build_pod_uid_lookup #call this in in_kube_health every min
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+        aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
+        deduped_records = aggregator.dedupe_records(formatted_records)
+        aggregator.aggregate(deduped_records)
+        aggregator.compute_state
+        records = aggregator.get_records
+
+        #omsagent has limit set. So its state should be set to pass.
+        #sidecar has no limit set. its state should be set to warning
+        omsagent_record = records.select{|r| r["MonitorTypeId"] == MonitorId::CONTAINER_CPU_MONITOR_ID && r["Details"]["details"]["container"] == "omsagent"}[0]
+        sidecar_record = records.select{|r| r["MonitorTypeId"] == MonitorId::CONTAINER_CPU_MONITOR_ID && r["Details"]["details"]["container"] == "sidecar"}[0]
+        omsagent_record['Details']['state'].must_equal HealthMonitorStates::PASS #limit is set
+        sidecar_record['Details']['state'].must_equal HealthMonitorStates::PASS
+    end
+
+
+    it "calculates the state as unknown when signals are missing" do
+        file = File.read(File.join(File.expand_path(File.dirname(__FILE__)),'cadvisor_perf.json'))
+        records = JSON.parse(file)
+        records = records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'}
+        formatted_records = []
+        formatter = HealthContainerCpuMemoryRecordFormatter.new
+        records.each{|record|
+            formatted_record = formatter.get_record_from_cadvisor_record(record)
+            formatted_records.push(formatted_record)
+        }
+
+        formatted_records = formatted_records.reject{|r| r["InstanceName"] == "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/69e68b21-c5df-11e9-8736-86290fd7dd1f/omsagent" && r["CounterName"] == "cpuUsageNanoCores"}
+        formatted_records = formatted_records.reject{|r| r["InstanceName"] == "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/b1e04e1c-c5df-11e9-8736-86290fd7dd1f/omsagent" && r["CounterName"] == "cpuUsageNanoCores"}
+
+        resources = HealthKubernetesResources.instance
+        nodes = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'nodes.json')))
+        pods = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'pods.json')))
+        deployments = JSON.parse(File.read(File.join(File.expand_path(File.dirname(__FILE__)),'deployments.json')))
+
+        resources.pod_inventory = pods
+        resources.node_inventory = nodes
+        resources.set_deployment_inventory(deployments)
+        resources.build_pod_uid_lookup #call this in in_kube_health every min
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+        aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
+        deduped_records = aggregator.dedupe_records(formatted_records)
+        aggregator.aggregate(deduped_records)
+        aggregator.compute_state
+        records = aggregator.get_records
+
+        #removed(missed) omsagent records should result in state being unknown
+        omsagent_record = records.select{|r| r["MonitorTypeId"] == MonitorId::CONTAINER_CPU_MONITOR_ID && r["Details"]["details"]["container"] == "omsagent" && !r["Details"]["details"]["workload_name"].include?("omsagent-rs") }[0]
+        omsagent_record['Details']['state'].must_equal HealthMonitorStates::UNKNOWN #limit is set
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_container_cpu_memory_record_formatter_spec.rb b/test/code/plugin/health/health_container_cpu_memory_record_formatter_spec.rb
new file mode 100644
index 000000000..d01922bce
--- /dev/null
+++ b/test/code/plugin/health/health_container_cpu_memory_record_formatter_spec.rb
@@ -0,0 +1,58 @@
+require_relative '../test_helpers'
+Dir[File.join(File.expand_path(File.dirname(__FILE__)), "../../../../source/code/plugin/health/*.rb")].reject{|f| f.include?('health_monitor_utils')}.each { |file| require file }
+include HealthModel
+include Minitest
+
+describe "HealthContainerCpuMemoryRecordFormatter spec" do
+    it 'returns the record in expected format when cadvisor record is well formed' do
+        formatter = HealthContainerCpuMemoryRecordFormatter.new
+        cadvisor_record = JSON.parse('{
+            "DataItems": [
+              {
+                "Timestamp": "2019-08-01T23:19:19Z",
+                "Host": "aks-nodepool1-19574989-2",
+                "ObjectName": "K8SContainer",
+                "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/6708e4ac-b49a-11e9-8a49-52a94e80d897/omsagent",
+                "Collections": [
+                  {
+                    "CounterName": "memoryWorkingSetBytes",
+                    "Value": 85143552
+                  }
+                ]
+              }
+            ],
+            "DataType": "LINUX_PERF_BLOB",
+            "IPName": "LogManagement"
+          }')
+        record = formatter.get_record_from_cadvisor_record(cadvisor_record)
+        record.wont_equal nil
+        record["InstanceName"].must_equal "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/6708e4ac-b49a-11e9-8a49-52a94e80d897/omsagent"
+        record["CounterName"].must_equal "memoryWorkingSetBytes"
+        record["CounterValue"].must_equal 85143552
+        record["Timestamp"].must_equal "2019-08-01T23:19:19Z"
+    end
+
+    it 'returns nil for invalid cadvisor record' do
+        formatter = HealthContainerCpuMemoryRecordFormatter.new
+        cadvisor_record = JSON.parse('{
+            "DataItms": [
+              {
+                "Timestamp": "2019-08-01T23:19:19Z",
+                "Host": "aks-nodepool1-19574989-2",
+                "ObjectName": "K8SContainer",
+                "InstanceName": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test/6708e4ac-b49a-11e9-8a49-52a94e80d897/omsagent",
+                "Collections": [
+                  {
+                    "CounterName": "memoryWorkingSetBytes",
+                    "Value": 85143552
+                  }
+                ]
+              }
+            ],
+            "DataType": "LINUX_PERF_BLOB",
+            "IPName": "LogManagement"
+          }')
+        record = formatter.get_record_from_cadvisor_record(cadvisor_record)
+        record.must_be_nil
+    end
+end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb
index c27d969ec..dbeec4858 100644
--- a/test/code/plugin/health/health_kubernetes_resource_spec.rb
+++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb
@@ -207,7 +207,7 @@
         resources = HealthKubernetesResources.instance
         resources.node_inventory = nodes
         resources.pod_inventory = pods
-        resources.deployment_inventory = deployments
+        resources.set_deployment_inventory(deployments)
         #act
         parsed_nodes = resources.get_nodes
         parsed_workloads = resources.get_workload_names
@@ -217,6 +217,28 @@
         assert_equal parsed_workloads.size, 3
 
         assert_equal parsed_nodes, ['aks-nodepool1-19574989-0', 'aks-nodepool1-19574989-1']
-        assert_equal parsed_workloads, ['default~~diliprdeploymentnodeapps', 'default~~rss-site', 'kube-system~~kube-proxy']
+        parsed_workloads.sort.must_equal ['default~~diliprdeploymentnodeapps', 'default~~rss-site', 'kube-system~~kube-proxy'].sort
     end
+
+    # it 'builds the pod_uid lookup correctly' do
+    #     #arrange
+    #     f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json')
+    #     nodes = JSON.parse(f)
+    #     f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json')
+    #     pods = JSON.parse(f)
+    #     f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json')
+    #     deployments = JSON.parse(f)
+
+    #     resources = HealthKubernetesResources.instance
+
+    #     resources.node_inventory = nodes
+    #     resources.pod_inventory = pods
+    #     resources.set_deployment_inventory(deployments) #resets deployment_lookup -- this was causing Unit test failures
+
+    #     resources.build_pod_uid_lookup
+
+    #     resources.pod_uid_lookup
+    #     resources.workload_container_count
+
+    # end
 end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_model_builder_test.rb b/test/code/plugin/health/health_model_builder_test.rb
index df921049c..a7c5e0927 100644
--- a/test/code/plugin/health/health_model_builder_test.rb
+++ b/test/code/plugin/health/health_model_builder_test.rb
@@ -64,10 +64,10 @@ def test_event_stream
             resources = HealthKubernetesResources.instance
             resources.node_inventory = node_inventory
             resources.pod_inventory = pod_inventory
-            resources.deployment_inventory = deployment_inventory
+            resources.set_deployment_inventory(deployment_inventory)
 
             workload_names = resources.get_workload_names
-            provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../..//installer/conf/healthmonitorconfig.json"))
+            provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
 
             health_monitor_records = []
             records.each do |record|
@@ -334,4 +334,162 @@ def test_event_stream_aks_engine
             after_state.initialize_state(deserialized_state)
         end
     end
+
+    def test_container_memory_cpu_with_model
+        health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json')
+        health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+        monitor_factory = MonitorFactory.new
+        hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+        # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
+        state_finalizers = [AggregateMonitorStateFinalizer.new]
+        monitor_set = MonitorSet.new
+        model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+        nodes_file_map = {
+            "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
+            "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
+            "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
+            }
+
+        pods_file_map = {
+            "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
+            "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
+            "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
+            }
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        #test
+        state = HealthMonitorState.new()
+        generator = HealthMissingSignalGenerator.new
+
+        mock_data_path = "C:/Users/dilipr/desktop/health/container_cpu_memory/daemonset.json"
+        file = File.read(mock_data_path)
+        records = JSON.parse(file)
+
+        node_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json"))
+        pod_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json"))
+        deployment_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json"))
+        resources = HealthKubernetesResources.instance
+        resources.node_inventory = node_inventory
+        resources.pod_inventory = pod_inventory
+        resources.set_deployment_inventory(deployment_inventory)
+
+        workload_names = resources.get_workload_names
+        provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+
+        #container memory cpu records
+        file = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/cadvisor_perf.json')
+        cadvisor_records = JSON.parse(file)
+        cadvisor_records = cadvisor_records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'}
+        formatted_records = []
+        formatter = HealthContainerCpuMemoryRecordFormatter.new
+        cadvisor_records.each{|record|
+            formatted_record = formatter.get_record_from_cadvisor_record(record)
+            formatted_records.push(formatted_record)
+        }
+
+        resources.build_pod_uid_lookup #call this in in_kube_health every min
+
+        cluster_labels = {
+            'container.azm.ms/cluster-region' => 'eastus',
+            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+            'container.azm.ms/cluster-name' => 'dilipr-health-test'
+        }
+
+        cluster_id = 'fake_cluster_id'
+
+        aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
+        deduped_records = aggregator.dedupe_records(formatted_records)
+        aggregator.aggregate(deduped_records)
+        aggregator.compute_state
+        container_cpu_memory_records = aggregator.get_records
+
+        records.concat(container_cpu_memory_records)
+
+        health_monitor_records = []
+        records.each do |record|
+            monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+            monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+            health_monitor_record = HealthMonitorRecord.new(
+                record[HealthMonitorRecordFields::MONITOR_ID],
+                record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+                record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+                record[HealthMonitorRecordFields::DETAILS]["state"],
+                provider.get_labels(record),
+                provider.get_config(monitor_id),
+                record[HealthMonitorRecordFields::DETAILS]
+            )
+
+            state.update_state(health_monitor_record,
+                provider.get_config(health_monitor_record.monitor_id)
+                )
+
+            # get the health state based on the monitor's operational state
+            # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+            health_monitor_record.state = state.get_state(monitor_instance_id).new_state
+            health_monitor_records.push(health_monitor_record)
+            #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+        end
+
+        #handle kube api down
+        kube_api_down_handler = HealthKubeApiDownHandler.new
+        health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
+
+        # Dedupe daemonset signals
+        # Remove unit monitor signals for “gone” objects
+        reducer = HealthSignalReducer.new()
+        reduced_records = reducer.reduce_signals(health_monitor_records, resources)
+
+        cluster_id = 'fake_cluster_id'
+
+        #get the list of  'none' and 'unknown' signals
+        missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
+        #update state for missing signals
+        missing_signals.each{|signal|
+            state.update_state(signal,
+                provider.get_config(signal.monitor_id)
+                )
+        }
+        generator.update_last_received_records(reduced_records)
+        reduced_records.push(*missing_signals)
+
+        # build the health model
+        all_records = reduced_records
+        model_builder.process_records(all_records)
+        all_monitors = model_builder.finalize_model
+
+        # update the state for aggregate monitors (unit monitors are updated above)
+        all_monitors.each{|monitor_instance_id, monitor|
+            if monitor.is_aggregate_monitor
+                state.update_state(monitor,
+                    provider.get_config(monitor.monitor_id)
+                    )
+            end
+
+            instance_state = state.get_state(monitor_instance_id)
+            #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+            should_send = instance_state.should_send
+
+            # always send cluster monitor as a heartbeat
+            if !should_send && monitor_instance_id != MonitorId::CLUSTER
+                all_monitors.delete(monitor_instance_id)
+            end
+        }
+
+        records_to_send = []
+        all_monitors.keys.each{|key|
+            record = provider.get_record(all_monitors[key], state)
+            #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+        }
+    end
 end
\ No newline at end of file
diff --git a/test/code/plugin/health/nodes.json b/test/code/plugin/health/nodes.json
new file mode 100644
index 000000000..f1721e076
--- /dev/null
+++ b/test/code/plugin/health/nodes.json
@@ -0,0 +1,1966 @@
+{
+    "apiVersion": "v1",
+    "items": [
+        {
+            "apiVersion": "v1",
+            "kind": "Node",
+            "metadata": {
+                "annotations": {
+                    "node.alpha.kubernetes.io/ttl": "0",
+                    "volumes.kubernetes.io/controller-managed-attach-detach": "true"
+                },
+                "creationTimestamp": "2019-03-12T16:40:36Z",
+                "labels": {
+                    "agentpool": "nodepool1",
+                    "beta.kubernetes.io/arch": "amd64",
+                    "beta.kubernetes.io/instance-type": "Standard_DS1_v2",
+                    "beta.kubernetes.io/os": "linux",
+                    "failure-domain.beta.kubernetes.io/region": "eastus",
+                    "failure-domain.beta.kubernetes.io/zone": "0",
+                    "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus",
+                    "kubernetes.io/hostname": "aks-nodepool1-19574989-0",
+                    "kubernetes.io/role": "agent",
+                    "node-role.kubernetes.io/agent": "",
+                    "storageprofile": "managed",
+                    "storagetier": "Premium_LRS"
+                },
+                "name": "aks-nodepool1-19574989-0",
+                "resourceVersion": "19068106",
+                "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-0",
+                "uid": "9012b16c-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "podCIDR": "10.244.1.0/24",
+                "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-0"
+            },
+            "status": {
+                "addresses": [
+                    {
+                        "address": "aks-nodepool1-19574989-0",
+                        "type": "Hostname"
+                    },
+                    {
+                        "address": "10.240.0.4",
+                        "type": "InternalIP"
+                    }
+                ],
+                "allocatable": {
+                    "cpu": "940m",
+                    "ephemeral-storage": "28043041951",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "2504708Ki",
+                    "pods": "110"
+                },
+                "capacity": {
+                    "cpu": "1",
+                    "ephemeral-storage": "30428648Ki",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "3524612Ki",
+                    "pods": "110"
+                },
+                "conditions": [
+                    {
+                        "lastHeartbeatTime": "2019-03-12T16:42:18Z",
+                        "lastTransitionTime": "2019-03-12T16:42:18Z",
+                        "message": "RouteController created a route",
+                        "reason": "RouteCreated",
+                        "status": "False",
+                        "type": "NetworkUnavailable"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-07-29T08:16:01Z",
+                        "message": "kubelet has sufficient disk space available",
+                        "reason": "KubeletHasSufficientDisk",
+                        "status": "False",
+                        "type": "OutOfDisk"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-07-29T08:16:01Z",
+                        "message": "kubelet has sufficient memory available",
+                        "reason": "KubeletHasSufficientMemory",
+                        "status": "False",
+                        "type": "MemoryPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-07-29T08:16:01Z",
+                        "message": "kubelet has no disk pressure",
+                        "reason": "KubeletHasNoDiskPressure",
+                        "status": "False",
+                        "type": "DiskPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-03-12T16:40:36Z",
+                        "message": "kubelet has sufficient PID available",
+                        "reason": "KubeletHasSufficientPID",
+                        "status": "False",
+                        "type": "PIDPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-07-29T08:16:01Z",
+                        "message": "kubelet is posting ready status. AppArmor enabled",
+                        "reason": "KubeletReady",
+                        "status": "True",
+                        "type": "Ready"
+                    }
+                ],
+                "daemonEndpoints": {
+                    "kubeletEndpoint": {
+                        "Port": 10250
+                    }
+                },
+                "images": [
+                    {
+                        "names": [
+                            "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b",
+                            "nickchase/rss-php-nginx:v1"
+                        ],
+                        "sizeBytes": 677038498
+                    },
+                    {
+                        "names": [
+                            "rdilip83/logeverysecond@sha256:6fe5624808609c507178a77f94384fb9794a4d6b7d102ed8016a4baf608164a1",
+                            "rdilip83/logeverysecond:v2"
+                        ],
+                        "sizeBytes": 674931590
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                            "k8s.gcr.io/hyperkube-amd64:v1.11.8"
+                        ],
+                        "sizeBytes": 615263658
+                    },
+                    {
+                        "names": [
+                            "microsoft/oms@sha256:de83d1df24cb86a3a3110bd03abbd5704d7a7345565b1996f49ff001a3665385",
+                            "microsoft/oms:healthpreview04262019"
+                        ],
+                        "sizeBytes": 514907213
+                    },
+                    {
+                        "names": [
+                            "rdilip83/fixrubyerror@sha256:6b7f36cf6258b311015493ab025f06577d758c45bc5010d022ac160b9f40ea5d",
+                            "rdilip83/fixrubyerror:latest"
+                        ],
+                        "sizeBytes": 494068028
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019"
+                        ],
+                        "sizeBytes": 494067935
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:fb2b90ce9bf7186fd9dfae97f5f72f9b9c80c8a0493af3cff74179cd4ff847c0",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08212019"
+                        ],
+                        "sizeBytes": 494067572
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c646e180483d295ffac114fb9df513db02553af7879681814d5910764653dd2d",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08202019"
+                        ],
+                        "sizeBytes": 494067210
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c21b596a22a1338ed293d01681f327acc871ee502ed779ec1109d6a93375bb3b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08192019"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "rdilip83/cifeatureprod08192019@sha256:7815bba9a805e4e8df33356fd532671de45525ce9c6e936e14f9b126e2097ecd",
+                            "rdilip83/cifeatureprod08192019:v1"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:59e34aab9f6e16a87e880b1ee1c9dd5434ee40dd29502e74aceefabf51443717",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:internaltesthealth08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:6387d0dedf4de0bab430f681ef61361f63a20e1c4c287a9b60ea5460283ac6cf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ci_feature_prod_health08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/hc08192019@sha256:014d936771508d499ac4c15043e23b16bce8de0019fb2048b99540cbe9084895",
+                            "rdilip83/hc08192019:1"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:8ad12bce5ffd27b301bc6fe4355c8affa6fce080ae7e2291dec3a0ed11bb9483",
+                            "rdilip83/health-rc:3"
+                        ],
+                        "sizeBytes": 494052863
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health_ci_feature_image@sha256:1a574d25884483083e8cbaacbf0cb7c4e442dc736d480615c65f5c71f8969b13",
+                            "rdilip83/health_ci_feature_image:v1"
+                        ],
+                        "sizeBytes": 494052147
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:816c8cef09822daf050a0fca6f92e7ac19147ff4bf1a823d43fe70f73470cc0c",
+                            "rdilip83/healthrc:v3"
+                        ],
+                        "sizeBytes": 494052138
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:d35aac044d1adc3d02269fde78f8dfd923db94b81288447cf6fdd482970a333b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthmerge08142019"
+                        ],
+                        "sizeBytes": 494052135
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:a130780e56ac0edb3ca29477e12edd5e9b5d08b5732dbd59ede9beb58e21eca7",
+                            "rdilip83/healthrc:v2"
+                        ],
+                        "sizeBytes": 494051682
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthmerge@sha256:24d270b0f59fb484c283922474736c3cba50f8aad0270bc0a3acd14284694eea",
+                            "rdilip83/healthmerge:v8"
+                        ],
+                        "sizeBytes": 494010139
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:b1d24728eb808d301da426b76b7f7b79606204c4c2b695a24ac670be8276d55d",
+                            "rdilip83/health-rc:1"
+                        ],
+                        "sizeBytes": 494000891
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:a0666957cccbfdf5784accd1133408bf017c28a6e694d9a2ae74da94eef2d285",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview08012019"
+                        ],
+                        "sizeBytes": 493994261
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:32c9b35a6809c54d5296e2ca2b122b35a4ad8c852622174cc5a9f92cc27e56e4",
+                            "rdilip83/mergehealth:v3"
+                        ],
+                        "sizeBytes": 493988815
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:a3521e8f36e007b3cb949e0356a75394ac61fd2024ca1ec4827b8d54fb068534",
+                            "rdilip83/mergehealth:v1"
+                        ],
+                        "sizeBytes": 493981585
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0438e4690e042b195917e160b8949aeb339520ee19c898a8bb9452f36d1f84f1",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview07182019"
+                        ],
+                        "sizeBytes": 493977357
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:9ebc410a36856176921dba81b5bd43132469209b315f52be346690435419b9bb"
+                        ],
+                        "sizeBytes": 493946790
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:4e51195a9c77bd166fc90ee5f6143a4604b502ab7ef0f06431dec10c341b10f3",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview06272019"
+                        ],
+                        "sizeBytes": 493893635
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06272019@sha256:d888ba5ff5e5810113a32f9c9812a5e28088cc81b902e95a185fe465a514029c",
+                            "rdilip83/healthpreview06272019:latest"
+                        ],
+                        "sizeBytes": 493893633
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06252019-1@sha256:1561876cffe94433a569f29f5231548e039193ebaa7ec640d22439675179e43f",
+                            "rdilip83/healthpreview06252019-1:latest"
+                        ],
+                        "sizeBytes": 493887387
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06252019@sha256:6597ff599a78ac452a4138dedb9e08c0ccd3e8b01594b033fd78ba9dbb41fe9e",
+                            "rdilip83/healthpreview06252019:latest"
+                        ],
+                        "sizeBytes": 493887384
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06242019@sha256:c4f565d92086d1ee56e6016178fed5c668352dc0ca0047f02910bdcb87a482c4",
+                            "rdilip83/healthpreview06242019:latest"
+                        ],
+                        "sizeBytes": 493850850
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06212019-1@sha256:937ce5801a0097a1cbc4eff5399c1973b4c6223ece9279b35207368b99f82b96",
+                            "rdilip83/healthpreview06212019-1:latest"
+                        ],
+                        "sizeBytes": 493850674
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a",
+                            "rdilip83/healthpreview06192019:latest"
+                        ],
+                        "sizeBytes": 493799437
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0f798cb7d56931b231f71e38e7fa5bf898b69e611247a566701f70a5f29a9799",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07092019"
+                        ],
+                        "sizeBytes": 467692116
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:3734a084fa9681c7e930eb90cad45a8f282c24af63065a720a2327b1683f3ba4",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019"
+                        ],
+                        "sizeBytes": 466882569
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:16402c34e2d7de72c2ebc18ec8e9f7933fa25f6a7f83bceb84483ba95e3902f7",
+                            "rdilip83/mergehealth:v2"
+                        ],
+                        "sizeBytes": 448931997
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06212019@sha256:5860c9caaf544f2e7c46edad5cdfb69e22398e20dc87cb8a4cd630b5b7000074",
+                            "rdilip83/healthpreview06212019:latest"
+                        ],
+                        "sizeBytes": 448366491
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41",
+                            "aksrepos.azurecr.io/prod/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41",
+                            "aksrepos.azurecr.io/mirror/hcp-tunnel-front:v1.9.2-v4.0.7",
+                            "aksrepos.azurecr.io/prod/hcp-tunnel-front:v1.9.2-v4.0.7"
+                        ],
+                        "sizeBytes": 383483267
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747",
+                            "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747",
+                            "aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64:v1.10.1",
+                            "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1"
+                        ],
+                        "sizeBytes": 121711221
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68"
+                        ],
+                        "sizeBytes": 109331233
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:bdbf36b7f1f77ffe7bd2a32e59235dff6ecf131e3b6b5b96061c652f30685f3a",
+                            "nginx:latest"
+                        ],
+                        "sizeBytes": 109258867
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:b73f527d86e3461fd652f62cf47e7b375196063bbbd503e853af5be16597cb2e",
+                            "nginx:1.15.5"
+                        ],
+                        "sizeBytes": 109083698
+                    },
+                    {
+                        "names": [
+                            "debian@sha256:118cf8f3557e1ea766c02f36f05f6ac3e63628427ea8965fb861be904ec35a6f",
+                            "debian:latest"
+                        ],
+                        "sizeBytes": 100594230
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451",
+                            "nginx:1.7.9"
+                        ],
+                        "sizeBytes": 91664166
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/mirror/kube-svc-redirect:v1.0.2",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2"
+                        ],
+                        "sizeBytes": 82897218
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a",
+                            "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a",
+                            "aksrepos.azurecr.io/mirror/heapster-amd64:v1.5.3",
+                            "k8s.gcr.io/heapster-amd64:v1.5.3"
+                        ],
+                        "sizeBytes": 75318342
+                    },
+                    {
+                        "names": [
+                            "vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a",
+                            "vishiy/hello:err100eps"
+                        ],
+                        "sizeBytes": 54649865
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52",
+                            "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13"
+                        ],
+                        "sizeBytes": 51157394
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892",
+                            "k8s.gcr.io/metrics-server-amd64:v0.2.1"
+                        ],
+                        "sizeBytes": 42541759
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4",
+                            "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10"
+                        ],
+                        "sizeBytes": 41635309
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8",
+                            "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10"
+                        ],
+                        "sizeBytes": 40372149
+                    }
+                ],
+                "nodeInfo": {
+                    "architecture": "amd64",
+                    "bootID": "d8f6c00f-a085-450e-bf5c-12e651a0fcfc",
+                    "containerRuntimeVersion": "docker://3.0.4",
+                    "kernelVersion": "4.15.0-1037-azure",
+                    "kubeProxyVersion": "v1.11.8",
+                    "kubeletVersion": "v1.11.8",
+                    "machineID": "cc9ed99e383540a4b0379995bb779221",
+                    "operatingSystem": "linux",
+                    "osImage": "Ubuntu 16.04.5 LTS",
+                    "systemUUID": "301B3B88-C7BD-3D45-A3CB-3CD66A42EB6F"
+                }
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Node",
+            "metadata": {
+                "annotations": {
+                    "node.alpha.kubernetes.io/ttl": "0",
+                    "volumes.kubernetes.io/controller-managed-attach-detach": "true"
+                },
+                "creationTimestamp": "2019-03-12T16:40:33Z",
+                "labels": {
+                    "agentpool": "nodepool1",
+                    "beta.kubernetes.io/arch": "amd64",
+                    "beta.kubernetes.io/instance-type": "Standard_DS1_v2",
+                    "beta.kubernetes.io/os": "linux",
+                    "failure-domain.beta.kubernetes.io/region": "eastus",
+                    "failure-domain.beta.kubernetes.io/zone": "1",
+                    "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus",
+                    "kubernetes.io/hostname": "aks-nodepool1-19574989-1",
+                    "kubernetes.io/role": "agent",
+                    "node-role.kubernetes.io/agent": "",
+                    "storageprofile": "managed",
+                    "storagetier": "Premium_LRS"
+                },
+                "name": "aks-nodepool1-19574989-1",
+                "resourceVersion": "19068104",
+                "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-1",
+                "uid": "8e1b5c77-44e5-11e9-9920-423525a6b683"
+            },
+            "spec": {
+                "podCIDR": "10.244.0.0/24",
+                "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-1"
+            },
+            "status": {
+                "addresses": [
+                    {
+                        "address": "aks-nodepool1-19574989-1",
+                        "type": "Hostname"
+                    },
+                    {
+                        "address": "10.240.0.5",
+                        "type": "InternalIP"
+                    }
+                ],
+                "allocatable": {
+                    "cpu": "940m",
+                    "ephemeral-storage": "28043041951",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "2504708Ki",
+                    "pods": "110"
+                },
+                "capacity": {
+                    "cpu": "1",
+                    "ephemeral-storage": "30428648Ki",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "3524612Ki",
+                    "pods": "110"
+                },
+                "conditions": [
+                    {
+                        "lastHeartbeatTime": "2019-03-12T16:42:30Z",
+                        "lastTransitionTime": "2019-03-12T16:42:30Z",
+                        "message": "RouteController created a route",
+                        "reason": "RouteCreated",
+                        "status": "False",
+                        "type": "NetworkUnavailable"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:21Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet has sufficient disk space available",
+                        "reason": "KubeletHasSufficientDisk",
+                        "status": "False",
+                        "type": "OutOfDisk"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:21Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet has sufficient memory available",
+                        "reason": "KubeletHasSufficientMemory",
+                        "status": "False",
+                        "type": "MemoryPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:21Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet has no disk pressure",
+                        "reason": "KubeletHasNoDiskPressure",
+                        "status": "False",
+                        "type": "DiskPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:21Z",
+                        "lastTransitionTime": "2019-03-12T16:40:33Z",
+                        "message": "kubelet has sufficient PID available",
+                        "reason": "KubeletHasSufficientPID",
+                        "status": "False",
+                        "type": "PIDPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:21Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet is posting ready status. AppArmor enabled",
+                        "reason": "KubeletReady",
+                        "status": "True",
+                        "type": "Ready"
+                    }
+                ],
+                "daemonEndpoints": {
+                    "kubeletEndpoint": {
+                        "Port": 10250
+                    }
+                },
+                "images": [
+                    {
+                        "names": [
+                            "perl@sha256:268e7af9853bcc6d2100e2ad76e928c2ca861518217c269b8a762849a8617c12",
+                            "perl:latest"
+                        ],
+                        "sizeBytes": 890592834
+                    },
+                    {
+                        "names": [
+                            "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b",
+                            "nickchase/rss-php-nginx:v1"
+                        ],
+                        "sizeBytes": 677038498
+                    },
+                    {
+                        "names": [
+                            "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8",
+                            "rdilip83/jsonlogger:v12"
+                        ],
+                        "sizeBytes": 676594134
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                            "k8s.gcr.io/hyperkube-amd64:v1.11.8"
+                        ],
+                        "sizeBytes": 615263658
+                    },
+                    {
+                        "names": [
+                            "rdilip83/fixrubyerror@sha256:6b7f36cf6258b311015493ab025f06577d758c45bc5010d022ac160b9f40ea5d",
+                            "rdilip83/fixrubyerror:latest"
+                        ],
+                        "sizeBytes": 494068028
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019"
+                        ],
+                        "sizeBytes": 494067935
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:fb2b90ce9bf7186fd9dfae97f5f72f9b9c80c8a0493af3cff74179cd4ff847c0",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08212019"
+                        ],
+                        "sizeBytes": 494067572
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c646e180483d295ffac114fb9df513db02553af7879681814d5910764653dd2d",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08202019"
+                        ],
+                        "sizeBytes": 494067210
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c21b596a22a1338ed293d01681f327acc871ee502ed779ec1109d6a93375bb3b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08192019"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "rdilip83/cifeatureprod08192019@sha256:7815bba9a805e4e8df33356fd532671de45525ce9c6e936e14f9b126e2097ecd",
+                            "rdilip83/cifeatureprod08192019:v1"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:59e34aab9f6e16a87e880b1ee1c9dd5434ee40dd29502e74aceefabf51443717",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:internaltesthealth08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:6387d0dedf4de0bab430f681ef61361f63a20e1c4c287a9b60ea5460283ac6cf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ci_feature_prod_health08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/hc08192019@sha256:014d936771508d499ac4c15043e23b16bce8de0019fb2048b99540cbe9084895",
+                            "rdilip83/hc08192019:1"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:8ad12bce5ffd27b301bc6fe4355c8affa6fce080ae7e2291dec3a0ed11bb9483",
+                            "rdilip83/health-rc:3"
+                        ],
+                        "sizeBytes": 494052863
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health_ci_feature_image@sha256:1a574d25884483083e8cbaacbf0cb7c4e442dc736d480615c65f5c71f8969b13",
+                            "rdilip83/health_ci_feature_image:v1"
+                        ],
+                        "sizeBytes": 494052147
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:816c8cef09822daf050a0fca6f92e7ac19147ff4bf1a823d43fe70f73470cc0c",
+                            "rdilip83/healthrc:v3"
+                        ],
+                        "sizeBytes": 494052138
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:d35aac044d1adc3d02269fde78f8dfd923db94b81288447cf6fdd482970a333b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthmerge08142019"
+                        ],
+                        "sizeBytes": 494052135
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:a130780e56ac0edb3ca29477e12edd5e9b5d08b5732dbd59ede9beb58e21eca7",
+                            "rdilip83/healthrc:v2"
+                        ],
+                        "sizeBytes": 494051682
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthmerge@sha256:24d270b0f59fb484c283922474736c3cba50f8aad0270bc0a3acd14284694eea",
+                            "rdilip83/healthmerge:v8"
+                        ],
+                        "sizeBytes": 494010139
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:b1d24728eb808d301da426b76b7f7b79606204c4c2b695a24ac670be8276d55d",
+                            "rdilip83/health-rc:1"
+                        ],
+                        "sizeBytes": 494000891
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:a0666957cccbfdf5784accd1133408bf017c28a6e694d9a2ae74da94eef2d285",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview08012019"
+                        ],
+                        "sizeBytes": 493994261
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:32c9b35a6809c54d5296e2ca2b122b35a4ad8c852622174cc5a9f92cc27e56e4",
+                            "rdilip83/mergehealth:v3"
+                        ],
+                        "sizeBytes": 493988815
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:a3521e8f36e007b3cb949e0356a75394ac61fd2024ca1ec4827b8d54fb068534",
+                            "rdilip83/mergehealth:v1"
+                        ],
+                        "sizeBytes": 493981585
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0438e4690e042b195917e160b8949aeb339520ee19c898a8bb9452f36d1f84f1",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview07182019"
+                        ],
+                        "sizeBytes": 493977357
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:9ebc410a36856176921dba81b5bd43132469209b315f52be346690435419b9bb"
+                        ],
+                        "sizeBytes": 493946790
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:4e51195a9c77bd166fc90ee5f6143a4604b502ab7ef0f06431dec10c341b10f3",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview06272019"
+                        ],
+                        "sizeBytes": 493893635
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06272019@sha256:d888ba5ff5e5810113a32f9c9812a5e28088cc81b902e95a185fe465a514029c",
+                            "rdilip83/healthpreview06272019:latest"
+                        ],
+                        "sizeBytes": 493893633
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06252019-1@sha256:1561876cffe94433a569f29f5231548e039193ebaa7ec640d22439675179e43f",
+                            "rdilip83/healthpreview06252019-1:latest"
+                        ],
+                        "sizeBytes": 493887387
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06252019@sha256:6597ff599a78ac452a4138dedb9e08c0ccd3e8b01594b033fd78ba9dbb41fe9e",
+                            "rdilip83/healthpreview06252019:latest"
+                        ],
+                        "sizeBytes": 493887384
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06242019@sha256:c4f565d92086d1ee56e6016178fed5c668352dc0ca0047f02910bdcb87a482c4",
+                            "rdilip83/healthpreview06242019:latest"
+                        ],
+                        "sizeBytes": 493850850
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06212019-1@sha256:937ce5801a0097a1cbc4eff5399c1973b4c6223ece9279b35207368b99f82b96",
+                            "rdilip83/healthpreview06212019-1:latest"
+                        ],
+                        "sizeBytes": 493850674
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0f798cb7d56931b231f71e38e7fa5bf898b69e611247a566701f70a5f29a9799",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07092019"
+                        ],
+                        "sizeBytes": 467692116
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:3734a084fa9681c7e930eb90cad45a8f282c24af63065a720a2327b1683f3ba4",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019"
+                        ],
+                        "sizeBytes": 466882569
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:16402c34e2d7de72c2ebc18ec8e9f7933fa25f6a7f83bceb84483ba95e3902f7",
+                            "rdilip83/mergehealth:v2"
+                        ],
+                        "sizeBytes": 448931997
+                    },
+                    {
+                        "names": [
+                            "deis/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41",
+                            "deis/hcp-tunnel-front:v1.9.2-v4.0.7"
+                        ],
+                        "sizeBytes": 383483267
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:23b4dcdf0d34d4a129755fc6f52e1c6e23bb34ea011b315d87e193033bcd1b68"
+                        ],
+                        "sizeBytes": 109331233
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:bdbf36b7f1f77ffe7bd2a32e59235dff6ecf131e3b6b5b96061c652f30685f3a",
+                            "nginx:latest"
+                        ],
+                        "sizeBytes": 109258867
+                    },
+                    {
+                        "names": [
+                            "debian@sha256:118cf8f3557e1ea766c02f36f05f6ac3e63628427ea8965fb861be904ec35a6f",
+                            "debian:latest"
+                        ],
+                        "sizeBytes": 100594230
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:e3456c851a152494c3e4ff5fcc26f240206abac0c9d794affb40e0714846c451",
+                            "nginx:1.7.9"
+                        ],
+                        "sizeBytes": 91664166
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/mirror/kube-svc-redirect:v1.0.2",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2"
+                        ],
+                        "sizeBytes": 82897218
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a",
+                            "k8s.gcr.io/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a",
+                            "aksrepos.azurecr.io/mirror/heapster-amd64:v1.5.3",
+                            "k8s.gcr.io/heapster-amd64:v1.5.3"
+                        ],
+                        "sizeBytes": 75318342
+                    },
+                    {
+                        "names": [
+                            "vishiy/hello@sha256:99d60766e39df52d28fe8db9c659633d96ba1d84fd672298dce047d8a86c478a",
+                            "vishiy/hello:err100eps"
+                        ],
+                        "sizeBytes": 54649865
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52",
+                            "k8s.gcr.io/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52",
+                            "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13",
+                            "k8s.gcr.io/k8s-dns-kube-dns-amd64:1.14.13"
+                        ],
+                        "sizeBytes": 51157394
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-proportional-autoscaler-amd64@sha256:003f98d9f411ddfa6ff6d539196355e03ddd69fa4ed38c7ffb8fec6f729afe2d",
+                            "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.2-r2"
+                        ],
+                        "sizeBytes": 49648481
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/metrics-server-amd64@sha256:220c0ed3451cb95e4b2f72dd5dc8d9d39d9f529722e5b29d8286373ce27b117e",
+                            "k8s.gcr.io/metrics-server-amd64@sha256:49a9f12f7067d11f42c803dbe61ed2c1299959ad85cb315b25ff7eef8e6b8892",
+                            "aksrepos.azurecr.io/mirror/metrics-server-amd64:v0.2.1",
+                            "k8s.gcr.io/metrics-server-amd64:v0.2.1"
+                        ],
+                        "sizeBytes": 42541759
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4",
+                            "k8s.gcr.io/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4",
+                            "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10",
+                            "k8s.gcr.io/k8s-dns-sidecar-amd64:1.14.10"
+                        ],
+                        "sizeBytes": 41635309
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8",
+                            "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8",
+                            "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10",
+                            "k8s.gcr.io/k8s-dns-dnsmasq-nanny-amd64:1.14.10"
+                        ],
+                        "sizeBytes": 40372149
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/addon-resizer@sha256:8ac3ffa4232046feb297cefc40734641fa2954c16308f9e0d70ec152f22231ca",
+                            "k8s.gcr.io/addon-resizer@sha256:507aa9845ecce1fdde4d61f530c802f4dc2974c700ce0db7730866e442db958d",
+                            "aksrepos.azurecr.io/mirror/addon-resizer:1.8.1",
+                            "k8s.gcr.io/addon-resizer:1.8.1"
+                        ],
+                        "sizeBytes": 32968591
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/nginx@sha256:91d22184f3f9b1be658c2cc2c12d324de7ff12c8b9c9a597905457b4d93b069d",
+                            "nginx@sha256:9d46fd628d54ebe1633ee3cf0fe2acfcc419cfae541c63056530e39cd5620366",
+                            "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                            "nginx:1.13.12-alpine"
+                        ],
+                        "sizeBytes": 18002931
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/exechealthz-amd64@sha256:34722333f0cd0b891b61c9e0efa31913f22157e341a3aabb79967305d4e78260",
+                            "k8s.gcr.io/exechealthz-amd64@sha256:503e158c3f65ed7399f54010571c7c977ade7fe59010695f48d9650d83488c0a",
+                            "aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2",
+                            "k8s.gcr.io/exechealthz-amd64:1.2"
+                        ],
+                        "sizeBytes": 8374840
+                    }
+                ],
+                "nodeInfo": {
+                    "architecture": "amd64",
+                    "bootID": "4c822e6d-c2e5-4697-9a01-467e04804fc1",
+                    "containerRuntimeVersion": "docker://3.0.4",
+                    "kernelVersion": "4.15.0-1037-azure",
+                    "kubeProxyVersion": "v1.11.8",
+                    "kubeletVersion": "v1.11.8",
+                    "machineID": "1954026de5e6436788f214eb0dfd6a13",
+                    "operatingSystem": "linux",
+                    "osImage": "Ubuntu 16.04.5 LTS",
+                    "systemUUID": "17A6A78E-D3E2-2A4F-852B-C91D933C8D5B"
+                }
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Node",
+            "metadata": {
+                "annotations": {
+                    "node.alpha.kubernetes.io/ttl": "0",
+                    "volumes.kubernetes.io/controller-managed-attach-detach": "true"
+                },
+                "creationTimestamp": "2019-06-21T02:01:53Z",
+                "labels": {
+                    "agentpool": "nodepool1",
+                    "beta.kubernetes.io/arch": "amd64",
+                    "beta.kubernetes.io/instance-type": "Standard_DS1_v2",
+                    "beta.kubernetes.io/os": "linux",
+                    "failure-domain.beta.kubernetes.io/region": "eastus",
+                    "failure-domain.beta.kubernetes.io/zone": "0",
+                    "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus",
+                    "kubernetes.io/hostname": "aks-nodepool1-19574989-2",
+                    "kubernetes.io/role": "agent",
+                    "node-role.kubernetes.io/agent": "",
+                    "storageprofile": "managed",
+                    "storagetier": "Premium_LRS"
+                },
+                "name": "aks-nodepool1-19574989-2",
+                "resourceVersion": "19068101",
+                "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-2",
+                "uid": "8a62e1bc-93c8-11e9-854d-ee76584a3c00"
+            },
+            "spec": {
+                "podCIDR": "10.244.12.0/24",
+                "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-2"
+            },
+            "status": {
+                "addresses": [
+                    {
+                        "address": "aks-nodepool1-19574989-2",
+                        "type": "Hostname"
+                    },
+                    {
+                        "address": "10.240.0.7",
+                        "type": "InternalIP"
+                    }
+                ],
+                "allocatable": {
+                    "cpu": "940m",
+                    "ephemeral-storage": "28043041951",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "2480548Ki",
+                    "pods": "110"
+                },
+                "capacity": {
+                    "cpu": "1",
+                    "ephemeral-storage": "30428648Ki",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "3500452Ki",
+                    "pods": "110"
+                },
+                "conditions": [
+                    {
+                        "lastHeartbeatTime": "2019-06-21T02:02:24Z",
+                        "lastTransitionTime": "2019-06-21T02:02:24Z",
+                        "message": "RouteController created a route",
+                        "reason": "RouteCreated",
+                        "status": "False",
+                        "type": "NetworkUnavailable"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:20Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet has sufficient disk space available",
+                        "reason": "KubeletHasSufficientDisk",
+                        "status": "False",
+                        "type": "OutOfDisk"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:20Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet has sufficient memory available",
+                        "reason": "KubeletHasSufficientMemory",
+                        "status": "False",
+                        "type": "MemoryPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:20Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet has no disk pressure",
+                        "reason": "KubeletHasNoDiskPressure",
+                        "status": "False",
+                        "type": "DiskPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:20Z",
+                        "lastTransitionTime": "2019-06-21T02:01:53Z",
+                        "message": "kubelet has sufficient PID available",
+                        "reason": "KubeletHasSufficientPID",
+                        "status": "False",
+                        "type": "PIDPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:20Z",
+                        "lastTransitionTime": "2019-07-23T14:46:10Z",
+                        "message": "kubelet is posting ready status. AppArmor enabled",
+                        "reason": "KubeletReady",
+                        "status": "True",
+                        "type": "Ready"
+                    }
+                ],
+                "daemonEndpoints": {
+                    "kubeletEndpoint": {
+                        "Port": 10250
+                    }
+                },
+                "images": [
+                    {
+                        "names": [
+                            "nickchase/rss-php-nginx@sha256:48da56a77fe4ecff4917121365d8e0ce615ebbdfe31f48a996255f5592894e2b",
+                            "nickchase/rss-php-nginx:v1"
+                        ],
+                        "sizeBytes": 677038498
+                    },
+                    {
+                        "names": [
+                            "rdilip83/jsonlogger@sha256:82b67ca5e0650cd5e47f5b51659d61cee035e5d8dcd8a79c50358cd2beb3b5a8",
+                            "rdilip83/jsonlogger:v12"
+                        ],
+                        "sizeBytes": 676594134
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                            "k8s.gcr.io/hyperkube-amd64:v1.11.8"
+                        ],
+                        "sizeBytes": 615263658
+                    },
+                    {
+                        "names": [
+                            "rdilip83/fixrubyerror@sha256:6b7f36cf6258b311015493ab025f06577d758c45bc5010d022ac160b9f40ea5d",
+                            "rdilip83/fixrubyerror:latest"
+                        ],
+                        "sizeBytes": 494068028
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019"
+                        ],
+                        "sizeBytes": 494067935
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:fb2b90ce9bf7186fd9dfae97f5f72f9b9c80c8a0493af3cff74179cd4ff847c0",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08212019"
+                        ],
+                        "sizeBytes": 494067572
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c646e180483d295ffac114fb9df513db02553af7879681814d5910764653dd2d",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08202019"
+                        ],
+                        "sizeBytes": 494067210
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c21b596a22a1338ed293d01681f327acc871ee502ed779ec1109d6a93375bb3b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08192019"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "rdilip83/cifeatureprod08192019@sha256:7815bba9a805e4e8df33356fd532671de45525ce9c6e936e14f9b126e2097ecd",
+                            "rdilip83/cifeatureprod08192019:v1"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:6387d0dedf4de0bab430f681ef61361f63a20e1c4c287a9b60ea5460283ac6cf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ci_feature_prod_health08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:59e34aab9f6e16a87e880b1ee1c9dd5434ee40dd29502e74aceefabf51443717",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:internaltesthealth08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/hc08192019@sha256:014d936771508d499ac4c15043e23b16bce8de0019fb2048b99540cbe9084895",
+                            "rdilip83/hc08192019:1"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:8ad12bce5ffd27b301bc6fe4355c8affa6fce080ae7e2291dec3a0ed11bb9483",
+                            "rdilip83/health-rc:3"
+                        ],
+                        "sizeBytes": 494052863
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health_ci_feature_image@sha256:1a574d25884483083e8cbaacbf0cb7c4e442dc736d480615c65f5c71f8969b13",
+                            "rdilip83/health_ci_feature_image:v1"
+                        ],
+                        "sizeBytes": 494052147
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:816c8cef09822daf050a0fca6f92e7ac19147ff4bf1a823d43fe70f73470cc0c",
+                            "rdilip83/healthrc:v3"
+                        ],
+                        "sizeBytes": 494052138
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:d35aac044d1adc3d02269fde78f8dfd923db94b81288447cf6fdd482970a333b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthmerge08142019"
+                        ],
+                        "sizeBytes": 494052135
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:a130780e56ac0edb3ca29477e12edd5e9b5d08b5732dbd59ede9beb58e21eca7",
+                            "rdilip83/healthrc:v2"
+                        ],
+                        "sizeBytes": 494051682
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthmerge@sha256:24d270b0f59fb484c283922474736c3cba50f8aad0270bc0a3acd14284694eea",
+                            "rdilip83/healthmerge:v8"
+                        ],
+                        "sizeBytes": 494010139
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:b1d24728eb808d301da426b76b7f7b79606204c4c2b695a24ac670be8276d55d",
+                            "rdilip83/health-rc:1"
+                        ],
+                        "sizeBytes": 494000891
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:a0666957cccbfdf5784accd1133408bf017c28a6e694d9a2ae74da94eef2d285",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview08012019"
+                        ],
+                        "sizeBytes": 493994261
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:32c9b35a6809c54d5296e2ca2b122b35a4ad8c852622174cc5a9f92cc27e56e4",
+                            "rdilip83/mergehealth:v3"
+                        ],
+                        "sizeBytes": 493988815
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:a3521e8f36e007b3cb949e0356a75394ac61fd2024ca1ec4827b8d54fb068534",
+                            "rdilip83/mergehealth:v1"
+                        ],
+                        "sizeBytes": 493981585
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0438e4690e042b195917e160b8949aeb339520ee19c898a8bb9452f36d1f84f1",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview07182019"
+                        ],
+                        "sizeBytes": 493977357
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:9ebc410a36856176921dba81b5bd43132469209b315f52be346690435419b9bb"
+                        ],
+                        "sizeBytes": 493946790
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:4e51195a9c77bd166fc90ee5f6143a4604b502ab7ef0f06431dec10c341b10f3",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview06272019"
+                        ],
+                        "sizeBytes": 493893635
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06272019@sha256:d888ba5ff5e5810113a32f9c9812a5e28088cc81b902e95a185fe465a514029c",
+                            "rdilip83/healthpreview06272019:latest"
+                        ],
+                        "sizeBytes": 493893633
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06252019-1@sha256:1561876cffe94433a569f29f5231548e039193ebaa7ec640d22439675179e43f",
+                            "rdilip83/healthpreview06252019-1:latest"
+                        ],
+                        "sizeBytes": 493887387
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06252019@sha256:6597ff599a78ac452a4138dedb9e08c0ccd3e8b01594b033fd78ba9dbb41fe9e",
+                            "rdilip83/healthpreview06252019:latest"
+                        ],
+                        "sizeBytes": 493887384
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06242019@sha256:c4f565d92086d1ee56e6016178fed5c668352dc0ca0047f02910bdcb87a482c4",
+                            "rdilip83/healthpreview06242019:latest"
+                        ],
+                        "sizeBytes": 493850850
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06212019-1@sha256:937ce5801a0097a1cbc4eff5399c1973b4c6223ece9279b35207368b99f82b96",
+                            "rdilip83/healthpreview06212019-1:latest"
+                        ],
+                        "sizeBytes": 493850674
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06192019@sha256:f92cb5283814d446f0acde6a489648ea197496d5f85b27ca959ec97bce742d8a",
+                            "rdilip83/healthpreview06192019:latest"
+                        ],
+                        "sizeBytes": 493799437
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0f798cb7d56931b231f71e38e7fa5bf898b69e611247a566701f70a5f29a9799",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07092019"
+                        ],
+                        "sizeBytes": 467692116
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:3734a084fa9681c7e930eb90cad45a8f282c24af63065a720a2327b1683f3ba4",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06142019"
+                        ],
+                        "sizeBytes": 466882569
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:16402c34e2d7de72c2ebc18ec8e9f7933fa25f6a7f83bceb84483ba95e3902f7",
+                            "rdilip83/mergehealth:v2"
+                        ],
+                        "sizeBytes": 448931997
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06212019@sha256:5860c9caaf544f2e7c46edad5cdfb69e22398e20dc87cb8a4cd630b5b7000074",
+                            "rdilip83/healthpreview06212019:latest"
+                        ],
+                        "sizeBytes": 448366491
+                    },
+                    {
+                        "names": [
+                            "deis/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41",
+                            "deis/hcp-tunnel-front:v1.9.2-v4.0.7"
+                        ],
+                        "sizeBytes": 383483267
+                    },
+                    {
+                        "names": [
+                            "progrium/stress@sha256:e34d56d60f5caae79333cee395aae93b74791d50e3841986420d23c2ee4697bf",
+                            "progrium/stress:latest"
+                        ],
+                        "sizeBytes": 281783943
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771",
+                            "k8s.gcr.io/cluster-autoscaler:v1.12.3"
+                        ],
+                        "sizeBytes": 232229241
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c",
+                            "k8s.gcr.io/cluster-autoscaler:v1.12.2"
+                        ],
+                        "sizeBytes": 232167833
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207",
+                            "k8s.gcr.io/cluster-autoscaler:v1.3.7"
+                        ],
+                        "sizeBytes": 217353965
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780",
+                            "k8s.gcr.io/cluster-autoscaler:v1.2.2"
+                        ],
+                        "sizeBytes": 208688449
+                    },
+                    {
+                        "names": [
+                            "containernetworking/azure-npm@sha256:4735da6dc0d5393d68be72498f5ce563cb930fa21b26faec8fdc844001057a56",
+                            "containernetworking/azure-npm:v1.0.18"
+                        ],
+                        "sizeBytes": 170727162
+                    },
+                    {
+                        "names": [
+                            "containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e",
+                            "containernetworking/networkmonitor:v0.0.6"
+                        ],
+                        "sizeBytes": 123663837
+                    },
+                    {
+                        "names": [
+                            "containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d",
+                            "containernetworking/networkmonitor:v0.0.5"
+                        ],
+                        "sizeBytes": 122043325
+                    },
+                    {
+                        "names": [
+                            "nginx@sha256:bdbf36b7f1f77ffe7bd2a32e59235dff6ecf131e3b6b5b96061c652f30685f3a",
+                            "nginx:latest"
+                        ],
+                        "sizeBytes": 109258867
+                    },
+                    {
+                        "names": [
+                            "debian@sha256:118cf8f3557e1ea766c02f36f05f6ac3e63628427ea8965fb861be904ec35a6f",
+                            "debian:latest"
+                        ],
+                        "sizeBytes": 100594230
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v8.8"
+                        ],
+                        "sizeBytes": 99631084
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v9.0"
+                        ],
+                        "sizeBytes": 83077558
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/mirror/kube-svc-redirect:v1.0.2",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2"
+                        ],
+                        "sizeBytes": 82897218
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/heapster-amd64@sha256:dccaabb0c20cf05c29baefa1e9bf0358b083ccc0fab492b9b3b47fb7e4db5472",
+                            "k8s.gcr.io/heapster-amd64:v1.5.4"
+                        ],
+                        "sizeBytes": 75318342
+                    }
+                ],
+                "nodeInfo": {
+                    "architecture": "amd64",
+                    "bootID": "ee529550-afa8-43bb-90a6-f157e7e22e18",
+                    "containerRuntimeVersion": "docker://3.0.4",
+                    "kernelVersion": "4.15.0-1045-azure",
+                    "kubeProxyVersion": "v1.11.8",
+                    "kubeletVersion": "v1.11.8",
+                    "machineID": "0e5d932888da4e17a3c58210f6c8c9db",
+                    "operatingSystem": "linux",
+                    "osImage": "Ubuntu 16.04.6 LTS",
+                    "systemUUID": "5DBFC273-947F-0140-AD1F-BF6758D30B37"
+                }
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Node",
+            "metadata": {
+                "annotations": {
+                    "node.alpha.kubernetes.io/ttl": "0",
+                    "volumes.kubernetes.io/controller-managed-attach-detach": "true"
+                },
+                "creationTimestamp": "2019-08-07T18:57:56Z",
+                "labels": {
+                    "agentpool": "nodepool1",
+                    "beta.kubernetes.io/arch": "amd64",
+                    "beta.kubernetes.io/instance-type": "Standard_DS1_v2",
+                    "beta.kubernetes.io/os": "linux",
+                    "failure-domain.beta.kubernetes.io/region": "eastus",
+                    "failure-domain.beta.kubernetes.io/zone": "1",
+                    "kubernetes.azure.com/cluster": "MC_dilipr-health-test_dilipr-health-test_eastus",
+                    "kubernetes.io/hostname": "aks-nodepool1-19574989-3",
+                    "kubernetes.io/role": "agent",
+                    "node-role.kubernetes.io/agent": "",
+                    "storageprofile": "managed",
+                    "storagetier": "Premium_LRS"
+                },
+                "name": "aks-nodepool1-19574989-3",
+                "resourceVersion": "19068105",
+                "selfLink": "/api/v1/nodes/aks-nodepool1-19574989-3",
+                "uid": "448ea0a7-b945-11e9-a1b6-127094e7fd94"
+            },
+            "spec": {
+                "podCIDR": "10.244.2.0/24",
+                "providerID": "azure:///subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourceGroups/MC_dilipr-health-test_dilipr-health-test_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-19574989-3"
+            },
+            "status": {
+                "addresses": [
+                    {
+                        "address": "aks-nodepool1-19574989-3",
+                        "type": "Hostname"
+                    },
+                    {
+                        "address": "10.240.0.6",
+                        "type": "InternalIP"
+                    }
+                ],
+                "allocatable": {
+                    "cpu": "940m",
+                    "ephemeral-storage": "28043041951",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "2480544Ki",
+                    "pods": "110"
+                },
+                "capacity": {
+                    "cpu": "1",
+                    "ephemeral-storage": "30428648Ki",
+                    "hugepages-1Gi": "0",
+                    "hugepages-2Mi": "0",
+                    "memory": "3500448Ki",
+                    "pods": "110"
+                },
+                "conditions": [
+                    {
+                        "lastHeartbeatTime": "2019-08-07T18:59:32Z",
+                        "lastTransitionTime": "2019-08-07T18:59:32Z",
+                        "message": "RouteController created a route",
+                        "reason": "RouteCreated",
+                        "status": "False",
+                        "type": "NetworkUnavailable"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-08-07T18:57:56Z",
+                        "message": "kubelet has sufficient disk space available",
+                        "reason": "KubeletHasSufficientDisk",
+                        "status": "False",
+                        "type": "OutOfDisk"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-08-07T18:57:56Z",
+                        "message": "kubelet has sufficient memory available",
+                        "reason": "KubeletHasSufficientMemory",
+                        "status": "False",
+                        "type": "MemoryPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-08-07T18:57:56Z",
+                        "message": "kubelet has no disk pressure",
+                        "reason": "KubeletHasNoDiskPressure",
+                        "status": "False",
+                        "type": "DiskPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-08-07T18:57:56Z",
+                        "message": "kubelet has sufficient PID available",
+                        "reason": "KubeletHasSufficientPID",
+                        "status": "False",
+                        "type": "PIDPressure"
+                    },
+                    {
+                        "lastHeartbeatTime": "2019-08-23T20:43:22Z",
+                        "lastTransitionTime": "2019-08-07T18:58:06Z",
+                        "message": "kubelet is posting ready status. AppArmor enabled",
+                        "reason": "KubeletReady",
+                        "status": "True",
+                        "type": "Ready"
+                    }
+                ],
+                "daemonEndpoints": {
+                    "kubeletEndpoint": {
+                        "Port": 10250
+                    }
+                },
+                "images": [
+                    {
+                        "names": [
+                            "deis/hcp-tunnel-front@sha256:a067679f0ab376197a344cd410821cf07d69fc322dcd9af4a9229250da725ce2",
+                            "deis/hcp-tunnel-front:v1.9.2-v4.0.4"
+                        ],
+                        "sizeBytes": 640504769
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "k8s.gcr.io/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                            "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                            "k8s.gcr.io/hyperkube-amd64:v1.11.8"
+                        ],
+                        "sizeBytes": 615263658
+                    },
+                    {
+                        "names": [
+                            "rdilip83/fixrubyerror@sha256:6b7f36cf6258b311015493ab025f06577d758c45bc5010d022ac160b9f40ea5d",
+                            "rdilip83/fixrubyerror:latest"
+                        ],
+                        "sizeBytes": 494068028
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019"
+                        ],
+                        "sizeBytes": 494067935
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:fb2b90ce9bf7186fd9dfae97f5f72f9b9c80c8a0493af3cff74179cd4ff847c0",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08212019"
+                        ],
+                        "sizeBytes": 494067572
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c646e180483d295ffac114fb9df513db02553af7879681814d5910764653dd2d",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08202019"
+                        ],
+                        "sizeBytes": 494067210
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:c21b596a22a1338ed293d01681f327acc871ee502ed779ec1109d6a93375bb3b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08192019"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "rdilip83/cifeatureprod08192019@sha256:7815bba9a805e4e8df33356fd532671de45525ce9c6e936e14f9b126e2097ecd",
+                            "rdilip83/cifeatureprod08192019:v1"
+                        ],
+                        "sizeBytes": 494055088
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:6387d0dedf4de0bab430f681ef61361f63a20e1c4c287a9b60ea5460283ac6cf",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ci_feature_prod_health08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/hc08192019@sha256:014d936771508d499ac4c15043e23b16bce8de0019fb2048b99540cbe9084895",
+                            "rdilip83/hc08192019:1"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:59e34aab9f6e16a87e880b1ee1c9dd5434ee40dd29502e74aceefabf51443717",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:internaltesthealth08192019"
+                        ],
+                        "sizeBytes": 494053562
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:8ad12bce5ffd27b301bc6fe4355c8affa6fce080ae7e2291dec3a0ed11bb9483",
+                            "rdilip83/health-rc:3"
+                        ],
+                        "sizeBytes": 494052863
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health_ci_feature_image@sha256:1a574d25884483083e8cbaacbf0cb7c4e442dc736d480615c65f5c71f8969b13",
+                            "rdilip83/health_ci_feature_image:v1"
+                        ],
+                        "sizeBytes": 494052147
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:816c8cef09822daf050a0fca6f92e7ac19147ff4bf1a823d43fe70f73470cc0c",
+                            "rdilip83/healthrc:v3"
+                        ],
+                        "sizeBytes": 494052138
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:d35aac044d1adc3d02269fde78f8dfd923db94b81288447cf6fdd482970a333b",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthmerge08142019"
+                        ],
+                        "sizeBytes": 494052135
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthrc@sha256:a130780e56ac0edb3ca29477e12edd5e9b5d08b5732dbd59ede9beb58e21eca7",
+                            "rdilip83/healthrc:v2"
+                        ],
+                        "sizeBytes": 494051682
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthmerge@sha256:24d270b0f59fb484c283922474736c3cba50f8aad0270bc0a3acd14284694eea",
+                            "rdilip83/healthmerge:v8"
+                        ],
+                        "sizeBytes": 494010139
+                    },
+                    {
+                        "names": [
+                            "rdilip83/health-rc@sha256:b1d24728eb808d301da426b76b7f7b79606204c4c2b695a24ac670be8276d55d",
+                            "rdilip83/health-rc:1"
+                        ],
+                        "sizeBytes": 494000891
+                    },
+                    {
+                        "names": [
+                            "rdilip83/mergehealth@sha256:32c9b35a6809c54d5296e2ca2b122b35a4ad8c852622174cc5a9f92cc27e56e4",
+                            "rdilip83/mergehealth:v3"
+                        ],
+                        "sizeBytes": 493988815
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:0438e4690e042b195917e160b8949aeb339520ee19c898a8bb9452f36d1f84f1",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview07182019"
+                        ],
+                        "sizeBytes": 493977357
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:4e51195a9c77bd166fc90ee5f6143a4604b502ab7ef0f06431dec10c341b10f3",
+                            "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:healthpreview06272019"
+                        ],
+                        "sizeBytes": 493893635
+                    },
+                    {
+                        "names": [
+                            "rdilip83/healthpreview06272019@sha256:d888ba5ff5e5810113a32f9c9812a5e28088cc81b902e95a185fe465a514029c",
+                            "rdilip83/healthpreview06272019:latest"
+                        ],
+                        "sizeBytes": 493893633
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/prod/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41",
+                            "aksrepos.azurecr.io/prod/hcp-tunnel-front:v1.9.2-v4.0.7"
+                        ],
+                        "sizeBytes": 383483267
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:b6834bb69e8fad88110b1dc57097a45bc79e6f2c5f2c2773c871d07389794771",
+                            "k8s.gcr.io/cluster-autoscaler:v1.12.3"
+                        ],
+                        "sizeBytes": 232229241
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:dc5744fd8c22aebfe40d6b62ab97d18d7bfbfc7ab1782509d69a5a9ec514df2c",
+                            "k8s.gcr.io/cluster-autoscaler:v1.12.2"
+                        ],
+                        "sizeBytes": 232167833
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:193eaf37788dd5f971dd400b7e3d28e650bfd81c90fa46b234f03eb3d43880e3",
+                            "k8s.gcr.io/cluster-autoscaler:v1.12.5"
+                        ],
+                        "sizeBytes": 231543459
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:e71851267764a068fbb091a4ef3bb874b5ce34db48cb757fcf77779f30ef0207",
+                            "k8s.gcr.io/cluster-autoscaler:v1.3.7"
+                        ],
+                        "sizeBytes": 217353965
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:156b7b9bcba24ed474f67d0feaf27f2506013f15b030341bbd41c630283161b8",
+                            "k8s.gcr.io/cluster-autoscaler:v1.3.4"
+                        ],
+                        "sizeBytes": 217264129
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:97896235bf66bde573d6f2ee150e212ea7010d314eb5d2cfb2ff1af93335db30",
+                            "k8s.gcr.io/cluster-autoscaler:v1.3.3"
+                        ],
+                        "sizeBytes": 217259793
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:b416bf3b6687788b4da4c7ede2bcf067b34ad781862ee3d3dac1d720c5fa38b3",
+                            "k8s.gcr.io/cluster-autoscaler:v1.3.9"
+                        ],
+                        "sizeBytes": 216696035
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:f37a2c84614bdd02475ccb020182caec562cde97fdfd9dae58de66ff89614bc5",
+                            "k8s.gcr.io/cluster-autoscaler:v1.3.8"
+                        ],
+                        "sizeBytes": 216693526
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:b0777becbfc7a56e66b079d2767fdc173121a29165523bbbe309bcb2c0a226aa",
+                            "k8s.gcr.io/cluster-autoscaler:v1.2.5"
+                        ],
+                        "sizeBytes": 212991966
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:36a369ca4643542d501bce0addf8b903f2141ae9e2608662b77a3d24f01d7780",
+                            "k8s.gcr.io/cluster-autoscaler:v1.2.2"
+                        ],
+                        "sizeBytes": 208688449
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/containernetworking/azure-npm@sha256:7b9e7dec6b06a21595f9aa06b319c99b579950619fa869dd85dc637b2235d79f",
+                            "mcr.microsoft.com/containernetworking/azure-npm:v1.0.18"
+                        ],
+                        "sizeBytes": 170727162
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:760232bed2097b5ca742f05b15c94d56ff96ed6b5c93251edc613be045c8d78b",
+                            "k8s.gcr.io/cluster-autoscaler:v1.15.0"
+                        ],
+                        "sizeBytes": 152214996
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:a4e5a8e6d4dc011e6e7a104d6abdfda56274b90357ee9f6e42cc22b70482420b",
+                            "k8s.gcr.io/cluster-autoscaler:v1.14.0"
+                        ],
+                        "sizeBytes": 142102721
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:cbc61e0f6c3ef1c591a0f22ec483826110e2c10acddd5415c0cc2305fd085e69",
+                            "k8s.gcr.io/cluster-autoscaler:v1.14.2"
+                        ],
+                        "sizeBytes": 142099784
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:9dcbd91e79f33c44529de58a0024deb3da23a3a0bc7fd4d028c1255c68f62fb7",
+                            "k8s.gcr.io/cluster-autoscaler:v1.13.2"
+                        ],
+                        "sizeBytes": 136684274
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:e4140dc3ab54e115ab4464331b25022fc5ffb947b568aaf81089efb72506c895",
+                            "k8s.gcr.io/cluster-autoscaler:v1.13.4"
+                        ],
+                        "sizeBytes": 136681463
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/cluster-autoscaler@sha256:7ff5a60304b344f2f29c804c7253632bbc818794f6932236a56db107a6a8f5af",
+                            "k8s.gcr.io/cluster-autoscaler:v1.13.1"
+                        ],
+                        "sizeBytes": 136618018
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/containernetworking/networkmonitor@sha256:d875511410502c3e37804e1f313cc2b0a03d7a03d3d5e6adaf8994b753a76f8e",
+                            "mcr.microsoft.com/containernetworking/networkmonitor:v0.0.6"
+                        ],
+                        "sizeBytes": 123663837
+                    },
+                    {
+                        "names": [
+                            "mcr.microsoft.com/containernetworking/networkmonitor@sha256:944408a497c451b0e79d2596dc2e9fe5036cdbba7fa831bff024e1c9ed44190d",
+                            "mcr.microsoft.com/containernetworking/networkmonitor:v0.0.5"
+                        ],
+                        "sizeBytes": 122043325
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747",
+                            "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1"
+                        ],
+                        "sizeBytes": 121711221
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:3da3f17cd4f02fe5696f29a5e6cd4aef7111f20dab9bec54ea35942346cfeb60",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v8.8"
+                        ],
+                        "sizeBytes": 99631084
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:2fd1daf3d3cf0e94a753f2263b60dbb0d42b107b5cde0c75ee3fc5c830e016e4",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v8.9"
+                        ],
+                        "sizeBytes": 99240637
+                    },
+                    {
+                        "names": [
+                            "microsoft/virtual-kubelet@sha256:efc397d741d7e590c892c0ea5dccc9a800656c3adb95da4dae25c1cdd5eb6d9f",
+                            "microsoft/virtual-kubelet:latest"
+                        ],
+                        "sizeBytes": 87436458
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:672794ee3582521eb8bc4f257d0f70c92893f1989f39a200f9c84bcfe1aea7c9",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v9.0"
+                        ],
+                        "sizeBytes": 83077558
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:382c220b3531d9f95bf316a16b7282cc2ef929cd8a89a9dd3f5933edafc41a8e",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v9.0.1"
+                        ],
+                        "sizeBytes": 83076194
+                    },
+                    {
+                        "names": [
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "deis/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                            "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2",
+                            "deis/kube-svc-redirect:v1.0.2"
+                        ],
+                        "sizeBytes": 82897218
+                    },
+                    {
+                        "names": [
+                            "k8s.gcr.io/kube-addon-manager-amd64@sha256:3519273916ba45cfc9b318448d4629819cb5fbccbb0822cce054dd8c1f68cb60",
+                            "k8s.gcr.io/kube-addon-manager-amd64:v8.6"
+                        ],
+                        "sizeBytes": 78384272
+                    }
+                ],
+                "nodeInfo": {
+                    "architecture": "amd64",
+                    "bootID": "47e7c02b-3741-42be-a2a1-76c76aa8ccde",
+                    "containerRuntimeVersion": "docker://3.0.6",
+                    "kernelVersion": "4.15.0-1050-azure",
+                    "kubeProxyVersion": "v1.11.8",
+                    "kubeletVersion": "v1.11.8",
+                    "machineID": "a4a4bc2f5a944cd38aba89365df05227",
+                    "operatingSystem": "linux",
+                    "osImage": "Ubuntu 16.04.6 LTS",
+                    "systemUUID": "BB102B43-9922-264C-8C23-22A7DE0F950F"
+                }
+            }
+        }
+    ],
+    "kind": "List",
+    "metadata": {
+        "resourceVersion": "",
+        "selfLink": ""
+    }
+}
diff --git a/test/code/plugin/health/parent_monitor_provider_spec.rb b/test/code/plugin/health/parent_monitor_provider_spec.rb
index a83db50fc..b531629c4 100644
--- a/test/code/plugin/health/parent_monitor_provider_spec.rb
+++ b/test/code/plugin/health/parent_monitor_provider_spec.rb
@@ -97,7 +97,7 @@ def monitor.labels; {HealthMonitorLabels::ROLE => "master"}; end
         assert_equal parent_id, "master_node_pool"
     end
 
-    it 'raises if conditions are not met' do
+    it 'returns defaultParentMonitorTypeId if conditions are not met' do
         #arrange
         definition = JSON.parse('{"conditional_monitor_id": {
             "conditions": [
@@ -123,6 +123,7 @@ def monitor.labels; {HealthMonitorLabels::ROLE => "master"}; end
               "container.azm.ms/cluster-resource-group",
               "container.azm.ms/cluster-name"
             ],
+            "default_parent_monitor_id": "default_parent_monitor_id",
             "aggregation_algorithm": "worstOf",
             "aggregation_algorithm_params": null
           }
@@ -137,8 +138,9 @@ def monitor.monitor_instance_id; "conditional_monitor_instance_id"; end
         def monitor.labels; {HealthMonitorLabels::ROLE => "master1"}; end
 
         #act and assert
-        assert_raises do
-            parent_id = health_model_definition.get_parent_monitor_id(monitor)
-        end
+
+        parent_id = health_model_definition.get_parent_monitor_id(monitor)
+        parent_id.must_equal('default_parent_monitor_id')
+
     end
 end
diff --git a/test/code/plugin/health/pods.json b/test/code/plugin/health/pods.json
new file mode 100644
index 000000000..b7c202a19
--- /dev/null
+++ b/test/code/plugin/health/pods.json
@@ -0,0 +1,5987 @@
+{
+    "apiVersion": "v1",
+    "items": [
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-08-23T17:12:10Z",
+                "generateName": "heapster-9bcbfdcf5-",
+                "labels": {
+                    "k8s-app": "heapster",
+                    "pod-template-hash": "567698791"
+                },
+                "name": "heapster-9bcbfdcf5-zp9tl",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "heapster-9bcbfdcf5",
+                        "uid": "24a0036e-c5c9-11e9-8736-86290fd7dd1f"
+                    }
+                ],
+                "resourceVersion": "19048925",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/heapster-9bcbfdcf5-zp9tl",
+                "uid": "24ab7e32-c5c9-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "command": [
+                            "/heapster",
+                            "--source=kubernetes.summary_api:\"\""
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/heapster-amd64:v1.5.3",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 3,
+                            "httpGet": {
+                                "path": "/healthz",
+                                "port": 8082,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 180,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "heapster",
+                        "resources": {
+                            "limits": {
+                                "cpu": "88m",
+                                "memory": "204Mi"
+                            },
+                            "requests": {
+                                "cpu": "88m",
+                                "memory": "204Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "heapster-token-7z7c5",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "command": [
+                            "/pod_nanny",
+                            "--config-dir=/etc/config",
+                            "--cpu=80m",
+                            "--extra-cpu=0.5m",
+                            "--memory=140Mi",
+                            "--extra-memory=4Mi",
+                            "--threshold=5",
+                            "--deployment=heapster",
+                            "--container=heapster",
+                            "--poll-period=300000",
+                            "--estimator=exponential"
+                        ],
+                        "env": [
+                            {
+                                "name": "MY_POD_NAME",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "metadata.name"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "MY_POD_NAMESPACE",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "metadata.namespace"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/addon-resizer:1.8.1",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "heapster-nanny",
+                        "resources": {
+                            "limits": {
+                                "cpu": "50m",
+                                "memory": "90Mi"
+                            },
+                            "requests": {
+                                "cpu": "50m",
+                                "memory": "90Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/etc/config",
+                                "name": "heapster-config-volume"
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "heapster-token-7z7c5",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-0",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "heapster",
+                "serviceAccountName": "heapster",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "heapster-config"
+                        },
+                        "name": "heapster-config-volume"
+                    },
+                    {
+                        "name": "heapster-token-7z7c5",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "heapster-token-7z7c5"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:12:10Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:12:26Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:12:10Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://8ab1ee82d29d0351cb21dbce4db9eb2a270407d2ebe10377be02edd46cb34027",
+                        "image": "aksrepos.azurecr.io/mirror/heapster-amd64:v1.5.3",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/heapster-amd64@sha256:fc33c690a3a446de5abc24b048b88050810a58b9e4477fa763a43d7df029301a",
+                        "lastState": {},
+                        "name": "heapster",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T17:12:21Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://42154ff41fed196c3f4b8a485436537330d16bcef23c743a34cf63202d023453",
+                        "image": "aksrepos.azurecr.io/mirror/addon-resizer:1.8.1",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/addon-resizer@sha256:8ac3ffa4232046feb297cefc40734641fa2954c16308f9e0d70ec152f22231ca",
+                        "lastState": {},
+                        "name": "heapster-nanny",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T17:12:25Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.4",
+                "phase": "Running",
+                "podIP": "10.244.1.33",
+                "qosClass": "Guaranteed",
+                "startTime": "2019-08-23T17:12:10Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "scheduler.alpha.kubernetes.io/critical-pod": "",
+                    "seccomp.security.alpha.kubernetes.io/pod": "docker/default"
+                },
+                "creationTimestamp": "2019-07-09T02:38:06Z",
+                "generateName": "kube-dns-autoscaler-7d64798d95-",
+                "labels": {
+                    "k8s-app": "kube-dns-autoscaler",
+                    "pod-template-hash": "3820354851"
+                },
+                "name": "kube-dns-autoscaler-7d64798d95-f9wcv",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "kube-dns-autoscaler-7d64798d95",
+                        "uid": "71655f71-a1f2-11e9-9bc6-127bb0ec03b8"
+                    }
+                ],
+                "resourceVersion": "15144041",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-autoscaler-7d64798d95-f9wcv",
+                "uid": "94e52ab1-a1f2-11e9-8b08-d602e29755d5"
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "command": [
+                            "/cluster-proportional-autoscaler",
+                            "--namespace=kube-system",
+                            "--configmap=kube-dns-autoscaler",
+                            "--target=deployment/kube-dns-v20",
+                            "--default-params={\"ladder\":{\"coresToReplicas\":[[1,2],[512,3],[1024,4],[2048,5]],\"nodesToReplicas\":[[1,2],[8,3],[16,4],[32,5]]}}",
+                            "--logtostderr=true",
+                            "--v=2"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/cluster-proportional-autoscaler-amd64:1.1.2-r2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "autoscaler",
+                        "resources": {
+                            "requests": {
+                                "cpu": "20m",
+                                "memory": "10Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-autoscaler-token-zkxt8",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "Default",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-2",
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-dns-autoscaler",
+                "serviceAccountName": "kube-dns-autoscaler",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "name": "kube-dns-autoscaler-token-zkxt8",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-dns-autoscaler-token-zkxt8"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:07Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:44Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:06Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://43f5fde3ce0f375a40c08de56087fc3b53f6269b239a3e6383d2082779504b96",
+                        "image": "aksrepos.azurecr.io/mirror/cluster-proportional-autoscaler-amd64:1.1.2-r2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/cluster-proportional-autoscaler-amd64@sha256:ccd2b031b116750091443930a8e6d0f785cfde38f137969e472b2ac850aeddfb",
+                        "lastState": {},
+                        "name": "autoscaler",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:40Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.7",
+                "phase": "Running",
+                "podIP": "10.244.12.118",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-09T02:38:07Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "prometheus.io/port": "10055",
+                    "prometheus.io/scrape": "true"
+                },
+                "creationTimestamp": "2019-07-09T02:38:06Z",
+                "generateName": "kube-dns-v20-55cb5d96f7-",
+                "labels": {
+                    "k8s-app": "kube-dns",
+                    "kubernetes.io/cluster-service": "true",
+                    "pod-template-hash": "1176185293",
+                    "version": "v20"
+                },
+                "name": "kube-dns-v20-55cb5d96f7-lmrpl",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "kube-dns-v20-55cb5d96f7",
+                        "uid": "71892fd6-a1f2-11e9-9bc6-127bb0ec03b8"
+                    }
+                ],
+                "resourceVersion": "15144030",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-v20-55cb5d96f7-lmrpl",
+                "uid": "952488f3-a1f2-11e9-8b08-d602e29755d5"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    },
+                    "podAntiAffinity": {
+                        "preferredDuringSchedulingIgnoredDuringExecution": [
+                            {
+                                "podAffinityTerm": {
+                                    "labelSelector": {
+                                        "matchExpressions": [
+                                            {
+                                                "key": "k8s-app",
+                                                "operator": "In",
+                                                "values": [
+                                                    "kube-dns"
+                                                ]
+                                            }
+                                        ]
+                                    },
+                                    "topologyKey": "kubernetes.io/hostname"
+                                },
+                                "weight": 100
+                            }
+                        ]
+                    }
+                },
+                "containers": [
+                    {
+                        "args": [
+                            "--kubecfg-file=/config/kubeconfig",
+                            "--config-dir=/kube-dns-config",
+                            "--domain=cluster.local.",
+                            "--dns-port=10053",
+                            "--v=2"
+                        ],
+                        "env": [
+                            {
+                                "name": "PROMETHEUS_PORT",
+                                "value": "10055"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 5,
+                            "httpGet": {
+                                "path": "/healthcheck/kubedns",
+                                "port": 10054,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "kubedns",
+                        "ports": [
+                            {
+                                "containerPort": 10053,
+                                "name": "dns-local",
+                                "protocol": "UDP"
+                            },
+                            {
+                                "containerPort": 10053,
+                                "name": "dns-tcp-local",
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 10055,
+                                "name": "metrics",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "readinessProbe": {
+                            "failureThreshold": 3,
+                            "httpGet": {
+                                "path": "/readiness",
+                                "port": 8081,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 30,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "resources": {
+                            "limits": {
+                                "memory": "170Mi"
+                            },
+                            "requests": {
+                                "cpu": "100m",
+                                "memory": "70Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/kube-dns-config",
+                                "name": "kube-dns-config"
+                            },
+                            {
+                                "mountPath": "/config",
+                                "name": "kubedns-kubecfg",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "args": [
+                            "-v=2",
+                            "-logtostderr",
+                            "-configDir=/kube-dns-config",
+                            "-restartDnsmasq=true",
+                            "--",
+                            "-k",
+                            "--cache-size=1000",
+                            "--no-negcache",
+                            "--no-resolv",
+                            "--server=127.0.0.1#10053",
+                            "--server=/cluster.local/127.0.0.1#10053",
+                            "--server=/in-addr.arpa/127.0.0.1#10053",
+                            "--server=/ip6.arpa/127.0.0.1#10053",
+                            "--log-facility=-"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "dnsmasq",
+                        "ports": [
+                            {
+                                "containerPort": 53,
+                                "name": "dns",
+                                "protocol": "UDP"
+                            },
+                            {
+                                "containerPort": 53,
+                                "name": "dns-tcp",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {},
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/kube-dns-config",
+                                "name": "kube-dns-config"
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "args": [
+                            "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 \u003e/dev/null || exit 1; done",
+                            "--url=/healthz-dnsmasq",
+                            "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 \u003e/dev/null || exit 1; done",
+                            "--url=/healthz-kubedns",
+                            "--port=8080",
+                            "--quiet"
+                        ],
+                        "env": [
+                            {
+                                "name": "PROBE_DOMAINS",
+                                "value": "bing.com kubernetes.default.svc.cluster.local"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 5,
+                            "httpGet": {
+                                "path": "/healthz-dnsmasq",
+                                "port": 8080,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "healthz",
+                        "ports": [
+                            {
+                                "containerPort": 8080,
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "memory": "50Mi"
+                            },
+                            "requests": {
+                                "cpu": "10m",
+                                "memory": "50Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "args": [
+                            "--v=2",
+                            "--logtostderr",
+                            "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV",
+                            "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 3,
+                            "httpGet": {
+                                "path": "/metrics",
+                                "port": 10054,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "sidecar",
+                        "ports": [
+                            {
+                                "containerPort": 10054,
+                                "name": "metrics",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "requests": {
+                                "cpu": "10m",
+                                "memory": "20Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "Default",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-1",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-dns",
+                "serviceAccountName": "kube-dns",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "kube-dns",
+                            "optional": true
+                        },
+                        "name": "kube-dns-config"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "kubedns-kubecfg"
+                        },
+                        "name": "kubedns-kubecfg"
+                    },
+                    {
+                        "name": "kube-dns-token-ghgtl",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-dns-token-ghgtl"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:09Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:50Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:06Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://8aa7d794d423f29469d8a35cc295bfaf2434a26756d7063fb19e06ce838aa5d9",
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8",
+                        "lastState": {},
+                        "name": "dnsmasq",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:23Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://7ee72258ca97555017c3096c3c125935b22e1735dafd494bec7f5480a408314a",
+                        "image": "aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/exechealthz-amd64@sha256:34722333f0cd0b891b61c9e0efa31913f22157e341a3aabb79967305d4e78260",
+                        "lastState": {},
+                        "name": "healthz",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:25Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://bf6c7e823d08306e6ba13353ae89319080990a5d302b1d7370e76acd34c34a52",
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52",
+                        "lastState": {},
+                        "name": "kubedns",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:19Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://2e4faf4da65a23316dc7065e3de27bf1ebd9ac2a8f07b9053de5ab63ab4c2d7e",
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4",
+                        "lastState": {},
+                        "name": "sidecar",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:28Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.5",
+                "phase": "Running",
+                "podIP": "10.244.0.192",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-09T02:38:09Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "prometheus.io/port": "10055",
+                    "prometheus.io/scrape": "true"
+                },
+                "creationTimestamp": "2019-07-09T02:38:06Z",
+                "generateName": "kube-dns-v20-55cb5d96f7-",
+                "labels": {
+                    "k8s-app": "kube-dns",
+                    "kubernetes.io/cluster-service": "true",
+                    "pod-template-hash": "1176185293",
+                    "version": "v20"
+                },
+                "name": "kube-dns-v20-55cb5d96f7-pl7sh",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "kube-dns-v20-55cb5d96f7",
+                        "uid": "71892fd6-a1f2-11e9-9bc6-127bb0ec03b8"
+                    }
+                ],
+                "resourceVersion": "15144050",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-dns-v20-55cb5d96f7-pl7sh",
+                "uid": "95046bc6-a1f2-11e9-8b08-d602e29755d5"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    },
+                    "podAntiAffinity": {
+                        "preferredDuringSchedulingIgnoredDuringExecution": [
+                            {
+                                "podAffinityTerm": {
+                                    "labelSelector": {
+                                        "matchExpressions": [
+                                            {
+                                                "key": "k8s-app",
+                                                "operator": "In",
+                                                "values": [
+                                                    "kube-dns"
+                                                ]
+                                            }
+                                        ]
+                                    },
+                                    "topologyKey": "kubernetes.io/hostname"
+                                },
+                                "weight": 100
+                            }
+                        ]
+                    }
+                },
+                "containers": [
+                    {
+                        "args": [
+                            "--kubecfg-file=/config/kubeconfig",
+                            "--config-dir=/kube-dns-config",
+                            "--domain=cluster.local.",
+                            "--dns-port=10053",
+                            "--v=2"
+                        ],
+                        "env": [
+                            {
+                                "name": "PROMETHEUS_PORT",
+                                "value": "10055"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 5,
+                            "httpGet": {
+                                "path": "/healthcheck/kubedns",
+                                "port": 10054,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "kubedns",
+                        "ports": [
+                            {
+                                "containerPort": 10053,
+                                "name": "dns-local",
+                                "protocol": "UDP"
+                            },
+                            {
+                                "containerPort": 10053,
+                                "name": "dns-tcp-local",
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 10055,
+                                "name": "metrics",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "readinessProbe": {
+                            "failureThreshold": 3,
+                            "httpGet": {
+                                "path": "/readiness",
+                                "port": 8081,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 30,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "resources": {
+                            "limits": {
+                                "memory": "170Mi"
+                            },
+                            "requests": {
+                                "cpu": "100m",
+                                "memory": "70Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/kube-dns-config",
+                                "name": "kube-dns-config"
+                            },
+                            {
+                                "mountPath": "/config",
+                                "name": "kubedns-kubecfg",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "args": [
+                            "-v=2",
+                            "-logtostderr",
+                            "-configDir=/kube-dns-config",
+                            "-restartDnsmasq=true",
+                            "--",
+                            "-k",
+                            "--cache-size=1000",
+                            "--no-negcache",
+                            "--no-resolv",
+                            "--server=127.0.0.1#10053",
+                            "--server=/cluster.local/127.0.0.1#10053",
+                            "--server=/in-addr.arpa/127.0.0.1#10053",
+                            "--server=/ip6.arpa/127.0.0.1#10053",
+                            "--log-facility=-"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "dnsmasq",
+                        "ports": [
+                            {
+                                "containerPort": 53,
+                                "name": "dns",
+                                "protocol": "UDP"
+                            },
+                            {
+                                "containerPort": 53,
+                                "name": "dns-tcp",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {},
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/kube-dns-config",
+                                "name": "kube-dns-config"
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "args": [
+                            "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1 \u003e/dev/null || exit 1; done",
+                            "--url=/healthz-dnsmasq",
+                            "--cmd=for d in $PROBE_DOMAINS; do nslookup $d 127.0.0.1:10053 \u003e/dev/null || exit 1; done",
+                            "--url=/healthz-kubedns",
+                            "--port=8080",
+                            "--quiet"
+                        ],
+                        "env": [
+                            {
+                                "name": "PROBE_DOMAINS",
+                                "value": "bing.com kubernetes.default.svc.cluster.local"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 5,
+                            "httpGet": {
+                                "path": "/healthz-dnsmasq",
+                                "port": 8080,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "healthz",
+                        "ports": [
+                            {
+                                "containerPort": 8080,
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "memory": "50Mi"
+                            },
+                            "requests": {
+                                "cpu": "10m",
+                                "memory": "50Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "args": [
+                            "--v=2",
+                            "--logtostderr",
+                            "--probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local,5,SRV",
+                            "--probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local,5,SRV"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 3,
+                            "httpGet": {
+                                "path": "/metrics",
+                                "port": 10054,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 5
+                        },
+                        "name": "sidecar",
+                        "ports": [
+                            {
+                                "containerPort": 10054,
+                                "name": "metrics",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "requests": {
+                                "cpu": "10m",
+                                "memory": "20Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-dns-token-ghgtl",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "Default",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-2",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-dns",
+                "serviceAccountName": "kube-dns",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "kube-dns",
+                            "optional": true
+                        },
+                        "name": "kube-dns-config"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "kubedns-kubecfg"
+                        },
+                        "name": "kubedns-kubecfg"
+                    },
+                    {
+                        "name": "kube-dns-token-ghgtl",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-dns-token-ghgtl"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:10Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:39:14Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:06Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://c16dce3b5c1f06c6fbfdf52edb98f9916740c0f652dc72b2fe0f9f0cc5c4c4de",
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64:1.14.10",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/k8s-dns-dnsmasq-nanny-amd64@sha256:bbb2a290a568125b3b996028958eb773f33b5b87a6b37bf38a28f8b62dddb3c8",
+                        "lastState": {},
+                        "name": "dnsmasq",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:51Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://410ceb88fcbc2c3cdf19ffc5ce88adb0ba933bbc3cf446a90e669a978a7d933c",
+                        "image": "aksrepos.azurecr.io/mirror/exechealthz-amd64:1.2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/exechealthz-amd64@sha256:34722333f0cd0b891b61c9e0efa31913f22157e341a3aabb79967305d4e78260",
+                        "lastState": {},
+                        "name": "healthz",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:58Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://694f575606b51234a98b3e22d2afd04f3fa11c30b6090a901e64922eeb9fba95",
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64:1.14.13",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/k8s-dns-kube-dns-amd64@sha256:618a82fa66cf0c75e4753369a6999032372be7308866fc9afb381789b1e5ad52",
+                        "lastState": {},
+                        "name": "kubedns",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:39Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://d7865fb7465b2f9cd218cdf6694018aee55260966f2bf51e6b628a86c6b9041f",
+                        "image": "aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64:1.14.10",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/k8s-dns-sidecar-amd64@sha256:4f1ab957f87b94a5ec1edc26fae50da2175461f00afecf68940c4aa079bd08a4",
+                        "lastState": {},
+                        "name": "sidecar",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:39:04Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.7",
+                "phase": "Running",
+                "podIP": "10.244.12.117",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-09T02:38:10Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "aks.microsoft.com/release-time": "seconds:1566580134 nanos:758740921 ",
+                    "remediator.aks.microsoft.com/kube-proxy-restart": "24"
+                },
+                "creationTimestamp": "2019-08-23T17:13:13Z",
+                "generateName": "kube-proxy-",
+                "labels": {
+                    "component": "kube-proxy",
+                    "controller-revision-hash": "3559350992",
+                    "pod-template-generation": "141",
+                    "tier": "node"
+                },
+                "name": "kube-proxy-ct2tl",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-proxy",
+                        "uid": "45640bf6-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "19049034",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-ct2tl",
+                "uid": "49e373c8-c5c9-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "command": [
+                            "/hyperkube",
+                            "proxy",
+                            "--kubeconfig=/var/lib/kubelet/kubeconfig",
+                            "--cluster-cidr=10.244.0.0/16",
+                            "--feature-gates=ExperimentalCriticalPodAnnotation=true",
+                            "--v=3"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "kube-proxy",
+                        "resources": {
+                            "requests": {
+                                "cpu": "100m"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/lib/kubelet",
+                                "name": "kubeconfig",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/certs",
+                                "name": "certificates",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-proxy-token-f5vbg",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-0",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-proxy",
+                "serviceAccountName": "kube-proxy",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/kubelet",
+                            "type": ""
+                        },
+                        "name": "kubeconfig"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes/certs",
+                            "type": ""
+                        },
+                        "name": "certificates"
+                    },
+                    {
+                        "name": "kube-proxy-token-f5vbg",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-proxy-token-f5vbg"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:13:13Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:13:23Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:13:13Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://ef115b31792ece39d1526075f9f3763f8cbf526814624795a05786d83367427e",
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                        "lastState": {},
+                        "name": "kube-proxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T17:13:22Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.4",
+                "phase": "Running",
+                "podIP": "10.240.0.4",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T17:13:13Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "aks.microsoft.com/release-time": "seconds:1566580134 nanos:758740921 ",
+                    "remediator.aks.microsoft.com/kube-proxy-restart": "24"
+                },
+                "creationTimestamp": "2019-08-23T17:10:52Z",
+                "generateName": "kube-proxy-",
+                "labels": {
+                    "component": "kube-proxy",
+                    "controller-revision-hash": "3559350992",
+                    "pod-template-generation": "141",
+                    "tier": "node"
+                },
+                "name": "kube-proxy-d59xd",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-proxy",
+                        "uid": "45640bf6-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "19048698",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-d59xd",
+                "uid": "f65e6a62-c5c8-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "command": [
+                            "/hyperkube",
+                            "proxy",
+                            "--kubeconfig=/var/lib/kubelet/kubeconfig",
+                            "--cluster-cidr=10.244.0.0/16",
+                            "--feature-gates=ExperimentalCriticalPodAnnotation=true",
+                            "--v=3"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "kube-proxy",
+                        "resources": {
+                            "requests": {
+                                "cpu": "100m"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/lib/kubelet",
+                                "name": "kubeconfig",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/certs",
+                                "name": "certificates",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-proxy-token-f5vbg",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-1",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-proxy",
+                "serviceAccountName": "kube-proxy",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/kubelet",
+                            "type": ""
+                        },
+                        "name": "kubeconfig"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes/certs",
+                            "type": ""
+                        },
+                        "name": "certificates"
+                    },
+                    {
+                        "name": "kube-proxy-token-f5vbg",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-proxy-token-f5vbg"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:10:52Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:11:05Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:10:52Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://c4e9d0e372116b9cab048f7bb381e93b423dac2285da75f66664a473fcc043b3",
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                        "lastState": {},
+                        "name": "kube-proxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T17:11:04Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.5",
+                "phase": "Running",
+                "podIP": "10.240.0.5",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T17:10:52Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "aks.microsoft.com/release-time": "seconds:1566580134 nanos:758740921 ",
+                    "remediator.aks.microsoft.com/kube-proxy-restart": "24"
+                },
+                "creationTimestamp": "2019-08-23T17:12:23Z",
+                "generateName": "kube-proxy-",
+                "labels": {
+                    "component": "kube-proxy",
+                    "controller-revision-hash": "3559350992",
+                    "pod-template-generation": "141",
+                    "tier": "node"
+                },
+                "name": "kube-proxy-kpm8j",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-proxy",
+                        "uid": "45640bf6-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "19048942",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-kpm8j",
+                "uid": "2c3de48d-c5c9-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "command": [
+                            "/hyperkube",
+                            "proxy",
+                            "--kubeconfig=/var/lib/kubelet/kubeconfig",
+                            "--cluster-cidr=10.244.0.0/16",
+                            "--feature-gates=ExperimentalCriticalPodAnnotation=true",
+                            "--v=3"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "kube-proxy",
+                        "resources": {
+                            "requests": {
+                                "cpu": "100m"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/lib/kubelet",
+                                "name": "kubeconfig",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/certs",
+                                "name": "certificates",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-proxy-token-f5vbg",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-2",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-proxy",
+                "serviceAccountName": "kube-proxy",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/kubelet",
+                            "type": ""
+                        },
+                        "name": "kubeconfig"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes/certs",
+                            "type": ""
+                        },
+                        "name": "certificates"
+                    },
+                    {
+                        "name": "kube-proxy-token-f5vbg",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-proxy-token-f5vbg"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:12:24Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:12:34Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:12:24Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://51067a965113e6d285a676e0d1e212ffbb60046aab6c4702f5554617415b2031",
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                        "lastState": {},
+                        "name": "kube-proxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T17:12:33Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.7",
+                "phase": "Running",
+                "podIP": "10.240.0.7",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T17:12:24Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "aks.microsoft.com/release-time": "seconds:1566580134 nanos:758740921 ",
+                    "remediator.aks.microsoft.com/kube-proxy-restart": "24"
+                },
+                "creationTimestamp": "2019-08-23T17:11:38Z",
+                "generateName": "kube-proxy-",
+                "labels": {
+                    "component": "kube-proxy",
+                    "controller-revision-hash": "3559350992",
+                    "pod-template-generation": "141",
+                    "tier": "node"
+                },
+                "name": "kube-proxy-skzg4",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-proxy",
+                        "uid": "45640bf6-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "19048774",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-proxy-skzg4",
+                "uid": "114f7246-c5c9-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "command": [
+                            "/hyperkube",
+                            "proxy",
+                            "--kubeconfig=/var/lib/kubelet/kubeconfig",
+                            "--cluster-cidr=10.244.0.0/16",
+                            "--feature-gates=ExperimentalCriticalPodAnnotation=true",
+                            "--v=3"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "kube-proxy",
+                        "resources": {
+                            "requests": {
+                                "cpu": "100m"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/lib/kubelet",
+                                "name": "kubeconfig",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/certs",
+                                "name": "certificates",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-proxy-token-f5vbg",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-3",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-proxy",
+                "serviceAccountName": "kube-proxy",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/kubelet",
+                            "type": ""
+                        },
+                        "name": "kubeconfig"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes/certs",
+                            "type": ""
+                        },
+                        "name": "certificates"
+                    },
+                    {
+                        "name": "kube-proxy-token-f5vbg",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-proxy-token-f5vbg"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:11:38Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:11:42Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T17:11:38Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://a3172e9191547b0ea3eb7db629cd4bba2240f5c9d0186ea37be49d9877034541",
+                        "image": "aksrepos.azurecr.io/mirror/hyperkube-amd64:v1.11.8",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/hyperkube-amd64@sha256:1447d5b491fcee503c9f8fb712e1593dc3772c7e661251f54c297477cc716913",
+                        "lastState": {},
+                        "name": "kube-proxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T17:11:41Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.6",
+                "phase": "Running",
+                "podIP": "10.240.0.6",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T17:11:38Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-07-15T20:54:26Z",
+                "generateName": "kube-svc-redirect-",
+                "labels": {
+                    "component": "kube-svc-redirect",
+                    "controller-revision-hash": "1216437240",
+                    "pod-template-generation": "9",
+                    "tier": "node"
+                },
+                "name": "kube-svc-redirect-czm8d",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-svc-redirect",
+                        "uid": "45a5fc62-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "15831523",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-czm8d",
+                "uid": "bb3d3ef2-a742-11e9-a38a-22d1c75c4357"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_SVC_IP",
+                                "value": "10.0.0.1"
+                            },
+                            {
+                                "name": "KUBE_SVC_REDIRECTOR_PROXY_IP",
+                                "value": "127.0.0.1:14612"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "redirector",
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "2Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "capabilities": {
+                                "add": [
+                                    "NET_ADMIN"
+                                ]
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "azureproxy",
+                        "ports": [
+                            {
+                                "containerPort": 14612,
+                                "hostPort": 14612,
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "32Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/etc/nginx/nginx.conf",
+                                "name": "azureproxy-nginx",
+                                "readOnly": true,
+                                "subPath": "nginx.conf"
+                            },
+                            {
+                                "mountPath": "/etc/nginx/conf.d",
+                                "name": "azureproxy-configs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/nginx/certs",
+                                "name": "azureproxy-certs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-0",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-svc-redirector",
+                "serviceAccountName": "kube-svc-redirector",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-nginx"
+                        },
+                        "name": "azureproxy-nginx"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-config"
+                        },
+                        "name": "azureproxy-configs"
+                    },
+                    {
+                        "name": "azureproxy-certs",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "azureproxy-certs"
+                        }
+                    },
+                    {
+                        "name": "kube-svc-redirector-token-ngjg2",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-svc-redirector-token-ngjg2"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:54:26Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:55:03Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:54:26Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://942d4ddc66e488245fa77cf331a38de7df760d5d5d96b344f5bfbc84adbab861",
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/nginx@sha256:91d22184f3f9b1be658c2cc2c12d324de7ff12c8b9c9a597905457b4d93b069d",
+                        "lastState": {},
+                        "name": "azureproxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-15T20:55:02Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://71d6f73215c0994fa2f7b340732d5e4453a86ece31dcf5278fb2abc32e3e4de2",
+                        "image": "aksrepos.azurecr.io/mirror/kube-svc-redirect:v1.0.2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                        "lastState": {},
+                        "name": "redirector",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-15T20:54:36Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.4",
+                "phase": "Running",
+                "podIP": "10.240.0.4",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-15T20:54:26Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-08-07T18:57:56Z",
+                "generateName": "kube-svc-redirect-",
+                "labels": {
+                    "component": "kube-svc-redirect",
+                    "controller-revision-hash": "1216437240",
+                    "pod-template-generation": "9",
+                    "tier": "node"
+                },
+                "name": "kube-svc-redirect-mqk98",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-svc-redirect",
+                        "uid": "45a5fc62-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "16965477",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-mqk98",
+                "uid": "44a61692-b945-11e9-a1b6-127094e7fd94"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_SVC_IP",
+                                "value": "10.0.0.1"
+                            },
+                            {
+                                "name": "KUBE_SVC_REDIRECTOR_PROXY_IP",
+                                "value": "127.0.0.1:14612"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "redirector",
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "2Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "capabilities": {
+                                "add": [
+                                    "NET_ADMIN"
+                                ]
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "azureproxy",
+                        "ports": [
+                            {
+                                "containerPort": 14612,
+                                "hostPort": 14612,
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "32Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/etc/nginx/nginx.conf",
+                                "name": "azureproxy-nginx",
+                                "readOnly": true,
+                                "subPath": "nginx.conf"
+                            },
+                            {
+                                "mountPath": "/etc/nginx/conf.d",
+                                "name": "azureproxy-configs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/nginx/certs",
+                                "name": "azureproxy-certs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-3",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-svc-redirector",
+                "serviceAccountName": "kube-svc-redirector",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-nginx"
+                        },
+                        "name": "azureproxy-nginx"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-config"
+                        },
+                        "name": "azureproxy-configs"
+                    },
+                    {
+                        "name": "azureproxy-certs",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "azureproxy-certs"
+                        }
+                    },
+                    {
+                        "name": "kube-svc-redirector-token-ngjg2",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-svc-redirector-token-ngjg2"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-07T18:57:58Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-07T18:58:09Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-07T18:57:58Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://5f47547dc8e4fceb8e2a6e01cee5612b49e2dc2d5682b6a58f648d8223b3a6b0",
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/nginx@sha256:91d22184f3f9b1be658c2cc2c12d324de7ff12c8b9c9a597905457b4d93b069d",
+                        "lastState": {},
+                        "name": "azureproxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-07T18:58:09Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://5da4e17288399f8e2d4998e5c06159d0d2d39690e89195c5381ab7e3c91aaf99",
+                        "image": "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/prod/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                        "lastState": {},
+                        "name": "redirector",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-07T18:58:08Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.6",
+                "phase": "Running",
+                "podIP": "10.240.0.6",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-07T18:57:58Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-07-15T20:55:38Z",
+                "generateName": "kube-svc-redirect-",
+                "labels": {
+                    "component": "kube-svc-redirect",
+                    "controller-revision-hash": "1216437240",
+                    "pod-template-generation": "9",
+                    "tier": "node"
+                },
+                "name": "kube-svc-redirect-qf4tl",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-svc-redirect",
+                        "uid": "45a5fc62-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "15144014",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-qf4tl",
+                "uid": "e690309f-a742-11e9-a38a-22d1c75c4357"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_SVC_IP",
+                                "value": "10.0.0.1"
+                            },
+                            {
+                                "name": "KUBE_SVC_REDIRECTOR_PROXY_IP",
+                                "value": "127.0.0.1:14612"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "redirector",
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "2Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "capabilities": {
+                                "add": [
+                                    "NET_ADMIN"
+                                ]
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "azureproxy",
+                        "ports": [
+                            {
+                                "containerPort": 14612,
+                                "hostPort": 14612,
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "32Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/etc/nginx/nginx.conf",
+                                "name": "azureproxy-nginx",
+                                "readOnly": true,
+                                "subPath": "nginx.conf"
+                            },
+                            {
+                                "mountPath": "/etc/nginx/conf.d",
+                                "name": "azureproxy-configs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/nginx/certs",
+                                "name": "azureproxy-certs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-1",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-svc-redirector",
+                "serviceAccountName": "kube-svc-redirector",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-nginx"
+                        },
+                        "name": "azureproxy-nginx"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-config"
+                        },
+                        "name": "azureproxy-configs"
+                    },
+                    {
+                        "name": "azureproxy-certs",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "azureproxy-certs"
+                        }
+                    },
+                    {
+                        "name": "kube-svc-redirector-token-ngjg2",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-svc-redirector-token-ngjg2"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:55:38Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:55:47Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:55:38Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://a0fa774ceba9ae78cf75ffb96a0d8f3ca4d48e5d9d17218957b07e8b1e7e2862",
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/nginx@sha256:91d22184f3f9b1be658c2cc2c12d324de7ff12c8b9c9a597905457b4d93b069d",
+                        "lastState": {},
+                        "name": "azureproxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-15T20:55:46Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://7f281954c57ff6529aaeea2e79dc45a8abeabd4b360c2bbea5c0830ddac4f093",
+                        "image": "aksrepos.azurecr.io/mirror/kube-svc-redirect:v1.0.2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                        "lastState": {},
+                        "name": "redirector",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-15T20:55:44Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.5",
+                "phase": "Running",
+                "podIP": "10.240.0.5",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-15T20:55:38Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-07-15T20:56:33Z",
+                "generateName": "kube-svc-redirect-",
+                "labels": {
+                    "component": "kube-svc-redirect",
+                    "controller-revision-hash": "1216437240",
+                    "pod-template-generation": "9",
+                    "tier": "node"
+                },
+                "name": "kube-svc-redirect-rtw2t",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "kube-svc-redirect",
+                        "uid": "45a5fc62-44e5-11e9-9920-423525a6b683"
+                    }
+                ],
+                "resourceVersion": "15144039",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kube-svc-redirect-rtw2t",
+                "uid": "06fef5f6-a743-11e9-a38a-22d1c75c4357"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_SVC_IP",
+                                "value": "10.0.0.1"
+                            },
+                            {
+                                "name": "KUBE_SVC_REDIRECTOR_PROXY_IP",
+                                "value": "127.0.0.1:14612"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/prod/kube-svc-redirect:v1.0.2",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "redirector",
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "2Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "capabilities": {
+                                "add": [
+                                    "NET_ADMIN"
+                                ]
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    },
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "azureproxy",
+                        "ports": [
+                            {
+                                "containerPort": 14612,
+                                "hostPort": 14612,
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "requests": {
+                                "cpu": "5m",
+                                "memory": "32Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/etc/nginx/nginx.conf",
+                                "name": "azureproxy-nginx",
+                                "readOnly": true,
+                                "subPath": "nginx.conf"
+                            },
+                            {
+                                "mountPath": "/etc/nginx/conf.d",
+                                "name": "azureproxy-configs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/nginx/certs",
+                                "name": "azureproxy-certs",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kube-svc-redirector-token-ngjg2",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "hostNetwork": true,
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-2",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kube-svc-redirector",
+                "serviceAccountName": "kube-svc-redirector",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/network-unavailable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-nginx"
+                        },
+                        "name": "azureproxy-nginx"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "azureproxy-config"
+                        },
+                        "name": "azureproxy-configs"
+                    },
+                    {
+                        "name": "azureproxy-certs",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "azureproxy-certs"
+                        }
+                    },
+                    {
+                        "name": "kube-svc-redirector-token-ngjg2",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kube-svc-redirector-token-ngjg2"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:56:33Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:56:49Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-15T20:56:33Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://aaea93b1e6a0c55e9ac0c002ffa6fdfb99e98b2f1a38c474cc2b9b65e947b6d9",
+                        "image": "aksrepos.azurecr.io/mirror/nginx:1.13.12-alpine",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/nginx@sha256:91d22184f3f9b1be658c2cc2c12d324de7ff12c8b9c9a597905457b4d93b069d",
+                        "lastState": {},
+                        "name": "azureproxy",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-15T20:56:48Z"
+                            }
+                        }
+                    },
+                    {
+                        "containerID": "docker://c03c8b9e99095205945e15bef5f60c0501c8a0a77186afc1fcc8eb0804274e78",
+                        "image": "aksrepos.azurecr.io/mirror/kube-svc-redirect:v1.0.2",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/kube-svc-redirect@sha256:a448687b78d24dae388bd3d54591c179c891fa078404752bc9c9dfdaecdc02ef",
+                        "lastState": {},
+                        "name": "redirector",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-15T20:56:43Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.7",
+                "phase": "Running",
+                "podIP": "10.240.0.7",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-15T20:56:33Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-07-09T02:38:07Z",
+                "generateName": "kubernetes-dashboard-6dcdfcd68b-",
+                "labels": {
+                    "k8s-app": "kubernetes-dashboard",
+                    "kubernetes.io/cluster-service": "true",
+                    "pod-template-hash": "2878978246"
+                },
+                "name": "kubernetes-dashboard-6dcdfcd68b-nfqbf",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "kubernetes-dashboard-6dcdfcd68b",
+                        "uid": "71ff2821-a1f2-11e9-9bc6-127bb0ec03b8"
+                    }
+                ],
+                "resourceVersion": "15831517",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/kubernetes-dashboard-6dcdfcd68b-nfqbf",
+                "uid": "9583b2ab-a1f2-11e9-8b08-d602e29755d5"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64:v1.10.1",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "failureThreshold": 3,
+                            "httpGet": {
+                                "path": "/",
+                                "port": 9090,
+                                "scheme": "HTTP"
+                            },
+                            "initialDelaySeconds": 30,
+                            "periodSeconds": 10,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 30
+                        },
+                        "name": "main",
+                        "ports": [
+                            {
+                                "containerPort": 9090,
+                                "name": "http",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "cpu": "100m",
+                                "memory": "500Mi"
+                            },
+                            "requests": {
+                                "cpu": "100m",
+                                "memory": "50Mi"
+                            }
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "kubernetes-dashboard-token-w4t8s",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-0",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "kubernetes-dashboard",
+                "serviceAccountName": "kubernetes-dashboard",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "name": "kubernetes-dashboard-token-w4t8s",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "kubernetes-dashboard-token-w4t8s"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:14Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:39:08Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:07Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://2b042ce7bdf3d03cb606317b19ee797cbf7b99c65076a67001064bccb313b3cb",
+                        "image": "aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64:v1.10.1",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/kubernetes-dashboard-amd64@sha256:0ae6b69432e78069c5ce2bcde0fe409c5c4d6f0f4d9cd50a17974fea38898747",
+                        "lastState": {},
+                        "name": "main",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:39:07Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.4",
+                "phase": "Running",
+                "podIP": "10.244.1.197",
+                "qosClass": "Burstable",
+                "startTime": "2019-07-09T02:38:14Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-07-09T02:38:06Z",
+                "generateName": "metrics-server-76cd9fb66-",
+                "labels": {
+                    "k8s-app": "metrics-server",
+                    "pod-template-hash": "327859622"
+                },
+                "name": "metrics-server-76cd9fb66-h2q55",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "metrics-server-76cd9fb66",
+                        "uid": "71c837df-a1f2-11e9-9bc6-127bb0ec03b8"
+                    }
+                ],
+                "resourceVersion": "15144037",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/metrics-server-76cd9fb66-h2q55",
+                "uid": "9543dbb7-a1f2-11e9-8b08-d602e29755d5"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "command": [
+                            "/metrics-server",
+                            "--source=kubernetes.summary_api:''"
+                        ],
+                        "env": [
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/mirror/metrics-server-amd64:v0.2.1",
+                        "imagePullPolicy": "IfNotPresent",
+                        "name": "metrics-server",
+                        "resources": {},
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "metrics-server-token-qtdgm",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-1",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "metrics-server",
+                "serviceAccountName": "metrics-server",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "name": "metrics-server-token-qtdgm",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "metrics-server-token-qtdgm"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:09Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:20Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-07-09T02:38:07Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://f60ef82657e5ccdfb611a4f3381848dff77a01bddf95c431e4b7a2bf6f4b8087",
+                        "image": "aksrepos.azurecr.io/mirror/metrics-server-amd64:v0.2.1",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/mirror/metrics-server-amd64@sha256:220c0ed3451cb95e4b2f72dd5dc8d9d39d9f529722e5b29d8286373ce27b117e",
+                        "lastState": {},
+                        "name": "metrics-server",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-07-09T02:38:18Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.5",
+                "phase": "Running",
+                "podIP": "10.244.0.193",
+                "qosClass": "BestEffort",
+                "startTime": "2019-07-09T02:38:09Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "agentVersion": "1.10.0.1",
+                    "dockerProviderVersion": "6.0.0-0",
+                    "schema-versions": "v1"
+                },
+                "creationTimestamp": "2019-08-23T19:53:57Z",
+                "generateName": "omsagent-",
+                "labels": {
+                    "controller-revision-hash": "868116844",
+                    "dsName": "omsagent-ds",
+                    "pod-template-generation": "9"
+                },
+                "name": "omsagent-25pks",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "omsagent",
+                        "uid": "e2f8c552-c2d2-11e9-8736-86290fd7dd1f"
+                    }
+                ],
+                "resourceVersion": "19063729",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-25pks",
+                "uid": "be78d7f6-c5df-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "AKS_RESOURCE_ID",
+                                "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test"
+                            },
+                            {
+                                "name": "AKS_REGION",
+                                "value": "eastus"
+                            },
+                            {
+                                "name": "CONTROLLER_TYPE",
+                                "value": "DaemonSet"
+                            },
+                            {
+                                "name": "NODE_IP",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "status.hostIP"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "exec": {
+                                "command": [
+                                    "/bin/bash",
+                                    "-c",
+                                    "/opt/livenessprobe.sh"
+                                ]
+                            },
+                            "failureThreshold": 3,
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 60,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 1
+                        },
+                        "name": "omsagent",
+                        "ports": [
+                            {
+                                "containerPort": 25225,
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 25224,
+                                "protocol": "UDP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "cpu": "150m",
+                                "memory": "600Mi"
+                            },
+                            "requests": {
+                                "cpu": "75m",
+                                "memory": "225Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/hostfs",
+                                "name": "host-root",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/host",
+                                "name": "docker-sock"
+                            },
+                            {
+                                "mountPath": "/var/log",
+                                "name": "host-log"
+                            },
+                            {
+                                "mountPath": "/var/lib/docker/containers",
+                                "name": "containerlog-path"
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/host",
+                                "name": "azure-json-path"
+                            },
+                            {
+                                "mountPath": "/etc/omsagent-secret",
+                                "name": "omsagent-secret"
+                            },
+                            {
+                                "mountPath": "/etc/config/settings",
+                                "name": "settings-vol-config",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "omsagent-token-fjmqb",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "nodeName": "aks-nodepool1-19574989-2",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 0,
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "omsagent",
+                "serviceAccountName": "omsagent",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node-role.kubernetes.io/master",
+                        "operator": "Equal",
+                        "value": "true"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/",
+                            "type": ""
+                        },
+                        "name": "host-root"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/run",
+                            "type": ""
+                        },
+                        "name": "docker-sock"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/hostname",
+                            "type": ""
+                        },
+                        "name": "container-hostname"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/log",
+                            "type": ""
+                        },
+                        "name": "host-log"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/docker/containers",
+                            "type": ""
+                        },
+                        "name": "containerlog-path"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes",
+                            "type": ""
+                        },
+                        "name": "azure-json-path"
+                    },
+                    {
+                        "name": "omsagent-secret",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-secret"
+                        }
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "container-azm-ms-agentconfig",
+                            "optional": true
+                        },
+                        "name": "settings-vol-config"
+                    },
+                    {
+                        "name": "omsagent-token-fjmqb",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-token-fjmqb"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:53:57Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:54:44Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:53:57Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://acd5cedc2c5874122047c47bb1398f35a7c0297292fc4a0e01345123c233d19a",
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imageID": "docker-pullable://mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                        "lastState": {},
+                        "name": "omsagent",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T19:54:43Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.7",
+                "phase": "Running",
+                "podIP": "10.244.12.169",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T19:53:57Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "agentVersion": "1.10.0.1",
+                    "dockerProviderVersion": "6.0.0-0",
+                    "schema-versions": "v1"
+                },
+                "creationTimestamp": "2019-08-23T19:51:35Z",
+                "generateName": "omsagent-",
+                "labels": {
+                    "controller-revision-hash": "868116844",
+                    "dsName": "omsagent-ds",
+                    "pod-template-generation": "9"
+                },
+                "name": "omsagent-4tncr",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "omsagent",
+                        "uid": "e2f8c552-c2d2-11e9-8736-86290fd7dd1f"
+                    }
+                ],
+                "resourceVersion": "19063468",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-4tncr",
+                "uid": "69e68b21-c5df-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "AKS_RESOURCE_ID",
+                                "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test"
+                            },
+                            {
+                                "name": "AKS_REGION",
+                                "value": "eastus"
+                            },
+                            {
+                                "name": "CONTROLLER_TYPE",
+                                "value": "DaemonSet"
+                            },
+                            {
+                                "name": "NODE_IP",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "status.hostIP"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "exec": {
+                                "command": [
+                                    "/bin/bash",
+                                    "-c",
+                                    "/opt/livenessprobe.sh"
+                                ]
+                            },
+                            "failureThreshold": 3,
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 60,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 1
+                        },
+                        "name": "omsagent",
+                        "ports": [
+                            {
+                                "containerPort": 25225,
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 25224,
+                                "protocol": "UDP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "cpu": "150m",
+                                "memory": "600Mi"
+                            },
+                            "requests": {
+                                "cpu": "75m",
+                                "memory": "225Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/hostfs",
+                                "name": "host-root",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/host",
+                                "name": "docker-sock"
+                            },
+                            {
+                                "mountPath": "/var/log",
+                                "name": "host-log"
+                            },
+                            {
+                                "mountPath": "/var/lib/docker/containers",
+                                "name": "containerlog-path"
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/host",
+                                "name": "azure-json-path"
+                            },
+                            {
+                                "mountPath": "/etc/omsagent-secret",
+                                "name": "omsagent-secret"
+                            },
+                            {
+                                "mountPath": "/etc/config/settings",
+                                "name": "settings-vol-config",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "omsagent-token-fjmqb",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "nodeName": "aks-nodepool1-19574989-1",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 0,
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "omsagent",
+                "serviceAccountName": "omsagent",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node-role.kubernetes.io/master",
+                        "operator": "Equal",
+                        "value": "true"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/",
+                            "type": ""
+                        },
+                        "name": "host-root"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/run",
+                            "type": ""
+                        },
+                        "name": "docker-sock"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/hostname",
+                            "type": ""
+                        },
+                        "name": "container-hostname"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/log",
+                            "type": ""
+                        },
+                        "name": "host-log"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/docker/containers",
+                            "type": ""
+                        },
+                        "name": "containerlog-path"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes",
+                            "type": ""
+                        },
+                        "name": "azure-json-path"
+                    },
+                    {
+                        "name": "omsagent-secret",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-secret"
+                        }
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "container-azm-ms-agentconfig",
+                            "optional": true
+                        },
+                        "name": "settings-vol-config"
+                    },
+                    {
+                        "name": "omsagent-token-fjmqb",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-token-fjmqb"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:51:35Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:52:28Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:51:35Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://7803b80452aa34460c848d9c1ca65d6bd925665cf78faaa8dbc122482f93c744",
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imageID": "docker-pullable://mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                        "lastState": {},
+                        "name": "omsagent",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T19:52:27Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.5",
+                "phase": "Running",
+                "podIP": "10.244.0.251",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T19:51:35Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "agentVersion": "1.10.0.1",
+                    "dockerProviderVersion": "6.0.0-0",
+                    "schema-versions": "v1"
+                },
+                "creationTimestamp": "2019-08-23T19:53:36Z",
+                "generateName": "omsagent-",
+                "labels": {
+                    "controller-revision-hash": "868116844",
+                    "dsName": "omsagent-ds",
+                    "pod-template-generation": "9"
+                },
+                "name": "omsagent-h44fk",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "omsagent",
+                        "uid": "e2f8c552-c2d2-11e9-8736-86290fd7dd1f"
+                    }
+                ],
+                "resourceVersion": "19063631",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-h44fk",
+                "uid": "b1e04e1c-c5df-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "AKS_RESOURCE_ID",
+                                "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test"
+                            },
+                            {
+                                "name": "AKS_REGION",
+                                "value": "eastus"
+                            },
+                            {
+                                "name": "CONTROLLER_TYPE",
+                                "value": "DaemonSet"
+                            },
+                            {
+                                "name": "NODE_IP",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "status.hostIP"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "exec": {
+                                "command": [
+                                    "/bin/bash",
+                                    "-c",
+                                    "/opt/livenessprobe.sh"
+                                ]
+                            },
+                            "failureThreshold": 3,
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 60,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 1
+                        },
+                        "name": "omsagent",
+                        "ports": [
+                            {
+                                "containerPort": 25225,
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 25224,
+                                "protocol": "UDP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "cpu": "150m",
+                                "memory": "600Mi"
+                            },
+                            "requests": {
+                                "cpu": "75m",
+                                "memory": "225Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/hostfs",
+                                "name": "host-root",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/host",
+                                "name": "docker-sock"
+                            },
+                            {
+                                "mountPath": "/var/log",
+                                "name": "host-log"
+                            },
+                            {
+                                "mountPath": "/var/lib/docker/containers",
+                                "name": "containerlog-path"
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/host",
+                                "name": "azure-json-path"
+                            },
+                            {
+                                "mountPath": "/etc/omsagent-secret",
+                                "name": "omsagent-secret"
+                            },
+                            {
+                                "mountPath": "/etc/config/settings",
+                                "name": "settings-vol-config",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "omsagent-token-fjmqb",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "nodeName": "aks-nodepool1-19574989-0",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 0,
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "omsagent",
+                "serviceAccountName": "omsagent",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node-role.kubernetes.io/master",
+                        "operator": "Equal",
+                        "value": "true"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/",
+                            "type": ""
+                        },
+                        "name": "host-root"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/run",
+                            "type": ""
+                        },
+                        "name": "docker-sock"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/hostname",
+                            "type": ""
+                        },
+                        "name": "container-hostname"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/log",
+                            "type": ""
+                        },
+                        "name": "host-log"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/docker/containers",
+                            "type": ""
+                        },
+                        "name": "containerlog-path"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes",
+                            "type": ""
+                        },
+                        "name": "azure-json-path"
+                    },
+                    {
+                        "name": "omsagent-secret",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-secret"
+                        }
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "container-azm-ms-agentconfig",
+                            "optional": true
+                        },
+                        "name": "settings-vol-config"
+                    },
+                    {
+                        "name": "omsagent-token-fjmqb",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-token-fjmqb"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:53:36Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:53:51Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:53:36Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://4b71a82e472a8e5d0bc4ef9b9b5d2ccf25741b31269480a77e29424ebe87757c",
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imageID": "docker-pullable://mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                        "lastState": {},
+                        "name": "omsagent",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T19:53:49Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.4",
+                "phase": "Running",
+                "podIP": "10.244.1.35",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T19:53:36Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "agentVersion": "1.10.0.1",
+                    "dockerProviderVersion": "6.0.0-0",
+                    "schema-versions": "v1"
+                },
+                "creationTimestamp": "2019-08-23T19:51:28Z",
+                "generateName": "omsagent-rs-5bb85d7468-",
+                "labels": {
+                    "pod-template-hash": "1664183024",
+                    "rsName": "omsagent-rs"
+                },
+                "name": "omsagent-rs-5bb85d7468-dnxpw",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "omsagent-rs-5bb85d7468",
+                        "uid": "659ec974-c5df-11e9-8736-86290fd7dd1f"
+                    }
+                ],
+                "resourceVersion": "19063495",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-rs-5bb85d7468-dnxpw",
+                "uid": "65a6f978-c5df-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "AKS_RESOURCE_ID",
+                                "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test"
+                            },
+                            {
+                                "name": "AKS_REGION",
+                                "value": "eastus"
+                            },
+                            {
+                                "name": "CONTROLLER_TYPE",
+                                "value": "ReplicaSet"
+                            },
+                            {
+                                "name": "NODE_IP",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "status.hostIP"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "exec": {
+                                "command": [
+                                    "/bin/bash",
+                                    "-c",
+                                    "/opt/livenessprobe.sh"
+                                ]
+                            },
+                            "failureThreshold": 3,
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 60,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 1
+                        },
+                        "name": "omsagent",
+                        "ports": [
+                            {
+                                "containerPort": 25225,
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 25224,
+                                "protocol": "UDP"
+                            },
+                            {
+                                "containerPort": 25227,
+                                "name": "in-rs-tcp",
+                                "protocol": "TCP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "cpu": "150m",
+                                "memory": "500Mi"
+                            },
+                            "requests": {
+                                "cpu": "110m",
+                                "memory": "250Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/var/run/host",
+                                "name": "docker-sock"
+                            },
+                            {
+                                "mountPath": "/var/log",
+                                "name": "host-log"
+                            },
+                            {
+                                "mountPath": "/var/lib/docker/containers",
+                                "name": "containerlog-path"
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/host",
+                                "name": "azure-json-path"
+                            },
+                            {
+                                "mountPath": "/etc/omsagent-secret",
+                                "name": "omsagent-secret",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/config",
+                                "name": "omsagent-rs-config"
+                            },
+                            {
+                                "mountPath": "/etc/config/settings",
+                                "name": "settings-vol-config",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "omsagent-token-fjmqb",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "nodeName": "aks-nodepool1-19574989-0",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux",
+                    "kubernetes.io/role": "agent"
+                },
+                "priority": 0,
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "omsagent",
+                "serviceAccountName": "omsagent",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/var/run",
+                            "type": ""
+                        },
+                        "name": "docker-sock"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/hostname",
+                            "type": ""
+                        },
+                        "name": "container-hostname"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/log",
+                            "type": ""
+                        },
+                        "name": "host-log"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/docker/containers",
+                            "type": ""
+                        },
+                        "name": "containerlog-path"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes",
+                            "type": ""
+                        },
+                        "name": "azure-json-path"
+                    },
+                    {
+                        "name": "omsagent-secret",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-secret"
+                        }
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "omsagent-rs-config"
+                        },
+                        "name": "omsagent-rs-config"
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "container-azm-ms-agentconfig",
+                            "optional": true
+                        },
+                        "name": "settings-vol-config"
+                    },
+                    {
+                        "name": "omsagent-token-fjmqb",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-token-fjmqb"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:51:28Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:52:37Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:51:28Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://7e080036bc213a7dadd95b1d8439e06a1b62822219642a83cab059dc4292b0e5",
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imageID": "docker-pullable://mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                        "lastState": {},
+                        "name": "omsagent",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T19:52:37Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.4",
+                "phase": "Running",
+                "podIP": "10.244.1.34",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T19:51:28Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "annotations": {
+                    "agentVersion": "1.10.0.1",
+                    "dockerProviderVersion": "6.0.0-0",
+                    "schema-versions": "v1"
+                },
+                "creationTimestamp": "2019-08-23T19:52:35Z",
+                "generateName": "omsagent-",
+                "labels": {
+                    "controller-revision-hash": "868116844",
+                    "dsName": "omsagent-ds",
+                    "pod-template-generation": "9"
+                },
+                "name": "omsagent-sb6xx",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "DaemonSet",
+                        "name": "omsagent",
+                        "uid": "e2f8c552-c2d2-11e9-8736-86290fd7dd1f"
+                    }
+                ],
+                "resourceVersion": "19063577",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/omsagent-sb6xx",
+                "uid": "8dbd5e8b-c5df-11e9-8736-86290fd7dd1f"
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "AKS_RESOURCE_ID",
+                                "value": "/subscriptions/72c8e8ca-dc16-47dc-b65c-6b5875eb600a/resourcegroups/dilipr-health-test/providers/Microsoft.ContainerService/managedClusters/dilipr-health-test"
+                            },
+                            {
+                                "name": "AKS_REGION",
+                                "value": "eastus"
+                            },
+                            {
+                                "name": "CONTROLLER_TYPE",
+                                "value": "DaemonSet"
+                            },
+                            {
+                                "name": "NODE_IP",
+                                "valueFrom": {
+                                    "fieldRef": {
+                                        "apiVersion": "v1",
+                                        "fieldPath": "status.hostIP"
+                                    }
+                                }
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "exec": {
+                                "command": [
+                                    "/bin/bash",
+                                    "-c",
+                                    "/opt/livenessprobe.sh"
+                                ]
+                            },
+                            "failureThreshold": 3,
+                            "initialDelaySeconds": 60,
+                            "periodSeconds": 60,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 1
+                        },
+                        "name": "omsagent",
+                        "ports": [
+                            {
+                                "containerPort": 25225,
+                                "protocol": "TCP"
+                            },
+                            {
+                                "containerPort": 25224,
+                                "protocol": "UDP"
+                            }
+                        ],
+                        "resources": {
+                            "limits": {
+                                "cpu": "150m",
+                                "memory": "600Mi"
+                            },
+                            "requests": {
+                                "cpu": "75m",
+                                "memory": "225Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/hostfs",
+                                "name": "host-root",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/host",
+                                "name": "docker-sock"
+                            },
+                            {
+                                "mountPath": "/var/log",
+                                "name": "host-log"
+                            },
+                            {
+                                "mountPath": "/var/lib/docker/containers",
+                                "name": "containerlog-path"
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/host",
+                                "name": "azure-json-path"
+                            },
+                            {
+                                "mountPath": "/etc/omsagent-secret",
+                                "name": "omsagent-secret"
+                            },
+                            {
+                                "mountPath": "/etc/config/settings",
+                                "name": "settings-vol-config",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "omsagent-token-fjmqb",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "ClusterFirst",
+                "nodeName": "aks-nodepool1-19574989-3",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 0,
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "omsagent",
+                "serviceAccountName": "omsagent",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node-role.kubernetes.io/master",
+                        "operator": "Equal",
+                        "value": "true"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/disk-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/memory-pressure",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoSchedule",
+                        "key": "node.kubernetes.io/unschedulable",
+                        "operator": "Exists"
+                    }
+                ],
+                "volumes": [
+                    {
+                        "hostPath": {
+                            "path": "/",
+                            "type": ""
+                        },
+                        "name": "host-root"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/run",
+                            "type": ""
+                        },
+                        "name": "docker-sock"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/hostname",
+                            "type": ""
+                        },
+                        "name": "container-hostname"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/log",
+                            "type": ""
+                        },
+                        "name": "host-log"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/var/lib/docker/containers",
+                            "type": ""
+                        },
+                        "name": "containerlog-path"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes",
+                            "type": ""
+                        },
+                        "name": "azure-json-path"
+                    },
+                    {
+                        "name": "omsagent-secret",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-secret"
+                        }
+                    },
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "container-azm-ms-agentconfig",
+                            "optional": true
+                        },
+                        "name": "settings-vol-config"
+                    },
+                    {
+                        "name": "omsagent-token-fjmqb",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "omsagent-token-fjmqb"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:52:35Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:53:25Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-23T19:52:35Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://f4f0cb19e5da394a4332847953c18d9321319f2ef422533b890ab844cb997879",
+                        "image": "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08222019",
+                        "imageID": "docker-pullable://mcr.microsoft.com/azuremonitor/containerinsights/ciprod@sha256:69b420bdb4081293c37e2d0f8ad2e4054bd516f5c08c7512d6b695660a36eccf",
+                        "lastState": {},
+                        "name": "omsagent",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-23T19:53:24Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.6",
+                "phase": "Running",
+                "podIP": "10.244.2.62",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-23T19:52:35Z"
+            }
+        },
+        {
+            "apiVersion": "v1",
+            "kind": "Pod",
+            "metadata": {
+                "creationTimestamp": "2019-08-12T20:28:08Z",
+                "generateName": "tunnelfront-65c8cfb7cc-",
+                "labels": {
+                    "component": "tunnel",
+                    "pod-template-hash": "2174796377"
+                },
+                "name": "tunnelfront-65c8cfb7cc-z8srb",
+                "namespace": "kube-system",
+                "ownerReferences": [
+                    {
+                        "apiVersion": "apps/v1",
+                        "blockOwnerDeletion": true,
+                        "controller": true,
+                        "kind": "ReplicaSet",
+                        "name": "tunnelfront-65c8cfb7cc",
+                        "uid": "7013afa3-a742-11e9-a08d-96dd47774ee5"
+                    }
+                ],
+                "resourceVersion": "17628809",
+                "selfLink": "/api/v1/namespaces/kube-system/pods/tunnelfront-65c8cfb7cc-z8srb",
+                "uid": "b2a0e1b3-bd3f-11e9-b2a7-d61658c73830"
+            },
+            "spec": {
+                "affinity": {
+                    "nodeAffinity": {
+                        "requiredDuringSchedulingIgnoredDuringExecution": {
+                            "nodeSelectorTerms": [
+                                {
+                                    "matchExpressions": [
+                                        {
+                                            "key": "kubernetes.azure.com/cluster",
+                                            "operator": "Exists"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "containers": [
+                    {
+                        "env": [
+                            {
+                                "name": "OVERRIDE_TUNNEL_SERVER_NAME",
+                                "value": "t_dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "TUNNEL_CLUSTERUSER_NAME",
+                                "value": "28957308"
+                            },
+                            {
+                                "name": "TUNNELGATEWAY_SERVER_NAME",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-0b16acad.tun.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "TUNNELGATEWAY_SSH_PORT",
+                                "value": "22"
+                            },
+                            {
+                                "name": "TUNNELGATEWAY_TLS_PORT",
+                                "value": "443"
+                            },
+                            {
+                                "name": "KUBE_CONFIG",
+                                "value": "/etc/kubernetes/kubeconfig/kubeconfig"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP_ADDR",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_PORT_443_TCP",
+                                "value": "tcp://dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io:443"
+                            },
+                            {
+                                "name": "KUBERNETES_SERVICE_HOST",
+                                "value": "dilipr-hea-dilipr-health-te-72c8e8-d3ccfd8f.hcp.eastus.azmk8s.io"
+                            }
+                        ],
+                        "image": "aksrepos.azurecr.io/prod/hcp-tunnel-front:v1.9.2-v4.0.7",
+                        "imagePullPolicy": "IfNotPresent",
+                        "livenessProbe": {
+                            "exec": {
+                                "command": [
+                                    "/lib/tunnel-front/check-tunnel-connection.sh"
+                                ]
+                            },
+                            "failureThreshold": 12,
+                            "initialDelaySeconds": 10,
+                            "periodSeconds": 60,
+                            "successThreshold": 1,
+                            "timeoutSeconds": 1
+                        },
+                        "name": "tunnel-front",
+                        "resources": {
+                            "requests": {
+                                "cpu": "10m",
+                                "memory": "64Mi"
+                            }
+                        },
+                        "securityContext": {
+                            "privileged": true
+                        },
+                        "terminationMessagePath": "/dev/termination-log",
+                        "terminationMessagePolicy": "File",
+                        "volumeMounts": [
+                            {
+                                "mountPath": "/etc/kubernetes/kubeconfig",
+                                "name": "kubeconfig",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/etc/kubernetes/certs",
+                                "name": "certificates",
+                                "readOnly": true
+                            },
+                            {
+                                "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
+                                "name": "tunnelfront-token-njgvg",
+                                "readOnly": true
+                            }
+                        ]
+                    }
+                ],
+                "dnsPolicy": "Default",
+                "imagePullSecrets": [
+                    {
+                        "name": "emptyacrsecret"
+                    }
+                ],
+                "nodeName": "aks-nodepool1-19574989-3",
+                "nodeSelector": {
+                    "beta.kubernetes.io/os": "linux"
+                },
+                "priority": 2000001000,
+                "priorityClassName": "system-node-critical",
+                "restartPolicy": "Always",
+                "schedulerName": "default-scheduler",
+                "securityContext": {},
+                "serviceAccount": "tunnelfront",
+                "serviceAccountName": "tunnelfront",
+                "terminationGracePeriodSeconds": 30,
+                "tolerations": [
+                    {
+                        "key": "CriticalAddonsOnly",
+                        "operator": "Exists"
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/not-ready",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    },
+                    {
+                        "effect": "NoExecute",
+                        "key": "node.kubernetes.io/unreachable",
+                        "operator": "Exists",
+                        "tolerationSeconds": 300
+                    }
+                ],
+                "volumes": [
+                    {
+                        "configMap": {
+                            "defaultMode": 420,
+                            "name": "tunnelfront-kubecfg",
+                            "optional": true
+                        },
+                        "name": "kubeconfig"
+                    },
+                    {
+                        "hostPath": {
+                            "path": "/etc/kubernetes/certs",
+                            "type": ""
+                        },
+                        "name": "certificates"
+                    },
+                    {
+                        "name": "tunnelfront-token-njgvg",
+                        "secret": {
+                            "defaultMode": 420,
+                            "secretName": "tunnelfront-token-njgvg"
+                        }
+                    }
+                ]
+            },
+            "status": {
+                "conditions": [
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-12T20:28:08Z",
+                        "status": "True",
+                        "type": "Initialized"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-12T20:28:13Z",
+                        "status": "True",
+                        "type": "Ready"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": null,
+                        "status": "True",
+                        "type": "ContainersReady"
+                    },
+                    {
+                        "lastProbeTime": null,
+                        "lastTransitionTime": "2019-08-12T20:28:08Z",
+                        "status": "True",
+                        "type": "PodScheduled"
+                    }
+                ],
+                "containerStatuses": [
+                    {
+                        "containerID": "docker://ac3b7482b15ba1f825e7a9ceef11defaccdc2682b9a20bb7c98bc307a8a34cf6",
+                        "image": "aksrepos.azurecr.io/prod/hcp-tunnel-front:v1.9.2-v4.0.7",
+                        "imageID": "docker-pullable://aksrepos.azurecr.io/prod/hcp-tunnel-front@sha256:68878ee3ea1781b322ea3952c3370e31dd89be8bb0864e2bf27bdba6dc904c41",
+                        "lastState": {},
+                        "name": "tunnel-front",
+                        "ready": true,
+                        "restartCount": 0,
+                        "state": {
+                            "running": {
+                                "startedAt": "2019-08-12T20:28:13Z"
+                            }
+                        }
+                    }
+                ],
+                "hostIP": "10.240.0.6",
+                "phase": "Running",
+                "podIP": "10.244.2.10",
+                "qosClass": "Burstable",
+                "startTime": "2019-08-12T20:28:08Z"
+            }
+        }
+    ],
+    "kind": "List",
+    "metadata": {
+        "resourceVersion": "",
+        "selfLink": ""
+    }
+}

From 382ed0294e57d9ec1dd3e85b0982f5eb3e286084 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 4 Oct 2019 15:54:42 -0700
Subject: [PATCH 126/160] init containers fix and other bug fixes (#269)

* init container - KPI and kubeperf changes

* changes

* changes

* changes

* changes for empty array fix

* changes

* changes

* pod inventory exception fix

* nil check changes

* changes

* fixing typo

* changes

* changes

* PR - feedback

* remove comment

* tag pass changes

* changes

* tagdrop changes

* changes

* changes
---
 installer/conf/telegraf.conf                  |  3 +-
 .../scripts/tomlparser-prom-customconfig.rb   |  2 +-
 source/code/go/src/plugins/oms.go             | 85 ++++++++++---------
 source/code/plugin/KubernetesApiClient.rb     | 14 ++-
 source/code/plugin/in_kube_events.rb          | 17 ++--
 source/code/plugin/in_kube_nodes.rb           | 13 ++-
 source/code/plugin/in_kube_podinventory.rb    | 41 ++++++---
 source/code/plugin/in_kube_services.rb        | 12 ++-
 8 files changed, 123 insertions(+), 64 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 4883de81b..cd22a56b4 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -566,7 +566,8 @@
   ## Use TLS but skip chain & host verification
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
-
+  [inputs.prometheus.tagpass]
+    operation_type = ["create_container", "remove_container", "pull_image"]
 
 ## prometheus custom metrics
 [[inputs.prometheus]]
diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb
index ab868f1a9..7aad580ee 100644
--- a/installer/scripts/tomlparser-prom-customconfig.rb
+++ b/installer/scripts/tomlparser-prom-customconfig.rb
@@ -47,7 +47,7 @@ def parseConfigMap
 end
 
 def checkForTypeArray(arrayValue, arrayType)
-  if (arrayValue.nil? || (arrayValue.kind_of?(Array) && arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType)))
+  if (arrayValue.nil? || (arrayValue.kind_of?(Array) && ((arrayValue.length == 0) || (arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType)))))
     return true
   else
     return false
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 6d78455bd..01aab85b4 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -184,11 +184,11 @@ type laKubeMonAgentEvents struct {
 }
 
 type KubeMonAgentEventTags struct {
-	PodName        string
-	ContainerId    string
-	FirstOccurance string
-	LastOccurance  string
-	Count          int
+	PodName         string
+	ContainerId     string
+	FirstOccurrence string
+	LastOccurrence  string
+	Count           int
 }
 
 type KubeMonAgentEventBlob struct {
@@ -259,7 +259,14 @@ func updateContainerImageNameMaps() {
 		}
 
 		for _, pod := range pods.Items {
-			for _, status := range pod.Status.ContainerStatuses {
+			podContainerStatuses := pod.Status.ContainerStatuses
+
+			// Doing this to include init container logs as well
+			podInitContainerStatuses := pod.Status.InitContainerStatuses
+			if (podInitContainerStatuses != nil) && (len(podInitContainerStatuses) > 0) {
+				podContainerStatuses = append(podContainerStatuses, podInitContainerStatuses...)
+			}
+			for _, status := range podContainerStatuses {
 				lastSlashIndex := strings.LastIndex(status.ContainerID, "/")
 				containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)]
 				image := status.Image
@@ -344,22 +351,22 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K
 		if val, ok := ConfigErrorEvent[logRecordString]; ok {
 			Log("In config error existing hash update\n")
 			eventCount := val.Count
-			eventFirstOccurance := val.FirstOccurance
+			eventFirstOccurrence := val.FirstOccurrence
 
 			ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{
-				PodName:        podName,
-				ContainerId:    containerID,
-				FirstOccurance: eventFirstOccurance,
-				LastOccurance:  eventTimeStamp,
-				Count:          eventCount + 1,
+				PodName:         podName,
+				ContainerId:     containerID,
+				FirstOccurrence: eventFirstOccurrence,
+				LastOccurrence:  eventTimeStamp,
+				Count:           eventCount + 1,
 			}
 		} else {
 			ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{
-				PodName:        podName,
-				ContainerId:    containerID,
-				FirstOccurance: eventTimeStamp,
-				LastOccurance:  eventTimeStamp,
-				Count:          1,
+				PodName:         podName,
+				ContainerId:     containerID,
+				FirstOccurrence: eventTimeStamp,
+				LastOccurrence:  eventTimeStamp,
+				Count:           1,
 			}
 		}
 
@@ -374,22 +381,22 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K
 				if val, ok := PromScrapeErrorEvent[splitString]; ok {
 					Log("In config error existing hash update\n")
 					eventCount := val.Count
-					eventFirstOccurance := val.FirstOccurance
+					eventFirstOccurrence := val.FirstOccurrence
 
 					PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{
-						PodName:        podName,
-						ContainerId:    containerID,
-						FirstOccurance: eventFirstOccurance,
-						LastOccurance:  eventTimeStamp,
-						Count:          eventCount + 1,
+						PodName:         podName,
+						ContainerId:     containerID,
+						FirstOccurrence: eventFirstOccurrence,
+						LastOccurrence:  eventTimeStamp,
+						Count:           eventCount + 1,
 					}
 				} else {
 					PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{
-						PodName:        podName,
-						ContainerId:    containerID,
-						FirstOccurance: eventTimeStamp,
-						LastOccurance:  eventTimeStamp,
-						Count:          1,
+						PodName:         podName,
+						ContainerId:     containerID,
+						FirstOccurrence: eventTimeStamp,
+						LastOccurrence:  eventTimeStamp,
+						Count:           1,
 					}
 				}
 			}
@@ -756,16 +763,18 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		FlushedRecordsSize += float64(len(stringMap["LogEntry"]))
 
 		dataItems = append(dataItems, dataItem)
-		loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp)
-		if e != nil {
-			message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error())
-			Log(message)
-			SendException(message)
-		} else {
-			ltncy := float64(start.Sub(loggedTime) / time.Millisecond)
-			if ltncy >= maxLatency {
-				maxLatency = ltncy
-				maxLatencyContainer = dataItem.Name + "=" + dataItem.ID
+		if dataItem.LogEntryTimeStamp != "" {
+			loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp)
+			if e != nil {
+				message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error())
+				Log(message)
+				SendException(message)
+			} else {
+				ltncy := float64(start.Sub(loggedTime) / time.Millisecond)
+				if ltncy >= maxLatency {
+					maxLatency = ltncy
+					maxLatencyContainer = dataItem.Name + "=" + dataItem.ID
+				}
 			}
 		}
 	}
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 48b25bf14..be1a51791 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -356,9 +356,19 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
           else
             podUid = pod["metadata"]["uid"]
           end
-          if (!pod["spec"]["containers"].nil? && !pod["spec"]["nodeName"].nil?)
+
+          podContainers = []
+          if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
+            podContainers = podContainers + pod["spec"]["containers"]
+          end
+          # Adding init containers to the record list as well.
+          if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
+            podContainers = podContainers + pod["spec"]["initContainers"]
+          end
+
+          if (!podContainers.nil? && !podContainers.empty? && !pod["spec"]["nodeName"].nil?)
             nodeName = pod["spec"]["nodeName"]
-            pod["spec"]["containers"].each do |container|
+            podContainers.each do |container|
               containerName = container["name"]
               metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
               if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 5538ba4aa..e1fdc5df6 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -47,17 +47,20 @@ def enumerate(eventList = nil)
       currentTime = Time.now
       emitTime = currentTime.to_f
       batchTime = currentTime.utc.iso8601
-      if eventList.nil?
-        $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-        events = JSON.parse(KubernetesApiClient.getKubeResourceInfo("events").body)
-        $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
-      else
-        events = eventList
+
+      events = eventList
+      $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
+      eventInfo = KubernetesApiClient.getKubeResourceInfo("events")
+      $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+
+      if !eventInfo.nil?
+        events = JSON.parse(eventInfo.body)
       end
+
       eventQueryState = getEventQueryState
       newEventQueryState = []
       begin
-        if (!events.empty? && !events["items"].nil?)
+        if (!events.nil? && !events.empty? && !events["items"].nil?)
           eventStream = MultiEventStream.new
           events["items"].each do |items|
             record = {}
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 42bc13b68..0a0fd9d2e 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -61,11 +61,19 @@ def enumerate
       emitTime = currentTime.to_f
       batchTime = currentTime.utc.iso8601
       telemetrySent = false
+
+      nodeInventory = nil
+
       $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-      nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+      nodeInfo = KubernetesApiClient.getKubeResourceInfo("nodes")
       $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+
+      if !nodeInfo.nil?
+        nodeInventory = JSON.parse(nodeInfo.body)
+      end
+
       begin
-        if (!nodeInventory.empty?)
+        if (!nodeInventory.nil? && !nodeInventory.empty?)
           eventStream = MultiEventStream.new
           containerNodeInventoryEventStream = MultiEventStream.new
           if !nodeInventory["items"].nil?
@@ -95,7 +103,6 @@ def enumerate
                 record["KubernetesProviderID"] = "onprem"
               end
 
-
               # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
               # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
               # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index f41ce9095..766831a66 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -48,13 +48,15 @@ def shutdown
     end
 
     def enumerate(podList = nil)
-      if podList.nil?
-        $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-        podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body)
-        $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-      else
-        podInventory = podList
+      podInventory = podList
+      $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+      podInfo = KubernetesApiClient.getKubeResourceInfo("pods")
+      $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+
+      if !podInfo.nil?
+        podInventory = JSON.parse(podInfo.body)
       end
+
       begin
         if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
           #get pod inventory & services
@@ -137,8 +139,16 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       begin
         podSpec = pod["spec"]
         containerEnvHash = {}
-        if !podSpec.nil? && !podSpec["containers"].nil?
-          podSpec["containers"].each do |container|
+        podContainersEnv = []
+        if !podSpec["containers"].nil? && !podSpec["containers"].empty?
+          podContainersEnv = podContainersEnv + podSpec["containers"]
+        end
+        # Adding init containers to the record list as well.
+        if !podSpec["initContainers"].nil? && !podSpec["initContainers"].empty?
+          podContainersEnv = podContainersEnv + podSpec["initContainers"]
+        end
+        if !podContainersEnv.nil? && !podContainersEnv.empty?
+          podContainersEnv.each do |container|
             if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
               containerEnvHash[container["name"]] = ["AZMON_CLUSTER_COLLECT_ENV_VAR=FALSE"]
             else
@@ -289,8 +299,19 @@ def parse_and_emit_records(podInventory, serviceList)
           end
           podRestartCount = 0
           record["PodRestartCount"] = 0
-          if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
-            items["status"]["containerStatuses"].each do |container|
+
+          podContainers = []
+          if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty?
+            podContainers = podContainers + items["status"]["containerStatuses"]
+          end
+          # Adding init containers to the record list as well.
+          if items["status"].key?("initContainerStatuses") && !items["status"]["initContainerStatuses"].empty?
+            podContainers = podContainers + items["status"]["initContainerStatuses"]
+          end
+
+          # if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
+          if !podContainers.empty? #container status block start
+            podContainers.each do |container|
               containerRestartCount = 0
               #container Id is of the form
               #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
index 8b0a013e4..7cd703620 100644
--- a/source/code/plugin/in_kube_services.rb
+++ b/source/code/plugin/in_kube_services.rb
@@ -46,11 +46,19 @@ def enumerate
       currentTime = Time.now
       emitTime = currentTime.to_f
       batchTime = currentTime.utc.iso8601
+
+      serviceList = nil
+      
       $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-      serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+      serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
       $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
+
+      if !serviceInfo.nil?
+        serviceList = JSON.parse(serviceInfo.body)
+      end
+
       begin
-        if (!serviceList.empty?)
+        if (!serviceList.nil? && !serviceList.empty?)
           eventStream = MultiEventStream.new
           serviceList["items"].each do |items|
             record = {}

From 3079471a69f9d704e6de55200857ac5489866fc2 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Mon, 7 Oct 2019 11:33:57 -0700
Subject: [PATCH 127/160] Send agg monitor signal on details change (#270)

send when an agg monitor details change, but state did not change
---
 .../plugin/filter_health_model_builder.rb     |  3 ++-
 .../plugin/health/health_monitor_state.rb     | 25 ++++++++++++++++---
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index 5aa7f610e..fa92038e6 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -184,7 +184,8 @@ def filter_stream(tag, es)
                     all_monitors.each{|monitor_instance_id, monitor|
                         if monitor.is_aggregate_monitor
                             @state.update_state(monitor,
-                                @provider.get_config(monitor.monitor_id)
+                                @provider.get_config(monitor.monitor_id),
+                                true
                                 )
                         end
 
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index 498c75ec7..7eb674f1e 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -57,10 +57,11 @@ def initialize_state(deserialized_state)
 2. if there is a "consistent" state change for monitors
 3. if the signal is stale (> 4hrs)
 4. If the latest state is none
+5. If an aggregate monitor has a change in its details, but no change in state
 =end
         def update_state(monitor, #UnitMonitor/AggregateMonitor
-            monitor_config #Hash
-            )
+            monitor_config, #Hash
+            is_aggregate_monitor = false)
             samples_to_keep = 1
             monitor_instance_id = monitor.monitor_instance_id
             log = HealthMonitorHelpers.get_log_handle
@@ -76,12 +77,13 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor
                 samples_to_keep = monitor_config['ConsecutiveSamplesForStateTransition'].to_i
             end
 
+            deleted_record = {}
             if @@monitor_states.key?(monitor_instance_id)
                 health_monitor_instance_state = @@monitor_states[monitor_instance_id]
                 health_monitor_records = health_monitor_instance_state.prev_records #This should be an array
 
                 if health_monitor_records.size == samples_to_keep
-                    health_monitor_records.delete_at(0)
+                    deleted_record = health_monitor_records.delete_at(0)
                 end
                 health_monitor_records.push(monitor.details)
                 health_monitor_instance_state.prev_records = health_monitor_records
@@ -106,7 +108,6 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor
                 @@monitor_states[monitor_instance_id] = health_monitor_instance_state
             end
 
-
             # update old and new state based on the history and latest record.
             # TODO: this is a little hairy. Simplify
 
@@ -142,6 +143,10 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor
                     @@first_record_sent[monitor_instance_id] = true
                     health_monitor_instance_state.should_send = true
                     set_state(monitor_instance_id, health_monitor_instance_state)
+                elsif agg_monitor_details_changed?(is_aggregate_monitor, deleted_record, health_monitor_instance_state.prev_records[0])
+                    health_monitor_instance_state.should_send = true
+                    set_state(monitor_instance_id, health_monitor_instance_state)
+                    log.debug "#{monitor_instance_id} condition: agg monitor details changed should_send #{health_monitor_instance_state.should_send}"
                 end
             # latest state is different that last sent state
             else
@@ -212,5 +217,17 @@ def is_state_change_consistent(health_monitor_records, samples_to_check)
             end
             return true
         end
+
+        def agg_monitor_details_changed?(is_aggregate_monitor, last_sent_details, latest_details)
+            log = HealthMonitorHelpers.get_log_handle
+            if !is_aggregate_monitor
+                return false
+            end
+            if latest_details['details'] != last_sent_details['details']
+                log.info "Last Sent Details #{JSON.pretty_generate(last_sent_details)} \n Latest Details: #{JSON.pretty_generate(latest_details)}"
+                return true
+            end
+            return false
+        end
     end
 end
\ No newline at end of file

From de2e1da41dcd08c3407569bf41c53f5281b46331 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 9 Oct 2019 17:04:16 -0700
Subject: [PATCH 128/160] bug fixes for error (#274)

---
 source/code/go/src/plugins/oms.go | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 01aab85b4..b68c471a1 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -413,7 +413,6 @@ func flushKubeMonAgentEventRecords() {
 			Log("In flushConfigErrorRecords\n")
 			start := time.Now()
 			var resp *http.Response
-			var postError error
 			var elapsed time.Duration
 			var laKubeMonAgentEventsRecords []laKubeMonAgentEvents
 			telemetryDimensions := make(map[string]string)
@@ -518,10 +517,10 @@ func flushKubeMonAgentEventRecords() {
 						req.Header.Set("x-ms-AzureResourceId", ResourceID)
 					}
 
-					resp, postError = HTTPClient.Do(req)
+					resp, err = HTTPClient.Do(req)
 					elapsed = time.Since(start)
 
-					if postError != nil {
+					if err != nil {
 						message := fmt.Sprintf("Error when sending kubemonagentevent request %s \n", err.Error())
 						Log(message)
 						Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed)
@@ -532,7 +531,7 @@ func flushKubeMonAgentEventRecords() {
 						Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed)
 					} else {
 						numRecords := len(laKubeMonAgentEventsRecords)
-						Log("Successfully flushed %d records in %s", numRecords, elapsed)
+						Log("FlushKubeMonAgentEventRecords::Info::Successfully flushed %d records in %s", numRecords, elapsed)
 
 						// Send telemetry to AppInsights resource
 						SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions)
@@ -822,7 +821,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
 		defer resp.Body.Close()
 		numRecords := len(dataItems)
-		Log("Successfully flushed %d records in %s", numRecords, elapsed)
+		Log("PostDataHelper::Info::Successfully flushed %d records in %s", numRecords, elapsed)
 		ContainerLogTelemetryMutex.Lock()
 		FlushedRecordsCount += float64(numRecords)
 		FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond)

From e4b91c51dff06558a9b048669f07462d0df47d88 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 9 Oct 2019 17:39:54 -0700
Subject: [PATCH 129/160] Fix to use declaration and assignment instead of
 assignment (#275)

* bug fixes for error

* adding declaration to assignment
---
 source/code/go/src/plugins/oms.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index b68c471a1..123aea197 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -517,7 +517,7 @@ func flushKubeMonAgentEventRecords() {
 						req.Header.Set("x-ms-AzureResourceId", ResourceID)
 					}
 
-					resp, err = HTTPClient.Do(req)
+					resp, err := HTTPClient.Do(req)
 					elapsed = time.Since(start)
 
 					if err != nil {

From cf5e85ccf7e841bf79117beb15608d0a5f8e533b Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Wed, 9 Oct 2019 18:01:40 -0700
Subject: [PATCH 130/160] 1. Added telemetry (#277)

2. Configuration property changes
3. Bug fixes for a. unscheduled pods returning green
3b. Sometimes, the details hash of agg monitors are different because the order of elements inside the array is different, causing the records to be sent
---
 installer/conf/healthmonitorconfig.json       | 40 ++++++-------
 installer/datafiles/base_container.data       |  1 +
 .../plugin/filter_health_model_builder.rb     | 14 +++--
 .../health_container_cpu_memory_aggregator.rb |  4 +-
 .../plugin/health/health_monitor_helpers.rb   |  2 -
 .../plugin/health/health_monitor_state.rb     | 42 ++++++++++++--
 .../plugin/health/health_monitor_telemetry.rb | 57 +++++++++++++++++++
 .../plugin/health/health_monitor_utils.rb     | 11 ++--
 .../health/health_monitor_state_spec.rb       | 12 ++--
 9 files changed, 134 insertions(+), 49 deletions(-)
 create mode 100644 source/code/plugin/health/health_monitor_telemetry.rb

diff --git a/installer/conf/healthmonitorconfig.json b/installer/conf/healthmonitorconfig.json
index ea6b23856..e4019fe73 100644
--- a/installer/conf/healthmonitorconfig.json
+++ b/installer/conf/healthmonitorconfig.json
@@ -1,40 +1,34 @@
 {
     "node_cpu_utilization": {
-        "WarnThresholdPercentage": 80.0,
-        "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3,
-        "Operator": ">"
+        "WarnIfGreaterThanPercentage": 80.0,
+        "FailIfGreaterThanPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3
     },
     "node_memory_utilization": {
-        "WarnThresholdPercentage": 80.0,
-        "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3,
-        "Operator": ">"
+        "WarnIfGreaterThanPercentage": 80.0,
+        "FailIfGreaterThanPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 3
     },
     "container_cpu_utilization": {
-        "WarnThresholdPercentage": 80.0,
-        "FailThresholdPercentage": 90.0,
+        "WarnIfGreaterThanPercentage": 80.0,
+        "FailIfGreaterThanPercentage": 90.0,
         "StateThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3,
-        "Operator": ">"
+        "ConsecutiveSamplesForStateTransition": 3
     },
     "container_memory_utilization": {
-        "WarnThresholdPercentage": 80.0,
-        "FailThresholdPercentage": 90.0,
+        "WarnIfGreaterThanPercentage": 80.0,
+        "FailIfGreaterThanPercentage": 90.0,
         "StateThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 3,
-        "Operator": ">"
+        "ConsecutiveSamplesForStateTransition": 3
     },
     "user_workload_pods_ready": {
-        "WarnThresholdPercentage": 100.0,
-        "FailThresholdPercentage": 90.0,
-        "ConsecutiveSamplesForStateTransition": 2,
-        "Operator": "<"
+        "WarnIfLesserThanPercentage": 100.0,
+        "FailIfLesserThanPercentage": 90.0,
+        "ConsecutiveSamplesForStateTransition": 2
     },
     "system_workload_pods_ready": {
-        "FailThresholdPercentage": 100.0,
-        "ConsecutiveSamplesForStateTransition": 2,
-        "Operator": "<"
+        "FailIfLesserThanPercentage": 100.0,
+        "ConsecutiveSamplesForStateTransition": 2
     },
     "node_condition": {
         "NodeConditionTypesForFailedState": "outofdisk,networkunavailable"
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 981f51f4c..4ebc4f338 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -147,6 +147,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb;                                   source/code/plugin/health/health_monitor_provider.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb;                                     source/code/plugin/health/health_monitor_record.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb;                                      source/code/plugin/health/health_monitor_state.rb; 644; root; root
+/opt/microsoft/omsagent/plugin/health/health_monitor_telemetry.rb;                                  source/code/plugin/health/health_monitor_telemetry.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb;                                      source/code/plugin/health/health_monitor_utils.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb;                                     source/code/plugin/health/health_signal_reducer.rb; 644; root; root
diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index fa92038e6..afb514a73 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -16,7 +16,7 @@ class FilterHealthModelBuilder < Filter
         config_param :model_definition_path, :default => '/etc/opt/microsoft/docker-cimprov/health/health_model_definition.json'
         config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json'
         config_param :health_state_serialized_path, :default => '/mnt/azure/health_model_state.json'
-        attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator
+        attr_reader :buffer, :model_builder, :health_model_definition, :monitor_factory, :state_finalizers, :monitor_set, :model_builder, :hierarchy_builder, :resources, :kube_api_down_handler, :provider, :reducer, :state, :generator, :telemetry
         include HealthModel
 
         @@rewrite_tag = 'kubehealth.Signals'
@@ -49,6 +49,7 @@ def initialize
                 @cluster_old_state = 'none'
                 @cluster_new_state = 'none'
                 @container_cpu_memory_records = []
+                @telemetry = HealthMonitorTelemetry.new
             rescue => e
                 ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
             end
@@ -142,7 +143,9 @@ def filter_stream(tag, es)
                     reduced_records = @reducer.reduce_signals(health_monitor_records, @resources)
                     reduced_records.each{|record|
                         @state.update_state(record,
-                            @provider.get_config(record.monitor_id)
+                            @provider.get_config(record.monitor_id),
+                            false,
+                            @telemetry
                             )
                         # get the health state based on the monitor's operational state
                         # update state calls updates the state of the monitor based on configuration and history of the the monitor records
@@ -160,7 +163,7 @@ def filter_stream(tag, es)
                     #update state for missing signals
                     missing_signals.each{|signal|
 
-                        @state.update_state(signal, @provider.get_config(signal.monitor_id))
+                        @state.update_state(signal, @provider.get_config(signal.monitor_id), false, @telemetry)
                         @log.info "After Updating #{@state.get_state(signal.monitor_instance_id)} #{@state.get_state(signal.monitor_instance_id).new_state}"
                         # for unknown/none records, update the "monitor state" to be the latest state (new_state) of the monitor instance from the state
                         signal.state = @state.get_state(signal.monitor_instance_id).new_state
@@ -185,7 +188,8 @@ def filter_stream(tag, es)
                         if monitor.is_aggregate_monitor
                             @state.update_state(monitor,
                                 @provider.get_config(monitor.monitor_id),
-                                true
+                                true,
+                                @telemetry
                                 )
                         end
 
@@ -242,7 +246,7 @@ def filter_stream(tag, es)
 
                     #update cluster state custom resource
                     @cluster_health_state.update_state(@state.to_h)
-
+                    @telemetry.send
                     # return an empty event stream, else the match will throw a NoMethodError
                     return MultiEventStream.new
                 elsif tag.start_with?("kubehealth.Signals")
diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index e98c288b3..6de146e3d 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -246,9 +246,9 @@ def calculate_monitor_state(v, config)
 
         def calculate_container_instance_state(counter_value, limit, config)
             percent_value = counter_value * 100  / limit
-            if percent_value > config['FailThresholdPercentage']
+            if percent_value > config['FailIfGreaterThanPercentage']
                 return HealthMonitorStates::FAIL
-            elsif percent_value > config['WarnThresholdPercentage']
+            elsif percent_value > config['WarnIfGreaterThanPercentage']
                 return HealthMonitorStates::WARN
             else
                 return HealthMonitorStates::PASS
diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb
index 4efd4c608..f784ae76e 100644
--- a/source/code/plugin/health/health_monitor_helpers.rb
+++ b/source/code/plugin/health/health_monitor_helpers.rb
@@ -43,11 +43,9 @@ def add_agentpool_node_label_if_not_present(records)
 
                     if labels_keys.include?(HealthMonitorLabels::AGENTPOOL)
                         @log.info "#{record.monitor_id} includes agentpool label. Value = #{record.labels[HealthMonitorLabels::AGENTPOOL]}"
-                        @log.info "Labels present = #{labels_keys}"
                         next
                     else
                         #@log.info "#{record} does not include agentpool label."
-                        @log.info "Labels present = #{labels_keys}"
                         role_name = 'unknown'
                         if record.labels.include?(HealthMonitorLabels::ROLE)
                             role_name = record.labels[HealthMonitorLabels::ROLE]
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index 7eb674f1e..cac66f26b 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -16,6 +16,7 @@ def initialize
             @@monitor_states = {}
             @@first_record_sent = {}
             @@health_signal_timeout = 240
+
         end
 
         def get_state(monitor_instance_id)
@@ -46,7 +47,6 @@ def initialize_state(deserialized_state)
                 state.should_send = health_monitor_instance_state_hash["should_send"]
                 @@monitor_states[k] = state
                 @@first_record_sent[k] = true
-
             }
         end
 
@@ -61,8 +61,11 @@ def initialize_state(deserialized_state)
 =end
         def update_state(monitor, #UnitMonitor/AggregateMonitor
             monitor_config, #Hash
-            is_aggregate_monitor = false)
+            is_aggregate_monitor = false,
+            telemetry = nil
+        )
             samples_to_keep = 1
+            monitor_id = monitor.monitor_id
             monitor_instance_id = monitor.monitor_instance_id
             log = HealthMonitorHelpers.get_log_handle
             current_time = Time.now.utc.iso8601
@@ -157,6 +160,11 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor
                     health_monitor_instance_state.state_change_time = current_time
                     health_monitor_instance_state.prev_sent_record_time = current_time
                     health_monitor_instance_state.should_send = true
+                    if !is_aggregate_monitor
+                        if !telemetry.nil?
+                            telemetry.add_monitor_to_telemetry(monitor_id, health_monitor_instance_state.old_state, health_monitor_instance_state.new_state)
+                        end
+                    end
                     if !@@first_record_sent.key?(monitor_instance_id)
                         @@first_record_sent[monitor_instance_id] = true
                     end
@@ -170,6 +178,11 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor
                     health_monitor_instance_state.state_change_time = current_time
                     health_monitor_instance_state.prev_sent_record_time = current_time
                     health_monitor_instance_state.should_send = true
+                    if !is_aggregate_monitor
+                        if !telemetry.nil?
+                            telemetry.add_monitor_to_telemetry(monitor_id, health_monitor_instance_state.old_state, health_monitor_instance_state.new_state)
+                        end
+                    end
                     if !@@first_record_sent.key?(monitor_instance_id)
                         @@first_record_sent[monitor_instance_id] = true
                     end
@@ -190,6 +203,11 @@ def update_state(monitor, #UnitMonitor/AggregateMonitor
                         health_monitor_instance_state.new_state = latest_record_state
                         health_monitor_instance_state.prev_sent_record_time = current_time
                         health_monitor_instance_state.state_change_time = current_time
+                        if !is_aggregate_monitor
+                            if !telemetry.nil?
+                                telemetry.add_monitor_to_telemetry(monitor_id, health_monitor_instance_state.old_state, health_monitor_instance_state.new_state)
+                            end
+                        end
 
                         set_state(monitor_instance_id, health_monitor_instance_state)
 
@@ -223,10 +241,22 @@ def agg_monitor_details_changed?(is_aggregate_monitor, last_sent_details, latest
             if !is_aggregate_monitor
                 return false
             end
-            if latest_details['details'] != last_sent_details['details']
-                log.info "Last Sent Details #{JSON.pretty_generate(last_sent_details)} \n Latest Details: #{JSON.pretty_generate(latest_details)}"
-                return true
-            end
+            # Do a deep comparison of the keys under details, since a shallow comparison is hit or miss.
+            # Actual bug was the array inside the keys were in random order and the previous equality comparison was failing
+            latest_details['details'].keys.each{|k|
+                if !last_sent_details['details'].key?(k)
+                    return true
+                end
+                if latest_details['details'][k].size != last_sent_details['details'][k].size
+                    return true
+                end
+            }
+            # Explanation: a = [1,2] b = [2,1] a & b = [1,2] , c = [2,3] d = [2] c & d = [2] c.size != (c&d).size
+            latest_details['details'].keys.each{|k|
+                if !(latest_details['details'][k].size == (last_sent_details['details'][k] & latest_details['details'][k]).size)
+                    return true
+                end
+            }
             return false
         end
     end
diff --git a/source/code/plugin/health/health_monitor_telemetry.rb b/source/code/plugin/health/health_monitor_telemetry.rb
new file mode 100644
index 000000000..df4b98ac8
--- /dev/null
+++ b/source/code/plugin/health/health_monitor_telemetry.rb
@@ -0,0 +1,57 @@
+require_relative 'health_model_constants'
+require 'socket'
+if Socket.gethostname.start_with?('omsagent-rs')
+    require_relative '../ApplicationInsightsUtility'
+end
+
+
+module HealthModel
+    class HealthMonitorTelemetry
+
+        attr_reader :monitor_records, :last_sent_time
+        @@TELEMETRY_SEND_INTERVAL = 60
+
+        def initialize
+            @last_sent_time = Time.now
+        end
+
+        def send
+            if Time.now > @last_sent_time + @@TELEMETRY_SEND_INTERVAL * 60
+                log = HealthMonitorHelpers.get_log_handle
+                log.info "Sending #{@monitor_records.size} state change events"
+                if @monitor_records.size > 0
+                    hash_to_send = {}
+                    @monitor_records.each{|k,v|
+                        v.each{|k1,v1|
+                            hash_to_send["#{k}-#{k1}"] = v1
+                        }
+                    }
+                    ApplicationInsightsUtility.sendCustomEvent("HealthMonitorStateChangeEvent", hash_to_send)
+                end
+                @monitor_records = {}
+                @last_sent_time = Time.now
+            end
+        end
+
+        def add_monitor_to_telemetry(monitor_id, old_state, new_state)
+            if @monitor_records.nil? || @monitor_records.empty?
+                @monitor_records = {}
+            end
+            if @monitor_records.key?(monitor_id)
+                monitor_hash = @monitor_records[monitor_id]
+                if monitor_hash.key?("#{old_state}-#{new_state}")
+                    count = monitor_hash["#{old_state}-#{new_state}"]
+                    count = count + 1
+                    monitor_hash["#{old_state}-#{new_state}"] = count
+                else
+                    monitor_hash["#{old_state}-#{new_state}"] = 1
+                end
+                @monitor_records[monitor_id] = monitor_hash
+            else
+                monitor_hash = {}
+                monitor_hash["#{old_state}-#{new_state}"] = 1
+                @monitor_records[monitor_id] = monitor_hash
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index 27e9b9a6e..e21fdc83d 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -27,16 +27,17 @@ class HealthMonitorUtils
 
         class << self
             # compute the percentage state given a value and a monitor configuration
+            #TODO : Add Unit Tests for this method
             def compute_percentage_state(value, config)
-
-                if config.nil? || config['WarnThresholdPercentage'].nil?
+                if config.nil? || ( config['WarnIfGreaterThanPercentage'].nil? && config['WarnIfLesserThanPercentage'].nil? )
                     warn_percentage = nil
                 else
-                    warn_percentage = config['WarnThresholdPercentage'].to_f
+                    warn_percentage = !config['WarnIfGreaterThanPercentage'].nil? ? config['WarnIfGreaterThanPercentage'].to_f : config['WarnIfLesserThanPercentage'].to_f
                 end
-                fail_percentage = config['FailThresholdPercentage'].to_f
+                fail_percentage = !config['FailIfGreaterThanPercentage'].nil? ? config['FailIfGreaterThanPercentage'].to_f : config['FailIfLesserThanPercentage'].to_f
+                is_less_than_comparer = config['FailIfGreaterThanPercentage'].nil? ? true : false # Fail percentage config always present for percentage computation monitors
 
-                if !config.nil? && !config['Operator'].nil? && config['Operator'] == '<'
+                if !config.nil? && is_less_than_comparer
                     if value < fail_percentage
                         return HealthMonitorStates::FAIL
                     elsif !warn_percentage.nil? && value < warn_percentage
diff --git a/test/code/plugin/health/health_monitor_state_spec.rb b/test/code/plugin/health/health_monitor_state_spec.rb
index 5fa8a6c6e..3d13d4150 100644
--- a/test/code/plugin/health/health_monitor_state_spec.rb
+++ b/test/code/plugin/health/health_monitor_state_spec.rb
@@ -65,8 +65,8 @@ def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
         def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
 
         config = JSON.parse('{
-            "WarnThresholdPercentage": 80.0,
-            "FailThresholdPercentage": 90.0,
+            "WarnIfGreaterThanPercentage": 80.0,
+            "FailIfGreaterThanPercentage": 90.0,
             "ConsecutiveSamplesForStateTransition": 3
         }')
         #act
@@ -96,8 +96,8 @@ def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
         def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
 
         config = JSON.parse('{
-            "WarnThresholdPercentage": 80.0,
-            "FailThresholdPercentage": 90.0,
+            "WarnIfGreaterThanPercentage": 80.0,
+            "FailIfGreaterThanPercentage": 90.0,
             "ConsecutiveSamplesForStateTransition": 3
         }')
         #act
@@ -136,8 +136,8 @@ def mock_monitor.transition_date_time; Time.now.utc.iso8601; end
         def mock_monitor.details; {"state" => "pass", "timestamp" => Time.now.utc.iso8601, "details" => {}}; end
 
         config = JSON.parse('{
-            "WarnThresholdPercentage": 80.0,
-            "FailThresholdPercentage": 90.0,
+            "WarnIfGreaterThanPercentage": 80.0,
+            "FailIfGreaterThanPercentage": 90.0,
             "ConsecutiveSamplesForStateTransition": 3
         }')
         #act

From e8529b2fc3ab96603c00eda162715bf8518b74b9 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 10 Oct 2019 10:51:55 -0700
Subject: [PATCH 131/160] Bug fix to remove unused variable (#281)

* bug fixes for error

* adding declaration to assignment

* removing unused variable
---
 source/code/go/src/plugins/oms.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 123aea197..5a323d7e0 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -412,7 +412,6 @@ func flushKubeMonAgentEventRecords() {
 		if skipKubeMonEventsFlush != true {
 			Log("In flushConfigErrorRecords\n")
 			start := time.Now()
-			var resp *http.Response
 			var elapsed time.Duration
 			var laKubeMonAgentEventsRecords []laKubeMonAgentEvents
 			telemetryDimensions := make(map[string]string)

From 8a4147d7a9eaddc2023152d84ccfe06b27034a6b Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 11 Oct 2019 15:33:32 -0700
Subject: [PATCH 132/160] Fix the WARN<->WARNING typo (#282)

---
 .../plugin/health/health_container_cpu_memory_aggregator.rb     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index 6de146e3d..ef1016158 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -249,7 +249,7 @@ def calculate_container_instance_state(counter_value, limit, config)
             if percent_value > config['FailIfGreaterThanPercentage']
                 return HealthMonitorStates::FAIL
             elsif percent_value > config['WarnIfGreaterThanPercentage']
-                return HealthMonitorStates::WARN
+                return HealthMonitorStates::WARNING
             else
                 return HealthMonitorStates::PASS
             end

From 4780c3e17fc98ccba9381d60c8945ed7c90c6301 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Sun, 13 Oct 2019 19:16:02 -0700
Subject: [PATCH 133/160] Bug Fixes  1. telemetry send throwing exception if
 records not initialized 2. permissions error in on-prem clusters (#284)

* Bug fixes 1. not writeable, telemetry error

* Change to state_WS_dir
---
 installer/conf/container.conf                         | 2 +-
 source/code/plugin/health/health_monitor_telemetry.rb | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 5f08043c7..f9540bde8 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -88,7 +88,7 @@
 
   <secondary>
     @type file
-    path /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
+    path %STATE_DIR_WS%/fluent_forward_failed.buffer
   </secondary>
 </match>
 
diff --git a/source/code/plugin/health/health_monitor_telemetry.rb b/source/code/plugin/health/health_monitor_telemetry.rb
index df4b98ac8..4e80a5145 100644
--- a/source/code/plugin/health/health_monitor_telemetry.rb
+++ b/source/code/plugin/health/health_monitor_telemetry.rb
@@ -13,6 +13,7 @@ class HealthMonitorTelemetry
 
         def initialize
             @last_sent_time = Time.now
+            @monitor_records = {}
         end
 
         def send

From 981018cafd6bc336cb463472e8c15b740c57b7cc Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 17 Oct 2019 21:49:22 -0700
Subject: [PATCH 134/160] Fix Require relative revert (#287)

---
 source/code/plugin/health/agg_monitor_id_labels.rb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/source/code/plugin/health/agg_monitor_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb
index bb016adb4..d5c724a86 100644
--- a/source/code/plugin/health/agg_monitor_id_labels.rb
+++ b/source/code/plugin/health/agg_monitor_id_labels.rb
@@ -1,3 +1,5 @@
+require_relative 'health_model_constants'
+
 module HealthModel
     class AggregateMonitorInstanceIdLabels
         @@id_labels_mapping = {

From edaa963477a1bdea67a508457ff3ac5340d3727f Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 1 Nov 2019 16:16:50 -0700
Subject: [PATCH 135/160] Bug Fixes for exceptions in telemetry, remove limit
 set check (#289)

* Bug Fixes 10222019

* Initialize container_cpu_memory_records in fhmb

* Added telemetry to investigate health exceptions

* Set frozen_string_literal to true

* Send event once per container when lookup is empty, or limit is an array

* Unit Tests, Use RS and POD to determine workload

* Fixed Node Condition Bug, added exception handling to return get_rs_owner_ref
---
 .../plugin/filter_health_model_builder.rb     |  31 +-
 .../plugin/health/agg_monitor_id_labels.rb    |   1 +
 .../aggregate_monitor_state_finalizer.rb      |   2 +
 .../plugin/health/cluster_health_state.rb     |   2 +
 .../health_container_cpu_memory_aggregator.rb |  71 +-
 ...h_container_cpu_memory_record_formatter.rb |   2 +
 .../plugin/health/health_hierarchy_builder.rb |   2 +
 .../health/health_kube_api_down_handler.rb    |   2 +
 .../health/health_kubernetes_resources.rb     |  90 +-
 .../health/health_missing_signal_generator.rb |   2 +
 .../code/plugin/health/health_model_buffer.rb |   2 +
 .../plugin/health/health_model_builder.rb     |   1 +
 .../plugin/health/health_model_constants.rb   |   1 +
 .../health/health_model_definition_parser.rb  |   1 +
 .../plugin/health/health_monitor_helpers.rb   |   1 +
 .../plugin/health/health_monitor_optimizer.rb |   1 +
 .../plugin/health/health_monitor_provider.rb  |   1 +
 .../plugin/health/health_monitor_record.rb    |   1 +
 .../plugin/health/health_monitor_state.rb     |   1 +
 .../plugin/health/health_monitor_telemetry.rb |   1 +
 .../plugin/health/health_monitor_utils.rb     |  73 +-
 .../plugin/health/health_signal_reducer.rb    |   1 +
 source/code/plugin/health/monitor_factory.rb  |   1 +
 .../plugin/health/parent_monitor_provider.rb  |   1 +
 source/code/plugin/health/unit_monitor.rb     |   1 +
 source/code/plugin/in_kube_health.rb          |  22 +-
 source/code/plugin/out_health_forward.rb      |   1 +
 ...th_container_cpu_memory_aggregator_spec.rb |   8 +-
 .../health/health_kubernetes_resource_spec.rb |  26 +-
 .../health/health_model_builder_test.rb       | 977 +++++++++---------
 30 files changed, 680 insertions(+), 647 deletions(-)

diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index afb514a73..47ce7a631 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -39,17 +39,16 @@ def initialize
                 @kube_api_down_handler = HealthKubeApiDownHandler.new
                 @resources = HealthKubernetesResources.instance
                 @reducer = HealthSignalReducer.new
-                @state = HealthMonitorState.new
                 @generator = HealthMissingSignalGenerator.new
-                #TODO: cluster_labels needs to be initialized
                 @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
-                deserialized_state_info = @cluster_health_state.get_state
-                @state = HealthMonitorState.new
-                @state.initialize_state(deserialized_state_info)
                 @cluster_old_state = 'none'
                 @cluster_new_state = 'none'
                 @container_cpu_memory_records = []
                 @telemetry = HealthMonitorTelemetry.new
+                @state = HealthMonitorState.new
+                # move network calls to the end. This will ensure all the instance variables get initialized
+                deserialized_state_info = @cluster_health_state.get_state
+                @state.initialize_state(deserialized_state_info)
             rescue => e
                 ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
             end
@@ -99,6 +98,10 @@ def filter_stream(tag, es)
                     end
                     container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
                     deduped_records = container_records_aggregator.dedupe_records(container_records)
+                    if @container_cpu_memory_records.nil?
+                        @log.info "@container_cpu_memory_records was not initialized"
+                        @container_cpu_memory_records = [] #in some clusters, this is null, so initialize it again.
+                    end
                     @container_cpu_memory_records.push(*deduped_records) # push the records for aggregation later
                     return MultiEventStream.new
                 elsif tag.start_with?("kubehealth.ReplicaSet")
@@ -106,14 +109,16 @@ def filter_stream(tag, es)
                     es.each{|time, record|
                         records.push(record)
                     }
-                    @buffer.add_to_buffer(records)
-
-                    container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
-                    container_records_aggregator.aggregate(@container_cpu_memory_records)
-                    container_records_aggregator.compute_state
-                    aggregated_container_records = container_records_aggregator.get_records
-                    @buffer.add_to_buffer(aggregated_container_records)
-
+                    @buffer.add_to_buffer(records) # in_kube_health records
+
+                    aggregated_container_records = []
+                    if !@container_cpu_memory_records.nil? && !@container_cpu_memory_records.empty?
+                        container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
+                        container_records_aggregator.aggregate(@container_cpu_memory_records)
+                        container_records_aggregator.compute_state
+                        aggregated_container_records = container_records_aggregator.get_records
+                    end
+                    @buffer.add_to_buffer(aggregated_container_records) #container cpu/memory records
                     records_to_process = @buffer.get_buffer
                     @buffer.reset_buffer
                     @container_cpu_memory_records = []
diff --git a/source/code/plugin/health/agg_monitor_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb
index d5c724a86..03680d054 100644
--- a/source/code/plugin/health/agg_monitor_id_labels.rb
+++ b/source/code/plugin/health/agg_monitor_id_labels.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 
 module HealthModel
diff --git a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb
index 74e780924..dd69c9c4d 100644
--- a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb
+++ b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 module HealthModel
     class AggregateMonitorStateFinalizer
 
diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb
index 3b56dd243..fa9cb42b2 100644
--- a/source/code/plugin/health/cluster_health_state.rb
+++ b/source/code/plugin/health/cluster_health_state.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 require "net/http"
 require "net/https"
 require "uri"
diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index ef1016158..f6b57e0ae 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -1,4 +1,12 @@
+# frozen_string_literal: true
+
 require_relative 'health_model_constants'
+
+# Require only when running inside container.
+# otherwise unit tests will fail due to ApplicationInsightsUtility dependency on base omsagent ruby files. If you have your dev machine starting with omsagent-rs, then GOOD LUCK!
+if Socket.gethostname.start_with?('omsagent-rs')
+    require_relative '../ApplicationInsightsUtility'
+end
 =begin
     @cpu_records/@memory_records
         [
@@ -37,6 +45,10 @@ class HealthContainerCpuMemoryAggregator
 
         @@memory_counter_name = 'memoryRssBytes'
         @@cpu_counter_name = 'cpuUsageNanoCores'
+        @@workload_container_count_empty_event_sent = {}
+        @@limit_is_array_event_sent = {}
+        @@WORKLOAD_CONTAINER_COUNT_EMPTY_EVENT = "WorkloadContainerCountEmptyEvent"
+        @@LIMIT_IS_ARRAY_EVENT = "ResourceLimitIsAnArrayEvent"
         def initialize(resources, provider)
             @pod_uid_lookup = resources.get_pod_uid_lookup
             @workload_container_count = resources.get_workload_container_count
@@ -163,11 +175,30 @@ def get_records
             container_cpu_memory_records = []
 
             @cpu_records.each{|resource_key, record|
+
+                cpu_limit_mc = 1.0
+                if record["limit"].is_a?(Numeric)
+                    cpu_limit_mc = record["limit"]/1000000.to_f
+                else
+                    @log.info "CPU Limit is not a number #{record['limit']}"
+		            if !@@limit_is_array_event_sent.key?(resource_key)
+                        custom_properties = {}
+                        custom_properties['limit'] = record['limit']
+                        if record['limit'].is_a?(Array)
+                            record['limit'].each_index{|i|
+                                custom_properties[i] = record['limit'][i]
+                            }
+                        end
+                        @@limit_is_array_event_sent[resource_key] = true
+                        #send once per resource key
+                        ApplicationInsightsUtility.sendCustomEvent(@@LIMIT_IS_ARRAY_EVENT, custom_properties)
+                    end
+                end
                 health_monitor_record = {
                     "timestamp" => time_now,
                     "state" => record["state"],
                     "details" => {
-                        "cpu_limit_millicores" => record["limit"]/1000000.to_f,
+                        "cpu_limit_millicores" => cpu_limit_mc,
                         "cpu_usage_instances" => record["records"].map{|r| r.each {|k,v|
                             k == "counter_value" ? r[k] = r[k] / 1000000.to_f : r[k]
                         }},
@@ -219,12 +250,10 @@ def get_records
 
         private
         def calculate_monitor_state(v, config)
-            if !v['limit_set'] && v['namespace'] != 'kube-system'
-                v["state"] = HealthMonitorStates::WARNING
-            else
-                # sort records by descending order of metric
-                v["records"] = v["records"].sort_by{|record| record["counter_value"]}.reverse
-                size = v["records"].size
+            # sort records by descending order of metric
+            v["records"] = v["records"].sort_by{|record| record["counter_value"]}.reverse
+            size = v["records"].size
+            if !v["record_count"].nil?
                 if size < v["record_count"]
                     unknown_count = v["record_count"] - size
                     for i in unknown_count.downto(1)
@@ -232,16 +261,30 @@ def calculate_monitor_state(v, config)
                         v["records"].insert(0, {"counter_value" => -1, "container" => v["container"], "pod_name" =>  "???", "state" => HealthMonitorStates::UNKNOWN }) #insert -1 for unknown records
                     end
                 end
+            else
+                v["state"] = HealthMonitorStates::UNKNOWN
+                container_key = "#{v['workload_name']}~~#{v['container']}"
+                @log.info "ContainerKey: #{container_key} Records Size: #{size} Records: #{v['records']} Record Count: #{v['record_count']} #{@workload_container_count}"
 
-                if size == 1
-                    state_index = 0
-                else
-                    state_threshold = config['StateThresholdPercentage'].to_f
-                    count = ((state_threshold*size)/100).ceil
-                    state_index = size - count
+                if !@@workload_container_count_empty_event_sent.key?(container_key)
+                    custom_properties = {}
+                    custom_properties = custom_properties.merge(v)
+                    custom_properties = custom_properties.merge(@workload_container_count)
+                    @log.info "Custom Properties : #{custom_properties}"
+                    @@workload_container_count_empty_event_sent[container_key] = true
+                    ApplicationInsightsUtility.sendCustomEvent(@@WORKLOAD_CONTAINER_COUNT_EMPTY_EVENT, custom_properties)
                 end
-                v["state"] = v["records"][state_index]["state"]
+                return #simply return the state as unknown here
+            end
+
+            if size == 1
+                state_index = 0
+            else
+                state_threshold = config['StateThresholdPercentage'].to_f
+                count = ((state_threshold*size)/100).ceil
+                state_index = size - count
             end
+            v["state"] = v["records"][state_index]["state"]
         end
 
         def calculate_container_instance_state(counter_value, limit, config)
diff --git a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
index 5c7db82d9..0c3f061f1 100644
--- a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 module HealthModel
     class HealthContainerCpuMemoryRecordFormatter
 
diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb
index 2da0050db..bb48e083b 100644
--- a/source/code/plugin/health/health_hierarchy_builder.rb
+++ b/source/code/plugin/health/health_hierarchy_builder.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 require 'json'
 module HealthModel
     class HealthHierarchyBuilder
diff --git a/source/code/plugin/health/health_kube_api_down_handler.rb b/source/code/plugin/health/health_kube_api_down_handler.rb
index a87c43ef1..bb91f2e3b 100644
--- a/source/code/plugin/health/health_kube_api_down_handler.rb
+++ b/source/code/plugin/health/health_kube_api_down_handler.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 require_relative 'health_model_constants'
 module HealthModel
     class HealthKubeApiDownHandler
diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb
index 30a9ac7ca..743dd8b94 100644
--- a/source/code/plugin/health/health_kubernetes_resources.rb
+++ b/source/code/plugin/health/health_kubernetes_resources.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 require 'singleton'
 require_relative 'health_model_constants'
 
@@ -5,20 +7,20 @@ module HealthModel
     class HealthKubernetesResources
 
         include Singleton
-        attr_accessor :node_inventory, :pod_inventory, :deployment_inventory, :pod_uid_lookup, :workload_container_count
+        attr_accessor :node_inventory, :pod_inventory, :replicaset_inventory, :pod_uid_lookup, :workload_container_count
         attr_reader :nodes, :pods, :workloads, :deployment_lookup
 
         def initialize
-            @node_inventory = []
-            @pod_inventory =  []
-            @deployment_inventory =  []
+            @node_inventory = {}
+            @pod_inventory =  {}
+            @replicaset_inventory = {}
             @nodes = []
             @pods = []
             @workloads = []
             @log = HealthMonitorHelpers.get_log_handle
             @pod_uid_lookup = {}
-            @deployment_lookup = {}
             @workload_container_count = {}
+            @workload_name_cache = {}
         end
 
         def get_node_inventory
@@ -36,9 +38,8 @@ def get_nodes
             return @nodes
         end
 
-        def set_deployment_inventory(deployments)
-            @deployment_inventory = deployments
-            @deployment_lookup = {}
+        def set_replicaset_inventory(replicasets)
+            @replicaset_inventory = replicasets
         end
 
         def get_workload_names
@@ -51,7 +52,12 @@ def get_workload_names
         end
 
         def build_pod_uid_lookup
+            if @pod_inventory.nil? || @pod_inventory['items'].nil? || @pod_inventory['items'].empty? || @pod_inventory['items'].size == 0
+                @log.info "Not Clearing pod_uid_lookup and workload_container_count since pod inventory is nil"
+                return
+            end
             @workload_container_count = {}
+            @pod_uid_lookup = {}
             @pod_inventory['items'].each do |pod|
                 begin
                     namespace = pod['metadata']['namespace']
@@ -92,7 +98,7 @@ def build_pod_uid_lookup
                         end
                     end
                 rescue => e
-                    @log.info "Error in build_pod_uid_lookup  #{pod} #{e.message}"
+                    @log.info "Error in build_pod_uid_lookup  for POD: #{pod_name} #{e.message} #{e.backtrace}"
                 end
             end
         end
@@ -105,19 +111,7 @@ def get_workload_container_count
             return @workload_container_count
         end
 
-        private
         def get_workload_name(pod)
-
-            if @deployment_lookup.empty?
-                @deployment_inventory['items'].each do |deployment|
-                    match_labels = deployment['spec']['selector']['matchLabels'].to_h
-                    namespace = deployment['metadata']['namespace']
-                    match_labels.each{|k,v|
-                        @deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
-                    }
-                end
-            end
-
             begin
                 has_owner = !pod['metadata']['ownerReferences'].nil?
                 owner_kind = ''
@@ -129,7 +123,6 @@ def get_workload_name(pod)
                     controller_name = pod['metadata']['name']
                 end
                 namespace = pod['metadata']['namespace']
-
                 workload_name = ''
                 if owner_kind.nil?
                     owner_kind = 'Pod'
@@ -139,41 +132,22 @@ def get_workload_name(pod)
                     # we are excluding jobs
                     return nil
                 when 'replicaset'
-                    # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name
-                    labels = pod['metadata']['labels'].to_h
-                    labels.each {|k,v|
-                        lookup_key = "#{namespace}-#{k}=#{v}"
-                        if @deployment_lookup.key?(lookup_key)
-                            workload_name = @deployment_lookup[lookup_key]
-                            break
-                        end
-                    }
-                    if workload_name.empty?
-                        workload_name = "#{namespace}~~#{controller_name}"
-                    end
+                    #TODO:
+                    workload_name = get_replica_set_owner_ref(controller_name)
+                    workload_name = "#{namespace}~~#{workload_name}"
                 when 'daemonset'
                     workload_name = "#{namespace}~~#{controller_name}"
                 else
-                    workload_name = "#{namespace}~~#{pod['metadata']['name']}"
+                    workload_name = "#{namespace}~~#{controller_name}"
                 end
                 return workload_name
             rescue => e
-                @log.info "Error in get_workload_name(pod) #{e.message}"
+                @log.info "Error in get_workload_name(pod) #{e.message} #{e.backtrace}"
                 return nil
             end
         end
 
         def get_workload_kind(pod)
-            if @deployment_lookup.empty?
-                @deployment_inventory['items'].each do |deployment|
-                    match_labels = deployment['spec']['selector']['matchLabels'].to_h
-                    namespace = deployment['metadata']['namespace']
-                    match_labels.each{|k,v|
-                        @deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
-                    }
-                end
-            end
-
             begin
                 has_owner = !pod['metadata']['ownerReferences'].nil?
                 owner_kind = ''
@@ -193,6 +167,30 @@ def get_workload_kind(pod)
             end
         end
 
+        private
+        def get_replica_set_owner_ref(controller_name)
+            if @workload_name_cache.key?(controller_name)
+                return @workload_name_cache[controller_name]
+            end
+            begin
+                owner_ref = controller_name
+                @replicaset_inventory['items'].each{|rs|
+                    rs_name = rs['metadata']['name']
+                    if controller_name.casecmp(rs_name) == 0
+                        if !rs['metadata']['ownerReferences'].nil?
+                            owner_ref = rs['metadata']['ownerReferences'][0]['name'] if rs['metadata']['ownerReferences'][0]['name']
+                        end
+                        break
+                    end
+                }
+                @workload_name_cache[controller_name] = owner_ref
+                return owner_ref
+            rescue => e
+                @log.info "Error in get_replica_set_owner_ref(controller_name) #{e.message}"
+                return controller_name
+            end
+        end
+
         def get_node_capacity(node_name, type)
             if node_name.nil? #unscheduled pods will not have a node name
                 return -1
diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb
index 1827a0190..84af81ea7 100644
--- a/source/code/plugin/health/health_missing_signal_generator.rb
+++ b/source/code/plugin/health/health_missing_signal_generator.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 require_relative 'health_model_constants'
 require_relative 'health_monitor_record'
 
diff --git a/source/code/plugin/health/health_model_buffer.rb b/source/code/plugin/health/health_model_buffer.rb
index 1ccfe7349..1c3ec3332 100644
--- a/source/code/plugin/health/health_model_buffer.rb
+++ b/source/code/plugin/health/health_model_buffer.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 module HealthModel
 
 =begin
diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb
index 13813c8d9..43ed30d05 100644
--- a/source/code/plugin/health/health_model_builder.rb
+++ b/source/code/plugin/health/health_model_builder.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require 'time'
 
 module HealthModel
diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb
index 0922c7ff2..c74f86f4d 100644
--- a/source/code/plugin/health/health_model_constants.rb
+++ b/source/code/plugin/health/health_model_constants.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module HealthModel
     class MonitorState
         CRITICAL = "fail"
diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb
index f6c7a781d..907bc1fd1 100644
--- a/source/code/plugin/health/health_model_definition_parser.rb
+++ b/source/code/plugin/health/health_model_definition_parser.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 =begin
     Class to parse the health model definition. The definition expresses the relationship between monitors, how to roll up to an aggregate monitor,
     and what labels to "pass on" to the parent monitor
diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb
index f784ae76e..74aa35af0 100644
--- a/source/code/plugin/health/health_monitor_helpers.rb
+++ b/source/code/plugin/health/health_monitor_helpers.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require 'logger'
 require 'digest'
 require_relative 'health_model_constants'
diff --git a/source/code/plugin/health/health_monitor_optimizer.rb b/source/code/plugin/health/health_monitor_optimizer.rb
index b33c8a986..a63d59abf 100644
--- a/source/code/plugin/health/health_monitor_optimizer.rb
+++ b/source/code/plugin/health/health_monitor_optimizer.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module HealthModel
     class HealthMonitorOptimizer
         #ctor
diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb
index e75824268..b36c46370 100644
--- a/source/code/plugin/health/health_monitor_provider.rb
+++ b/source/code/plugin/health/health_monitor_provider.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 
 module HealthModel
diff --git a/source/code/plugin/health/health_monitor_record.rb b/source/code/plugin/health/health_monitor_record.rb
index 873736c3a..7df84ff53 100644
--- a/source/code/plugin/health/health_monitor_record.rb
+++ b/source/code/plugin/health/health_monitor_record.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 HealthMonitorRecord = Struct.new(
     :monitor_id,
     :monitor_instance_id,
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index 8e2294cc9..16f8bedc4 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 
 module HealthModel
diff --git a/source/code/plugin/health/health_monitor_telemetry.rb b/source/code/plugin/health/health_monitor_telemetry.rb
index 4e80a5145..1227e1f83 100644
--- a/source/code/plugin/health/health_monitor_telemetry.rb
+++ b/source/code/plugin/health/health_monitor_telemetry.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 require 'socket'
 if Socket.gethostname.start_with?('omsagent-rs')
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index e21fdc83d..0d297d215 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require 'logger'
 require 'digest'
 require_relative 'health_model_constants'
@@ -73,59 +74,17 @@ def is_cluster_health_model_enabled
                 end
             end
 
-            def get_pods_ready_hash(pod_inventory, deployment_inventory)
+            def get_pods_ready_hash(resources)
                 pods_ready_percentage_hash = {}
-                deployment_lookup = {}
-                deployment_inventory['items'].each do |deployment|
-                    match_labels = deployment['spec']['selector']['matchLabels'].to_h
-                    namespace = deployment['metadata']['namespace']
-                    match_labels.each{|k,v|
-                        deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}"
-                    }
-                end
-                pod_inventory['items'].each do |pod|
+                resources.pod_inventory['items'].each do |pod|
                     begin
-                        has_owner = !pod['metadata']['ownerReferences'].nil?
-                        owner_kind = ''
-                        if has_owner
-                            owner_kind = pod['metadata']['ownerReferences'][0]['kind']
-                            controller_name = pod['metadata']['ownerReferences'][0]['name']
-                        else
-                            owner_kind = pod['kind']
-                            controller_name = pod['metadata']['name']
-                            #log.info "#{JSON.pretty_generate(pod)}"
-                        end
-
+                        workload_name = resources.get_workload_name(pod)
                         namespace = pod['metadata']['namespace']
                         status = pod['status']['phase']
-
-                        workload_name = ''
-                        if owner_kind.nil?
-                            owner_kind = 'Pod'
-                        end
-                        case owner_kind.downcase
-                        when 'job'
-                            # we are excluding jobs
+                        owner_kind = resources.get_workload_kind(pod)
+                        if owner_kind.casecmp('job') == 0
                             next
-                        when 'replicaset'
-                            # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name
-                            labels = pod['metadata']['labels'].to_h
-                            labels.each {|k,v|
-                                lookup_key = "#{namespace}-#{k}=#{v}"
-                                if deployment_lookup.key?(lookup_key)
-                                    workload_name = deployment_lookup[lookup_key]
-                                    break
-                                end
-                            }
-                            if workload_name.empty?
-                                workload_name = "#{namespace}~~#{controller_name}"
-                            end
-                        when 'daemonset'
-                            workload_name = "#{namespace}~~#{controller_name}"
-                        else
-                            workload_name = "#{namespace}~~#{pod['metadata']['name']}"
                         end
-
                         if pods_ready_percentage_hash.key?(workload_name)
                             total_pods = pods_ready_percentage_hash[workload_name]['totalPods']
                             pods_ready = pods_ready_percentage_hash[workload_name]['podsReady']
@@ -141,7 +100,7 @@ def get_pods_ready_hash(pod_inventory, deployment_inventory)
 
                         pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind}
                     rescue => e
-                        log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}"
+                        @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}"
                     end
                 end
                 return pods_ready_percentage_hash
@@ -152,30 +111,30 @@ def get_node_state_from_node_conditions(monitor_config, node_conditions)
                 failtypes = ['outofdisk', 'networkunavailable'].to_set #default fail types
                 if !monitor_config.nil? && !monitor_config["NodeConditionTypesForFailedState"].nil?
                     failtypes = monitor_config["NodeConditionTypesForFailedState"]
-		    if !failtypes.nil?
-		    	failtypes = failtypes.split(',').map{|x| x.downcase}.map{|x| x.gsub(" ","")}.to_set
-		    end
+                    if !failtypes.nil?
+                        failtypes = failtypes.split(',').map{|x| x.downcase}.map{|x| x.gsub(" ","")}.to_set
+                    end
                 end
-		log = get_log_handle
-		#log.info "Fail Types #{failtypes.inspect}"
+		        log = get_log_handle
+		        #log.info "Fail Types #{failtypes.inspect}"
                 node_conditions.each do |condition|
                     type = condition['type']
                     status = condition['status']
 
                     #for each condition in the configuration, check if the type is not false. If yes, update state to fail
                     if (failtypes.include?(type.downcase) && (status == 'True' || status == 'Unknown'))
-                        return "fail"
+                        return HealthMonitorStates::FAIL
                     elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown'))
-                        return "warn"
+                        return HealthMonitorStates::WARNING
                     elsif type == "Ready" &&  status == 'True'
                         pass = true
                     end
                 end
 
                 if pass
-                    return "pass"
+                    return HealthMonitorStates::PASS
                 else
-                    return "fail"
+                    return HealthMonitorStates::FAIL
                 end
             end
 
diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb
index f92f24ac3..4708c4ee5 100644
--- a/source/code/plugin/health/health_signal_reducer.rb
+++ b/source/code/plugin/health/health_signal_reducer.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 
 module HealthModel
diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb
index 5f2c3945c..1e4f6f5b8 100644
--- a/source/code/plugin/health/monitor_factory.rb
+++ b/source/code/plugin/health/monitor_factory.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'aggregate_monitor'
 require_relative 'unit_monitor'
 
diff --git a/source/code/plugin/health/parent_monitor_provider.rb b/source/code/plugin/health/parent_monitor_provider.rb
index 4ab6e6297..e5766ea1b 100644
--- a/source/code/plugin/health/parent_monitor_provider.rb
+++ b/source/code/plugin/health/parent_monitor_provider.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 module HealthModel
     class ParentMonitorProvider
diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb
index 9af599321..6454007b6 100644
--- a/source/code/plugin/health/unit_monitor.rb
+++ b/source/code/plugin/health/unit_monitor.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 require_relative 'health_model_constants'
 require 'json'
 
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 9a1b8f9a9..affbdd275 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -86,11 +86,11 @@ def enumerate
         node_inventory = JSON.parse(node_inventory_response.body)
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
         pod_inventory = JSON.parse(pod_inventory_response.body)
-        deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body)
+        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_version: "extensions/v1beta1").body)
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
-        @resources.set_deployment_inventory(deployment_inventory)
+        @resources.set_replicaset_inventory(replicaset_inventory)
         @resources.build_pod_uid_lookup
 
         if node_inventory_response.code.to_i != 200
@@ -106,7 +106,7 @@ def enumerate
           health_monitor_records.push(record) if record
           record = process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
           health_monitor_records.push(record) if record
-          pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(pod_inventory, deployment_inventory)
+          pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(@resources)
 
           system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'}
           workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'}
@@ -121,7 +121,7 @@ def enumerate
             health_monitor_records.push(record) if record
           end
         else
-            hmlog.info "POD INVENTORY IS NIL"
+            @@hmlog.info "POD INVENTORY IS NIL"
         end
 
         if !node_inventory.nil?
@@ -130,7 +130,7 @@ def enumerate
             health_monitor_records.push(record) if record
           end
         else
-            hmlog.info "NODE INVENTORY IS NIL"
+            @@hmlog.info "NODE INVENTORY IS NIL"
         end
 
         health_monitor_records.each do |record|
@@ -260,14 +260,14 @@ def process_node_condition_monitor(node_inventory)
           node_inventory['items'].each do |node|
             node_name = node['metadata']['name']
             conditions = node['status']['conditions']
-            state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions)
+            node_state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions)
             details = {}
             conditions.each do |condition|
-                state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL
-                details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => state}
+                condition_state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL
+                details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => condition_state}
                 #@@hmlog.info "Node Condition details: #{JSON.pretty_generate(details)}"
             end
-            health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details}
+            health_monitor_record = {"timestamp" => timestamp, "state" => node_state, "details" => details}
             monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name])
             health_record = {}
             time_now = Time.now.utc.iso8601
@@ -291,11 +291,11 @@ def initialize_inventory
         node_inventory = JSON.parse(node_inventory_response.body)
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
         pod_inventory = JSON.parse(pod_inventory_response.body)
-        deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body)
+        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_version: "extensions/v1beta1").body)
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
-        @resources.set_deployment_inventory(deployment_inventory)
+        @resources.set_replicaset_inventory(replicaset_inventory)
         @resources.build_pod_uid_lookup
     end
 
diff --git a/source/code/plugin/out_health_forward.rb b/source/code/plugin/out_health_forward.rb
index 18664a22a..6fcfe368b 100644
--- a/source/code/plugin/out_health_forward.rb
+++ b/source/code/plugin/out_health_forward.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 # Fluentd
 #
diff --git a/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb b/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb
index 074878fe2..6972916bf 100644
--- a/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb
+++ b/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb
@@ -25,7 +25,7 @@
 
         resources.pod_inventory = pods
         resources.node_inventory = nodes
-        resources.set_deployment_inventory(deployments)
+        resources.set_replicaset_inventory(deployments)
         resources.build_pod_uid_lookup #call this in in_kube_health every min
 
         cluster_labels = {
@@ -60,7 +60,7 @@
 
         resources.pod_inventory = pods
         resources.node_inventory = nodes
-        resources.set_deployment_inventory(deployments)
+        resources.set_replicaset_inventory(deployments)
         resources.build_pod_uid_lookup #call this in in_kube_health every min
 
         cluster_labels = {
@@ -113,7 +113,7 @@
 
         resources.pod_inventory = pods
         resources.node_inventory = nodes
-        resources.set_deployment_inventory(deployments)
+        resources.set_replicaset_inventory(deployments)
         resources.build_pod_uid_lookup #call this in in_kube_health every min
 
         cluster_labels = {
@@ -163,7 +163,7 @@
 
         resources.pod_inventory = pods
         resources.node_inventory = nodes
-        resources.set_deployment_inventory(deployments)
+        resources.set_replicaset_inventory(deployments)
         resources.build_pod_uid_lookup #call this in in_kube_health every min
 
         cluster_labels = {
diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb
index dbeec4858..f4daedace 100644
--- a/test/code/plugin/health/health_kubernetes_resource_spec.rb
+++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb
@@ -207,7 +207,7 @@
         resources = HealthKubernetesResources.instance
         resources.node_inventory = nodes
         resources.pod_inventory = pods
-        resources.set_deployment_inventory(deployments)
+        resources.set_replicaset_inventory(deployments)
         #act
         parsed_nodes = resources.get_nodes
         parsed_workloads = resources.get_workload_names
@@ -217,28 +217,6 @@
         assert_equal parsed_workloads.size, 3
 
         assert_equal parsed_nodes, ['aks-nodepool1-19574989-0', 'aks-nodepool1-19574989-1']
-        parsed_workloads.sort.must_equal ['default~~diliprdeploymentnodeapps', 'default~~rss-site', 'kube-system~~kube-proxy'].sort
+        parsed_workloads.sort.must_equal ['default~~diliprdeploymentnodeapps-c4fdfb446', 'default~~rss-site', 'kube-system~~kube-proxy'].sort
     end
-
-    # it 'builds the pod_uid lookup correctly' do
-    #     #arrange
-    #     f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json')
-    #     nodes = JSON.parse(f)
-    #     f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json')
-    #     pods = JSON.parse(f)
-    #     f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json')
-    #     deployments = JSON.parse(f)
-
-    #     resources = HealthKubernetesResources.instance
-
-    #     resources.node_inventory = nodes
-    #     resources.pod_inventory = pods
-    #     resources.set_deployment_inventory(deployments) #resets deployment_lookup -- this was causing Unit test failures
-
-    #     resources.build_pod_uid_lookup
-
-    #     resources.pod_uid_lookup
-    #     resources.workload_container_count
-
-    # end
 end
\ No newline at end of file
diff --git a/test/code/plugin/health/health_model_builder_test.rb b/test/code/plugin/health/health_model_builder_test.rb
index a7c5e0927..3015ae55f 100644
--- a/test/code/plugin/health/health_model_builder_test.rb
+++ b/test/code/plugin/health/health_model_builder_test.rb
@@ -7,489 +7,510 @@
 class FilterHealthModelBuilderTest < Test::Unit::TestCase
     include HealthModel
 
-    def test_event_stream
-        #setup
-        health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json')
-        health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
-        monitor_factory = MonitorFactory.new
-        hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
-        # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
-        state_finalizers = [AggregateMonitorStateFinalizer.new]
-        monitor_set = MonitorSet.new
-        model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
-
-        nodes_file_map = {
-            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json",
-            "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            }
-
-        pods_file_map = {
-            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json",
-            "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            }
-
-        cluster_labels = {
-            'container.azm.ms/cluster-region' => 'eastus',
-            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
-            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
-            'container.azm.ms/cluster-name' => 'dilipr-health-test'
-        }
-
-        cluster_id = 'fake_cluster_id'
-
-        #test
-        state = HealthMonitorState.new()
-        generator = HealthMissingSignalGenerator.new
-
-        for scenario in ["first", "second", "third"]
-            mock_data_path = File.join(__dir__, "../../../../health_records/#{scenario}_daemon_set_signals.json")
-            file = File.read(mock_data_path)
-            records = JSON.parse(file)
-
-            node_inventory = JSON.parse(File.read(nodes_file_map[scenario]))
-            pod_inventory = JSON.parse(File.read(pods_file_map[scenario]))
-            deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/deployments.json")))
-            resources = HealthKubernetesResources.instance
-            resources.node_inventory = node_inventory
-            resources.pod_inventory = pod_inventory
-            resources.set_deployment_inventory(deployment_inventory)
-
-            workload_names = resources.get_workload_names
-            provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
-
-            health_monitor_records = []
-            records.each do |record|
-                monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
-                monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
-                health_monitor_record = HealthMonitorRecord.new(
-                    record[HealthMonitorRecordFields::MONITOR_ID],
-                    record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
-                    record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
-                    record[HealthMonitorRecordFields::DETAILS]["state"],
-                    provider.get_labels(record),
-                    provider.get_config(monitor_id),
-                    record[HealthMonitorRecordFields::DETAILS]
-                )
-
-                state.update_state(health_monitor_record,
-                    provider.get_config(health_monitor_record.monitor_id)
-                    )
-
-                # get the health state based on the monitor's operational state
-                # update state calls updates the state of the monitor based on configuration and history of the the monitor records
-                health_monitor_record.state = state.get_state(monitor_instance_id).new_state
-                health_monitor_records.push(health_monitor_record)
-                instance_state = state.get_state(monitor_instance_id)
-                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
-            end
-
-
-            #handle kube api down
-            kube_api_down_handler = HealthKubeApiDownHandler.new
-            health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
-
-            # Dedupe daemonset signals
-            # Remove unit monitor signals for “gone” objects
-            reducer = HealthSignalReducer.new()
-            reduced_records = reducer.reduce_signals(health_monitor_records, resources)
-
-            cluster_id = 'fake_cluster_id'
-
-            #get the list of  'none' and 'unknown' signals
-            missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
-            #update state for missing signals
-            missing_signals.each{|signal|
-                state.update_state(signal,
-                    provider.get_config(signal.monitor_id)
-                    )
-            }
-            generator.update_last_received_records(reduced_records)
-            reduced_records.push(*missing_signals)
-
-            # build the health model
-            all_records = reduced_records
-            model_builder.process_records(all_records)
-            all_monitors = model_builder.finalize_model
-
-            # update the state for aggregate monitors (unit monitors are updated above)
-            all_monitors.each{|monitor_instance_id, monitor|
-                if monitor.is_aggregate_monitor
-                    state.update_state(monitor,
-                        provider.get_config(monitor.monitor_id)
-                        )
-                end
-
-                instance_state = state.get_state(monitor_instance_id)
-                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
-                should_send = instance_state.should_send
-
-                # always send cluster monitor as a heartbeat
-                if !should_send && monitor_instance_id != MonitorId::CLUSTER
-                    all_monitors.delete(monitor_instance_id)
-                end
-            }
-
-            records_to_send = []
-            all_monitors.keys.each{|key|
-                record = provider.get_record(all_monitors[key], state)
-                #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
-            }
-
-            if scenario == "first"
-                assert_equal 50, all_monitors.size
-            elsif scenario == "second"
-                assert_equal 34, all_monitors.size
-            elsif scenario == "third"
-                assert_equal 5, all_monitors.size
-            end
-            # for each key in monitor.keys,
-            # get the state from health_monitor_state
-            # generate the record to send
-            serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json'))
-            serializer.serialize(state)
-
-            deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json'))
-            deserialized_state = deserializer.deserialize
-
-            after_state = HealthMonitorState.new
-            after_state.initialize_state(deserialized_state)
-        end
-    end
-
-    def test_event_stream_aks_engine
-
-        #setup
-        health_definition_path = File.join(__dir__, '../../../../installer\conf\health_model_definition.json')
-        health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
-        monitor_factory = MonitorFactory.new
-        hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
-        state_finalizers = [AggregateMonitorStateFinalizer.new]
-        monitor_set = MonitorSet.new
-        model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
-
-        nodes_file_map = {
-            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json",
-            #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
-            "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
-            "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
-            "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
-            }
-
-        pods_file_map = {
-            #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json",
-            #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
-            "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
-            "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
-            "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
-            }
-
-        cluster_labels = {
-            'container.azm.ms/cluster-region' => 'eastus',
-            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
-            'container.azm.ms/cluster-resource-group' => 'aks-engine-health',
-            'container.azm.ms/cluster-name' => 'aks-engine-health'
-        }
-
-        cluster_id = 'fake_cluster_id'
-
-        #test
-        state = HealthMonitorState.new()
-        generator = HealthMissingSignalGenerator.new
-
-        for scenario in 1..3
-            mock_data_path = File.join(__dir__, "../../../../health_records/aks-engine/aks-engine-#{scenario}.json")
-            file = File.read(mock_data_path)
-            records = JSON.parse(file)
-
-            node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"]))
-            pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"]))
-            deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/aks-engine/deployments.json")))
-            resources = HealthKubernetesResources.instance
-            resources.node_inventory = node_inventory
-            resources.pod_inventory = pod_inventory
-            resources.deployment_inventory = deployment_inventory
-
-            workload_names = resources.get_workload_names
-            provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
-
-            health_monitor_records = []
-            records.each do |record|
-                monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
-                monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
-                health_monitor_record = HealthMonitorRecord.new(
-                    record[HealthMonitorRecordFields::MONITOR_ID],
-                    record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
-                    record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
-                    record[HealthMonitorRecordFields::DETAILS]["state"],
-                    provider.get_labels(record),
-                    provider.get_config(monitor_id),
-                    record[HealthMonitorRecordFields::DETAILS]
-                )
-
-                state.update_state(health_monitor_record,
-                    provider.get_config(health_monitor_record.monitor_id)
-                    )
-
-                # get the health state based on the monitor's operational state
-                # update state calls updates the state of the monitor based on configuration and history of the the monitor records
-                health_monitor_record.state = state.get_state(monitor_instance_id).new_state
-                health_monitor_records.push(health_monitor_record)
-                instance_state = state.get_state(monitor_instance_id)
-                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
-            end
-
-
-            #handle kube api down
-            kube_api_down_handler = HealthKubeApiDownHandler.new
-            health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
-
-            # Dedupe daemonset signals
-            # Remove unit monitor signals for “gone” objects
-            reducer = HealthSignalReducer.new()
-            reduced_records = reducer.reduce_signals(health_monitor_records, resources)
-
-            cluster_id = 'fake_cluster_id'
-
-            #get the list of  'none' and 'unknown' signals
-            missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
-            #update state for missing signals
-            missing_signals.each{|signal|
-                state.update_state(signal,
-                    provider.get_config(signal.monitor_id)
-                    )
-            }
-            generator.update_last_received_records(reduced_records)
-            reduced_records.push(*missing_signals)
-
-            # build the health model
-            all_records = reduced_records
-            model_builder.process_records(all_records)
-            all_monitors = model_builder.finalize_model
-
-            # update the state for aggregate monitors (unit monitors are updated above)
-            all_monitors.each{|monitor_instance_id, monitor|
-                if monitor.is_aggregate_monitor
-                    state.update_state(monitor,
-                        provider.get_config(monitor.monitor_id)
-                        )
-                end
-
-                instance_state = state.get_state(monitor_instance_id)
-                #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
-                should_send = instance_state.should_send
-
-                # always send cluster monitor as a heartbeat
-                if !should_send && monitor_instance_id != MonitorId::CLUSTER
-                    all_monitors.delete(monitor_instance_id)
-                end
-            }
-
-            records_to_send = []
-            all_monitors.keys.each{|key|
-                record = provider.get_record(all_monitors[key], state)
-                #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
-            }
-
-            if scenario == 1
-                assert_equal 58, all_monitors.size
-            elsif scenario == 2
-                assert_equal 37, all_monitors.size
-            elsif scenario == 3
-                assert_equal 6, all_monitors.size
-            end
-            # for each key in monitor.keys,
-            # get the state from health_monitor_state
-            # generate the record to send
-            serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json'))
-            serializer.serialize(state)
-
-            deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json'))
-            deserialized_state = deserializer.deserialize
-
-            after_state = HealthMonitorState.new
-            after_state.initialize_state(deserialized_state)
-        end
-    end
-
-    def test_container_memory_cpu_with_model
-        health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json')
-        health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
-        monitor_factory = MonitorFactory.new
-        hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
-        # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
-        state_finalizers = [AggregateMonitorStateFinalizer.new]
-        monitor_set = MonitorSet.new
-        model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
-
-        nodes_file_map = {
-            "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
-            "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
-            "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
-            }
-
-        pods_file_map = {
-            "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
-            "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
-            "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
-            }
-
-        cluster_labels = {
-            'container.azm.ms/cluster-region' => 'eastus',
-            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
-            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
-            'container.azm.ms/cluster-name' => 'dilipr-health-test'
-        }
-
-        cluster_id = 'fake_cluster_id'
-
-        #test
-        state = HealthMonitorState.new()
-        generator = HealthMissingSignalGenerator.new
-
-        mock_data_path = "C:/Users/dilipr/desktop/health/container_cpu_memory/daemonset.json"
-        file = File.read(mock_data_path)
-        records = JSON.parse(file)
-
-        node_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json"))
-        pod_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json"))
-        deployment_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json"))
+    # def test_event_stream
+    #     #setup
+    #     health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json')
+    #     health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+    #     monitor_factory = MonitorFactory.new
+    #     hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+    #     # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
+    #     state_finalizers = [AggregateMonitorStateFinalizer.new]
+    #     monitor_set = MonitorSet.new
+    #     model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+    #     nodes_file_map = {
+    #         #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json",
+    #         "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         }
+
+    #     pods_file_map = {
+    #         #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json",
+    #         "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         }
+
+    #     cluster_labels = {
+    #         'container.azm.ms/cluster-region' => 'eastus',
+    #         'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+    #         'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+    #         'container.azm.ms/cluster-name' => 'dilipr-health-test'
+    #     }
+
+    #     cluster_id = 'fake_cluster_id'
+
+    #     #test
+    #     state = HealthMonitorState.new()
+    #     generator = HealthMissingSignalGenerator.new
+
+    #     for scenario in ["first", "second", "third"]
+    #         mock_data_path = File.join(__dir__, "../../../../health_records/#{scenario}_daemon_set_signals.json")
+    #         file = File.read(mock_data_path)
+    #         records = JSON.parse(file)
+
+    #         node_inventory = JSON.parse(File.read(nodes_file_map[scenario]))
+    #         pod_inventory = JSON.parse(File.read(pods_file_map[scenario]))
+    #         deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/deployments.json")))
+    #         resources = HealthKubernetesResources.instance
+    #         resources.node_inventory = node_inventory
+    #         resources.pod_inventory = pod_inventory
+    #         resources.set_replicaset_inventory(deployment_inventory)
+
+    #         workload_names = resources.get_workload_names
+    #         provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+    #         health_monitor_records = []
+    #         records.each do |record|
+    #             monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+    #             monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+    #             health_monitor_record = HealthMonitorRecord.new(
+    #                 record[HealthMonitorRecordFields::MONITOR_ID],
+    #                 record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+    #                 record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+    #                 record[HealthMonitorRecordFields::DETAILS]["state"],
+    #                 provider.get_labels(record),
+    #                 provider.get_config(monitor_id),
+    #                 record[HealthMonitorRecordFields::DETAILS]
+    #             )
+
+    #             state.update_state(health_monitor_record,
+    #                 provider.get_config(health_monitor_record.monitor_id)
+    #                 )
+
+    #             # get the health state based on the monitor's operational state
+    #             # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+    #             health_monitor_record.state = state.get_state(monitor_instance_id).new_state
+    #             health_monitor_records.push(health_monitor_record)
+    #             instance_state = state.get_state(monitor_instance_id)
+    #             #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+    #         end
+
+
+    #         #handle kube api down
+    #         kube_api_down_handler = HealthKubeApiDownHandler.new
+    #         health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
+
+    #         # Dedupe daemonset signals
+    #         # Remove unit monitor signals for “gone” objects
+    #         reducer = HealthSignalReducer.new()
+    #         reduced_records = reducer.reduce_signals(health_monitor_records, resources)
+
+    #         cluster_id = 'fake_cluster_id'
+
+    #         #get the list of  'none' and 'unknown' signals
+    #         missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
+    #         #update state for missing signals
+    #         missing_signals.each{|signal|
+    #             state.update_state(signal,
+    #                 provider.get_config(signal.monitor_id)
+    #                 )
+    #         }
+    #         generator.update_last_received_records(reduced_records)
+    #         reduced_records.push(*missing_signals)
+
+    #         # build the health model
+    #         all_records = reduced_records
+    #         model_builder.process_records(all_records)
+    #         all_monitors = model_builder.finalize_model
+
+    #         # update the state for aggregate monitors (unit monitors are updated above)
+    #         all_monitors.each{|monitor_instance_id, monitor|
+    #             if monitor.is_aggregate_monitor
+    #                 state.update_state(monitor,
+    #                     provider.get_config(monitor.monitor_id)
+    #                     )
+    #             end
+
+    #             instance_state = state.get_state(monitor_instance_id)
+    #             #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+    #             should_send = instance_state.should_send
+
+    #             # always send cluster monitor as a heartbeat
+    #             if !should_send && monitor_instance_id != MonitorId::CLUSTER
+    #                 all_monitors.delete(monitor_instance_id)
+    #             end
+    #         }
+
+    #         records_to_send = []
+    #         all_monitors.keys.each{|key|
+    #             record = provider.get_record(all_monitors[key], state)
+    #             #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+    #         }
+
+    #         if scenario == "first"
+    #             assert_equal 50, all_monitors.size
+    #         elsif scenario == "second"
+    #             assert_equal 34, all_monitors.size
+    #         elsif scenario == "third"
+    #             assert_equal 5, all_monitors.size
+    #         end
+    #         # for each key in monitor.keys,
+    #         # get the state from health_monitor_state
+    #         # generate the record to send
+    #         serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json'))
+    #         serializer.serialize(state)
+
+    #         deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json'))
+    #         deserialized_state = deserializer.deserialize
+
+    #         after_state = HealthMonitorState.new
+    #         after_state.initialize_state(deserialized_state)
+    #     end
+    # end
+
+    # def test_event_stream_aks_engine
+
+    #     #setup
+    #     health_definition_path = File.join(__dir__, '../../../../installer\conf\health_model_definition.json')
+    #     health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+    #     monitor_factory = MonitorFactory.new
+    #     hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+    #     state_finalizers = [AggregateMonitorStateFinalizer.new]
+    #     monitor_set = MonitorSet.new
+    #     model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+    #     nodes_file_map = {
+    #         #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json",
+    #         #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json",
+    #         "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
+    #         "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
+    #         "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json",
+    #         }
+
+    #     pods_file_map = {
+    #         #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json",
+    #         #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json",
+    #         "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
+    #         "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
+    #         "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json",
+    #         }
+
+    #     cluster_labels = {
+    #         'container.azm.ms/cluster-region' => 'eastus',
+    #         'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+    #         'container.azm.ms/cluster-resource-group' => 'aks-engine-health',
+    #         'container.azm.ms/cluster-name' => 'aks-engine-health'
+    #     }
+
+    #     cluster_id = 'fake_cluster_id'
+
+    #     #test
+    #     state = HealthMonitorState.new()
+    #     generator = HealthMissingSignalGenerator.new
+
+    #     for scenario in 1..3
+    #         mock_data_path = File.join(__dir__, "../../../../health_records/aks-engine/aks-engine-#{scenario}.json")
+    #         file = File.read(mock_data_path)
+    #         records = JSON.parse(file)
+
+    #         node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"]))
+    #         pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"]))
+    #         deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/aks-engine/deployments.json")))
+    #         resources = HealthKubernetesResources.instance
+    #         resources.node_inventory = node_inventory
+    #         resources.pod_inventory = pod_inventory
+    #         resources.deployment_inventory = deployment_inventory
+
+    #         workload_names = resources.get_workload_names
+    #         provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+    #         health_monitor_records = []
+    #         records.each do |record|
+    #             monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+    #             monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+    #             health_monitor_record = HealthMonitorRecord.new(
+    #                 record[HealthMonitorRecordFields::MONITOR_ID],
+    #                 record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+    #                 record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+    #                 record[HealthMonitorRecordFields::DETAILS]["state"],
+    #                 provider.get_labels(record),
+    #                 provider.get_config(monitor_id),
+    #                 record[HealthMonitorRecordFields::DETAILS]
+    #             )
+
+    #             state.update_state(health_monitor_record,
+    #                 provider.get_config(health_monitor_record.monitor_id)
+    #                 )
+
+    #             # get the health state based on the monitor's operational state
+    #             # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+    #             health_monitor_record.state = state.get_state(monitor_instance_id).new_state
+    #             health_monitor_records.push(health_monitor_record)
+    #             instance_state = state.get_state(monitor_instance_id)
+    #             #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+    #         end
+
+
+    #         #handle kube api down
+    #         kube_api_down_handler = HealthKubeApiDownHandler.new
+    #         health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
+
+    #         # Dedupe daemonset signals
+    #         # Remove unit monitor signals for “gone” objects
+    #         reducer = HealthSignalReducer.new()
+    #         reduced_records = reducer.reduce_signals(health_monitor_records, resources)
+
+    #         cluster_id = 'fake_cluster_id'
+
+    #         #get the list of  'none' and 'unknown' signals
+    #         missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
+    #         #update state for missing signals
+    #         missing_signals.each{|signal|
+    #             state.update_state(signal,
+    #                 provider.get_config(signal.monitor_id)
+    #                 )
+    #         }
+    #         generator.update_last_received_records(reduced_records)
+    #         reduced_records.push(*missing_signals)
+
+    #         # build the health model
+    #         all_records = reduced_records
+    #         model_builder.process_records(all_records)
+    #         all_monitors = model_builder.finalize_model
+
+    #         # update the state for aggregate monitors (unit monitors are updated above)
+    #         all_monitors.each{|monitor_instance_id, monitor|
+    #             if monitor.is_aggregate_monitor
+    #                 state.update_state(monitor,
+    #                     provider.get_config(monitor.monitor_id)
+    #                     )
+    #             end
+
+    #             instance_state = state.get_state(monitor_instance_id)
+    #             #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+    #             should_send = instance_state.should_send
+
+    #             # always send cluster monitor as a heartbeat
+    #             if !should_send && monitor_instance_id != MonitorId::CLUSTER
+    #                 all_monitors.delete(monitor_instance_id)
+    #             end
+    #         }
+
+    #         records_to_send = []
+    #         all_monitors.keys.each{|key|
+    #             record = provider.get_record(all_monitors[key], state)
+    #             #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+    #         }
+
+    #         if scenario == 1
+    #             assert_equal 58, all_monitors.size
+    #         elsif scenario == 2
+    #             assert_equal 37, all_monitors.size
+    #         elsif scenario == 3
+    #             assert_equal 6, all_monitors.size
+    #         end
+    #         # for each key in monitor.keys,
+    #         # get the state from health_monitor_state
+    #         # generate the record to send
+    #         serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json'))
+    #         serializer.serialize(state)
+
+    #         deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json'))
+    #         deserialized_state = deserializer.deserialize
+
+    #         after_state = HealthMonitorState.new
+    #         after_state.initialize_state(deserialized_state)
+    #     end
+    # end
+
+    # def test_container_memory_cpu_with_model
+    #     health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json')
+    #     health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file)
+    #     monitor_factory = MonitorFactory.new
+    #     hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory)
+    #     # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side
+    #     state_finalizers = [AggregateMonitorStateFinalizer.new]
+    #     monitor_set = MonitorSet.new
+    #     model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set)
+
+    #     nodes_file_map = {
+    #         "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
+    #         "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
+    #         "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json",
+    #         }
+
+    #     pods_file_map = {
+    #         "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
+    #         "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
+    #         "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json",
+    #         }
+
+    #     cluster_labels = {
+    #         'container.azm.ms/cluster-region' => 'eastus',
+    #         'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+    #         'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+    #         'container.azm.ms/cluster-name' => 'dilipr-health-test'
+    #     }
+
+    #     cluster_id = 'fake_cluster_id'
+
+    #     #test
+    #     state = HealthMonitorState.new()
+    #     generator = HealthMissingSignalGenerator.new
+
+    #     mock_data_path = "C:/Users/dilipr/desktop/health/container_cpu_memory/daemonset.json"
+    #     file = File.read(mock_data_path)
+    #     records = JSON.parse(file)
+
+    #     node_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json"))
+    #     pod_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json"))
+    #     deployment_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json"))
+    #     resources = HealthKubernetesResources.instance
+    #     resources.node_inventory = node_inventory
+    #     resources.pod_inventory = pod_inventory
+    #     resources.set_replicaset_inventory(deployment_inventory)
+
+    #     workload_names = resources.get_workload_names
+    #     provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
+
+
+    #     #container memory cpu records
+    #     file = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/cadvisor_perf.json')
+    #     cadvisor_records = JSON.parse(file)
+    #     cadvisor_records = cadvisor_records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'}
+    #     formatted_records = []
+    #     formatter = HealthContainerCpuMemoryRecordFormatter.new
+    #     cadvisor_records.each{|record|
+    #         formatted_record = formatter.get_record_from_cadvisor_record(record)
+    #         formatted_records.push(formatted_record)
+    #     }
+
+    #     resources.build_pod_uid_lookup #call this in in_kube_health every min
+
+    #     cluster_labels = {
+    #         'container.azm.ms/cluster-region' => 'eastus',
+    #         'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
+    #         'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
+    #         'container.azm.ms/cluster-name' => 'dilipr-health-test'
+    #     }
+
+    #     cluster_id = 'fake_cluster_id'
+
+    #     aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
+    #     deduped_records = aggregator.dedupe_records(formatted_records)
+    #     aggregator.aggregate(deduped_records)
+    #     aggregator.compute_state
+    #     container_cpu_memory_records = aggregator.get_records
+
+    #     records.concat(container_cpu_memory_records)
+
+    #     health_monitor_records = []
+    #     records.each do |record|
+    #         monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
+    #         monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
+    #         health_monitor_record = HealthMonitorRecord.new(
+    #             record[HealthMonitorRecordFields::MONITOR_ID],
+    #             record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
+    #             record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
+    #             record[HealthMonitorRecordFields::DETAILS]["state"],
+    #             provider.get_labels(record),
+    #             provider.get_config(monitor_id),
+    #             record[HealthMonitorRecordFields::DETAILS]
+    #         )
+
+    #         state.update_state(health_monitor_record,
+    #             provider.get_config(health_monitor_record.monitor_id)
+    #             )
+
+    #         # get the health state based on the monitor's operational state
+    #         # update state calls updates the state of the monitor based on configuration and history of the the monitor records
+    #         health_monitor_record.state = state.get_state(monitor_instance_id).new_state
+    #         health_monitor_records.push(health_monitor_record)
+    #         #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+    #     end
+
+    #     #handle kube api down
+    #     kube_api_down_handler = HealthKubeApiDownHandler.new
+    #     health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
+
+    #     # Dedupe daemonset signals
+    #     # Remove unit monitor signals for “gone” objects
+    #     reducer = HealthSignalReducer.new()
+    #     reduced_records = reducer.reduce_signals(health_monitor_records, resources)
+
+    #     cluster_id = 'fake_cluster_id'
+
+    #     #get the list of  'none' and 'unknown' signals
+    #     missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
+    #     #update state for missing signals
+    #     missing_signals.each{|signal|
+    #         state.update_state(signal,
+    #             provider.get_config(signal.monitor_id)
+    #             )
+    #     }
+    #     generator.update_last_received_records(reduced_records)
+    #     reduced_records.push(*missing_signals)
+
+    #     # build the health model
+    #     all_records = reduced_records
+    #     model_builder.process_records(all_records)
+    #     all_monitors = model_builder.finalize_model
+
+    #     # update the state for aggregate monitors (unit monitors are updated above)
+    #     all_monitors.each{|monitor_instance_id, monitor|
+    #         if monitor.is_aggregate_monitor
+    #             state.update_state(monitor,
+    #                 provider.get_config(monitor.monitor_id)
+    #                 )
+    #         end
+
+    #         instance_state = state.get_state(monitor_instance_id)
+    #         #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
+    #         should_send = instance_state.should_send
+
+    #         # always send cluster monitor as a heartbeat
+    #         if !should_send && monitor_instance_id != MonitorId::CLUSTER
+    #             all_monitors.delete(monitor_instance_id)
+    #         end
+    #     }
+
+    #     records_to_send = []
+    #     all_monitors.keys.each{|key|
+    #         record = provider.get_record(all_monitors[key], state)
+    #         #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
+    #     }
+    # end
+
+    def test_get_workload_name
+        # node_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/dilipr-health-test-nodes.json"))
+        # pod_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/dilipr-health-test-pods.json'))
+        # replicaset_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/dilipr-health-test-rs.json'))
+        node_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/jobyaks2-nodes.json"))
+        pod_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/jobyaks2-pods.json'))
+        replicaset_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/jobyaks2-rs.json'))
         resources = HealthKubernetesResources.instance
         resources.node_inventory = node_inventory
         resources.pod_inventory = pod_inventory
-        resources.set_deployment_inventory(deployment_inventory)
-
-        workload_names = resources.get_workload_names
-        provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json"))
-
-
-        #container memory cpu records
-        file = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/cadvisor_perf.json')
-        cadvisor_records = JSON.parse(file)
-        cadvisor_records = cadvisor_records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'}
-        formatted_records = []
-        formatter = HealthContainerCpuMemoryRecordFormatter.new
-        cadvisor_records.each{|record|
-            formatted_record = formatter.get_record_from_cadvisor_record(record)
-            formatted_records.push(formatted_record)
+        resources.set_replicaset_inventory(replicaset_inventory)
+        pod_inventory['items'].each{|pod|
+            workload_name = resources.get_workload_name(pod)
+            puts "POD #{pod['metadata']['name']} Workload Name #{workload_name}"
         }
 
-        resources.build_pod_uid_lookup #call this in in_kube_health every min
+        pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(resources)
 
-        cluster_labels = {
-            'container.azm.ms/cluster-region' => 'eastus',
-            'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a',
-            'container.azm.ms/cluster-resource-group' => 'dilipr-health-test',
-            'container.azm.ms/cluster-name' => 'dilipr-health-test'
-        }
-
-        cluster_id = 'fake_cluster_id'
-
-        aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider)
-        deduped_records = aggregator.dedupe_records(formatted_records)
-        aggregator.aggregate(deduped_records)
-        aggregator.compute_state
-        container_cpu_memory_records = aggregator.get_records
-
-        records.concat(container_cpu_memory_records)
-
-        health_monitor_records = []
-        records.each do |record|
-            monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID]
-            monitor_id = record[HealthMonitorRecordFields::MONITOR_ID]
-            health_monitor_record = HealthMonitorRecord.new(
-                record[HealthMonitorRecordFields::MONITOR_ID],
-                record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID],
-                record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED],
-                record[HealthMonitorRecordFields::DETAILS]["state"],
-                provider.get_labels(record),
-                provider.get_config(monitor_id),
-                record[HealthMonitorRecordFields::DETAILS]
-            )
-
-            state.update_state(health_monitor_record,
-                provider.get_config(health_monitor_record.monitor_id)
-                )
-
-            # get the health state based on the monitor's operational state
-            # update state calls updates the state of the monitor based on configuration and history of the the monitor records
-            health_monitor_record.state = state.get_state(monitor_instance_id).new_state
-            health_monitor_records.push(health_monitor_record)
-            #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
-        end
-
-        #handle kube api down
-        kube_api_down_handler = HealthKubeApiDownHandler.new
-        health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records)
-
-        # Dedupe daemonset signals
-        # Remove unit monitor signals for “gone” objects
-        reducer = HealthSignalReducer.new()
-        reduced_records = reducer.reduce_signals(health_monitor_records, resources)
-
-        cluster_id = 'fake_cluster_id'
-
-        #get the list of  'none' and 'unknown' signals
-        missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider)
-        #update state for missing signals
-        missing_signals.each{|signal|
-            state.update_state(signal,
-                provider.get_config(signal.monitor_id)
-                )
-        }
-        generator.update_last_received_records(reduced_records)
-        reduced_records.push(*missing_signals)
-
-        # build the health model
-        all_records = reduced_records
-        model_builder.process_records(all_records)
-        all_monitors = model_builder.finalize_model
-
-        # update the state for aggregate monitors (unit monitors are updated above)
-        all_monitors.each{|monitor_instance_id, monitor|
-            if monitor.is_aggregate_monitor
-                state.update_state(monitor,
-                    provider.get_config(monitor.monitor_id)
-                    )
-            end
-
-            instance_state = state.get_state(monitor_instance_id)
-            #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}"
-            should_send = instance_state.should_send
-
-            # always send cluster monitor as a heartbeat
-            if !should_send && monitor_instance_id != MonitorId::CLUSTER
-                all_monitors.delete(monitor_instance_id)
-            end
-        }
-
-        records_to_send = []
-        all_monitors.keys.each{|key|
-            record = provider.get_record(all_monitors[key], state)
-            #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}"
-        }
+        puts JSON.pretty_generate(pods_ready_hash)
     end
 end
\ No newline at end of file

From 22bd43da20fadd862ec52e279dc250abeec161d5 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Mon, 4 Nov 2019 18:22:43 -0800
Subject: [PATCH 136/160] Fix the bug where if a warning condition appears
 before fail condition, the node condition is reported as warning instead of
 fail. Also fix the node conditions state to consider unknown as a failure
 state (#292)

---
 source/code/plugin/health/health_monitor_utils.rb | 14 +++++++++-----
 source/code/plugin/in_kube_health.rb              | 12 ++++++++++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index 0d297d215..2fa2d3a52 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -108,6 +108,8 @@ def get_pods_ready_hash(resources)
 
             def get_node_state_from_node_conditions(monitor_config, node_conditions)
                 pass = false
+                warn = false
+                fail = false
                 failtypes = ['outofdisk', 'networkunavailable'].to_set #default fail types
                 if !monitor_config.nil? && !monitor_config["NodeConditionTypesForFailedState"].nil?
                     failtypes = monitor_config["NodeConditionTypesForFailedState"]
@@ -123,18 +125,20 @@ def get_node_state_from_node_conditions(monitor_config, node_conditions)
 
                     #for each condition in the configuration, check if the type is not false. If yes, update state to fail
                     if (failtypes.include?(type.downcase) && (status == 'True' || status == 'Unknown'))
-                        return HealthMonitorStates::FAIL
+                        fail = true
                     elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown'))
-                        return HealthMonitorStates::WARNING
+                        warn = true
                     elsif type == "Ready" &&  status == 'True'
                         pass = true
                     end
                 end
 
-                if pass
-                    return HealthMonitorStates::PASS
-                else
+                if fail
                     return HealthMonitorStates::FAIL
+                elsif warn
+                    return HealthMonitorStates::WARNING
+                else
+                    return HealthMonitorStates::PASS
                 end
             end
 
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index affbdd275..51ffa86d5 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -263,9 +263,17 @@ def process_node_condition_monitor(node_inventory)
             node_state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions)
             details = {}
             conditions.each do |condition|
-                condition_state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL
+                condition_state = HealthMonitorStates::PASS
+                if condition['type'].downcase != 'ready'
+                    if (condition['status'].downcase == 'true' || condition['status'].downcase == 'unknown')
+                        condition_state = HealthMonitorStates::FAIL
+                    end
+                else #Condition == READY
+                    if condition['status'].downcase != 'true'
+                        condition_state = HealthMonitorStates::FAIL
+                    end
+                end
                 details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => condition_state}
-                #@@hmlog.info "Node Condition details: #{JSON.pretty_generate(details)}"
             end
             health_monitor_record = {"timestamp" => timestamp, "state" => node_state, "details" => details}
             monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name])

From 40f47a9b4f16ca049857243c00ee2a455904601f Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 5 Nov 2019 15:07:09 -0800
Subject: [PATCH 137/160] Fix for Nodes Aspect not showing up in draft cluster
 (#294)

---
 source/code/plugin/health/health_model_definition_parser.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb
index 907bc1fd1..91f8cd24f 100644
--- a/source/code/plugin/health/health_model_definition_parser.rb
+++ b/source/code/plugin/health/health_model_definition_parser.rb
@@ -29,6 +29,7 @@ def parse_file
                 labels = entry['labels']  if entry['labels']
                 aggregation_algorithm = entry['aggregation_algorithm'] if entry['aggregation_algorithm']
                 aggregation_algorithm_params = entry['aggregation_algorithm_params'] if entry['aggregation_algorithm_params']
+                default_parent_monitor_id = entry['default_parent_monitor_id'] if entry['default_parent_monitor_id']
                 if parent_monitor_id.is_a?(Array)
                     conditions = []
                     parent_monitor_id.each{|condition|
@@ -38,7 +39,7 @@ def parse_file
                         parent_id = condition['id']
                         conditions.push({"key" => key, "operator" => operator, "value" => value, "parent_id" => parent_id})
                     }
-                    @health_model_definition[monitor_id] = {"conditions" => conditions, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params}
+                    @health_model_definition[monitor_id] = {"conditions" => conditions, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params, "default_parent_monitor_id" => default_parent_monitor_id}
                 elsif parent_monitor_id.is_a?(String)
                     @health_model_definition[monitor_id] = {"parent_monitor_id" => parent_monitor_id, "labels" => labels, "aggregation_algorithm" => aggregation_algorithm, "aggregation_algorithm_params" =>aggregation_algorithm_params}
                 elsif parent_monitor_id.nil?

From 16055bed2d7ac755301f5023bb9be28318690ed3 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 5 Nov 2019 17:16:02 -0800
Subject: [PATCH 138/160] Fix the issue where the health tree is inconsistent
 if a deployment is deleted (#295)

---
 .../health_container_cpu_memory_aggregator.rb | 92 ++++++++++++++++++-
 1 file changed, 88 insertions(+), 4 deletions(-)

diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index f6b57e0ae..6d69e0213 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -49,6 +49,9 @@ class HealthContainerCpuMemoryAggregator
         @@limit_is_array_event_sent = {}
         @@WORKLOAD_CONTAINER_COUNT_EMPTY_EVENT = "WorkloadContainerCountEmptyEvent"
         @@LIMIT_IS_ARRAY_EVENT = "ResourceLimitIsAnArrayEvent"
+        @@cpu_last_sent_monitors = {}
+        @@memory_last_sent_monitors = {}
+
         def initialize(resources, provider)
             @pod_uid_lookup = resources.get_pod_uid_lookup
             @workload_container_count = resources.get_workload_container_count
@@ -137,7 +140,6 @@ def aggregate(container_records)
                     end
 
                     container_instance_record = {}
-
                     pod_name = @pod_uid_lookup[lookup_key]["pod_name"]
                     #append the record to the hash
                     # append only if the record is not a duplicate record
@@ -160,13 +162,14 @@ def compute_state()
             # if limits not set, set state to warning
             # if all records present, sort in descending order of metric, compute index based on StateThresholdPercentage, get the state (pass/fail/warn) based on monitor state (Using [Fail/Warn]ThresholdPercentage, and set the state)
             @memory_records.each{|k,v|
+                @@memory_last_sent_monitors.delete(k) #remove from last sent list if the record is present in the current set of signals
                 calculate_monitor_state(v, @provider.get_config(MonitorId::CONTAINER_MEMORY_MONITOR_ID))
             }
 
             @cpu_records.each{|k,v|
+                @@cpu_last_sent_monitors.delete(k) #remove from last sent list if the record is present in the current set of signals
                 calculate_monitor_state(v, @provider.get_config(MonitorId::CONTAINER_CPU_MONITOR_ID))
             }
-
             @log.info "Finished computing state"
         end
 
@@ -175,7 +178,6 @@ def get_records
             container_cpu_memory_records = []
 
             @cpu_records.each{|resource_key, record|
-
                 cpu_limit_mc = 1.0
                 if record["limit"].is_a?(Numeric)
                     cpu_limit_mc = record["limit"]/1000000.to_f
@@ -221,6 +223,42 @@ def get_records
                 container_cpu_memory_records.push(health_record)
             }
 
+            # If all records that were sent previously are present in current set, this will not be executed
+            if @@cpu_last_sent_monitors.keys.size != 0
+                @@cpu_last_sent_monitors.keys.each{|key|
+                    begin
+                        @log.info "Container CPU monitor #{key} not present in current set. Sending none state transition"
+                        tokens = key.split('_')
+                        namespace = tokens[0]
+                        workload_name = "#{tokens[0]}~~#{tokens[1]}"
+                        container = tokens[2]
+                        health_monitor_record = {
+                            "timestamp" => time_now,
+                            "state" => HealthMonitorStates::NONE,
+                            "details" => {
+                                "reason" => "No record received for workload #{workload_name}",
+                                "workload_name" => workload_name,
+                                "namespace" => namespace,
+                                "container" => container
+                                }
+                            }
+
+                        monitor_instance_id = HealthMonitorHelpers.get_monitor_instance_id(MonitorId::CONTAINER_CPU_MONITOR_ID, key.split('_')) #container_cpu_utilization-namespace-workload-container
+
+                        health_record = {}
+                        health_record[HealthMonitorRecordFields::MONITOR_ID] = MonitorId::CONTAINER_CPU_MONITOR_ID
+                        health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                        health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                        health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
+                        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                        container_cpu_memory_records.push(health_record)
+                    rescue => e
+                        @log.info "Error when trying to create NONE State transition signal for #{key} for monitor #{monitor_instance_id} #{e.message}"
+                        next
+                    end
+                }
+            end
+
             @memory_records.each{|resource_key, record|
                 health_monitor_record = {
                     "timestamp" => time_now,
@@ -245,6 +283,52 @@ def get_records
                 health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
                 container_cpu_memory_records.push(health_record)
             }
+
+            # If all records that were sent previously are present in current set, this will not be executed
+            if @@memory_last_sent_monitors.keys.size != 0
+                @@memory_last_sent_monitors.keys.each{|key|
+                    begin
+                        @log.info "Container Memory monitor #{key} not present in current set. Sending none state transition"
+                        tokens = key.split('_')
+                        namespace = tokens[0]
+                        workload_name = "#{tokens[0]}~~#{tokens[1]}"
+                        container = tokens[2]
+                        health_monitor_record = {
+                            "timestamp" => time_now,
+                            "state" => HealthMonitorStates::NONE,
+                            "details" => {
+                                "reason" => "No record received for workload #{workload_name}",
+                                "workload_name" => workload_name,
+                                "namespace" => namespace,
+                                "container" => container
+                                }
+                            }
+                        monitor_instance_id = HealthMonitorHelpers.get_monitor_instance_id(MonitorId::CONTAINER_MEMORY_MONITOR_ID, key.split('_')) #container_cpu_utilization-namespace-workload-container
+                        health_record = {}
+                        health_record[HealthMonitorRecordFields::MONITOR_ID] = MonitorId::CONTAINER_MEMORY_MONITOR_ID
+                        health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+                        health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+                        health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
+                        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+                        container_cpu_memory_records.push(health_record)
+                    rescue => e
+                        @log.info "Error when trying to create NONE State transition signal for #{key} for monitor #{monitor_instance_id} #{e.message}"
+                        next
+                    end
+                }
+            end
+
+            #reset the last sent monitors list
+            @@memory_last_sent_monitors = {}
+            @@cpu_last_sent_monitors = {}
+
+            # add the current set of signals for comparison in next iteration
+            @cpu_records.keys.each{|k|
+                @@cpu_last_sent_monitors[k] = true
+            }
+            @memory_records.keys.each{|k|
+                @@memory_last_sent_monitors[k] = true
+            }
             return container_cpu_memory_records
         end
 
@@ -298,4 +382,4 @@ def calculate_container_instance_state(counter_value, limit, config)
             end
         end
     end
-end
\ No newline at end of file
+end

From 2d861cccf20891f3150d325d3916f62883643126 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 12 Nov 2019 10:39:01 -0800
Subject: [PATCH 139/160] Rashmi/1 16 test (#297)

* health deployment update

* apps v1 changes for deployment

* changes

* changes to use relicasets and api groups
---
 source/code/plugin/KubernetesApiClient.rb | 21 +++---
 source/code/plugin/in_kube_health.rb      | 88 +++++++++++------------
 2 files changed, 55 insertions(+), 54 deletions(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index be1a51791..7b5a1cd24 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -12,6 +12,8 @@ class KubernetesApiClient
   require_relative "oms_common"
 
   @@ApiVersion = "v1"
+  @@ApiVersionApps = "v1"
+  @@ApiGroupApps = "apps"
   @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
   @@ClusterName = nil
   @@ClusterId = nil
@@ -30,13 +32,12 @@ def initialize
   end
 
   class << self
-    def getKubeResourceInfo(resource, api_version: nil)
+    def getKubeResourceInfo(resource, api_group: nil)
       headers = {}
       response = nil
-      @Log.info "Getting Kube resource api_version #{api_version}"
-      @Log.info resource
+      @Log.info "Getting Kube resource: #{resource}"
       begin
-        resourceUri = getResourceUri(resource, api_version: api_version)
+        resourceUri = getResourceUri(resource, api_group)
         if !resourceUri.nil?
           uri = URI.parse(resourceUri)
           http = Net::HTTP.new(uri.host, uri.port)
@@ -85,14 +86,14 @@ def getClusterRegion
       end
     end
 
-    def getResourceUri(resource, api_version: nil)
+    def getResourceUri(resource, api_group)
       begin
         if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"]
-            if !api_version.nil?
-                return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/apis/" + api_version + "/" + resource
-            end
-            api_version = @@ApiVersion
-            return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + api_version + "/" + resource
+          if api_group.nil?
+            return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + @@ApiVersion + "/" + resource
+          elsif api_group == @@ApiGroupApps
+            return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/apis/apps/" + @@ApiVersionApps + "/" + resource
+          end
         else
           @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri")
           return nil
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 51ffa86d5..57ca07f64 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -7,12 +7,12 @@
 require_relative "ApplicationInsightsUtility"
 
 module Fluent
+  Dir[File.join(__dir__, "./health", "*.rb")].each { |file| require file }
 
-  Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
   class KubeHealthInput < Input
     Plugin.register_input("kubehealth", self)
 
-    config_param :health_monitor_config_path, :default => '/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json'
+    config_param :health_monitor_config_path, :default => "/etc/opt/microsoft/docker-cimprov/health/healthmonitorconfig.json"
 
     @@clusterCpuCapacity = 0.0
     @@clusterMemoryCapacity = 0.0
@@ -26,6 +26,7 @@ def initialize
         @@cluster_id = KubernetesApiClient.getClusterId
         @resources = HealthKubernetesResources.instance
         @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
+        @@ApiGroupApps = "apps"
       rescue => e
         ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
       end
@@ -40,25 +41,25 @@ def configure(conf)
     end
 
     def start
-        begin
-            if @run_interval
-                @finished = false
-                @condition = ConditionVariable.new
-                @mutex = Mutex.new
-                @thread = Thread.new(&method(:run_periodic))
-
-                @@hmlog = HealthMonitorUtils.get_log_handle
-                @@clusterName = KubernetesApiClient.getClusterName
-                @@clusterRegion = KubernetesApiClient.getClusterRegion
-                cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog)
-                @@clusterCpuCapacity = cluster_capacity[0]
-                @@clusterMemoryCapacity = cluster_capacity[1]
-                @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}"
-                initialize_inventory
-            end
-        rescue => e
-            ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+      begin
+        if @run_interval
+          @finished = false
+          @condition = ConditionVariable.new
+          @mutex = Mutex.new
+          @thread = Thread.new(&method(:run_periodic))
+
+          @@hmlog = HealthMonitorUtils.get_log_handle
+          @@clusterName = KubernetesApiClient.getClusterName
+          @@clusterRegion = KubernetesApiClient.getClusterRegion
+          cluster_capacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog)
+          @@clusterCpuCapacity = cluster_capacity[0]
+          @@clusterMemoryCapacity = cluster_capacity[1]
+          @@hmlog.info "Cluster CPU Capacity: #{@@clusterCpuCapacity} Memory Capacity: #{@@clusterMemoryCapacity}"
+          initialize_inventory
         end
+      rescue => e
+        ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
+      end
     end
 
     def shutdown
@@ -73,7 +74,6 @@ def shutdown
 
     def enumerate
       begin
-
         currentTime = Time.now
         emitTime = currentTime.to_f
         batchTime = currentTime.utc.iso8601
@@ -86,7 +86,7 @@ def enumerate
         node_inventory = JSON.parse(node_inventory_response.body)
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
         pod_inventory = JSON.parse(pod_inventory_response.body)
-        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_version: "extensions/v1beta1").body)
+        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_group: @@ApiGroupApps).body)
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
@@ -108,8 +108,8 @@ def enumerate
           health_monitor_records.push(record) if record
           pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(@resources)
 
-          system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'}
-          workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'}
+          system_pods = pods_ready_hash.select { |k, v| v["namespace"] == "kube-system" }
+          workload_pods = pods_ready_hash.select { |k, v| v["namespace"] != "kube-system" }
 
           system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
           system_pods_ready_percentage_records.each do |record|
@@ -147,13 +147,13 @@ def enumerate
     def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
       timestamp = Time.now.utc.iso8601
       @@clusterCpuCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog, node_inventory: node_inventory)[0]
-      subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"cpu", @@clusterCpuCapacity)
+      subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory, "cpu", @@clusterCpuCapacity)
       @@hmlog.info "Refreshed Cluster CPU Capacity #{@@clusterCpuCapacity}"
-      state =  subscription > @@clusterCpuCapacity ? "fail" : "pass"
+      state = subscription > @@clusterCpuCapacity ? "fail" : "pass"
 
       #CPU
       monitor_id = MonitorId::WORKLOAD_CPU_OVERSUBSCRIBED_MONITOR_ID
-      health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity/1000000.to_f, "clusterCpuRequests" => subscription/1000000.to_f}}
+      health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"clusterCpuCapacity" => @@clusterCpuCapacity / 1000000.to_f, "clusterCpuRequests" => subscription / 1000000.to_f}}
       # @@hmlog.info health_monitor_record
 
       monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id])
@@ -163,8 +163,8 @@ def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
       health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
       health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
       health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-      health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
-      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
+      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
       health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
       #@@hmlog.info "Successfully processed process_cpu_oversubscribed_monitor"
       return health_record
@@ -172,10 +172,10 @@ def process_cpu_oversubscribed_monitor(pod_inventory, node_inventory)
 
     def process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
       timestamp = Time.now.utc.iso8601
-      @@clusterMemoryCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog,node_inventory: node_inventory)[1]
+      @@clusterMemoryCapacity = HealthMonitorUtils.get_cluster_cpu_memory_capacity(@@hmlog, node_inventory: node_inventory)[1]
       @@hmlog.info "Refreshed Cluster Memory Capacity #{@@clusterMemoryCapacity}"
-      subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory,"memory", @@clusterMemoryCapacity)
-      state =  subscription > @@clusterMemoryCapacity ? "fail" : "pass"
+      subscription = HealthMonitorUtils.get_resource_subscription(pod_inventory, "memory", @@clusterMemoryCapacity)
+      state = subscription > @@clusterMemoryCapacity ? "fail" : "pass"
       #@@hmlog.debug "Memory Oversubscribed Monitor State : #{state}"
 
       #CPU
@@ -189,8 +189,8 @@ def process_memory_oversubscribed_monitor(pod_inventory, node_inventory)
       health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
       health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
       health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-      health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
-      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
+      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
       health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
       #@@hmlog.info "Successfully processed process_memory_oversubscribed_monitor"
       return health_record
@@ -201,7 +201,7 @@ def process_kube_api_up_monitor(state, response)
 
       monitor_id = MonitorId::KUBE_API_STATUS
       details = response.each_header.to_h
-      details['ResponseCode'] = response.code
+      details["ResponseCode"] = response.code
       health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details}
       hmlog = HealthMonitorUtils.get_log_handle
       #hmlog.info health_monitor_record
@@ -213,8 +213,8 @@ def process_kube_api_up_monitor(state, response)
       health_record[HealthMonitorRecordFields::MONITOR_ID] = monitor_id
       health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
       health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-      health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
-      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+      health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
+      health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
       health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
       #@@hmlog.info "Successfully processed process_kube_api_up_monitor"
       return health_record
@@ -227,10 +227,10 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id)
       records = []
       pods_hash.keys.each do |key|
         workload_name = key
-        total_pods = pods_hash[workload_name]['totalPods']
-        pods_ready = pods_hash[workload_name]['podsReady']
-        namespace = pods_hash[workload_name]['namespace']
-        workload_kind = pods_hash[workload_name]['kind']
+        total_pods = pods_hash[workload_name]["totalPods"]
+        pods_ready = pods_hash[workload_name]["podsReady"]
+        namespace = pods_hash[workload_name]["namespace"]
+        workload_kind = pods_hash[workload_name]["kind"]
         percent = pods_ready / total_pods * 100
         timestamp = Time.now.utc.iso8601
 
@@ -242,8 +242,8 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id)
         health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
         health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
         health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-        health_record[HealthMonitorRecordFields::TIME_GENERATED] =  time_now
-        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] =  time_now
+        health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
+        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
         health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
         records.push(health_record)
       end
@@ -299,7 +299,7 @@ def initialize_inventory
         node_inventory = JSON.parse(node_inventory_response.body)
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
         pod_inventory = JSON.parse(pod_inventory_response.body)
-        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_version: "extensions/v1beta1").body)
+        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_group: @@ApiGroupApps).body)
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory

From 844afbdd4bb940902e0d90717a5af7d381c30c88 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 12 Nov 2019 15:16:48 -0800
Subject: [PATCH 140/160] Fix duplicate records in container memory/cpu samples
 (#298)

---
 source/code/plugin/filter_health_model_builder.rb           | 6 +++---
 .../plugin/health/health_container_cpu_memory_aggregator.rb | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index 47ce7a631..1724065fe 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -97,12 +97,11 @@ def filter_stream(tag, es)
                         }
                     end
                     container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
-                    deduped_records = container_records_aggregator.dedupe_records(container_records)
                     if @container_cpu_memory_records.nil?
                         @log.info "@container_cpu_memory_records was not initialized"
                         @container_cpu_memory_records = [] #in some clusters, this is null, so initialize it again.
                     end
-                    @container_cpu_memory_records.push(*deduped_records) # push the records for aggregation later
+                    @container_cpu_memory_records.push(*container_records) # push the records for aggregation later
                     return MultiEventStream.new
                 elsif tag.start_with?("kubehealth.ReplicaSet")
                     records = []
@@ -114,7 +113,8 @@ def filter_stream(tag, es)
                     aggregated_container_records = []
                     if !@container_cpu_memory_records.nil? && !@container_cpu_memory_records.empty?
                         container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider)
-                        container_records_aggregator.aggregate(@container_cpu_memory_records)
+                        deduped_records = container_records_aggregator.dedupe_records(@container_cpu_memory_records)
+                        container_records_aggregator.aggregate(deduped_records)
                         container_records_aggregator.compute_state
                         aggregated_container_records = container_records_aggregator.get_records
                     end
diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index 6d69e0213..29ac91bde 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -84,12 +84,13 @@ def dedupe_records(container_records)
                     else
                         r = resource_instances[instance_name]
                         if record["Timestamp"] > r["Timestamp"]
-                            @log.info "Dropping older record"
+                            @log.info "Dropping older record for instance #{instance_name} new: #{record["Timestamp"]} old: #{r["Timestamp"]}"
                             resource_instances[instance_name] = record
                         end
                     end
                 rescue => e
                     @log.info "Exception when deduping record #{record}"
+                    next
                 end
             end
             return cpu_deduped_instances.values.concat(memory_deduped_instances.values)

From 9a8f0f8b58d28aee68cf680bebf8094c8e1b8ea6 Mon Sep 17 00:00:00 2001
From: bragi92 <kadubey@microsoft.com>
Date: Thu, 14 Nov 2019 10:42:50 -0800
Subject: [PATCH 141/160] Update MDM region list to include francecentral,
 japaneast and australiaeast

---
 installer/conf/container.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index f9540bde8..696ffdb6b 100755
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -36,7 +36,7 @@
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
   log_level info
 </filter>

From 597b2fb3dd9a4e9a7f4f4ec8cef3a855526abbe0 Mon Sep 17 00:00:00 2001
From: bragi92 <kadubey@microsoft.com>
Date: Thu, 14 Nov 2019 10:48:48 -0800
Subject: [PATCH 142/160] Update MDM region list to include francecentral,
 japaneast and australiaeast

---
 installer/conf/kube.conf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 40f4ac880..49d0bf62e 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -70,14 +70,14 @@
 
 <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
   type filter_inventory2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
   log_level info
 </filter>
 
 #custom_metrics_mdm filter plugin for perf data from windows nodes
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
   log_level info
 </filter>
@@ -247,4 +247,4 @@
   retry_limit 10
   retry_wait 30s
   max_retry_wait 9m
-</match>
\ No newline at end of file
+</match>

From cd1a37b72b1911eb657012668319763e0b3770da Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Thu, 14 Nov 2019 18:18:54 -0800
Subject: [PATCH 143/160] Send telemetry when there is error in calculation of
 state in percentage aggregation, and send state as unknown (#300)

---
 .../code/plugin/health/aggregate_monitor.rb   | 38 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb
index 794f716ce..10dbdc705 100644
--- a/source/code/plugin/health/aggregate_monitor.rb
+++ b/source/code/plugin/health/aggregate_monitor.rb
@@ -3,6 +3,12 @@
 require_relative 'health_model_constants'
 require 'json'
 
+# Require only when running inside container.
+# otherwise unit tests will fail due to ApplicationInsightsUtility dependency on base omsagent ruby files. If you have your dev machine starting with omsagent-rs, then GOOD LUCK!
+if Socket.gethostname.start_with?('omsagent-rs')
+    require_relative '../ApplicationInsightsUtility'
+end
+
 module HealthModel
   class AggregateMonitor
     attr_accessor :monitor_id, :monitor_instance_id, :state, :transition_date_time, :aggregation_algorithm, :aggregation_algorithm_params, :labels, :is_aggregate_monitor, :details
@@ -16,6 +22,8 @@ class AggregateMonitor
         MonitorState::NONE => 5
     }
 
+    @@telemetry_sent_hash = {}
+
     # constructor
     def initialize(
       monitor_id,
@@ -127,17 +135,43 @@ def calculate_percentage_state(monitor_set)
 
         #sort
         #TODO: What if sorted_filtered is empty? is that even possible?
+        log = HealthMonitorHelpers.get_log_handle
         sorted_filtered = sort_filter_member_monitors(monitor_set)
 
         state_threshold = @aggregation_algorithm_params['state_threshold'].to_f
 
-        size = sorted_filtered.size
+        if sorted_filtered.nil?
+            size = 0
+        else
+            size = sorted_filtered.size
+        end
+
         if size == 1
             @state =  sorted_filtered[0].state
         else
             count = ((state_threshold*size)/100).ceil
             index = size - count
-            @state = sorted_filtered[index].state
+            if sorted_filtered.nil? || sorted_filtered[index].nil?
+                @state = HealthMonitorStates::UNKNOWN
+                if !@@telemetry_sent_hash.key?(@monitor_instance_id)
+                    log.debug "Adding to telemetry sent hash #{@monitor_instance_id}"
+                    @@telemetry_sent_hash[@monitor_instance_id] = true
+                    log.info "Index: #{index} size: #{size} Count: #{count}"
+                    custom_error_event_map = {}
+                    custom_error_event_map["count"] = count
+                    custom_error_event_map["index"] = index
+                    custom_error_event_map["size"] = size
+                    if !sorted_filtered.nil?
+                        sorted_filtered.each_index{|i|
+                            custom_error_event_map[i] = sorted_filtered[i].state
+                        }
+                    end
+                    ApplicationInsightsUtility.sendCustomEvent("PercentageStateCalculationErrorEvent", custom_error_event_map)
+                end
+            else
+                @state = sorted_filtered[index].state
+            end
+            @state
         end
     end
 

From d6ea1896ae3f63307434fc9e37315f2c16db37d0 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 25 Nov 2019 17:26:30 -0800
Subject: [PATCH 144/160] fix exceptions (#306)

---
 source/code/plugin/KubernetesApiClient.rb  |  6 ++++--
 source/code/plugin/in_kube_podinventory.rb | 10 ++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 7b5a1cd24..6bfdc06f1 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -58,8 +58,10 @@ def getKubeResourceInfo(resource, api_group: nil)
       rescue => error
         @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")
       end
-      if (!response.nil? && !response.body.nil? && response.body.empty?)
-        @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}")
+      if (!response.nil?)
+        if (!response.body.nil? && response.body.empty?)
+          @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}")
+        end
       end
       return response
     end
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 766831a66..1dd029b22 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -58,10 +58,16 @@ def enumerate(podList = nil)
       end
 
       begin
-        if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
+        if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
           #get pod inventory & services
           $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-          serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+          serviceList = nil
+          serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
+
+          if !serviceInfo.nil?
+            serviceList = JSON.parse(serviceInfo.body)
+          end
+          
           $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(podInventory, serviceList)
         else

From 3df0ab6567e4c39b686cfff31b5baf18013929cf Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 3 Dec 2019 16:20:40 -0800
Subject: [PATCH 145/160] Merge Branch morgan into ci_feature (#308)

* Fixes :
1) Disable health (for time being) - in DS & RS
2) Disable MDM (for time being) - in DS & RS
3) Merge kubeperf into kubenode & kubepod
4) Made scheduling predictable for kubenode & kubepod
5) Enable containerlog enrichment fields (timeofcommand, containername & containerimage) as a configurable setting (default = true/ON) - Also add telemetry for it
6) Filter OUT type!=Normal events for k8s events
7) AppInsights telemetry async
8) Fix double calling bug in in_win_cadvisor_perf
9) Add connect timeout (20secs) & read timeout (40 secs) for all cadvisor api calls & also for all kubernetes api server  calls
10) Fix batchTime for kubepods to be one before making api server call (rather than after making the call, which will make it fluctuate based on api server latency for the call)

* fix setting issue for the new enrichcontainerlog setting

* fix compilation issue

* fix another compilation issue

* fix emit issues

* fix a nil issue

* fix mising tag

* * Fix all input plugins for scheduling issue
* Merge kubeservices with kubepodinventory (reduce RS to API server by one more)
* Remove Kubelogs (not used)
* Fix liveness probe
* Disable enrichment by default for container logs
* Move to yajl json parser across the board for docker provier code
* Remove unused files

* fix removed files

* fix timeofcommand and remove a duplicate entry for a health file.

* Rashmi/http leak fixes (#301)

* changes for http connection close

* close socket in ensure

* adding nil check

* Rashmi/http leak fixes (#303)

* changes for http connection close

* close socket in ensure

* adding nil check

* adding missing end

* use yajl for events & nodes parsing.

* Rashmi/http leak fixes (#304)

* changes for http connection close

* close socket in ensure

* adding nil check

* Update MDM region list to include francecentral, japaneast and australiaeast

* Update MDM region list to include francecentral, japaneast and australiaeast

* adding missing end

* Send telemetry when there is error in calculation of state in percentage aggregation, and send state as unknown (#300)

* changes for chunking

* telemetry changes

* some fixes

* bug fix

* changing to have morgan changes only

* add new line

* use polltime for metrics and disable out_forward for health

* enable mdm & health

* few optimizations

* do not remove time of command
make kube.conf same as scale tested config

* remove comments from container.conf

* remove flush comment for ai telemetry

* remove commented code lines

* fix config

* remove timeofcommand when enrichment==false

* fix config

* enable mdm filter

* Rashmi/api chunk (#307)

* changes

* changes

* refactor changes

* changes

* changes

* changes

* changes

* node changes

* changes

* changes

* changes

* changes

* adding open and read timeouts for api client

* removing comments

* updating chunk size
---
 installer/conf/container.conf                 |  33 +-
 installer/conf/kube.conf                      | 466 ++++++++----------
 installer/datafiles/base_container.data       |   5 -
 installer/scripts/livenessprobe.sh            |   2 +-
 installer/scripts/tomlparser.rb               |  12 +
 source/code/go/src/plugins/oms.go             |  52 +-
 .../code/plugin/ApplicationInsightsUtility.rb |  44 +-
 .../code/plugin/CAdvisorMetricsAPIClient.rb   |  66 +--
 source/code/plugin/ContainerInventoryState.rb |   2 +-
 source/code/plugin/DockerApiClient.rb         |   7 +-
 source/code/plugin/KubernetesApiClient.rb     |  51 +-
 source/code/plugin/filter_cadvisor2mdm.rb     |   2 +-
 .../filter_cadvisor_health_container.rb       |   2 +-
 .../plugin/filter_cadvisor_health_node.rb     |   2 +-
 source/code/plugin/filter_docker_log.rb       |   1 +
 .../plugin/filter_health_model_builder.rb     |   2 +-
 source/code/plugin/filter_inventory2mdm.rb    |   2 +-
 .../code/plugin/health/aggregate_monitor.rb   |   4 +-
 .../plugin/health/cluster_health_state.rb     |   1 +
 .../health_container_cpu_memory_aggregator.rb |   6 +-
 ...h_container_cpu_memory_record_formatter.rb |   2 +
 .../plugin/health/health_hierarchy_builder.rb |   2 +-
 .../health/health_model_definition_parser.rb  |   2 +-
 .../plugin/health/health_monitor_optimizer.rb |   1 +
 .../plugin/health/health_monitor_provider.rb  |   1 +
 .../plugin/health/health_monitor_state.rb     |   1 +
 .../plugin/health/health_monitor_utils.rb     |   1 +
 source/code/plugin/health/unit_monitor.rb     |   2 +-
 source/code/plugin/in_cadvisor_perf.rb        |  26 +-
 source/code/plugin/in_containerinventory.rb   |  20 +-
 .../code/plugin/in_containerlog_sudo_tail.rb  | 189 -------
 source/code/plugin/in_kube_events.rb          | 150 +++---
 source/code/plugin/in_kube_health.rb          |  88 ++--
 source/code/plugin/in_kube_logs.rb            | 181 -------
 source/code/plugin/in_kube_nodes.rb           | 307 +++++++-----
 source/code/plugin/in_kube_perf.rb            | 120 -----
 source/code/plugin/in_kube_podinventory.rb    | 220 ++++++---
 source/code/plugin/in_kube_services.rb        | 110 -----
 source/code/plugin/in_win_cadvisor_perf.rb    |  24 +-
 .../channel/contracts/json_serializable.rb    |   2 +-
 .../channel/sender_base.rb                    |   2 +-
 source/code/plugin/out_mdm.rb                 |   2 +-
 42 files changed, 967 insertions(+), 1248 deletions(-)
 mode change 100755 => 100644 installer/conf/container.conf
 delete mode 100644 source/code/plugin/in_containerlog_sudo_tail.rb
 delete mode 100644 source/code/plugin/in_kube_logs.rb
 delete mode 100644 source/code/plugin/in_kube_perf.rb
 delete mode 100644 source/code/plugin/in_kube_services.rb

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
old mode 100755
new mode 100644
index 696ffdb6b..93c250fbb
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -11,7 +11,7 @@
 <source>
   type containerinventory
   tag oms.containerinsights.containerinventory
-  run_interval 60s
+  run_interval 60
   log_level debug
 </source>
 
@@ -19,7 +19,7 @@
 <source>
   type cadvisorperf
   tag oms.api.cadvisorperf
-  run_interval 60s
+  run_interval 60
   log_level debug
 </source>
 
@@ -45,30 +45,28 @@
   type out_oms
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer
-  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
+  buffer_chunk_limit 4m
   flush_interval 20s
   retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
+  retry_wait 5s
+  max_retry_wait 5m
 </match>
 
 <match oms.api.cadvisorperf**>
   type out_oms
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_oms_cadvisorperf*.buffer
-  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
+  buffer_chunk_limit 4m
   flush_interval 20s
   retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
+  retry_wait 5s
+  max_retry_wait 5m
 </match>
 
 
@@ -80,6 +78,14 @@
   heartbeat_type tcp
   skip_network_error_at_init true
   expire_dns_cache 600s
+  buffer_queue_full_action drop_oldest_chunk
+  buffer_type file 
+  buffer_path %STATE_DIR_WS%/out_health_forward*.buffer
+  buffer_chunk_limit 3m
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 5s
+  max_retry_wait 5m
 
   <server>
     host "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_HOST']}"
@@ -96,14 +102,13 @@
   type out_mdm
   log_level debug
   num_threads 5
-  buffer_chunk_limit 20m
   buffer_type file
   buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  buffer_queue_limit 20
   buffer_queue_full_action drop_oldest_chunk
+  buffer_chunk_limit 4m
   flush_interval 20s
   retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
+  retry_wait 5s
+  max_retry_wait 5m
   retry_mdm_post_wait_minutes 60
 </match>
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 49d0bf62e..207780442 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -1,250 +1,218 @@
 # Fluentd config file for OMS Docker - cluster components (kubeAPI)
-<source>
-  type forward
-  port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
-  bind 0.0.0.0
-</source>
-
-#Kubernetes pod inventory
-<source>
-	type kubepodinventory
-	tag oms.containerinsights.KubePodInventory
-	run_interval 60s
-  log_level debug
-</source>
-
-#Kubernetes events
-<source>
-	type kubeevents
-	tag oms.containerinsights.KubeEvents
-	run_interval 60s
-    log_level debug
-</source>
-
-#Kubernetes logs
-<source>
-	type kubelogs
-	tag oms.api.KubeLogs
-	run_interval 60s
-</source>
-
-#Kubernetes services
-<source>
-	type kubeservices
-	tag oms.containerinsights.KubeServices
-	run_interval 60s
-  log_level debug
-</source>
-
-#Kubernetes Nodes
-<source>
-	type kubenodeinventory
-	tag oms.containerinsights.KubeNodeInventory
-	run_interval 60s
-  log_level debug
-</source>
-
-#Kubernetes perf
-<source>
-	type kubeperf
-	tag oms.api.KubePerf
-	run_interval 60s
-  log_level debug
-</source>
-
-#Kubernetes health
-<source>
-  type kubehealth
- tag kubehealth.ReplicaSet
-  run_interval 60s
-  log_level debug
-</source>
-
-#cadvisor perf- Windows nodes
-<source>
-	type wincadvisorperf
-	tag oms.api.wincadvisorperf
-	run_interval 60s
-  log_level debug
-</source>
-
-<filter mdm.kubepodinventory** mdm.kubenodeinventory**>
-  type filter_inventory2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-  log_level info
-</filter>
-
-#custom_metrics_mdm filter plugin for perf data from windows nodes
-<filter mdm.cadvisorperf**>
-  type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
-  log_level info
-</filter>
-
-<filter kubehealth**>
-  type filter_health_model_builder
-</filter>
-<match mdm.cadvisorperf**>
-  type out_mdm
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-  retry_mdm_post_wait_minutes 60
-</match>
-
-<match oms.containerinsights.KubePodInventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.KubeEvents**>
-	type out_oms
-	log_level debug
-	num_threads 5
-	buffer_chunk_limit 5m
-	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
-	buffer_queue_limit 10
-	buffer_queue_full_action drop_oldest_chunk
-	flush_interval 20s
-	retry_limit 10
-	retry_wait 30s
-	max_retry_wait 9m
-</match>
-
-<match oms.api.KubeLogs**>
-	type out_oms_api
-	log_level debug
-  	buffer_chunk_limit 10m
-	buffer_type file
-	buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer
-	buffer_queue_limit 10
-	flush_interval 20s
-	retry_limit 10
-	retry_wait 30s
-</match>
-
-
-
-<match oms.containerinsights.KubeServices**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.KubeNodeInventory**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match oms.containerinsights.ContainerNodeInventory**>
-  type out_oms
-  log_level debug
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
-  buffer_queue_limit 20
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 15s
-  max_retry_wait 9m
-</match>
-
-<match oms.api.KubePerf**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match mdm.kubepodinventory** mdm.kubenodeinventory** >
-  type out_mdm
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-  retry_mdm_post_wait_minutes 60
-</match>
-
-<match oms.api.wincadvisorperf**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
-
-<match kubehealth.Signals**>
-  type out_oms
-  log_level debug
-  num_threads 5
-  buffer_chunk_limit 20m
-  buffer_type file
-  buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
-  buffer_queue_limit 20
-  buffer_queue_full_action drop_oldest_chunk
-  flush_interval 20s
-  retry_limit 10
-  retry_wait 30s
-  max_retry_wait 9m
-</match>
+    #fluent forward plugin
+    <source>
+     type forward
+     port "#{ENV['HEALTHMODEL_REPLICASET_SERVICE_SERVICE_PORT']}"
+     bind 0.0.0.0
+     chunk_size_limit 4m
+    </source>
+
+    #Kubernetes pod inventory
+    <source>
+     type kubepodinventory
+     tag oms.containerinsights.KubePodInventory
+     run_interval 60
+     log_level debug
+    </source>
+
+    #Kubernetes events
+    <source>
+     type kubeevents
+     tag oms.containerinsights.KubeEvents
+     run_interval 60
+     log_level debug
+     </source>
+
+    #Kubernetes Nodes
+    <source>
+     type kubenodeinventory
+     tag oms.containerinsights.KubeNodeInventory
+     run_interval 60
+     log_level debug
+    </source>
+
+    #Kubernetes health
+    <source>
+     type kubehealth
+     tag kubehealth.ReplicaSet
+     run_interval 60
+     log_level debug
+    </source>
+
+    #cadvisor perf- Windows nodes
+    <source>
+     type wincadvisorperf
+     tag oms.api.wincadvisorperf
+     run_interval 60
+     log_level debug
+    </source>
+
+    <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
+     type filter_inventory2mdm
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+     log_level info
+    </filter>
+
+    #custom_metrics_mdm filter plugin for perf data from windows nodes
+    <filter mdm.cadvisorperf**>
+     type filter_cadvisor2mdm
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+     metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+     log_level info
+    </filter>
+
+    #health model aggregation filter
+    <filter kubehealth**>
+     type filter_health_model_builder
+    </filter>
+
+    <match oms.containerinsights.KubePodInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match oms.containerinsights.KubeEvents**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match oms.containerinsights.KubeServices**>
+     type out_oms
+     log_level debug
+     num_threads 2
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match oms.containerinsights.KubeNodeInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match oms.containerinsights.ContainerNodeInventory**>
+     type out_oms
+     log_level debug
+     num_threads 3
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
+     buffer_queue_limit 20
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match oms.api.KubePerf**>	
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match mdm.kubepodinventory** mdm.kubenodeinventory** >
+     type out_mdm
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_mdm_*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+     retry_mdm_post_wait_minutes 60
+    </match>
+
+    <match oms.api.wincadvisorperf**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
+    <match mdm.cadvisorperf**>
+     type out_mdm
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+     retry_mdm_post_wait_minutes 60
+    </match>
+
+    <match kubehealth.Signals**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_kubehealth*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 4ebc4f338..60de5af18 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -26,16 +26,13 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/microsoft/omsagent/plugin/in_kube_podinventory.rb;			source/code/plugin/in_kube_podinventory.rb;			644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_events.rb;			    source/code/plugin/in_kube_events.rb;				644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_logs.rb;                 source/code/plugin/in_kube_logs.rb;                 644; root; root
 /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb;			source/code/plugin/KubernetesApiClient.rb;			644; root; root
 
 /etc/opt/microsoft/docker-cimprov/container.conf;			    installer/conf/container.conf;                      644; root; root
 
 /opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb;     source/code/plugin/CAdvisorMetricsAPIClient.rb;     644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_perf.rb;			        source/code/plugin/in_kube_perf.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb;			        source/code/plugin/in_cadvisor_perf.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb;			    source/code/plugin/in_win_cadvisor_perf.rb;			    644; root; root
-/opt/microsoft/omsagent/plugin/in_kube_services.rb;			        source/code/plugin/in_kube_services.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb;			     source/code/plugin/filter_inventory2mdm.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb;			     source/code/plugin/CustomMetricsUtils.rb;			    	644; root; root
@@ -143,12 +140,10 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/health/health_model_definition_parser.rb;                            source/code/plugin/health/health_model_definition_parser.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_optimizer.rb;                                  source/code/plugin/health/health_monitor_optimizer.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_provider.rb;                                   source/code/plugin/health/health_monitor_provider.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_record.rb;                                     source/code/plugin/health/health_monitor_record.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_state.rb;                                      source/code/plugin/health/health_monitor_state.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_telemetry.rb;                                  source/code/plugin/health/health_monitor_telemetry.rb; 644; root; root
-/opt/microsoft/omsagent/plugin/health/health_monitor_helpers.rb;                                    source/code/plugin/health/health_monitor_helpers.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_monitor_utils.rb;                                      source/code/plugin/health/health_monitor_utils.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/health_signal_reducer.rb;                                     source/code/plugin/health/health_signal_reducer.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/health/monitor_factory.rb;                                           source/code/plugin/health/monitor_factory.rb; 644; root; root
diff --git a/installer/scripts/livenessprobe.sh b/installer/scripts/livenessprobe.sh
index cb7e8a0ba..e957b4bdf 100644
--- a/installer/scripts/livenessprobe.sh
+++ b/installer/scripts/livenessprobe.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 #test to exit non zero value
-(ps -ef | grep omsagent | grep -v "grep") && (ps -ef | grep td-agent-bit | grep -v "grep")
+(ps -ef | grep omsagent- | grep -v "grep") && (ps -ef | grep td-agent-bit | grep -v "grep")
 if [ $? -eq 0 ] && [ ! -s "inotifyoutput.txt" ]
 then
   # inotifyoutput file is empty and the grep commands for omsagent and td-agent-bit succeeded
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index cd16cbf9b..ba67d023a 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -15,6 +15,7 @@
 @logTailPath = "/var/log/containers/*.log"
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
 @excludePath = "*.csv2" #some invalid path
+@enrichContainerLogs = false
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -117,6 +118,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
     rescue => errorStr
       ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults, please check config map for errors")
     end
+
+    #Get container log enrichment setting
+    begin
+      if !parsedConfig[:log_collection_settings][:enrich_container_logs].nil? && !parsedConfig[:log_collection_settings][:enrich_container_logs][:enabled].nil?
+        @enrichContainerLogs = parsedConfig[:log_collection_settings][:enrich_container_logs][:enabled]
+        puts "config::Using config map setting for cluster level container log enrichment"
+      end
+    rescue => errorStr
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level container log enrichment - #{errorStr}, using defaults, please check config map for errors")
+    end
   end
 end
 
@@ -156,6 +167,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
   file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
   file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
+  file.write("export AZMON_CLUSTER_CONTAINER_LOG_ENRICH=#{@enrichContainerLogs}\n")
   # Close file after writing all environment variables
   file.close
   puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 5a323d7e0..834726c93 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -92,6 +92,8 @@ var (
 	ResourceName string
 	//KubeMonAgentEvents skip first flush
 	skipKubeMonEventsFlush bool
+	// enrich container logs (when true this will add the fields - timeofcommand, containername & containerimage)
+	enrichContainerLogs bool
 )
 
 var (
@@ -746,16 +748,30 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			stringMap["Name"] = val
 		}
 
-		dataItem := DataItem{
-			ID:                    stringMap["Id"],
-			LogEntry:              stringMap["LogEntry"],
-			LogEntrySource:        stringMap["LogEntrySource"],
-			LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
-			LogEntryTimeOfCommand: start.Format(time.RFC3339),
-			SourceSystem:          stringMap["SourceSystem"],
-			Computer:              Computer,
-			Image:                 stringMap["Image"],
-			Name:                  stringMap["Name"],
+		var dataItem DataItem
+		if enrichContainerLogs == true {
+			dataItem = DataItem{
+				ID:                    stringMap["Id"],
+				LogEntry:              stringMap["LogEntry"],
+				LogEntrySource:        stringMap["LogEntrySource"],
+				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+				LogEntryTimeOfCommand: start.Format(time.RFC3339),
+				SourceSystem:          stringMap["SourceSystem"],
+				Computer:              Computer,
+				Image:                 stringMap["Image"],
+				Name:                  stringMap["Name"],
+			}
+		} else { // dont collect timeofcommand field as its part of container log enrivhment
+			dataItem = DataItem{
+				ID:                    stringMap["Id"],
+				LogEntry:              stringMap["LogEntry"],
+				LogEntrySource:        stringMap["LogEntrySource"],
+				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+				SourceSystem:          stringMap["SourceSystem"],
+				Computer:              Computer,
+				Image:                 stringMap["Image"],
+				Name:                  stringMap["Name"],
+			}
 		}
 
 		FlushedRecordsSize += float64(len(stringMap["LogEntry"]))
@@ -892,6 +908,15 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	// Initilizing this to true to skip the first kubemonagentevent flush since the errors are not populated at this time
 	skipKubeMonEventsFlush = true
 
+	enrichContainerLogsSetting := os.Getenv("AZMON_CLUSTER_CONTAINER_LOG_ENRICH")
+		if (strings.Compare(enrichContainerLogsSetting, "true") == 0) {
+			enrichContainerLogs = true
+			Log("ContainerLogEnrichment=true \n")
+		} else {
+			enrichContainerLogs = false
+			Log("ContainerLogEnrichment=false \n")
+		}
+
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
 		message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error())
@@ -989,7 +1014,12 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
 		populateExcludedStdoutNamespaces()
 		populateExcludedStderrNamespaces()
-		go updateContainerImageNameMaps()
+		if enrichContainerLogs == true {
+			Log("ContainerLogEnrichment=true; starting goroutine to update containerimagenamemaps \n")
+			go updateContainerImageNameMaps()
+		} else {
+			Log("ContainerLogEnrichment=false \n")
+		}
 
 		// Flush config error records every hour
 		go flushKubeMonAgentEventRecords()
diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb
index 85b424e69..f7bd806a0 100644
--- a/source/code/plugin/ApplicationInsightsUtility.rb
+++ b/source/code/plugin/ApplicationInsightsUtility.rb
@@ -6,7 +6,7 @@ class ApplicationInsightsUtility
   require_relative "omslog"
   require_relative "DockerApiClient"
   require_relative "oms_common"
-  require "json"
+  require 'yajl/json_gem'
   require "base64"
 
   @@HeartBeat = "HeartBeatEvent"
@@ -73,16 +73,37 @@ def initializeUtility()
           @@Tc = ApplicationInsights::TelemetryClient.new
         elsif !encodedAppInsightsKey.nil?
           decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey)
+          
           #override ai endpoint if its available otherwise use default.
           if appInsightsEndpoint && !appInsightsEndpoint.nil? && !appInsightsEndpoint.empty?
             $log.info("AppInsightsUtility: Telemetry client uses overrided endpoint url : #{appInsightsEndpoint}")
-            telemetrySynchronousSender = ApplicationInsights::Channel::SynchronousSender.new appInsightsEndpoint
-            telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender)
-            telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue
+            #telemetrySynchronousSender = ApplicationInsights::Channel::SynchronousSender.new appInsightsEndpoint
+            #telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender)
+            #telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue
+            sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint
+            queue = ApplicationInsights::Channel::AsynchronousQueue.new sender
+            channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue
             @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, telemetryChannel
           else
-            @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey
+            sender = ApplicationInsights::Channel::AsynchronousSender.new
+            queue = ApplicationInsights::Channel::AsynchronousQueue.new sender
+            channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue
+            @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, channel
           end
+          # The below are default recommended values. If you change these, ensure you test telemetry flow fully
+
+          # flush telemetry if we have 10 or more telemetry items in our queue
+          #@@Tc.channel.queue.max_queue_length = 10
+
+          # send telemetry to the service in batches of 5
+          #@@Tc.channel.sender.send_buffer_size = 5
+
+          # the background worker thread will be active for 5 seconds before it shuts down. if
+          # during this time items are picked up from the queue, the timer is reset.
+          #@@Tc.channel.sender.send_time = 5
+
+          # the background worker thread will poll the queue every 0.5 seconds for new items
+          #@@Tc.channel.sender.send_interval = 0.5
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}")
@@ -102,8 +123,7 @@ def sendHeartBeatEvent(pluginName)
         eventName = pluginName + @@HeartBeat
         if !(@@Tc.nil?)
           @@Tc.track_event eventName, :properties => @@CustomProperties
-          @@Tc.flush
-          $log.info("AppInsights Heartbeat Telemetry sent successfully")
+          $log.info("AppInsights Heartbeat Telemetry put successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}")
@@ -116,8 +136,7 @@ def sendLastProcessedContainerInventoryCountMetric(pluginName, properties)
           @@Tc.track_metric "LastProcessedContainerInventoryCount", properties["ContainerCount"],
                             :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
                             :properties => @@CustomProperties
-          @@Tc.flush
-          $log.info("AppInsights Container Count Telemetry sent successfully")
+          $log.info("AppInsights Container Count Telemetry sput successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}")
@@ -138,7 +157,6 @@ def sendCustomEvent(eventName, properties)
         end
         if !(@@Tc.nil?)
           @@Tc.track_event eventName, :properties => telemetryProps
-          @@Tc.flush
           $log.info("AppInsights Custom Event #{eventName} sent successfully")
         end
       rescue => errorStr
@@ -162,8 +180,7 @@ def sendExceptionTelemetry(errorStr, properties = nil)
         end
         if !(@@Tc.nil?)
           @@Tc.track_exception errorStr, :properties => telemetryProps
-          @@Tc.flush
-          $log.info("AppInsights Exception Telemetry sent successfully")
+          $log.info("AppInsights Exception Telemetry put successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}")
@@ -209,8 +226,7 @@ def sendMetricTelemetry(metricName, metricValue, properties)
           @@Tc.track_metric metricName, metricValue,
                             :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT,
                             :properties => telemetryProps
-          @@Tc.flush
-          $log.info("AppInsights metric Telemetry #{metricName} sent successfully")
+          $log.info("AppInsights metric Telemetry #{metricName} put successfully into the queue")
         end
       rescue => errorStr
         $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}")
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 09499b4cf..be61b8b8f 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -2,12 +2,13 @@
 # frozen_string_literal: true
 
 class CAdvisorMetricsAPIClient
-  require "json"
+  require 'yajl/json_gem'
   require "logger"
   require "net/http"
   require "net/https"
   require "uri"
   require "date"
+  require "time"
 
   require_relative "oms_common"
   require_relative "KubernetesApiClient"
@@ -21,6 +22,7 @@ class CAdvisorMetricsAPIClient
   @clusterLogTailExcludPath = ENV["AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH"]
   @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
   @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+  @clusterContainerLogEnrich = ENV["AZMON_CLUSTER_CONTAINER_LOG_ENRICH"]
 
   @dsPromInterval = ENV["TELEMETRY_DS_PROM_INTERVAL"]
   @dsPromFieldPassCount = ENV["TELEMETRY_DS_PROM_FIELDPASS_LENGTH"]
@@ -64,12 +66,11 @@ def getSummaryStatsFromCAdvisor(winNode)
         cAdvisorUri = getCAdvisorUri(winNode)
         if !cAdvisorUri.nil?
           uri = URI.parse(cAdvisorUri)
-          http = Net::HTTP.new(uri.host, uri.port)
-          http.use_ssl = false
-
-          cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
-          response = http.request(cAdvisorApiRequest)
-          @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+          Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40 ) do |http|
+            cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+            response = http.request(cAdvisorApiRequest)
+            @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+          end
         end
       rescue => error
         @Log.warn("CAdvisor api request failed: #{error}")
@@ -103,7 +104,7 @@ def getCAdvisorUri(winNode)
       end
     end
 
-    def getMetrics(winNode = nil)
+    def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601 )
       metricDataItems = []
       begin
         cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
@@ -122,27 +123,27 @@ def getMetrics(winNode = nil)
           operatingSystem = "Linux"
         end
         if !metricInfo.nil?
-          metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes"))
-          metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch"))
+          metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes", metricTime))
+          metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch", metricTime))
 
           if operatingSystem == "Linux"
-            metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", "cpuUsageNanoCores"))
-            metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes"))
-            metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes"))
+            metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", "cpuUsageNanoCores", metricTime))
+            metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes", metricTime))
+            metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes", metricTime))
           elsif operatingSystem == "Windows"
-            containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", "cpuUsageNanoCores")
+            containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", "cpuUsageNanoCores", metricTime)
             if containerCpuUsageNanoSecondsRate && !containerCpuUsageNanoSecondsRate.empty? && !containerCpuUsageNanoSecondsRate.nil?
               metricDataItems.concat(containerCpuUsageNanoSecondsRate)
             end
           end
 
-          cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores", operatingSystem)
+          cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores", operatingSystem, metricTime)
           if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
             metricDataItems.push(cpuUsageNanoSecondsRate)
           end
-          metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes"))
+          metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes", metricTime))
 
-          metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch"))
+          metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch", metricTime))
 
           # Disabling networkRxRate and networkTxRate since we dont use it as of now.
           #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
@@ -165,7 +166,7 @@ def getMetrics(winNode = nil)
       return metricDataItems
     end
 
-    def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
+    def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
       timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
@@ -182,7 +183,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
               #cpu metric
               containerName = container["name"]
               metricValue = container["cpu"][cpuMetricNameToCollect]
-              metricTime = container["cpu"]["time"]
+              metricTime = metricPollTime #container["cpu"]["time"]
               metricItem = {}
               metricItem["DataItems"] = []
 
@@ -219,6 +220,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                       telemetryProps["clusterlogtailexcludepath"] = @clusterLogTailExcludPath
                       telemetryProps["clusterLogTailPath"] = @clusterLogTailPath
                       telemetryProps["clusterAgentSchemaVersion"] = @clusterAgentSchemaVersion
+                      telemetryProps["clusterCLEnrich"] = @clusterContainerLogEnrich
                     end
                     #telemetry about prometheus metric collections settings for daemonset
                     if (File.file?(@promConfigMountPath))
@@ -272,7 +274,7 @@ def resetWinContainerIdCache
     end
 
     # usageNanoCores doesnt exist for windows nodes. Hence need to compute this from usageCoreNanoSeconds
-    def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn)
+    def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
       timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs
@@ -292,7 +294,7 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
               containerCount += 1
               containerName = container["name"]
               metricValue = container["cpu"][cpuMetricNameToCollect]
-              metricTime = container["cpu"]["time"]
+              metricTime = metricPollTime #container["cpu"]["time"]
               metricItem = {}
               metricItem["DataItems"] = []
 
@@ -366,7 +368,7 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect,
       return metricItems
     end
 
-    def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn)
+    def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
       timeDifference = (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs
@@ -381,7 +383,7 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec
             pod["containers"].each do |container|
               containerName = container["name"]
               metricValue = container["memory"][memoryMetricNameToCollect]
-              metricTime = container["memory"]["time"]
+              metricTime = metricPollTime #container["memory"]["time"]
 
               metricItem = {}
               metricItem["DataItems"] = []
@@ -431,7 +433,7 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec
       return metricItems
     end
 
-    def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn)
+    def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, metricPollTime)
       metricItem = {}
       clusterId = KubernetesApiClient.getClusterId
       begin
@@ -441,7 +443,7 @@ def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect,
 
         if !node[metricCategory].nil?
           metricValue = node[metricCategory][metricNameToCollect]
-          metricTime = node[metricCategory]["time"]
+          metricTime = metricPollTime #node[metricCategory]["time"]
 
           metricItem["DataItems"] = []
 
@@ -467,7 +469,7 @@ def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect,
       return metricItem
     end
 
-    def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, operatingSystem)
+    def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, operatingSystem, metricPollTime)
       metricItem = {}
       clusterId = KubernetesApiClient.getClusterId
       begin
@@ -477,7 +479,7 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl
 
         if !node[metricCategory].nil?
           metricValue = node[metricCategory][metricNameToCollect]
-          metricTime = node[metricCategory]["time"]
+          metricTime = metricPollTime #node[metricCategory]["time"]
 
           #   if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds")
           #     @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}")
@@ -584,7 +586,7 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl
       return metricItem
     end
 
-    def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
+    def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn, metricPollTime)
       metricItem = {}
       clusterId = KubernetesApiClient.getClusterId
 
@@ -594,7 +596,7 @@ def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
         nodeName = node["nodeName"]
 
         metricValue = node["startTime"]
-        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        metricTime = metricPollTime #Time.now.utc.iso8601 #2018-01-30T19:36:14Z
 
         metricItem["DataItems"] = []
 
@@ -620,10 +622,10 @@ def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn)
       return metricItem
     end
 
-    def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn)
+    def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
-      currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+      #currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
       begin
         metricInfo = metricJSON
         metricInfo["pods"].each do |pod|
@@ -632,7 +634,7 @@ def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn)
             pod["containers"].each do |container|
               containerName = container["name"]
               metricValue = container["startTime"]
-              metricTime = currentTime
+              metricTime = metricPollTime #currentTime
 
               metricItem = {}
               metricItem["DataItems"] = []
diff --git a/source/code/plugin/ContainerInventoryState.rb b/source/code/plugin/ContainerInventoryState.rb
index 7e5ca18e8..170fa65e3 100644
--- a/source/code/plugin/ContainerInventoryState.rb
+++ b/source/code/plugin/ContainerInventoryState.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class ContainerInventoryState
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'omslog'
     @@InventoryDirectory = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/"
 
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index ee2742dd4..f2828b357 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -3,7 +3,7 @@
 
 class DockerApiClient
   require "socket"
-  require "json"
+  require "yajl/json_gem"
   require "timeout"
   require_relative "omslog"
   require_relative "DockerApiRestHelper"
@@ -40,7 +40,6 @@ def getResponse(request, isMultiJson, isVersion)
           end
           break if (isVersion) ? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
         end
-        socket.close
         return (isTimeOut) ? nil : parseResponse(dockerResponse, isMultiJson)
       rescue => errorStr
         $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
@@ -49,6 +48,10 @@ def getResponse(request, isMultiJson, isVersion)
           ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end
         return nil
+      ensure
+        if !socket.nil?
+          socket.close
+        end
       end
     end
 
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 6bfdc06f1..e52c77884 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class KubernetesApiClient
-  require "json"
+  require "yajl/json_gem"
   require "logger"
   require "net/http"
   require "net/https"
@@ -40,20 +40,17 @@ def getKubeResourceInfo(resource, api_group: nil)
         resourceUri = getResourceUri(resource, api_group)
         if !resourceUri.nil?
           uri = URI.parse(resourceUri)
-          http = Net::HTTP.new(uri.host, uri.port)
-          http.use_ssl = true
           if !File.exist?(@@CaFile)
             raise "#{@@CaFile} doesnt exist"
           else
-            http.ca_file = @@CaFile if File.exist?(@@CaFile)
+            Net::HTTP.start(uri.host, uri.port, :use_ssl => true, :ca_file => @@CaFile, :verify_mode => OpenSSL::SSL::VERIFY_PEER, :open_timeout => 20, :read_timeout => 40) do |http|
+              kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
+              kubeApiRequest["Authorization"] = "Bearer " + getTokenStr
+              @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
+              response = http.request(kubeApiRequest)
+              @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
+            end
           end
-          http.verify_mode = OpenSSL::SSL::VERIFY_PEER
-
-          kubeApiRequest = Net::HTTP::Get.new(uri.request_uri)
-          kubeApiRequest["Authorization"] = "Bearer " + getTokenStr
-          @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}"
-          response = http.request(kubeApiRequest)
-          @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}"
         end
       rescue => error
         @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")
@@ -338,7 +335,7 @@ def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp)
       return containerLogs
     end
 
-    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
+    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         clusterId = getClusterId
@@ -373,7 +370,7 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
             nodeName = pod["spec"]["nodeName"]
             podContainers.each do |container|
               containerName = container["name"]
-              metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
               if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
                 metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
 
@@ -433,14 +430,14 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
       return metricItems
     end #getContainerResourceRequestAndLimits
 
-    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn)
+    def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         metricInfo = metricJSON
         clusterId = getClusterId
         #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
         #if we are coming up with the time it should be same for all nodes
-        metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
         metricInfo["items"].each do |node|
           if (!node["status"][metricCategory].nil?)
 
@@ -551,5 +548,29 @@ def getMetricNumericValue(metricName, metricVal)
       end
       return metricValue
     end # getMetricNumericValue
+
+    def getResourcesAndContinuationToken(uri)
+      continuationToken = nil
+      resourceInventory = nil
+      begin
+        @Log.info "KubernetesApiClient::getResourcesAndContinuationToken : Getting resources from Kube API using url: #{uri} @ #{Time.now.utc.iso8601}"
+        resourceInfo = getKubeResourceInfo(uri)
+        @Log.info "KubernetesApiClient::getResourcesAndContinuationToken : Done getting resources from Kube API using url: #{uri} @ #{Time.now.utc.iso8601}"
+        if !resourceInfo.nil?
+          @Log.info "KubernetesApiClient::getResourcesAndContinuationToken:Start:Parsing data for #{uri} using yajl @ #{Time.now.utc.iso8601}"
+          resourceInventory = Yajl::Parser.parse(StringIO.new(resourceInfo.body))
+          @Log.info "KubernetesApiClient::getResourcesAndContinuationToken:End:Parsing data for #{uri} using yajl @ #{Time.now.utc.iso8601}"
+          resourceInfo = nil
+        end
+        if (!resourceInventory.nil? && !resourceInventory["metadata"].nil?)
+          continuationToken = resourceInventory["metadata"]["continue"]
+        end
+      rescue => errorStr
+        @Log.warn "KubernetesApiClient::getResourcesAndContinuationToken:Failed in get resources for #{uri} and continuation token: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        resourceInventory = nil
+      end
+      return continuationToken, resourceInventory
+    end #getResourcesAndContinuationToken
   end
 end
diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
index a6e643e45..f14a1369b 100644
--- a/source/code/plugin/filter_cadvisor2mdm.rb
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -4,7 +4,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative 'CustomMetricsUtils'
 
diff --git a/source/code/plugin/filter_cadvisor_health_container.rb b/source/code/plugin/filter_cadvisor_health_container.rb
index 2eccd125f..93d50e20f 100644
--- a/source/code/plugin/filter_cadvisor_health_container.rb
+++ b/source/code/plugin/filter_cadvisor_health_container.rb
@@ -3,7 +3,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative "ApplicationInsightsUtility"
     Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
index d2f735cd1..c6280db60 100644
--- a/source/code/plugin/filter_cadvisor_health_node.rb
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -3,7 +3,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative "ApplicationInsightsUtility"
     require_relative "KubernetesApiClient"
diff --git a/source/code/plugin/filter_docker_log.rb b/source/code/plugin/filter_docker_log.rb
index 7ffd333e3..b80f4c204 100644
--- a/source/code/plugin/filter_docker_log.rb
+++ b/source/code/plugin/filter_docker_log.rb
@@ -5,6 +5,7 @@
 module Fluent
 	require 'logger'
 	require 'socket'
+	require 'yajl/json_gem'
 
 	class DockerLogFilter < Filter
 		Plugin.register_filter('filter_docker_log', self)
diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb
index 1724065fe..1c451ea38 100644
--- a/source/code/plugin/filter_health_model_builder.rb
+++ b/source/code/plugin/filter_health_model_builder.rb
@@ -4,7 +4,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     Dir[File.join(__dir__, './health', '*.rb')].each { |file| require file }
 
 
diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index 30f6f911a..422b4b54a 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -4,7 +4,7 @@
 
 module Fluent
     require 'logger'
-    require 'json'
+    require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative 'CustomMetricsUtils'
 
diff --git a/source/code/plugin/health/aggregate_monitor.rb b/source/code/plugin/health/aggregate_monitor.rb
index 10dbdc705..a774478e7 100644
--- a/source/code/plugin/health/aggregate_monitor.rb
+++ b/source/code/plugin/health/aggregate_monitor.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 require_relative 'health_model_constants'
-require 'json'
+require 'yajl/json_gem'
 
 # Require only when running inside container.
 # otherwise unit tests will fail due to ApplicationInsightsUtility dependency on base omsagent ruby files. If you have your dev machine starting with omsagent-rs, then GOOD LUCK!
@@ -218,7 +218,7 @@ def sort_filter_member_monitors(monitor_set)
             member_monitors.push(member_monitor)
         }
 
-	filtered = member_monitors.select{|monitor| monitor.state != MonitorState::NONE}
+        filtered = member_monitors.keep_if{|monitor| monitor.state != MonitorState::NONE}
         sorted = filtered.sort_by{ |monitor| [@@sort_key_order[monitor.state]] }
 
         return sorted
diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb
index fa9cb42b2..e46d0bf5f 100644
--- a/source/code/plugin/health/cluster_health_state.rb
+++ b/source/code/plugin/health/cluster_health_state.rb
@@ -3,6 +3,7 @@
 require "net/http"
 require "net/https"
 require "uri"
+require 'yajl/json_gem'
 
 module HealthModel
     class ClusterHealthState
diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
index 29ac91bde..e93c66c14 100644
--- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb
@@ -64,8 +64,8 @@ def initialize(resources, provider)
         def dedupe_records(container_records)
             cpu_deduped_instances = {}
             memory_deduped_instances = {}
-            container_records = container_records.select{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
-
+            container_records = container_records.keep_if{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
+           
             container_records.each do |record|
                 begin
                     instance_name = record["InstanceName"]
@@ -98,7 +98,7 @@ def dedupe_records(container_records)
 
         def aggregate(container_records)
             #filter and select only cpuUsageNanoCores and memoryRssBytes
-            container_records = container_records.select{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
+            container_records = container_records.keep_if{|record| record['CounterName'] == @@memory_counter_name || record['CounterName'] == @@cpu_counter_name}
             # poduid lookup has poduid/cname --> workload_name, namespace, cpu_limit, memory limit mapping
             # from the container records, extract the poduid/cname, get the values from poduid_lookup, and aggregate based on namespace_workload_cname
             container_records.each do |record|
diff --git a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
index 0c3f061f1..12c72a120 100644
--- a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
+++ b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb
@@ -1,5 +1,7 @@
 # frozen_string_literal: true
 
+require 'yajl/json_gem'
+
 module HealthModel
     class HealthContainerCpuMemoryRecordFormatter
 
diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb
index bb48e083b..a59020996 100644
--- a/source/code/plugin/health/health_hierarchy_builder.rb
+++ b/source/code/plugin/health/health_hierarchy_builder.rb
@@ -1,6 +1,6 @@
 # frozen_string_literal: true
+require 'yajl/json_gem'
 
-require 'json'
 module HealthModel
     class HealthHierarchyBuilder
 
diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb
index 91f8cd24f..c185e5389 100644
--- a/source/code/plugin/health/health_model_definition_parser.rb
+++ b/source/code/plugin/health/health_model_definition_parser.rb
@@ -3,7 +3,7 @@
     Class to parse the health model definition. The definition expresses the relationship between monitors, how to roll up to an aggregate monitor,
     and what labels to "pass on" to the parent monitor
 =end
-require 'json'
+require 'yajl/json_gem'
 
 module HealthModel
     class HealthModelDefinitionParser
diff --git a/source/code/plugin/health/health_monitor_optimizer.rb b/source/code/plugin/health/health_monitor_optimizer.rb
index a63d59abf..d87540941 100644
--- a/source/code/plugin/health/health_monitor_optimizer.rb
+++ b/source/code/plugin/health/health_monitor_optimizer.rb
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+require 'yajl/json_gem'
 module HealthModel
     class HealthMonitorOptimizer
         #ctor
diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb
index b36c46370..8e1d11143 100644
--- a/source/code/plugin/health/health_monitor_provider.rb
+++ b/source/code/plugin/health/health_monitor_provider.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 require_relative 'health_model_constants'
+require 'yajl/json_gem'
 
 module HealthModel
     class HealthMonitorProvider
diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb
index 16f8bedc4..110793eeb 100644
--- a/source/code/plugin/health/health_monitor_state.rb
+++ b/source/code/plugin/health/health_monitor_state.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 require_relative 'health_model_constants'
+require 'yajl/json_gem'
 
 module HealthModel
 
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index 2fa2d3a52..13d1416b1 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -2,6 +2,7 @@
 require 'logger'
 require 'digest'
 require_relative 'health_model_constants'
+require 'yajl/json_gem'
 
 module HealthModel
     # static class that provides a bunch of utility methods
diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb
index 6454007b6..8e2de210b 100644
--- a/source/code/plugin/health/unit_monitor.rb
+++ b/source/code/plugin/health/unit_monitor.rb
@@ -1,6 +1,6 @@
 # frozen_string_literal: true
 require_relative 'health_model_constants'
-require 'json'
+require 'yajl/json_gem'
 
 module HealthModel
     class UnitMonitor
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 810fb512f..96aa66aa1 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -9,14 +9,15 @@ class CAdvisor_Perf_Input < Input
     def initialize
       super
       require "yaml"
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
 
       require_relative "CAdvisorMetricsAPIClient"
       require_relative "oms_common"
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.api.cadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
     config_param :nodehealthtag, :string, :default => "kubehealth.DaemonSet.Node"
@@ -46,10 +47,12 @@ def shutdown
     end
 
     def enumerate()
-      time = Time.now.to_f
+      currentTime = Time.now
+      time = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
       begin
         eventStream = MultiEventStream.new
-        metricData = CAdvisorMetricsAPIClient.getMetrics()
+        metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: nil, metricTime: batchTime )
         metricData.each do |record|
           record["DataType"] = "LINUX_PERF_BLOB"
           record["IPName"] = "LogManagement"
@@ -74,14 +77,25 @@ def enumerate()
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_cadvisor_perf::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_cadvisor_perf::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}"
           end
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index ccf61ab2e..d107047b4 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -13,14 +13,15 @@ class Container_Inventory_Input < Input
 
     def initialize
       super
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
       require_relative "DockerApiClient"
       require_relative "ContainerInventoryState"
       require_relative "ApplicationInsightsUtility"
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.containerinventory"
 
     def configure(conf)
@@ -259,14 +260,25 @@ def enumerate
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_container_inventory::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_container_inventory::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_container_inventory::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_container_inventory::run_periodic: Failed in enumerate container inventory: #{errorStr}"
           end
diff --git a/source/code/plugin/in_containerlog_sudo_tail.rb b/source/code/plugin/in_containerlog_sudo_tail.rb
deleted file mode 100644
index 8faa260d0..000000000
--- a/source/code/plugin/in_containerlog_sudo_tail.rb
+++ /dev/null
@@ -1,189 +0,0 @@
-
-require 'yajl'
-require 'fluent/input'
-require 'fluent/event'
-require 'fluent/config/error'
-require 'fluent/parser'
-require 'open3'
-require 'json'
-require_relative 'omslog'
-require_relative 'KubernetesApiClient'
-
-module Fluent
-  class ContainerLogSudoTail < Input
-    Plugin.register_input('containerlog_sudo_tail', self)
-
-    def initialize
-      super
-      @command = nil
-      @paths = []
-      #Using this to construct the file path for all every container json log file.	
-      #Example container log file path -> /var/lib/docker/containers/{ContainerID}/{ContainerID}-json.log	
-      #We have read permission on this file but don't have execute permission on the below mentioned path. Hence wildcard character searches to find the container ID's doesn't work.	
-      @containerLogFilePath = "/var/lib/docker/containers/"
-      #This folder contains a list of all the containers running/stopped and we're using it to get all the container ID's which will be needed for the log file path below
-      #TODO : Use generic path from docker REST endpoint and find a way to mount the correct folder in the omsagent.yaml	    
-      @containerIDFilePath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/*"
-      @@systemPodsNamespace = 'kube-system'
-      @@getSystemPodsTimeIntervalSecs = 300 #refresh system container list every 5 minutes
-      @@lastSystemPodsGetTime = nil;
-      @@systemContainerIDList = Hash.new
-      @@disableKubeSystemLogCollection = ENV['DISABLE_KUBE_SYSTEM_LOG_COLLECTION']
-      if !@@disableKubeSystemLogCollection.nil? && !@@disableKubeSystemLogCollection.empty? && @@disableKubeSystemLogCollection.casecmp('true') == 0
-        @@disableKubeSystemLogCollection = 'true'
-        $log.info("in_container_sudo_tail : System container log collection is disabled")
-      else
-        @@disableKubeSystemLogCollection = 'false'
-        $log.info("in_container_sudo_tail : System container log collection is enabled")
-      end
-    end
-
-    attr_accessor :command
-
-    #The format used to map the program output to the incoming event.
-    config_param :format, :string, default: 'none'
-
-    #Tag of the event.
-    config_param :tag, :string, default: nil
-
-    #Fluentd will record the position it last read into this file.
-    config_param :pos_file, :string, default: nil
-
-    #The interval time between periodic program runs.
-    config_param :run_interval, :time, default: nil
-
-    BASE_DIR = File.dirname(File.expand_path('..', __FILE__))
-    RUBY_DIR = BASE_DIR + '/ruby/bin/ruby '
-    TAILSCRIPT = BASE_DIR + '/plugin/containerlogtailfilereader.rb '
-
-    def configure(conf)
-      super
-      unless @pos_file
-        raise ConfigError, "'pos_file' is required to keep track of file"
-      end 
-
-      unless @tag 
-        raise ConfigError, "'tag' is required on sudo tail"
-      end
-
-      unless @run_interval
-        raise ConfigError, "'run_interval' is required for periodic tailing"      
-      end
- 
-      @parser = Plugin.new_parser(conf['format'])
-      @parser.configure(conf)
-    end
-
-    def start
-      @finished = false
-      @thread = Thread.new(&method(:run_periodic))
-    end
-
-    def shutdown
-      @finished = true 
-      @thread.join
-    end
-
-    def receive_data(line)
-      es = MultiEventStream.new
-      begin
-        line.chomp!  # remove \n
-        @parser.parse(line) { |time, record|
-          if time && record
-            es.add(time, record)
-          else
-            $log.warn "pattern doesn't match: #{line.inspect}"
-          end
-          unless es.empty?
-            tag=@tag
-            router.emit_stream(tag, es)
-          end
-        }
-      rescue => e
-        $log.warn line.dump, error: e.to_s
-        $log.debug_backtrace(e.backtrace)
-      end
-    end
-
-    def receive_log(line)
-      $log.warn "#{line}" if line.start_with?('WARN')
-      $log.error "#{line}" if line.start_with?('ERROR')
-      $log.info "#{line}" if line.start_with?('INFO')
-    end
- 
-    def readable_path(path)
-      if system("sudo test -r #{path}")
-        OMS::Log.info_once("Following tail of #{path}")
-        return path
-      else
-        OMS::Log.warn_once("#{path} is not readable. Cannot tail the file.")
-	return ""
-      end
-    end
-
-    def set_system_command
-      timeNow = DateTime.now
-      cName = "Unkown"
-      tempContainerInfo = {}
-      paths = ""
-      
-      #if we are on agent & system containers log collection is disabled, get system containerIDs to exclude logs from containers in system containers namespace from being tailed 
-      if !KubernetesApiClient.isNodeMaster && @@disableKubeSystemLogCollection.casecmp('true') == 0 
-        if @@lastSystemPodsGetTime.nil? || ((timeNow - @@lastSystemPodsGetTime)*24*60*60).to_i >= @@getSystemPodsTimeIntervalSecs
-          $log.info("in_container_sudo_tail : System Container list last refreshed at #{@@lastSystemPodsGetTime} - refreshing now at #{timeNow}")
-          sysContainers = KubernetesApiClient.getContainerIDs(@@systemPodsNamespace)
-          #BugBug - https://msecg.visualstudio.com/OMS/_workitems/edit/215107 - we get 200 with empty payloaf from time to time
-          if (!sysContainers.nil? && !sysContainers.empty?)
-            @@systemContainerIDList = sysContainers
-          else
-            $log.info("in_container_sudo_tail : System Container ID List is empty!!!! Continuing to use currently cached list.")
-          end
-          @@lastSystemPodsGetTime = timeNow
-          $log.info("in_container_sudo_tail : System Container ID List: #{@@systemContainerIDList}")
-        end
-      end
-      
-      Dir.glob(@containerIDFilePath).select { |p|
-        cName = p.split('/').last;
-        if !@@systemContainerIDList.key?("docker://" + cName)
-	        p = @containerLogFilePath + cName + "/" + cName + "-json.log"
-          paths += readable_path(p) + " "
-        else
-          $log.info("in_container_sudo_tail : Excluding system container with ID #{cName} from tailng for log collection")
-        end
-      }
-      if !system("sudo test -r #{@pos_file}")
-	      system("sudo touch #{@pos_file}")
-      end
-      @command = "sudo " << RUBY_DIR << TAILSCRIPT << paths <<  " -p #{@pos_file}"
-    end
-
-    def run_periodic
-      until @finished
-        begin
-          sleep @run_interval
-          #if we are on master & system containers log collection is disabled, collect nothing (i.e NO COntainer log collection for ANY container)
-          #we will be not collection omsagent log as well in this case, but its insignificant & okay!
-          if !KubernetesApiClient.isNodeMaster || @@disableKubeSystemLogCollection.casecmp('true') != 0 
-            set_system_command
-            Open3.popen3(@command) {|writeio, readio, errio, wait_thread|
-              writeio.close
-              while line = readio.gets
-                receive_data(line)
-              end
-              while line = errio.gets
-                receive_log(line)
-              end
-              
-              wait_thread.value #wait until child process terminates
-            }
-          end
-        rescue
-          $log.error "containerlog_sudo_tail failed to run or shutdown child proces", error => $!.to_s, :error_class => $!.class.to_s
-          $log.warn_backtrace $!.backtrace
-        end
-      end
-    end
-  end
-
-end
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index e1fdc5df6..6116cb62d 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -9,15 +9,20 @@ class Kube_Event_Input < Input
 
     def initialize
       super
-      require "json"
+      require "yajl/json_gem"
+      require "yajl"
+      require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "oms_common"
       require_relative "omslog"
       require_relative "ApplicationInsightsUtility"
+      
+      # 30000 events account to approximately 5MB
+      @EVENTS_CHUNK_SIZE = 30000
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubeEvents"
 
     def configure(conf)
@@ -43,79 +48,114 @@ def shutdown
       end
     end
 
-    def enumerate(eventList = nil)
-      currentTime = Time.now
-      emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
+    def enumerate
+      begin
+        eventList = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+        eventQueryState = getEventQueryState
+        newEventQueryState = []
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}")
+        $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+        if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+          newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
+        else
+          $log.warn "in_kube_events::enumerate:Received empty eventList"
+        end
 
-      events = eventList
-      $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-      eventInfo = KubernetesApiClient.getKubeResourceInfo("events")
-      $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+            newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
+          else
+            $log.warn "in_kube_events::enumerate:Received empty eventList"
+          end
+        end
 
-      if !eventInfo.nil?
-        events = JSON.parse(eventInfo.body)
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        eventList = nil
+        writeEventQueryState(newEventQueryState)
+      rescue => errorStr
+        $log.warn "in_kube_events::enumerate:Failed in enumerate: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
+    end # end enumerate
 
-      eventQueryState = getEventQueryState
-      newEventQueryState = []
+    def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTime = Time.utc.iso8601)
+      currentTime = Time.now
+      emitTime = currentTime.to_f
       begin
-        if (!events.nil? && !events.empty? && !events["items"].nil?)
-          eventStream = MultiEventStream.new
-          events["items"].each do |items|
-            record = {}
-            #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
-            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            eventId = items["metadata"]["uid"] + "/" + items["count"].to_s
-            newEventQueryState.push(eventId)
-            if !eventQueryState.empty? && eventQueryState.include?(eventId)
-              next
-                end
-            record["ObjectKind"] = items["involvedObject"]["kind"]
-            record["Namespace"] = items["involvedObject"]["namespace"]
-            record["Name"] = items["involvedObject"]["name"]
-            record["Reason"] = items["reason"]
-            record["Message"] = items["message"]
-            record["Type"] = items["type"]
-            record["TimeGenerated"] = items["metadata"]["creationTimestamp"]
-            record["SourceComponent"] = items["source"]["component"]
-            record["FirstSeen"] = items["firstTimestamp"]
-            record["LastSeen"] = items["lastTimestamp"]
-            record["Count"] = items["count"]
-            if items["source"].key?("host")
-              record["Computer"] = items["source"]["host"]
-            else
-              record["Computer"] = (OMS::Common.get_hostname)
-            end
-                record['ClusterName'] = KubernetesApiClient.getClusterName
-            record["ClusterId"] = KubernetesApiClient.getClusterId
-            wrapper = {
-              "DataType" => "KUBE_EVENTS_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [record.each { |k, v| record[k] = v }],
-            }
-            eventStream.add(emitTime, wrapper) if wrapper
+        eventStream = MultiEventStream.new
+        events["items"].each do |items|
+          record = {}
+          #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
+          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          eventId = items["metadata"]["uid"] + "/" + items["count"].to_s
+          newEventQueryState.push(eventId)
+          if !eventQueryState.empty? && eventQueryState.include?(eventId)
+            next
           end
-          router.emit_stream(@tag, eventStream) if eventStream
-            end
-        writeEventQueryState(newEventQueryState)
+          record["ObjectKind"] = items["involvedObject"]["kind"]
+          record["Namespace"] = items["involvedObject"]["namespace"]
+          record["Name"] = items["involvedObject"]["name"]
+          record["Reason"] = items["reason"]
+          record["Message"] = items["message"]
+          record["Type"] = items["type"]
+          record["TimeGenerated"] = items["metadata"]["creationTimestamp"]
+          record["SourceComponent"] = items["source"]["component"]
+          record["FirstSeen"] = items["firstTimestamp"]
+          record["LastSeen"] = items["lastTimestamp"]
+          record["Count"] = items["count"]
+          if items["source"].key?("host")
+            record["Computer"] = items["source"]["host"]
+          else
+            record["Computer"] = (OMS::Common.get_hostname)
+          end
+          record["ClusterName"] = KubernetesApiClient.getClusterName
+          record["ClusterId"] = KubernetesApiClient.getClusterId
+          wrapper = {
+            "DataType" => "KUBE_EVENTS_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [record.each { |k, v| record[k] = v }],
+          }
+          eventStream.add(emitTime, wrapper) if wrapper
+        end
+        router.emit_stream(@tag, eventStream) if eventStream
       rescue => errorStr
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
+      end
+      return newEventQueryState
     end
 
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_kube_events::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_events::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_kube_events::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 57ca07f64..0eebf395b 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -21,19 +21,22 @@ def initialize
       begin
         super
         require "yaml"
-        require "json"
+        require 'yajl/json_gem'
+        require "yajl"
+        require "time"
 
         @@cluster_id = KubernetesApiClient.getClusterId
         @resources = HealthKubernetesResources.instance
         @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path)
         @@ApiGroupApps = "apps"
+        @@KubeInfraNamespace = "kube-system"
       rescue => e
         ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"})
       end
     end
 
     include HealthModel
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "kubehealth.ReplicaSet"
 
     def configure(conf)
@@ -83,10 +86,11 @@ def enumerate
         #HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil)
         # we do this so that if the call fails, we get a response code/header etc.
         node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
-        node_inventory = JSON.parse(node_inventory_response.body)
-        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
-        pod_inventory = JSON.parse(pod_inventory_response.body)
-        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_group: @@ApiGroupApps).body)
+        node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
+        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
+        pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
+        replicaset_inventory_response = KubernetesApiClient.getKubeResourceInfo("replicasets?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}", api_group: @@ApiGroupApps)
+        replicaset_inventory = Yajl::Parser.parse(StringIO.new(replicaset_inventory_response.body))
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
@@ -108,8 +112,8 @@ def enumerate
           health_monitor_records.push(record) if record
           pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(@resources)
 
-          system_pods = pods_ready_hash.select { |k, v| v["namespace"] == "kube-system" }
-          workload_pods = pods_ready_hash.select { |k, v| v["namespace"] != "kube-system" }
+          system_pods = pods_ready_hash.keep_if { |k, v| v["namespace"] == @@KubeInfraNamespace }
+          workload_pods = Hash.new # pods_ready_hash.select{ |k, v| v["namespace"] != @@KubeInfraNamespace }
 
           system_pods_ready_percentage_records = process_pods_ready_percentage(system_pods, MonitorId::SYSTEM_WORKLOAD_PODS_READY_MONITOR_ID)
           system_pods_ready_percentage_records.each do |record|
@@ -225,28 +229,28 @@ def process_pods_ready_percentage(pods_hash, config_monitor_id)
       hmlog = HealthMonitorUtils.get_log_handle
 
       records = []
-      pods_hash.keys.each do |key|
-        workload_name = key
-        total_pods = pods_hash[workload_name]["totalPods"]
-        pods_ready = pods_hash[workload_name]["podsReady"]
-        namespace = pods_hash[workload_name]["namespace"]
-        workload_kind = pods_hash[workload_name]["kind"]
-        percent = pods_ready / total_pods * 100
-        timestamp = Time.now.utc.iso8601
-
-        state = HealthMonitorUtils.compute_percentage_state(percent, monitor_config)
-        health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workload_name" => workload_name, "namespace" => namespace, "workload_kind" => workload_kind}}
-        monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name])
-        health_record = {}
-        time_now = Time.now.utc.iso8601
-        health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
-        health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
-        health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
-        health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
-        health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
-        health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
-        records.push(health_record)
-      end
+        pods_hash.keys.each do |key|
+          workload_name = key
+          total_pods = pods_hash[workload_name]["totalPods"]
+          pods_ready = pods_hash[workload_name]["podsReady"]
+          namespace = pods_hash[workload_name]["namespace"]
+          workload_kind = pods_hash[workload_name]["kind"]
+          percent = pods_ready / total_pods * 100
+          timestamp = Time.now.utc.iso8601
+
+          state = HealthMonitorUtils.compute_percentage_state(percent, monitor_config)
+          health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"totalPods" => total_pods, "podsReady" => pods_ready, "workload_name" => workload_name, "namespace" => namespace, "workload_kind" => workload_kind}}
+          monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(config_monitor_id, [@@cluster_id, namespace, workload_name])
+          health_record = {}
+          time_now = Time.now.utc.iso8601
+          health_record[HealthMonitorRecordFields::MONITOR_ID] = config_monitor_id
+          health_record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] = monitor_instance_id
+          health_record[HealthMonitorRecordFields::DETAILS] = health_monitor_record
+          health_record[HealthMonitorRecordFields::TIME_GENERATED] = time_now
+          health_record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED] = time_now
+          health_record[HealthMonitorRecordFields::CLUSTER_ID] = @@cluster_id
+          records.push(health_record)
+        end
       #@@hmlog.info "Successfully processed pods_ready_percentage for #{config_monitor_id} #{records.size}"
       return records
     end
@@ -296,10 +300,11 @@ def process_node_condition_monitor(node_inventory)
     def initialize_inventory
         #this is required because there are other components, like the container cpu memory aggregator, that depends on the mapping being initialized
         node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
-        node_inventory = JSON.parse(node_inventory_response.body)
-        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods")
-        pod_inventory = JSON.parse(pod_inventory_response.body)
-        replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_group: @@ApiGroupApps).body)
+        node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
+        pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
+        pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
+        replicaset_inventory_response = KubernetesApiClient.getKubeResourceInfo("replicasets?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}", api_group: @@ApiGroupApps)
+        replicaset_inventory = Yajl::Parser.parse(StringIO.new(replicaset_inventory_response.body))
 
         @resources.node_inventory = node_inventory
         @resources.pod_inventory = pod_inventory
@@ -310,14 +315,25 @@ def initialize_inventory
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            @@hmlog.info("in_kube_health::run_periodic @ #{Time.now.utc.iso8601}")
+            @@hmlog.info("in_kube_health::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            @@hmlog.info("in_kube_health::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             @@hmlog.warn "in_kube_health::run_periodic: enumerate Failed for kubeapi sourced data health: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_logs.rb b/source/code/plugin/in_kube_logs.rb
deleted file mode 100644
index 119473819..000000000
--- a/source/code/plugin/in_kube_logs.rb
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/local/bin/ruby
-# frozen_string_literal: true
-
-module Fluent
-
-    class Kube_Logs_Input < Input
-        Plugin.register_input('kubelogs', self)
-
-        @@KubeLogsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeLogQueryState.yaml"
-
-        def initialize
-            super
-            require 'yaml'
-            require 'date'
-            require 'time'
-            require 'json'
-
-            require_relative 'KubernetesApiClient'
-            require_relative 'oms_common'
-            require_relative 'omslog'
-        end
-
-        config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.api.KubeLogs"
-
-        def configure (conf)
-            super
-        end
-
-        def start
-            if @run_interval
-                @finished = false
-                @condition = ConditionVariable.new
-                @mutex = Mutex.new
-                @thread = Thread.new(&method(:run_periodic))
-            end
-        end
-
-        def shutdown
-            if @run_interval
-                @mutex.synchronize {
-                    @finished = true
-                    @condition.signal
-                }
-                @thread.join
-            end
-        end
-
-        def enumerate(podList = nil)
-
-            namespace = ENV['OMS_KUBERNETES_LOGS_NAMESPACE']
-            if namespace.nil? || namespace.empty?
-                return
-            end
-
-            time = Time.now.to_f
-                if podList.nil?
-                    pods = KubernetesApiClient.getPods(namespace)
-                else
-                    pods = podList
-                end
-                logQueryState = getLogQueryState
-                newLogQueryState = {}
-
-                pods.each do |pod|
-                    record = {}
-                    begin
-                        pod['status']['containerStatuses'].each do |container|
-
-                            # if container['state']['running']
-                            #     puts container['name'] + ' is running'
-                            # end
-
-                            timeStamp = DateTime.now
-
-                            containerId = pod['metadata']['namespace'] + "_" + pod['metadata']['name'] + "_" + container['name']
-                            if !logQueryState.empty? && logQueryState[containerId]
-                                timeStamp = DateTime.parse(logQueryState[containerId])
-                            end
-
-                            # Try to get logs for the container
-                            begin
-                              $log.debug "Getting logs for #{container['name']}"
-                              logs = KubernetesApiClient.getContainerLogsSinceTime(pod['metadata']['namespace'], pod['metadata']['name'], container['name'], timeStamp.rfc3339(9), true)
-                              $log.debug "got something back"
-
-                              # By default we don't change the timestamp (if no logs were returned or if there was a (hopefully transient) error in retrieval
-                              newLogQueryState[containerId] = timeStamp.rfc3339(9)
-
-                              if !logs || logs.empty?
-                                  $log.info "no logs returned"
-                              else
-                                  $log.debug "response size is #{logs.length}"
-                                  lines = logs.split("\n")
-                                  index = -1
-
-                                  # skip duplicates
-                                  for i in 0...lines.count
-                                      dateTime = DateTime.parse(lines[i].split(" ").first)
-                                      if (dateTime.to_time - timeStamp.to_time) > 0.0
-                                          index = i
-                                          break
-                                      end
-                                  end
-
-                                  if index >= 0
-                                      $log.debug "starting from line #{index}"
-                                      for i in index...lines.count
-                                          record['Namespace'] = pod['metadata']['namespace']
-                                          record['Pod'] = pod['metadata']['name']
-                                          record['Container'] = container['name']
-                                          record['Message'] = lines[i][(lines[i].index(' ') + 1)..(lines[i].length - 1)]
-                                          record['TimeGenerated'] = lines[i].split(" ").first
-                                          record['Node'] = pod['spec']['nodeName']
-                                          record['Computer'] = OMS::Common.get_hostname
-                                          record['ClusterName'] = KubernetesApiClient.getClusterName
-                                          router.emit(@tag, time, record) if record
-                                      end
-                                      newLogQueryState[containerId] = lines.last.split(" ").first
-                                  else
-                                      newLogQueryState[containerId] = DateTime.now.rfc3339(9)
-                                  end
-                              end
-                            rescue => logException
-                              $log.warn "Failed to retrieve logs for container: #{logException}"
-                              $log.debug_backtrace(logException.backtrace)
-                            end
-                        end
-                        # Update log query state only if logging was succesfful.
-                        # TODO: May have a few duplicate lines in case of
-                        writeLogQueryState(newLogQueryState)
-                    rescue  => errorStr
-                        $log.warn "Exception raised in enumerate: #{errorStr}"
-                        $log.debug_backtrace(errorStr.backtrace)
-                    end
-                end
-        end
-
-        def run_periodic
-            @mutex.lock
-            done = @finished
-            until done
-                @condition.wait(@mutex, @run_interval)
-                done = @finished
-                @mutex.unlock
-                if !done
-                    $log.debug "calling enumerate for KubeLogs"
-                    enumerate
-                    $log.debug "done with enumerate for KubeLogs"
-                end
-                @mutex.lock
-            end
-            @mutex.unlock
-        end
-
-        def getLogQueryState
-            logQueryState = {}
-            begin
-                if File.file?(@@KubeLogsStateFile)
-                    logQueryState = YAML.load_file(@@KubeLogsStateFile, {})
-                end
-            rescue  => errorStr
-                $log.warn "Failed to load query state #{errorStr}"
-                $log.debug_backtrace(errorStr.backtrace)
-            end
-            return logQueryState
-        end
-
-        def writeLogQueryState(logQueryState)
-            begin
-                File.write(@@KubeLogsStateFile, logQueryState.to_yaml)
-            rescue  => errorStr
-                $log.warn "Failed to write query state #{errorStr.to_s}"
-                $log.debug_backtrace(errorStr.backtrace)
-            end
-        end
-
-    end # Kube_Log_Input
-
-end # module
-
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 0a0fd9d2e..fa0994f43 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -9,6 +9,7 @@ class Kube_nodeInventory_Input < Input
     @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory"
     @@promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
     @@AzStackCloudFileName = "/etc/kubernetes/host/azurestackcloud.json"
+    @@kubeperfTag = "oms.api.KubePerf"
 
     @@rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
     @@rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
@@ -21,15 +22,18 @@ class Kube_nodeInventory_Input < Input
     def initialize
       super
       require "yaml"
-      require "json"
+      require "yajl/json_gem"
+      require "yajl"
+      require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
+      @NODES_CHUNK_SIZE = "400"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory"
 
     def configure(conf)
@@ -57,158 +61,217 @@ def shutdown
     end
 
     def enumerate
-      currentTime = Time.now
-      emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
-      telemetrySent = false
+      begin
+        nodeInventory = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
 
-      nodeInventory = nil
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken("nodes?limit=#{@NODES_CHUNK_SIZE}")
+        $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+          parse_and_emit_records(nodeInventory, batchTime)
+        else
+          $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
+        end
 
-      $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-      nodeInfo = KubernetesApiClient.getKubeResourceInfo("nodes")
-      $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken("nodes?limit=#{@NODES_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+            parse_and_emit_records(nodeInventory, batchTime)
+          else
+            $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
+          end
+        end
 
-      if !nodeInfo.nil?
-        nodeInventory = JSON.parse(nodeInfo.body)
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        nodeInventory = nil
+      rescue => errorStr
+        $log.warn "in_kube_nodes::enumerate:Failed in enumerate: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
+    end # end enumerate
 
+    def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
       begin
-        if (!nodeInventory.nil? && !nodeInventory.empty?)
-          eventStream = MultiEventStream.new
-          containerNodeInventoryEventStream = MultiEventStream.new
-          if !nodeInventory["items"].nil?
-            #get node inventory
-            nodeInventory["items"].each do |items|
-              record = {}
-              # Sending records for ContainerNodeInventory
-              containerNodeInventoryRecord = {}
-              containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-              containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
-
-              record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-              record["Computer"] = items["metadata"]["name"]
-              record["ClusterName"] = KubernetesApiClient.getClusterName
-              record["ClusterId"] = KubernetesApiClient.getClusterId
-              record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-              record["Labels"] = [items["metadata"]["labels"]]
-              record["Status"] = ""
-
-              if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
-                if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
-                  record["KubernetesProviderID"] = "azurestack"
-                else
-                  record["KubernetesProviderID"] = items["spec"]["providerID"]
-                end
-              else
-                record["KubernetesProviderID"] = "onprem"
-              end
+        currentTime = Time.now
+        emitTime = currentTime.to_f
+        telemetrySent = false
+        eventStream = MultiEventStream.new
+        containerNodeInventoryEventStream = MultiEventStream.new
+        #get node inventory
+        nodeInventory["items"].each do |items|
+          record = {}
+          # Sending records for ContainerNodeInventory
+          containerNodeInventoryRecord = {}
+          containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
 
-              # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-              # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
-              # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-              # implying that the node is ready for hosting pods, however its out of disk.
-
-              if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
-                allNodeConditions = ""
-                items["status"]["conditions"].each do |condition|
-                  if condition["status"] == "True"
-                    if !allNodeConditions.empty?
-                      allNodeConditions = allNodeConditions + "," + condition["type"]
-                    else
-                      allNodeConditions = condition["type"]
-                    end
-                  end
-                  #collect last transition to/from ready (no matter ready is true/false)
-                  if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
-                    record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
-                  end
-                end
+          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+          record["Computer"] = items["metadata"]["name"]
+          record["ClusterName"] = KubernetesApiClient.getClusterName
+          record["ClusterId"] = KubernetesApiClient.getClusterId
+          record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
+          record["Labels"] = [items["metadata"]["labels"]]
+          record["Status"] = ""
+
+          if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
+            if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
+              record["KubernetesProviderID"] = "azurestack"
+            else
+              record["KubernetesProviderID"] = items["spec"]["providerID"]
+            end
+          else
+            record["KubernetesProviderID"] = "onprem"
+          end
+
+          # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+          # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+          # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+          # implying that the node is ready for hosting pods, however its out of disk.
+
+          if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
+            allNodeConditions = ""
+            items["status"]["conditions"].each do |condition|
+              if condition["status"] == "True"
                 if !allNodeConditions.empty?
-                  record["Status"] = allNodeConditions
+                  allNodeConditions = allNodeConditions + "," + condition["type"]
+                else
+                  allNodeConditions = condition["type"]
                 end
               end
-
-              nodeInfo = items["status"]["nodeInfo"]
-              record["KubeletVersion"] = nodeInfo["kubeletVersion"]
-              record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
-              containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
-              dockerVersion = nodeInfo["containerRuntimeVersion"]
-              dockerVersion.slice! "docker://"
-              containerNodeInventoryRecord["DockerVersion"] = dockerVersion
-              # ContainerNodeInventory data for docker version and operating system.
-              containerNodeInventoryWrapper = {
-                "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
-              }
-              containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
-
-              wrapper = {
-                "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [record.each { |k, v| record[k] = v }],
-              }
-              eventStream.add(emitTime, wrapper) if wrapper
-              # Adding telemetry to send node telemetry every 5 minutes
-              timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-              timeDifferenceInMinutes = timeDifference / 60
-              if (timeDifferenceInMinutes >= 10)
-                properties = {}
-                properties["Computer"] = record["Computer"]
-                properties["KubeletVersion"] = record["KubeletVersion"]
-                properties["OperatingSystem"] = nodeInfo["operatingSystem"]
-                properties["DockerVersion"] = dockerVersion
-                properties["KubernetesProviderID"] = record["KubernetesProviderID"]
-                properties["KernelVersion"] = nodeInfo["kernelVersion"]
-                properties["OSImage"] = nodeInfo["osImage"]
-
-                capacityInfo = items["status"]["capacity"]
-                ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
-
-                #telemetry about prometheus metric collections settings for replicaset
-                if (File.file?(@@promConfigMountPath))
-                  properties["rsPromInt"] = @@rsPromInterval
-                  properties["rsPromFPC"] = @@rsPromFieldPassCount
-                  properties["rsPromFDC"] = @@rsPromFieldDropCount
-                  properties["rsPromServ"] = @@rsPromK8sServiceCount
-                  properties["rsPromUrl"] = @@rsPromUrlCount
-                  properties["rsPromMonPods"] = @@rsPromMonitorPods
-                  properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
-                end
-                ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
-                telemetrySent = true
+              #collect last transition to/from ready (no matter ready is true/false)
+              if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+                record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
               end
             end
+            if !allNodeConditions.empty?
+              record["Status"] = allNodeConditions
+            end
           end
-          router.emit_stream(@tag, eventStream) if eventStream
-          router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-          router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
-          if telemetrySent == true
-            @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+
+          nodeInfo = items["status"]["nodeInfo"]
+          record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+          record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+          containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+          dockerVersion = nodeInfo["containerRuntimeVersion"]
+          dockerVersion.slice! "docker://"
+          containerNodeInventoryRecord["DockerVersion"] = dockerVersion
+          # ContainerNodeInventory data for docker version and operating system.
+          containerNodeInventoryWrapper = {
+            "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+          }
+          containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+
+          wrapper = {
+            "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [record.each { |k, v| record[k] = v }],
+          }
+          eventStream.add(emitTime, wrapper) if wrapper
+          # Adding telemetry to send node telemetry every 10 minutes
+          timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+          timeDifferenceInMinutes = timeDifference / 60
+          if (timeDifferenceInMinutes >= 10)
+            properties = {}
+            properties["Computer"] = record["Computer"]
+            properties["KubeletVersion"] = record["KubeletVersion"]
+            properties["OperatingSystem"] = nodeInfo["operatingSystem"]
+            properties["DockerVersion"] = dockerVersion
+            properties["KubernetesProviderID"] = record["KubernetesProviderID"]
+            properties["KernelVersion"] = nodeInfo["kernelVersion"]
+            properties["OSImage"] = nodeInfo["osImage"]
+
+            capacityInfo = items["status"]["capacity"]
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+
+            #telemetry about prometheus metric collections settings for replicaset
+            if (File.file?(@@promConfigMountPath))
+              properties["rsPromInt"] = @@rsPromInterval
+              properties["rsPromFPC"] = @@rsPromFieldPassCount
+              properties["rsPromFDC"] = @@rsPromFieldDropCount
+              properties["rsPromServ"] = @@rsPromK8sServiceCount
+              properties["rsPromUrl"] = @@rsPromUrlCount
+              properties["rsPromMonPods"] = @@rsPromMonitorPods
+              properties["rsPromMonPodsNs"] = @@rsPromMonitorPodsNamespaceLength
+            end
+            ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+            telemetrySent = true
           end
-          @@istestvar = ENV["ISTEST"]
-          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
-            $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+        end
+        router.emit_stream(@tag, eventStream) if eventStream
+        router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+        router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+        if telemetrySent == true
+          @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
+        @@istestvar = ENV["ISTEST"]
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
+          $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+        end
+        #:optimize:kubeperf merge
+        begin
+          #if(!nodeInventory.empty?)
+          nodeMetricDataItems = []
+          #allocatable metrics @ node level
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
+          #capacity metrics @ node level
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
+          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
+
+          kubePerfEventStream = MultiEventStream.new
+
+          nodeMetricDataItems.each do |record|
+            record["DataType"] = "LINUX_PERF_BLOB"
+            record["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, record) if record
           end
+          #end
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+        rescue => errorStr
+          $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end
+        #:optimize:end kubeperf merge
+
       rescue => errorStr
         $log.warn "Failed to retrieve node inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
+      $log.warn "in_kube_nodes::parse_and_emit_records:End #{Time.now.utc.iso8601}"
     end
 
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_kube_nodes::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_nodes::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_kube_nodes::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_perf.rb b/source/code/plugin/in_kube_perf.rb
deleted file mode 100644
index 8b571139d..000000000
--- a/source/code/plugin/in_kube_perf.rb
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/local/bin/ruby
-# frozen_string_literal: true
-
-module Fluent
-    
-      class Kube_Perf_Input < Input
-        Plugin.register_input('kubeperf', self)
-    
-        def initialize
-          super
-          require 'yaml'
-          require 'json'
-    
-          require_relative 'KubernetesApiClient'
-          require_relative 'oms_common'
-          require_relative 'omslog'
-        end
-    
-        config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.api.KubePerf"
-    
-        def configure (conf)
-          super
-        end
-    
-        def start
-          if @run_interval
-            @finished = false
-            @condition = ConditionVariable.new
-            @mutex = Mutex.new
-            @thread = Thread.new(&method(:run_periodic))
-          end
-        end
-    
-        def shutdown
-          if @run_interval
-            @mutex.synchronize {
-              @finished = true
-              @condition.signal
-            }
-            @thread.join
-          end
-        end
-    
-        def enumerate()
-          time = Time.now.to_f
-          begin
-              eventStream = MultiEventStream.new
-              
-                $log.info("in_kube_perf::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                #get resource requests & resource limits per container as perf data 
-                podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('pods').body)
-                $log.info("in_kube_perf::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
-                if(!podInventory.empty?) 
-                  containerMetricDataItems = []
-                  hostName = (OMS::Common.get_hostname)
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu","cpuRequestNanoCores"))
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory","memoryRequestBytes"))
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu","cpuLimitNanoCores"))
-                  containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory","memoryLimitBytes"))
-      
-                  containerMetricDataItems.each do |record|
-                    record['DataType'] = "LINUX_PERF_BLOB"
-                    record['IPName'] = "LogManagement"
-                    eventStream.add(time, record) if record
-                    #router.emit(@tag, time, record) if record  
-                  end
-                end
-
-                #get allocatable limits per node as perf data
-                #<TODO> Node capacity is different from node allocatable. Allocatable is what is avaialble for allocating pods.
-                # In theory Capacity = Allocatable + kube-reserved + system-reserved + eviction-threshold
-                # For more details refer to https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable
-                $log.info("in_kube_perf::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-                nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body)
-                $log.info("in_kube_perf::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-                if(!nodeInventory.empty?)
-                  nodeMetricDataItems = []
-                  #allocatable metrics @ node level
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores"))
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes"))
-                  #capacity metrics @ node level
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores"))
-                  nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes"))
-
-                  nodeMetricDataItems.each do |record|
-                    record['DataType'] = "LINUX_PERF_BLOB"
-                    record['IPName'] = "LogManagement"
-                    eventStream.add(time, record) if record
-                    #router.emit(@tag, time, record) if record 
-                  end 
-                end
-              router.emit_stream(@tag, eventStream) if eventStream          
-              rescue  => errorStr
-              $log.warn "Failed to retrieve metric data: #{errorStr}"
-              $log.debug_backtrace(errorStr.backtrace)
-          end
-        end
-    
-        def run_periodic
-          @mutex.lock
-          done = @finished
-          until done
-            @condition.wait(@mutex, @run_interval)
-            done = @finished
-            @mutex.unlock
-            if !done
-              begin
-                $log.info("in_kube_perf::run_periodic @ #{Time.now.utc.iso8601}")
-                enumerate
-              rescue => errorStr
-                $log.warn "in_kube_perf::run_periodic: enumerate Failed to retrieve kube perf metrics: #{errorStr}"
-              end
-            end
-            @mutex.lock
-          end
-          @mutex.unlock
-        end
-      end # Kube_Perf_Input
-end # module
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 1dd029b22..28b20bfc0 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -7,20 +7,30 @@ class Kube_PodInventory_Input < Input
 
     @@MDMKubePodInventoryTag = "mdm.kubepodinventory"
     @@hostName = (OMS::Common.get_hostname)
+    @@kubeperfTag = "oms.api.KubePerf"
+    @@kubeservicesTag = "oms.containerinsights.KubeServices"
 
     def initialize
       super
       require "yaml"
-      require "json"
+      require "yajl/json_gem"
+      require "yajl"
       require "set"
+      require "time"
 
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
+
+      @PODS_CHUNK_SIZE = "1500"
+      @podCount = 0
+      @controllerSet = Set.new []
+      @winContainerCount = 0
+      @controllerData = {}
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
 
     def configure(conf)
@@ -48,33 +58,77 @@ def shutdown
     end
 
     def enumerate(podList = nil)
-      podInventory = podList
-      $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
-      podInfo = KubernetesApiClient.getKubeResourceInfo("pods")
-      $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+      begin
+        podInventory = podList
+        telemetryFlush = false
+        @podCount = 0
+        @controllerSet = Set.new []
+        @winContainerCount = 0
+        @controllerData = {}
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
 
-      if !podInfo.nil?
-        podInventory = JSON.parse(podInfo.body)
-      end
+        # Get services first so that we dont need to make a call for very chunk
+        $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
+        serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
+        # serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+        $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
 
-      begin
-        if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?)
-          #get pod inventory & services
-          $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-          serviceList = nil
-          serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
-
-          if !serviceInfo.nil?
-            serviceList = JSON.parse(serviceInfo.body)
-          end
-          
-          $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-          parse_and_emit_records(podInventory, serviceList)
+        if !serviceInfo.nil?
+          $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
+          serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
+          $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
+          serviceInfo = nil
+        end
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
+        $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
+          parse_and_emit_records(podInventory, serviceList, batchTime)
         else
-          $log.warn "Received empty podInventory"
+          $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
+        end
+
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
+            parse_and_emit_records(podInventory, serviceList, batchTime)
+          else
+            $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
+          end
+        end
+
+        # Setting these to nil so that we dont hold memory until GC kicks in
+        podInventory = nil
+        serviceList = nil
+
+        # Adding telemetry to send pod telemetry every 5 minutes
+        timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= 5)
+          telemetryFlush = true
+        end
+
+        # Flush AppInsights telemetry once all the processing is done
+        if telemetryFlush == true
+          telemetryProperties = {}
+          telemetryProperties["Computer"] = @@hostName
+          ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
+          telemetryProperties["ControllerData"] = @controllerData.to_json
+          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties)
+          if @winContainerCount > 0
+            telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
+            ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
+          end
+          @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       rescue => errorStr
-        $log.warn "Failed in enumerate pod inventory: #{errorStr}"
+        $log.warn "in_kube_podinventory::enumerate:Failed in enumerate: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
@@ -192,15 +246,12 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       end
     end
 
-    def parse_and_emit_records(podInventory, serviceList)
+    def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
+      #batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
-      controllerSet = Set.new []
-      controllerData = {}
-      telemetryFlush = false
-      winContainerCount = 0
+
       begin #begin block start
         # Getting windows nodes from kubeapi
         winNodes = KubernetesApiClient.getWindowsNodesArray
@@ -283,24 +334,17 @@ def parse_and_emit_records(podInventory, serviceList)
           record["ClusterId"] = KubernetesApiClient.getClusterId
           record["ClusterName"] = KubernetesApiClient.getClusterName
           record["ServiceName"] = getServiceNameFromLabels(items["metadata"]["namespace"], items["metadata"]["labels"], serviceList)
-          # Adding telemetry to send pod telemetry every 5 minutes
-          timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
-          timeDifferenceInMinutes = timeDifference / 60
-          if (timeDifferenceInMinutes >= 5)
-            telemetryFlush = true
-          end
+
           if !items["metadata"]["ownerReferences"].nil?
             record["ControllerKind"] = items["metadata"]["ownerReferences"][0]["kind"]
             record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"]
-            if telemetryFlush == true
-              controllerSet.add(record["ControllerKind"] + record["ControllerName"])
-              #Adding controller kind to telemetry ro information about customer workload
-              if (controllerData[record["ControllerKind"]].nil?)
-                controllerData[record["ControllerKind"]] = 1
-              else
-                controllerValue = controllerData[record["ControllerKind"]]
-                controllerData[record["ControllerKind"]] += 1
-              end
+            @controllerSet.add(record["ControllerKind"] + record["ControllerName"])
+            #Adding controller kind to telemetry ro information about customer workload
+            if (@controllerData[record["ControllerKind"]].nil?)
+              @controllerData[record["ControllerKind"]] = 1
+            else
+              controllerValue = @controllerData[record["ControllerKind"]]
+              @controllerData[record["ControllerKind"]] += 1
             end
           end
           podRestartCount = 0
@@ -418,7 +462,7 @@ def parse_and_emit_records(podInventory, serviceList)
             end
           end
           # Send container inventory records for containers on windows nodes
-          winContainerCount += containerInventoryRecords.length
+          @winContainerCount += containerInventoryRecords.length
           containerInventoryRecords.each do |cirecord|
             if !cirecord.nil?
               ciwrapper = {
@@ -433,19 +477,66 @@ def parse_and_emit_records(podInventory, serviceList)
 
         router.emit_stream(@tag, eventStream) if eventStream
         router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream
-        if telemetryFlush == true
-          telemetryProperties = {}
-          telemetryProperties["Computer"] = @@hostName
-          ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
-          ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory["items"].length, {})
-          telemetryProperties["ControllerData"] = controllerData.to_json
-          ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length, telemetryProperties)
-          if winContainerCount > 0
-            telemetryProperties["ClusterWideWindowsContainersCount"] = winContainerCount
-            ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
+        #:optimize:kubeperf merge
+        begin
+          #if(!podInventory.empty?)
+          containerMetricDataItems = []
+          #hostName = (OMS::Common.get_hostname)
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu", "cpuRequestNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory", "memoryRequestBytes", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu", "cpuLimitNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory", "memoryLimitBytes", batchTime))
+
+          kubePerfEventStream = MultiEventStream.new
+
+          containerMetricDataItems.each do |record|
+            record["DataType"] = "LINUX_PERF_BLOB"
+            record["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, record) if record
           end
-          @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
+          #end
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+        rescue => errorStr
+          $log.warn "Failed in parse_and_emit_record for KubePerf from in_kube_podinventory : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
         end
+        #:optimize:end kubeperf merge
+
+        #:optimize:start kubeservices merge
+        begin
+          if (!serviceList.nil? && !serviceList.empty?)
+            kubeServicesEventStream = MultiEventStream.new
+            serviceList["items"].each do |items|
+              kubeServiceRecord = {}
+              kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+              kubeServiceRecord["ServiceName"] = items["metadata"]["name"]
+              kubeServiceRecord["Namespace"] = items["metadata"]["namespace"]
+              kubeServiceRecord["SelectorLabels"] = [items["spec"]["selector"]]
+              kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
+              kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
+              kubeServiceRecord["ClusterIP"] = items["spec"]["clusterIP"]
+              kubeServiceRecord["ServiceType"] = items["spec"]["type"]
+              #<TODO> : Add ports and status fields
+              kubeServicewrapper = {
+                "DataType" => "KUBE_SERVICES_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }],
+              }
+              kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
+            end
+            router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
+          end
+        rescue => errorStr
+          $log.warn "Failed in parse_and_emit_record for KubeServices from in_kube_podinventory : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        end
+        #:optimize:end kubeservices merge
+
+        #Updating value for AppInsights telemetry
+        @podCount += podInventory["items"].length
+
         @@istestvar = ENV["ISTEST"]
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -460,14 +551,25 @@ def parse_and_emit_records(podInventory, serviceList)
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_kube_podinventory::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_kube_podinventory::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_kube_podinventory::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}"
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
deleted file mode 100644
index 7cd703620..000000000
--- a/source/code/plugin/in_kube_services.rb
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/local/bin/ruby
-# frozen_string_literal: true
-
-module Fluent
-  class Kube_Services_Input < Input
-    Plugin.register_input("kubeservices", self)
-
-    def initialize
-      super
-      require "yaml"
-      require "json"
-
-      require_relative "KubernetesApiClient"
-      require_relative "oms_common"
-      require_relative "omslog"
-      require_relative "ApplicationInsightsUtility"
-    end
-
-    config_param :run_interval, :time, :default => "1m"
-    config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
-
-    def configure(conf)
-      super
-    end
-
-    def start
-      if @run_interval
-        @finished = false
-        @condition = ConditionVariable.new
-        @mutex = Mutex.new
-        @thread = Thread.new(&method(:run_periodic))
-      end
-    end
-
-    def shutdown
-      if @run_interval
-        @mutex.synchronize {
-          @finished = true
-          @condition.signal
-        }
-        @thread.join
-      end
-    end
-
-    def enumerate
-      currentTime = Time.now
-      emitTime = currentTime.to_f
-      batchTime = currentTime.utc.iso8601
-
-      serviceList = nil
-      
-      $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-      serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
-      $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-
-      if !serviceInfo.nil?
-        serviceList = JSON.parse(serviceInfo.body)
-      end
-
-      begin
-        if (!serviceList.nil? && !serviceList.empty?)
-          eventStream = MultiEventStream.new
-          serviceList["items"].each do |items|
-            record = {}
-            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            record["ServiceName"] = items["metadata"]["name"]
-            record["Namespace"] = items["metadata"]["namespace"]
-            record["SelectorLabels"] = [items["spec"]["selector"]]
-            record["ClusterId"] = KubernetesApiClient.getClusterId
-            record["ClusterName"] = KubernetesApiClient.getClusterName
-            record["ClusterIP"] = items["spec"]["clusterIP"]
-            record["ServiceType"] = items["spec"]["type"]
-            #<TODO> : Add ports and status fields
-            wrapper = {
-              "DataType" => "KUBE_SERVICES_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [record.each { |k, v| record[k] = v }],
-            }
-            eventStream.add(emitTime, wrapper) if wrapper
-          end
-          router.emit_stream(@tag, eventStream) if eventStream
-        end
-      rescue => errorStr
-        $log.debug_backtrace(errorStr.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-      end
-    end
-
-    def run_periodic
-      @mutex.lock
-      done = @finished
-      until done
-        @condition.wait(@mutex, @run_interval)
-        done = @finished
-        @mutex.unlock
-        if !done
-          begin
-            $log.info("in_kube_services::run_periodic @ #{Time.now.utc.iso8601}")
-            enumerate
-          rescue => errorStr
-            $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
-        end
-        @mutex.lock
-      end
-      @mutex.unlock
-    end
-  end # Kube_Services_Input
-end # module
diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb
index 2e5f839e6..695a686cf 100644
--- a/source/code/plugin/in_win_cadvisor_perf.rb
+++ b/source/code/plugin/in_win_cadvisor_perf.rb
@@ -10,7 +10,8 @@ class Win_CAdvisor_Perf_Input < Input
     def initialize
       super
       require "yaml"
-      require "json"
+      require 'yajl/json_gem'
+      require "time"
 
       require_relative "CAdvisorMetricsAPIClient"
       require_relative "KubernetesApiClient"
@@ -18,7 +19,7 @@ def initialize
       require_relative "omslog"
     end
 
-    config_param :run_interval, :time, :default => "1m"
+    config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.api.wincadvisorperf"
     config_param :mdmtag, :string, :default => "mdm.cadvisorperf"
 
@@ -60,13 +61,13 @@ def enumerate()
           $log.info "in_win_cadvisor_perf: Getting windows nodes"
           nodes = KubernetesApiClient.getWindowsNodes()
           if !nodes.nil?
-            @@winNodes = KubernetesApiClient.getWindowsNodes()
+            @@winNodes = nodes
           end
           $log.info "in_win_cadvisor_perf : Successuly got windows nodes after 5 minute interval"
           @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i
         end
         @@winNodes.each do |winNode|
-          metricData = CAdvisorMetricsAPIClient.getMetrics(winNode)
+          metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601)
           metricData.each do |record|
             if !record.empty?
               record["DataType"] = "LINUX_PERF_BLOB"
@@ -100,14 +101,25 @@ def enumerate()
     def run_periodic
       @mutex.lock
       done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
       until done
-        @condition.wait(@mutex, @run_interval)
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
         @mutex.unlock
         if !done
           begin
-            $log.info("in_win_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}")
+            $log.info("in_win_cadvisor_perf::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
             enumerate
+            $log.info("in_win_cadvisor_perf::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
           rescue => errorStr
             $log.warn "in_win_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics for windows nodes: #{errorStr}"
           end
diff --git a/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
index 8f4677044..60838e215 100644
--- a/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
+++ b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb
@@ -1,4 +1,4 @@
-﻿require 'json'
+﻿require 'yajl/json_gem'
 
 module ApplicationInsights
   module Channel
diff --git a/source/code/plugin/lib/application_insights/channel/sender_base.rb b/source/code/plugin/lib/application_insights/channel/sender_base.rb
index 2431bf748..004b4722f 100644
--- a/source/code/plugin/lib/application_insights/channel/sender_base.rb
+++ b/source/code/plugin/lib/application_insights/channel/sender_base.rb
@@ -1,4 +1,4 @@
-require 'json'
+require 'yajl/json_gem'
 require 'net/http'
 require 'openssl'
 require 'stringio'
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index b8d10090d..0a4e601b2 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -12,7 +12,7 @@ def initialize
       require "net/http"
       require "net/https"
       require "uri"
-      require "json"
+      require 'yajl/json_gem'
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
 

From 852680238a0675e67de45ccd5ba55b3f6610706c Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 3 Dec 2019 16:41:01 -0800
Subject: [PATCH 146/160] Update Readme

---
 README.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/README.md b/README.md
index 4674700c4..ff3e2890c 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,31 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 12/03/2019 -
+##### Version microsoft/oms:ciprod12032019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod12032019
+- Fix scheduler for all input plugins
+- Fix liveness probe
+- Reduce chunk sizes for all fluentD buffers to support larger clusters (nodes & pods)
+- Chunk Kubernetes API calls (pods,nodes,events)
+- Use HTTP.start instead of HTTP.new
+- Merge KubePerf into KubePods & KubeNodes
+- Merge KubeServices into KubePod
+- Use stream based yajl for JSON parsing
+- Health - Query only kube-system pods
+- Health - Use keep_if instead of select
+- Container log enrichment (turned OFF by default for TimeOfCommand, ContainerName & ContainerImage)
+- Application Insights Telemetry - Async
+- Fix metricTime to be batch time for all metric input plugins
+- Close socket connections properly for DockerAPIClient
+- Fix top un handled exceptions in Kubernetes API Client and pod inventory
+- Fix retries, wait between retries, chunk size, thread counts to be consistent for all FluentD workflows
+- Back-off for containerlog enrichment K8S API calls
+- Add new regions (3) for Azure Monitor Custom metrics
+- Increase the cpu & memory limits for replica-set to support larger clusters (nodes & pods)
+- Move to Ubuntu 18.04 LTS
+- Support for Kubernetes 1.16
+- Use ifconfig for detecting network connectivity issues
+
 ### 10/11/2019 -
 ##### Version microsoft/oms:ciprod10112019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10112019
 - Update prometheus config scraping capability to restrict collecting metrics from pods in specific namespaces.

From c766d73ccbc55e3098a72f3c6b55a7c68ed06bab Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 4 Dec 2019 11:46:18 -0800
Subject: [PATCH 147/160] add back timeofcommand (#310)

---
 source/code/go/src/plugins/oms.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 834726c93..8dfaf0e7e 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -761,12 +761,13 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 				Image:                 stringMap["Image"],
 				Name:                  stringMap["Name"],
 			}
-		} else { // dont collect timeofcommand field as its part of container log enrivhment
+		} else { // dont collect timeofcommand field as its part of container log enrichment [But currently we dont know the ux behavior , so waiting for ux fix (LA ux)]
 			dataItem = DataItem{
 				ID:                    stringMap["Id"],
 				LogEntry:              stringMap["LogEntry"],
 				LogEntrySource:        stringMap["LogEntrySource"],
 				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
+				LogEntryTimeOfCommand: start.Format(time.RFC3339),
 				SourceSystem:          stringMap["SourceSystem"],
 				Computer:              Computer,
 				Image:                 stringMap["Image"],

From 8dfa313161f17151b25040870797d0f4938b20df Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 4 Dec 2019 12:06:09 -0800
Subject: [PATCH 148/160] update readme for timeofcommand fix (#314)

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ff3e2890c..49c6d1fe4 100644
--- a/README.md
+++ b/README.md
@@ -11,8 +11,8 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
-### 12/03/2019 -
-##### Version microsoft/oms:ciprod12032019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod12032019
+### 12/04/2019 -
+##### Version microsoft/oms:ciprod12042019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod12042019
 - Fix scheduler for all input plugins
 - Fix liveness probe
 - Reduce chunk sizes for all fluentD buffers to support larger clusters (nodes & pods)
@@ -23,7 +23,7 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
 - Use stream based yajl for JSON parsing
 - Health - Query only kube-system pods
 - Health - Use keep_if instead of select
-- Container log enrichment (turned OFF by default for TimeOfCommand, ContainerName & ContainerImage)
+- Container log enrichment (turned OFF by default for ContainerName & ContainerImage)
 - Application Insights Telemetry - Async
 - Fix metricTime to be batch time for all metric input plugins
 - Close socket connections properly for DockerAPIClient

From a0984af9984d04e6f8a364a778e9a8a412365ab4 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 4 Dec 2019 12:18:02 -0800
Subject: [PATCH 149/160] Merge from ci_feature_prod into ci_feature (fix put
 back timeofcommand) (#311) (#316)

* Updatng release history

* fixing the plugin logs for emit stream

* updating log message

* Remove Log Processing from fluentd configuration

* Remove plugin references from base_container.data

* Dilipr/fluent bit log processing (#126)

* Build out_oms.so and include in docker-cimprov package

* Adding fluent-bit-config file to base container

* PR Feedback

* Adding out_oms.conf to base_container.data

* PR Feedback

* Making the critical section as small as possible

* PR Feedback

* Fixing the newline bug for Computer, and changing containerId to Id

* Dilipr/glide updates (#127)

* Updating glide.* files to include lumberjack

* containerID="" for pull issues

* Using KubeAPI for getting image,name. Adding more logs (#129)

* Using KubeAPI for getting image,name. Adding more logs

* Moving log file and state file to within the omsagent container

* Changing log and state paths

* Dilipr/mark comments (#130)

* Marks Comments + Error Handling

* Drop records from files that are not in k8s format

* Remove unnecessary log line'

* Adding Log to the file that doesn't conform to the expected format

* Rashmi/segfault latest (#132)

* adding null checks in all providers

* fixing type

* fixing type

* adding more null checks

* update cjson

* Adding a missed null check (#135)

* reusing some variables (#136)

* Rashmi/cjson delete null check (#138)

* adding null check for cjson-delete

* null chk

* removing null check

* updating log level to debug for some provider workflows (#139)

* Fixing CPU Utilization and removing Fluent-bit filters (#140)

Removing fluent-bit filters, CPU optimizations

* Minor tweaks 1. Remove some logging 2. Added more Error Handling 3. Continue when there is an error with k8s api (#141)

* Removing some logs, added more error checking, continue on kube-api error

* Return FLB OK for json Marshall error, instead of RETRY

* * Change FluentBit flush interval to 30 secs (from 5 secs)
* Remove ContainerPerf, ContainerServiceLog,ContainerProcess (OMI workflows) for Daemonset

* Container Log Telemetry

* Fixing an issue with Send Init Event if Telemetry is not initialized properly, tab to whitespace in conf file

* PR feedback

* PR feedback

* Sending an event every 5 mins(Heartbeat) (#146)

* PR feedback to cleanup removed workflows

* updating agent version for telemetry

* updating agent version

* Telemetry Updates (#149)

* Telemetry Fixes 1. Added Log Generation Rate 2. Fixed parsing bugs 3. Added code to send Exceptions/errors

* PR Feedback

* Changes to send omsagent/omsagent-rs kubectl logs to App Insights (#159)

* Changes to send omsagent/omsagent-rs kubectl logs to App Insights

* PR Feedback

* Rashmi/fluentd docker inventory (#160)

* first stab

* changes

* changes

* docker util changes

* working tested util

* input plugin and conf

* changes

* changes

* changes

* changes

* changes

* working containerinventory

* fixing omi removal from container.conf

* removing comments

* file write and read

* deleted containers working

* changes

* changes

* socket timeout

* deleting test files

* adding log

* fixing comment

* appinsights changes

* changes

* tel changes

* changes

* changes

* changes

* changes

* lib changes

* changes

* changes

* fixes

* PR comments

* changes

* updating the ownership

* changes

* changes

* changes to container data

* removing comment

* changes

* adding collection time

* bug fix

* env string truncation

* changes for acs-engine test

* Fix Telemetry Bug -- Initialize Telemetry Client after Initializing all required properties (#162)

* Fix kube events memory leak due to yaml serialization for > 5k events (#163)

* Setting Timeout for HTTP Client  in PostDataHelper in outoms go plugin(#164)

* Vishwa/perftelemetry 2 (#165)

* add cpu usage telemetry for ds & rs

* add cpu & memory usage telemetry for ds & rs

* environment variable fix (#166)

* environment variable fix

* updating agent version

* Fixing a bug where we were crashing due to container statuses not present when not was lost (#167)

* Updating title

* updating right versions for last release

* Updating the break condition to look for end of response (#168)

* Updating the break condition to look for end of response

* changes for docker response

* updating AgentVersion for telemetry

* Updating readme for latest release changes

* Changes - (#173)

* use /var/log for state
* new metric ContainerLogsAgentSideLatencyMs
* new field 'timeOfComand'

* Rashmi/kubenodeinventory (#174)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* Get cpuusage from usageseconds (#175)

* Rashmi/kubenodeinventory (#176)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* Rashmi/kubenodeinventory (#178)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* Fixing an issue on the cpurate metric, which happens for the first time (when cache is empty) (#179)

* Rashmi/kubenodeinventory (#180)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Exclude docker containers from container inventory (#181)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Excluding raw docker containers from container inventory

* making labels key case insensitive

* make poduid label case insensitive

* Exclude pauseamd64 containers from container inventory (#182)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Excluding raw docker containers from container inventory

* making labels key case insensitive

* make poduid label case insensitive

* changes to exclude pause amd 64 containers

* Update agent version

* Updating readme for the latest release

* Fix indentation in kube.conf and update readme (#184)

* containernodeinventory changes

* changes for containernodeinventory

* changes to add node telemetry

* pod telemetry cahnges

* updated telemetry changes

* changes to get uid of owner references as controller id

* updating socket to the new mount location

* Adding exception telemetry and heartbeat

* changes to fix controller type

* Fixing typo

* fixing method signature

* updating plugins to get controller type from env

* fixing bugs

* changes to fixed type

* removing comments

* changes for fixed type

* adding kubelet version as a dimension

* Excluding raw docker containers from container inventory

* making labels key case insensitive

* make poduid label case insensitive

* changes to exclude pause amd 64 containers

* fixing indentation so that kube.conf contents can be used in config map in the yaml

* updating readme to fix date and agent version

* updating agent tag

* Get Pods for current Node Only (#185)

* Fix KubeAPI Calls to filter to get pods for current node

* Reinstate log line

* changes for container node inventory fixed type (#186)

* Fix for mooncake (disable telemetry optionally) (#191)

* disable telemetry option

* fix a typo

* CustomMetrics to ci_feature (#193)

Custom Metrics changes to ci_feature

* add ContainerNotRunning column to KubePodInventory

* merge pr feedback: update name to ContainerStatusReason

* Zero Fill for Missing Pod Phases, Change Namespace Dimension to Kubernetes namespace, as it might be confused with metrics namespace in Metrics Explorer (#194)

* Zero Fill for Pod Counts by Phase

* Change namespace dimension to Kubernetes namespace

* No Retries for non 404 4xx errors (#196)

* Update agent version for telemetry

* Update readme for upcoming (ciprod01202019) release

* fix readme formatting

* fix formatting for readme

* fix formatting for readme

* fix readme

* fix readme

* fix agent version for telemetry

* fix date in readme

* update readme

* Restart  logs every 10MB instead of weekly (#198)

* Rotate logs every 10MB instead of weekly

* Removing some logging, fixed log rotation

* update agent version for telemetry

* update readme

* Update kube.conf to use %STATE_DIR_WS% instead of hardcoded path

* Fix AKSEngine Crash (#200)

* hotfix
* close resp.Body
* remove chatty logs
* membuf=5m and ignore files not updated since 5 mins

* fix readme for new version

* Fix the pod count in mdm agent plugin (#203)

* Update readme

* string freeze for out_mdm plugin

* Vishwa/resourcecentric (#208)

* resourceid fix (for AKS only)

* fix name

* Rashmi/win nodepool - PR (#206)

* changes for win nodes enumeration

* changes

* changes

* changes

* node cpu metric rate changes

* container cpu rate

* changes

* changes

* changes

* changes

* changes

* changes to include in_win_cadvisor_perf.rb file

* send containerinventoryheartbeatevent

* changes

* cahnges for mdm metrics

* changes

* cahnges

* changes

* container states

* changes

* changes

* changes for env variables

* changes

* changes

* changes

* changes

* delete comments

* changes

* mutex changes

* changes

* changes

* changes

* telemetry fix for docker version

* removing hardcoded values for mdm

* update docker version

* telemetry for windows cadvisor timeouts

* exeception key update to computer

* PR comments

* adding os to container inventory for windows nodes (#210)

* Fix omsagent crash Error when kube-api returns non-200, send events for HTTP Errors (#211)

* Fix omsagent crash Error when kube-api returns non-200, send events for HTTP Errors

* Fixing the bug, deferring telemetry changes for later

* updating to lowercase compare for units (#212)

* Merge from vishwa/telegraftcp to ci_feature for telegraf changes (#214)

* merge from Vishwa/telegraf to Vishwa/telegraftcp for telegraf changes (#207)

* add configuration for telegraf

* fix for perms

* fix telegraf config.

* fix file location & config

* update to config

* fix namespace

* trying different namespace and also debug=true

* add placeholder for nodename

* change namespace

* updated config

* fix uri

* fix azMon settings

* remove aad settings

* add custom metrics regions

* fix config

* add support for replica-set config

* fix oomkilled

* Add telegraf 403 metric telemetry & non 403 trace telemetry

* fix type

* fix package

* fix package import

* fix filename

* delete unused file

* conf file for rs; fix 403counttotal metric for telegraf, remove host and use nodeName consistently, rename metrics

* fix statefulsets

* fix typo.

* fix another typo.

* fix telemetry

* fix casing issue

* fix comma issue.

* disable telemetry for rs ; fix stateful set name

* worksround for namespace fix

* telegraf integration - v1

* telemetry changes for telegraf

* telemetry & other changes

* remove custom metric regions as we dont need anymore

* remove un-needed files

* fixes

* exclude certain volumes and fix telemetry to not have computer & nodename as dimensions (redundant)

* Vishwa/resourcecentric (#208) (#209)

* resourceid fix (for AKS only)

* fix name

* near final metric shape

* change from customlog to fixed type (InsightsMetrics)

* fix PR feedback

* fix pr feedback

* Fix telemetry error for telegraf err count metric (#215)

* Fix Unscheduled Pod bug, remove excess telemetry (#218)

* Fix Unscheduled Pod bug, remove excess telemetry

* Send Success Telemetry only once after startup for a node in a cluster for MDM Post

* Sending telemetry for successful push to MDM every hour

* Merge from Vishwa/promstandardmetrics into ci_feature (#220)

* enable prometheus metrics collection in replica-set

* fixing typos

* fix config file path for replicaset

* fix configuration

* config changes

* merge config/settings to ci_feature (#221)

* updating fluentbit to use LOG_TAIL_PATH

* changes

* log exclusion pattern

* changes

* removing comments

* adding enviornment varibale collection/disable

* disable env var for cluster variable change

* changes

* toml parser changes

* adding directory tomlrb

* changes for container inventory

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* Telemetry for config overrides

* add schema version telemetry

* reduce the number of api calls for namespace filtering
add more telemetry for config processing
move liveness probe & parser to this repo

* optimize for default kube-system namespace log collection exclusion

* Fix Scenario when Controller name is empty (#222)

* fix ;

* ContainerLog collection optimizations (#223)

* * derive k8s namespace from file (rather than making a api call)
* optimize perf by not tailing excluded namespaces in stdout & stderr

* Tuning fluentbit settings based on Cortana teams findings
* making db sync off
* buffer chunk and max as 1m so that we dont flush > 1m payloads
* increasing rotatte wait from 5 secs to 30 secs
* decreasing refresh interval from 60 secs to 30 secs
* adding retry limit as 10 so that items get dropped in 50 secs rather than infinetely trying
* changing flush to 5 secs from 30 secs

* merge final changes for release  from Vishwa/june2019agentrel to ci_feature (#224)

* * derive k8s namespace from file (rather than making a api call)
* optimize perf by not tailing excluded namespaces in stdout & stderr

* Tuning fluentbit settings based on Cortana teams findings
* making db sync off
* buffer chunk and max as 1m so that we dont flush > 1m payloads
* increasing rotatte wait from 5 secs to 30 secs
* decreasing refresh interval from 60 secs to 30 secs
* adding retry limit as 10 so that items get dropped in 50 secs rather than infinetely trying
* changing flush to 5 secs from 30 secs

* fix a minor comment

* * change flush from 5 to 10 secs based on perf findings

* fix fluent bit tuning for perf run (#226)

* fix fluent bit tuning for perf run

* stop collecting our own partition

* fix merge issue

* add release notes for june release in ci_feature branch

* fix title

* update

* fix title

* Trim spaces in AKS_REGION (#233)

This is not an issue for normal AKS Monitoring Addon Onboarding. ONLY an issue for backdoor onboarding

* Add Logs Size To Telemetry (#234)

* Add Logs to telemetry
* Using len instead of unsafe.Sizeof

* Merge Vishwa/promcustommetrics to ci_feature (#237)

* hard code config for UST CCP team

* fix config

* fix config after discussion

* fix error log to get errros

* fix config

* update config

* Add telemetry

* Rashmi/promcustomconfig (#231)

* changes

* formatting changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* adding telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* cahnges

* changes

* Rashmi/promcustomconfig (#236)

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* fix exceptions

* changes to remove some exceptions

* exception fixes

* changes

* changes for poduid nil check

* Fix Region space error (#239)

* Trim spaces in AKS_REGION
This is not an issue for normal AKS Monitoring Addon Onboarding. ONLY an issue for backdoor onboarding

* Fix out_mdm parsing error

* Removing buffer chunk size and buffer max size from fluentbit conf  (#240)

* hard code config for UST CCP team

* fix config

* fix config after discussion

* fix error log to get errros

* fix config

* update config

* Add telemetry

* Rashmi/promcustomconfig (#231)

* changes

* formatting changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* adding telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* cahnges

* changes

* Rashmi/promcustomconfig (#236)

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* fix exceptions

* changes to remove some exceptions

* exception fixes

* changes

* changes for poduid nil check

* removing buffer chunk size and buffer max size from fluentbit conf

* changes (#243)

* Collect container last state (#235)

* updating the OMS agent to also collect container last state

* changed a comment

* git surrounded ContainerLastStatus code in a begin/rescue block

* added a lot of error checking and logging

* Rashmi/fix prom telemetry (#247)

* fix prom telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* Merge Health Model work into ci_feature behind a feature flag Pending perf testing (#246)

Merge Health to ci_feature

* Fix Deserialization Bug (#249)

* Fix the bug where capacity is not updated and cached value was being used (#251)

* Fix the Capacity computation

* fix node cpu and memory limits calculation

* changes (#250)

* Added new Custom Metrics Regions, fixed MDM plugin crash bug (#253)

Added new regions, added handler for MDM plugin start

* Add Missing Handlers (#254)

* Added Missing Handlers

* Return MultiEventStream.new instead of empty array (#256)

* Added explicit require_relative to avoid loading errors (#258)

* Adding explicit require_relative

* Gangams/enable ai telemetry in mc (#252)

* enable ai telemetry to configure different ikey and endpoint per cloud

* Fixing null check out_mdm bug, tomlparser bug, exposing Replica Set service name as an ENV variable (#261)

* Expose replica set service as an env variable

* Fixing null check out_mdm bug, and tomlparser bug

* Updating the env variable name to be more specific to health model

* Changes for creating custom plugins with namespace settings for prometheus scraping (#262)

* changes

* changes

* changes

* changes

* changes

* changes

* chnages

* changes

* telemetry changes

* changes

* Cherry-pick hotfix 09092019 to ci_feature (#265)

* Gangams/add telemetry hybrid (#264)

* add telemetry to detect the cloud, distro and kernel version

* add null check since providerId optional

* detect azurestack cloud

* rename to KubernetesProviderID since ProviderID name already used in LA

* capture workspaceCloud to the telemetry

* trim the domain read from file

* KubeMonAgentEvents changes to collect configuration events (#267)

* changes

* changes

* changes

* changes

* changes

* changes

* env changes

* changes

* changes

* changes

* reverting

* changes

* cahnges

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* chnages

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* Fix the Dupe Perf Data Issue from the DaemonSet (#266)

* Dupe Perf Record Fix

* PR for 1. Container Memory CPU monitor 2. Configuration for Node Conditions 3. Fixed Type Changes 4. Use Env variable, and health_forward (that handles network errors at init) 5. Unit Tests (#268)

* init containers fix and other bug fixes (#269)

* init container - KPI and kubeperf changes

* changes

* changes

* changes

* changes for empty array fix

* changes

* changes

* pod inventory exception fix

* nil check changes

* changes

* fixing typo

* changes

* changes

* PR - feedback

* remove comment

* tag pass changes

* changes

* tagdrop changes

* changes

* changes

* Send agg monitor signal on details change (#270)

send when an agg monitor details change, but state did not change

* bug fixes for error (#274)

* Fix to use declaration and assignment instead of assignment (#275)

* bug fixes for error

* adding declaration to assignment

* 1. Added telemetry (#277)

2. Configuration property changes
3. Bug fixes for a. unscheduled pods returning green
3b. Sometimes, the details hash of agg monitors are different because the order of elements inside the array is different, causing the records to be sent

* Bug fix to remove unused variable (#281)

* bug fixes for error

* adding declaration to assignment

* removing unused variable

* Fix the WARN<->WARNING typo (#282)

* Bug Fixes  1. telemetry send throwing exception if records not initialized 2. permissions error in on-prem clusters (#284)

* Bug fixes 1. not writeable, telemetry error

* Change to state_WS_dir

* Fix Require relative revert (#287)

* Bug Fixes for exceptions in telemetry, remove limit set check (#289)

* Bug Fixes 10222019

* Initialize container_cpu_memory_records in fhmb

* Added telemetry to investigate health exceptions

* Set frozen_string_literal to true

* Send event once per container when lookup is empty, or limit is an array

* Unit Tests, Use RS and POD to determine workload

* Fixed Node Condition Bug, added exception handling to return get_rs_owner_ref

* Fix the bug where if a warning condition appears before fail condition, the node condition is reported as warning instead of fail. Also fix the node conditions state to consider unknown as a failure state (#292)

* Fix for Nodes Aspect not showing up in draft cluster (#294)

* Fix the issue where the health tree is inconsistent if a deployment is deleted (#295)

* Rashmi/1 16 test (#297)

* health deployment update

* apps v1 changes for deployment

* changes

* changes to use relicasets and api groups

* Fix duplicate records in container memory/cpu samples (#298)

* Update MDM region list to include francecentral, japaneast and australiaeast

* Update MDM region list to include francecentral, japaneast and australiaeast

* Send telemetry when there is error in calculation of state in percentage aggregation, and send state as unknown (#300)

* fix exceptions (#306)

* Merge Branch morgan into ci_feature (#308)

* Fixes :
1) Disable health (for time being) - in DS & RS
2) Disable MDM (for time being) - in DS & RS
3) Merge kubeperf into kubenode & kubepod
4) Made scheduling predictable for kubenode & kubepod
5) Enable containerlog enrichment fields (timeofcommand, containername & containerimage) as a configurable setting (default = true/ON) - Also add telemetry for it
6) Filter OUT type!=Normal events for k8s events
7) AppInsights telemetry async
8) Fix double calling bug in in_win_cadvisor_perf
9) Add connect timeout (20secs) & read timeout (40 secs) for all cadvisor api calls & also for all kubernetes api server  calls
10) Fix batchTime for kubepods to be one before making api server call (rather than after making the call, which will make it fluctuate based on api server latency for the call)

* fix setting issue for the new enrichcontainerlog setting

* fix compilation issue

* fix another compilation issue

* fix emit issues

* fix a nil issue

* fix mising tag

* * Fix all input plugins for scheduling issue
* Merge kubeservices with kubepodinventory (reduce RS to API server by one more)
* Remove Kubelogs (not used)
* Fix liveness probe
* Disable enrichment by default for container logs
* Move to yajl json parser across the board for docker provier code
* Remove unused files

* fix removed files

* fix timeofcommand and remove a duplicate entry for a health file.

* Rashmi/http leak fixes (#301)

* changes for http connection close

* close socket in ensure

* adding nil check

* Rashmi/http leak fixes (#303)

* changes for http connection close

* close socket in ensure

* adding nil check

* adding missing end

* use yajl for events & nodes parsing.

* Rashmi/http leak fixes (#304)

* changes for http connection close

* close socket in ensure

* adding nil check

* Update MDM region list to include francecentral, japaneast and australiaeast

* Update MDM region list to include francecentral, japaneast and australiaeast

* adding missing end

* Send telemetry when there is error in calculation of state in percentage aggregation, and send state as unknown (#300)

* changes for chunking

* telemetry changes

* some fixes

* bug fix

* changing to have morgan changes only

* add new line

* use polltime for metrics and disable out_forward for health

* enable mdm & health

* few optimizations

* do not remove time of command
make kube.conf same as scale tested config

* remove comments from container.conf

* remove flush comment for ai telemetry

* remove commented code lines

* fix config

* remove timeofcommand when enrichment==false

* fix config

* enable mdm filter

* Rashmi/api chunk (#307)

* changes

* changes

* refactor changes

* changes

* changes

* changes

* changes

* node changes

* changes

* changes

* changes

* changes

* adding open and read timeouts for api client

* removing comments

* updating chunk size

* Update Readme

* add back timeofcommand (#310)

From deff7ace376c2265520e58fef0da6dfd26b9aa6d Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 6 Dec 2019 16:25:11 -0800
Subject: [PATCH 150/160] Adding new cpu and memory limits to readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 49c6d1fe4..007f92d92 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
 - Fix retries, wait between retries, chunk size, thread counts to be consistent for all FluentD workflows
 - Back-off for containerlog enrichment K8S API calls
 - Add new regions (3) for Azure Monitor Custom metrics
-- Increase the cpu & memory limits for replica-set to support larger clusters (nodes & pods)
+- Increase the cpu(1 core) & memory(750Mi) limits for replica-set to support larger clusters (nodes & pods)
 - Move to Ubuntu 18.04 LTS
 - Support for Kubernetes 1.16
 - Use ifconfig for detecting network connectivity issues

From 4b1ef9c7123b3b52a7460cb30fc45f9b0c0244e1 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 6 Jan 2020 17:34:48 -0800
Subject: [PATCH 151/160] CAdvisor to use 10255/10250 based on env variable
 (#321)

* CAdvisor secure port changes (#320)

* cadvsior secure port changes

* update to use secure/insecure port for cadvisor

* telemetry changes

* fix bug

* bug fix

* changes

* Adding cadvisor uri log

* switching defaults

* update readme

* changes
---
 README.md                                     |  7 +++
 installer/conf/telegraf.conf                  |  2 +-
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 53 +++++++++++++++----
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 007f92d92..75b2d8665 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,13 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 01/07/2020 -
+##### Version microsoft/oms:ciprod01072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01072020
+## Code change log
+- Switch between 10255(old) and 10250(new) ports for cadvisor for older and newer versions of kubernetes
+## Customer Impact
+- Node cpu, node memory, container cpu and container memory metrics were obtained earlier by querying kubelet readonly port(http://$NODE_IP:10255). Agent now supports getting these metrics from kubelet port(https://$NODE_IP:10250) as well. During the agent startup, it checks for connectivity to kubelet port(https://$NODE_IP:10250), and if it fails the metrics source is defaulted to readonly port(http://$NODE_IP:10255).
+
 ### 12/04/2019 -
 ##### Version microsoft/oms:ciprod12042019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod12042019
 - Fix scheduler for all input plugins
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index cd22a56b4..f9dc3fb6a 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -531,7 +531,7 @@
 [[inputs.prometheus]]
   name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
-  urls = ["http://$NODE_IP:10255/metrics"]
+  urls = ["$CADVISOR_METRICS_URL"]
   fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
 
   metric_version = 2
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index be61b8b8f..8b0105a6f 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 class CAdvisorMetricsAPIClient
-  require 'yajl/json_gem'
+  require "yajl/json_gem"
   require "logger"
   require "net/http"
   require "net/https"
@@ -29,6 +29,8 @@ class CAdvisorMetricsAPIClient
   @dsPromFieldDropCount = ENV["TELEMETRY_DS_PROM_FIELDDROP_LENGTH"]
   @dsPromUrlCount = ENV["TELEMETRY_DS_PROM_URLS_LENGTH"]
 
+  @cAdvisorMetricsSecurePort = ENV["IS_SECURE_CADVISOR_PORT"]
+
   @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
   @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
   #   @@rxBytesLast = nil
@@ -63,13 +65,34 @@ def getSummaryStatsFromCAdvisor(winNode)
       response = nil
       @Log.info "Getting CAdvisor Uri"
       begin
-        cAdvisorUri = getCAdvisorUri(winNode)
+        cAdvisorSecurePort = false
+        # Check to see if omsagent needs to use 10255(insecure) port or 10250(secure) port
+        if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true"
+          cAdvisorSecurePort = true
+        end
+
+        cAdvisorUri = getCAdvisorUri(winNode, cAdvisorSecurePort)
+        bearerToken = File.read("/var/run/secrets/kubernetes.io/serviceaccount/token")
+        @Log.info "cAdvisorUri: #{cAdvisorUri}"
+
         if !cAdvisorUri.nil?
           uri = URI.parse(cAdvisorUri)
-          Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40 ) do |http|
-            cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
-            response = http.request(cAdvisorApiRequest)
-            @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+          if !!cAdvisorSecurePort == true
+            Net::HTTP.start(uri.host, uri.port,
+                            :use_ssl => true, :open_timeout => 20, :read_timeout => 40,
+                            :ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
+                            :verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http|
+              cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+              cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}"
+              response = http.request(cAdvisorApiRequest)
+              @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+            end
+          else
+            Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http|
+              cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+              response = http.request(cAdvisorApiRequest)
+              @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+            end
           end
         end
       rescue => error
@@ -81,9 +104,14 @@ def getSummaryStatsFromCAdvisor(winNode)
       return response
     end
 
-    def getCAdvisorUri(winNode)
+    def getCAdvisorUri(winNode, cAdvisorSecurePort)
       begin
-        defaultHost = "http://localhost:10255"
+        if !!cAdvisorSecurePort == true
+          defaultHost = "https://localhost:10250"
+        else
+          defaultHost = "http://localhost:10255"
+        end
+
         relativeUri = "/stats/summary"
         if !winNode.nil?
           nodeIP = winNode["InternalIP"]
@@ -92,7 +120,11 @@ def getCAdvisorUri(winNode)
         end
         if !nodeIP.nil?
           @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri")
-          return "http://#{nodeIP}:10255" + relativeUri
+          if !!cAdvisorSecurePort == true
+            return "https://#{nodeIP}:10250" + relativeUri
+          else
+            return "http://#{nodeIP}:10255" + relativeUri
+          end
         else
           @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ")
           if !winNode.nil?
@@ -104,7 +136,7 @@ def getCAdvisorUri(winNode)
       end
     end
 
-    def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601 )
+    def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
       metricDataItems = []
       begin
         cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
@@ -211,6 +243,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                     telemetryProps["PodName"] = podName
                     telemetryProps["ContainerName"] = containerName
                     telemetryProps["Computer"] = hostName
+                    telemetryProps["CAdvisorIsSecure"] = @cAdvisorMetricsSecurePort
                     #telemetry about log collections settings
                     if (File.file?(@configMapMountPath))
                       telemetryProps["clustercustomsettings"] = true

From 6dc93e8828800f68063423bd322ee3918d1412ef Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 6 Jan 2020 17:42:51 -0800
Subject: [PATCH 152/160] changing font for code change and customer impact

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 75b2d8665..1898bd17c 100644
--- a/README.md
+++ b/README.md
@@ -13,9 +13,10 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
 
 ### 01/07/2020 -
 ##### Version microsoft/oms:ciprod01072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01072020
-## Code change log
+##### Code change log
 - Switch between 10255(old) and 10250(new) ports for cadvisor for older and newer versions of kubernetes
-## Customer Impact
+
+##### Customer Impact
 - Node cpu, node memory, container cpu and container memory metrics were obtained earlier by querying kubelet readonly port(http://$NODE_IP:10255). Agent now supports getting these metrics from kubelet port(https://$NODE_IP:10250) as well. During the agent startup, it checks for connectivity to kubelet port(https://$NODE_IP:10250), and if it fails the metrics source is defaulted to readonly port(http://$NODE_IP:10255).
 
 ### 12/04/2019 -

From 044f13db72dfa7c3a63cc28a466a0b924745a7e0 Mon Sep 17 00:00:00 2001
From: ganga1980 <gangams@microsoft.com>
Date: Thu, 23 Jan 2020 21:51:03 -0800
Subject: [PATCH 153/160] For ARO, stop collecting inventory of master and
 infra (#323)

* filter out infra and master nodes inventory for aro

* filterout pods info scheduled master and infra nodes

* fix redundant KubernetesApiClient name

* filter out events sourced from master and infra nodes

* fix in kubeapi

* add the comments

* fix pr feedback

* minor updates

* fix pr feedback

* encode special characters in query

* some refactoring
---
 source/code/plugin/KubernetesApiClient.rb     | 42 ++++++++++++++++++-
 source/code/plugin/filter_cadvisor2mdm.rb     |  3 +-
 .../plugin/health/health_monitor_utils.rb     |  9 ++--
 source/code/plugin/in_kube_events.rb          | 16 ++++---
 source/code/plugin/in_kube_health.rb          |  6 ++-
 source/code/plugin/in_kube_nodes.rb           |  6 ++-
 source/code/plugin/in_kube_podinventory.rb    |  7 ++++
 7 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index e52c77884..6f108ec92 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -18,6 +18,7 @@ class KubernetesApiClient
   @@ClusterName = nil
   @@ClusterId = nil
   @@IsNodeMaster = nil
+  @@IsAROV3Cluster = nil
   #@@IsValidRunningNode = nil
   #@@IsLinuxCluster = nil
   @@KubeSystemNamespace = "kube-system"
@@ -152,6 +153,20 @@ def getClusterId
       return @@ClusterId
     end
 
+    def isAROV3Cluster
+      return @@IsAROV3Cluster if !@@IsAROV3Cluster.nil?
+      @@IsAROV3Cluster = false
+      begin
+        cluster = getClusterId
+        if !cluster.nil? && !cluster.empty? && cluster.downcase.include?("/microsoft.containerservice/openshiftmanagedclusters")
+          @@IsAROV3Cluster = true
+        end
+      rescue => error
+        @Log.warn("KubernetesApiClient::IsAROV3Cluster : IsAROV3Cluster failed #{error}")
+      end
+      return @@IsAROV3Cluster
+    end
+
     def isNodeMaster
       return @@IsNodeMaster if !@@IsNodeMaster.nil?
       @@IsNodeMaster = false
@@ -177,6 +192,22 @@ def isNodeMaster
       return @@IsNodeMaster
     end
 
+    def getNodesResourceUri(nodesResourceUri)
+      begin
+        # For ARO v3 cluster, filter out all other node roles other than compute
+        if IsAROV3Cluster
+          if !nodesResourceUri.nil? && !nodesResourceUri.index("?").nil?
+            nodesResourceUri = nodesResourceUri + "&labelSelector=node-role.kubernetes.io%2Fcompute%3Dtrue"
+          else
+            nodesResourceUri = nodesResourceUri + "labelSelector=node-role.kubernetes.io%2Fcompute%3Dtrue"
+          end
+        end
+      rescue => error
+        @Log.warn("getNodesResourceUri failed: #{error}")
+      end
+      return nodesResourceUri
+    end
+
     #def isValidRunningNode
     #    return @@IsValidRunningNode if !@@IsValidRunningNode.nil?
     #    @@IsValidRunningNode = false
@@ -240,7 +271,8 @@ def getPods(namespace)
     def getWindowsNodes
       winNodes = []
       begin
-        nodeInventory = JSON.parse(getKubeResourceInfo("nodes").body)
+        resourceUri =  getNodesResourceUri("nodes")
+        nodeInventory = JSON.parse(getKubeResourceInfo(resourceUri).body)
         @Log.info "KubernetesAPIClient::getWindowsNodes : Got nodes from kube api"
         # Resetting the windows node cache
         @@WinNodeArray.clear
@@ -357,6 +389,14 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
             podUid = pod["metadata"]["uid"]
           end
 
+          # For ARO, skip the pods scheduled on to master or infra nodes to ingest
+          if IsAROV3Cluster && !pod["spec"].nil? && !pod["spec"]["nodeName"].nil? &&
+            ( pod["spec"]["nodeName"].downcase.start_with?("infra-") ||
+            pod["spec"]["nodeName"].downcase.start_with?("master-") )
+            next
+          end
+
+
           podContainers = []
           if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
             podContainers = podContainers + pod["spec"]["containers"]
diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
index f14a1369b..bc26532a5 100644
--- a/source/code/plugin/filter_cadvisor2mdm.rb
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -145,7 +145,8 @@ def ensure_cpu_memory_capacity_set
             end
 
             begin
-                nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes?fieldSelector=metadata.name%3D#{@@hostName}").body)
+                resourceUri = KubernetesApiClient.getNodesResourceUri("nodes?fieldSelector=metadata.name%3D#{@@hostName}")
+                nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo(resourceUri).body)
             rescue Exception => e
                 @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
                 ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index 13d1416b1..2b5bd85b5 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -161,7 +161,8 @@ def get_resource_subscription(pod_inventory, metric_name, metric_capacity)
             def get_cluster_cpu_memory_capacity(log, node_inventory: nil)
                 begin
                     if node_inventory.nil?
-                        node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                        resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
+                        node_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo(resourceUri).body)
                     end
                     cluster_cpu_capacity = 0.0
                     cluster_memory_capacity = 0.0
@@ -207,7 +208,8 @@ def refresh_kubernetes_api_data(log, hostName, force: false)
                 end
 
                 begin
-                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                    resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
+                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo(resourceUri).body)
                     if !hostName.nil?
                         podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=spec.nodeName%3D#{hostName}").body)
                     else
@@ -272,7 +274,8 @@ def ensure_cpu_memory_capacity_set(log, cpu_capacity, memory_capacity, hostname)
 
                 log.info "CPU and Memory Capacity Not set"
                 begin
-                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body)
+                    resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
+                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo(resourceUri).body)
                 rescue Exception => e
                     log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
                     ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 6116cb62d..b405afde9 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -17,7 +17,7 @@ def initialize
       require_relative "oms_common"
       require_relative "omslog"
       require_relative "ApplicationInsightsUtility"
-      
+
       # 30000 events account to approximately 5MB
       @EVENTS_CHUNK_SIZE = 30000
     end
@@ -101,6 +101,14 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
           if !eventQueryState.empty? && eventQueryState.include?(eventId)
             next
           end
+
+          nodeName = items["source"].key?("host") ? items["source"]["host"] : (OMS::Common.get_hostname)
+          # For ARO v3 cluster, drop the master and infra node sourced events to ingest
+          if KubernetesApiClient.isAROV3Cluster && !nodeName.nil? && !nodeName.empty? &&
+             ( nodeName.downcase.start_with?("infra-") || nodeName.downcase.start_with?("master-") )
+            next
+          end
+
           record["ObjectKind"] = items["involvedObject"]["kind"]
           record["Namespace"] = items["involvedObject"]["namespace"]
           record["Name"] = items["involvedObject"]["name"]
@@ -112,11 +120,7 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
           record["FirstSeen"] = items["firstTimestamp"]
           record["LastSeen"] = items["lastTimestamp"]
           record["Count"] = items["count"]
-          if items["source"].key?("host")
-            record["Computer"] = items["source"]["host"]
-          else
-            record["Computer"] = (OMS::Common.get_hostname)
-          end
+          record["Computer"] = nodeName
           record["ClusterName"] = KubernetesApiClient.getClusterName
           record["ClusterId"] = KubernetesApiClient.getClusterId
           wrapper = {
diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index 0eebf395b..c54545e04 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -85,7 +85,8 @@ def enumerate
 
         #HealthMonitorUtils.refresh_kubernetes_api_data(@@hmlog, nil)
         # we do this so that if the call fails, we get a response code/header etc.
-        node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
+        resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
+        node_inventory_response = KubernetesApiClient.getKubeResourceInfo(resourceUri)
         node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
         pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
@@ -299,7 +300,8 @@ def process_node_condition_monitor(node_inventory)
 
     def initialize_inventory
         #this is required because there are other components, like the container cpu memory aggregator, that depends on the mapping being initialized
-        node_inventory_response = KubernetesApiClient.getKubeResourceInfo("nodes")
+        resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
+        node_inventory_response = KubernetesApiClient.getKubeResourceInfo(resourceUri)
         node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
         pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index fa0994f43..706c3ad13 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -69,7 +69,9 @@ def enumerate
         # Initializing continuation token to nil
         continuationToken = nil
         $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
-        continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken("nodes?limit=#{@NODES_CHUNK_SIZE}")
+        resourceUri = KubernetesApiClient.getNodesResourceUri("nodes?limit=#{@NODES_CHUNK_SIZE}")
+        continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri)
+
         $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
         if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
           parse_and_emit_records(nodeInventory, batchTime)
@@ -79,7 +81,7 @@ def enumerate
 
         #If we receive a continuation token, make calls, process and flush data until we have processed all data
         while (!continuationToken.nil? && !continuationToken.empty?)
-          continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken("nodes?limit=#{@NODES_CHUNK_SIZE}&continue=#{continuationToken}")
+          continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri + "&continue=#{continuationToken}")
           if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
             parse_and_emit_records(nodeInventory, batchTime)
           else
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 28b20bfc0..c709edbc2 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -265,6 +265,13 @@ def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso86
           record["Name"] = items["metadata"]["name"]
           podNameSpace = items["metadata"]["namespace"]
 
+          # For ARO v3 cluster, skip the pods scheduled on to master or infra nodes
+          if KubernetesApiClient.isAROV3Cluster && !items["spec"].nil? && !items["spec"]["nodeName"].nil? &&
+             ( items["spec"]["nodeName"].downcase.start_with?("infra-") ||
+              items["spec"]["nodeName"].downcase.start_with?("master-") )
+            next
+          end
+
           if podNameSpace.eql?("kube-system") && !items["metadata"].key?("ownerReferences")
             # The above case seems to be the only case where you have horizontal scaling of pods
             # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash

From acc1d278279ff393dd528bda87306d21acfdb064 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 28 Jan 2020 15:55:51 -0800
Subject: [PATCH 154/160] MDM plugin support for large scale clusters (#324)

* Batch Commit

* WIP: Committing move logic from filter to input

* WIP : MDM plugins for scale clusters

* Bug fixes 1. cpu percentage 2. bytesize on array. Remove log line

* Fixing metric value in cadvisor2mdm plugin

* WIP to laptop

* Working version with cadvisor changes

* Fix Health cpu usage

* Added uri for cadvisor failure
---
 installer/datafiles/base_container.data       |   2 +
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 141 +++++++------
 source/code/plugin/filter_cadvisor2mdm.rb     |  52 +++--
 .../plugin/filter_cadvisor_health_node.rb     |   4 +-
 source/code/plugin/filter_inventory2mdm.rb    |   4 +-
 .../plugin/health/health_monitor_utils.rb     |  40 +---
 source/code/plugin/in_kube_podinventory.rb    |  26 ++-
 source/code/plugin/kubelet_utils.rb           |  23 +++
 source/code/plugin/out_mdm.rb                 |  12 +-
 source/code/plugin/podinventory_to_mdm.rb     | 190 ++++++++++++++++++
 10 files changed, 365 insertions(+), 129 deletions(-)
 create mode 100644 source/code/plugin/kubelet_utils.rb
 create mode 100644 source/code/plugin/podinventory_to_mdm.rb

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 60de5af18..f976454f9 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -35,6 +35,8 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb;			    source/code/plugin/in_win_cadvisor_perf.rb;			    644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_nodes.rb;			        source/code/plugin/in_kube_nodes.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb;			     source/code/plugin/filter_inventory2mdm.rb;			    	644; root; root
+/opt/microsoft/omsagent/plugin/podinventory_to_mdm.rb;			     source/code/plugin/podinventory_to_mdm.rb;			    	644; root; root
+/opt/microsoft/omsagent/plugin/kubelet_utils.rb;			         source/code/plugin/kubelet_utils.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb;			     source/code/plugin/CustomMetricsUtils.rb;			    	644; root; root
 
 
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 8b0105a6f..54e7e5fd9 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -55,85 +55,58 @@ class CAdvisorMetricsAPIClient
   # Keeping track of containers so that can delete the container from the container cpu cache when the container is deleted
   # as a part of the cleanup routine
   @@winContainerIdCache = []
-
+  #cadvisor ports
+  @@CADVISOR_SECURE_PORT = "10250"
+  @@CADVISOR_NON_SECURE_PORT = "10255"
   def initialize
   end
 
   class << self
     def getSummaryStatsFromCAdvisor(winNode)
-      headers = {}
-      response = nil
-      @Log.info "Getting CAdvisor Uri"
-      begin
-        cAdvisorSecurePort = false
-        # Check to see if omsagent needs to use 10255(insecure) port or 10250(secure) port
-        if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true"
-          cAdvisorSecurePort = true
-        end
-
-        cAdvisorUri = getCAdvisorUri(winNode, cAdvisorSecurePort)
-        bearerToken = File.read("/var/run/secrets/kubernetes.io/serviceaccount/token")
-        @Log.info "cAdvisorUri: #{cAdvisorUri}"
+      relativeUri = "/stats/summary"
+      return getResponse(winNode, relativeUri)
+    end
 
-        if !cAdvisorUri.nil?
-          uri = URI.parse(cAdvisorUri)
-          if !!cAdvisorSecurePort == true
-            Net::HTTP.start(uri.host, uri.port,
-                            :use_ssl => true, :open_timeout => 20, :read_timeout => 40,
-                            :ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
-                            :verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http|
-              cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
-              cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}"
-              response = http.request(cAdvisorApiRequest)
-              @Log.info "Got response code #{response.code} from #{uri.request_uri}"
-            end
-          else
-            Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http|
-              cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
-              response = http.request(cAdvisorApiRequest)
-              @Log.info "Got response code #{response.code} from #{uri.request_uri}"
-            end
-          end
-        end
-      rescue => error
-        @Log.warn("CAdvisor api request failed: #{error}")
-        telemetryProps = {}
-        telemetryProps["Computer"] = winNode["Hostname"]
-        ApplicationInsightsUtility.sendExceptionTelemetry(error, telemetryProps)
-      end
-      return response
+    def getNodeCapacityFromCAdvisor(winNode: nil)
+      relativeUri = "/spec/"
+      return getResponse(winNode, relativeUri)
     end
 
-    def getCAdvisorUri(winNode, cAdvisorSecurePort)
-      begin
+    def getBaseCAdvisorUri(winNode)
+        cAdvisorSecurePort = isCAdvisorOnSecurePort()
+
         if !!cAdvisorSecurePort == true
-          defaultHost = "https://localhost:10250"
+            defaultHost = "https://localhost:#{@@CADVISOR_SECURE_PORT}"
         else
-          defaultHost = "http://localhost:10255"
+            defaultHost = "http://localhost:#{@@CADVISOR_NON_SECURE_PORT}"
         end
 
-        relativeUri = "/stats/summary"
         if !winNode.nil?
-          nodeIP = winNode["InternalIP"]
+            nodeIP = winNode["InternalIP"]
         else
-          nodeIP = ENV["NODE_IP"]
+            nodeIP = ENV["NODE_IP"]
         end
+
         if !nodeIP.nil?
-          @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri")
-          if !!cAdvisorSecurePort == true
-            return "https://#{nodeIP}:10250" + relativeUri
-          else
-            return "http://#{nodeIP}:10255" + relativeUri
-          end
+            @Log.info("Using #{nodeIP} for CAdvisor Host")
+            if !!cAdvisorSecurePort == true
+                return "https://#{nodeIP}:#{@@CADVISOR_SECURE_PORT}"
+            else
+                return "http://#{nodeIP}:#{@@CADVISOR_NON_SECURE_PORT}"
+            end
         else
-          @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ")
-          if !winNode.nil?
-            return nil
-          else
-            return defaultHost + relativeUri
-          end
+            @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost}")
+            if !winNode.nil?
+                return nil
+            else
+                return defaultHost
+            end
         end
-      end
+    end
+
+    def getCAdvisorUri(winNode, relativeUri)
+        baseUri = getBaseCAdvisorUri(winNode)
+        return baseUri + relativeUri
     end
 
     def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
@@ -696,5 +669,51 @@ def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn, m
       end
       return metricItems
     end
+
+    def getResponse(winNode, relativeUri)
+      response = nil
+      @Log.info "Getting CAdvisor Uri Response"
+      bearerToken = File.read("/var/run/secrets/kubernetes.io/serviceaccount/token")
+      begin
+        cAdvisorUri = getCAdvisorUri(winNode, relativeUri)
+        @Log.info "cAdvisorUri: #{cAdvisorUri}"
+
+        if !cAdvisorUri.nil?
+          uri = URI.parse(cAdvisorUri)
+          if isCAdvisorOnSecurePort()
+            Net::HTTP.start(uri.host, uri.port,
+              :use_ssl => true, :open_timeout => 20, :read_timeout => 40,
+              :ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
+              :verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http|
+            cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+            cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}"
+            response = http.request(cAdvisorApiRequest)
+            @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+            end
+          else
+            Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http|
+              cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
+              response = http.request(cAdvisorApiRequest)
+              @Log.info "Got response code #{response.code} from #{uri.request_uri}"
+            end
+          end
+        end
+      rescue => error
+        @Log.warn("CAdvisor api request for #{cAdvisorUri} failed: #{error}")
+        telemetryProps = {}
+        telemetryProps["Computer"] = winNode["Hostname"]
+        ApplicationInsightsUtility.sendExceptionTelemetry(error, telemetryProps)
+      end
+      return response
+    end
+
+    def isCAdvisorOnSecurePort
+        cAdvisorSecurePort = false
+        # Check to see whether omsagent needs to use 10255(insecure) port or 10250(secure) port
+        if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true"
+          cAdvisorSecurePort = true
+        end
+        return cAdvisorSecurePort
+    end
   end
 end
diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb
index bc26532a5..45f0d9d6f 100644
--- a/source/code/plugin/filter_cadvisor2mdm.rb
+++ b/source/code/plugin/filter_cadvisor2mdm.rb
@@ -7,6 +7,7 @@ module Fluent
     require 'yajl/json_gem'
     require_relative 'oms_common'
     require_relative 'CustomMetricsUtils'
+    require_relative 'kubelet_utils'
 
 	class CAdvisor2MdmFilter < Filter
 		Fluent::Plugin.register_filter('filter_cadvisor2mdm', self)
@@ -110,9 +111,10 @@ def filter(tag, time, record)
                         metric_value = record['DataItems'][0]['Collections'][0]['Value']
                         if counter_name.downcase == @@cpu_usage_nano_cores
                             metric_name = @@cpu_usage_milli_cores
-                            metric_value = metric_value/1000000
+                            metric_value /= 1000000 #cadvisor record is in nanocores. Convert to mc
+                            @log.info "Metric_value: #{metric_value} CPU Capacity #{@cpu_capacity}"
                             if @cpu_capacity != 0.0
-                                percentage_metric_value = (metric_value*1000000)*100/@cpu_capacity
+                                percentage_metric_value = (metric_value)*100/@cpu_capacity
                             end
                         end
 
@@ -138,34 +140,42 @@ def filter(tag, time, record)
 
         def ensure_cpu_memory_capacity_set
 
-            @log.info "ensure_cpu_memory_capacity_set @cpu_capacity #{@cpu_capacity} @memory_capacity #{@memory_capacity}"
             if @cpu_capacity != 0.0 && @memory_capacity != 0.0
                 @log.info "CPU And Memory Capacity are already set"
                 return
             end
 
-            begin
+            controller_type = ENV["CONTROLLER_TYPE"]
+            if controller_type.downcase == 'replicaset'
+                @log.info "ensure_cpu_memory_capacity_set @cpu_capacity #{@cpu_capacity} @memory_capacity #{@memory_capacity}"
+
+                begin
                 resourceUri = KubernetesApiClient.getNodesResourceUri("nodes?fieldSelector=metadata.name%3D#{@@hostName}")
                 nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo(resourceUri).body)
-            rescue Exception => e
-                @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
-                ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
-            end
-            if !nodeInventory.nil?
-                cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
-                if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
-                    @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
-                    @log.info "CPU Limit #{@cpu_capacity}"
-                else
-                    @log.info "Error getting cpu_capacity"
+                rescue Exception => e
+                    @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
+                    ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
                 end
-                memory_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes")
-                if !memory_capacity_json.nil? && !memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
-                    @memory_capacity = memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
-                    @log.info "Memory Limit #{@memory_capacity}"
-                else
-                    @log.info "Error getting memory_capacity"
+                if !nodeInventory.nil?
+                    cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
+                    if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                        @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
+                        @log.info "CPU Limit #{@cpu_capacity}"
+                    else
+                        @log.info "Error getting cpu_capacity"
+                    end
+                    memory_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes")
+                    if !memory_capacity_json.nil? && !memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil?
+                        @memory_capacity = memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value']
+                        @log.info "Memory Limit #{@memory_capacity}"
+                    else
+                        @log.info "Error getting memory_capacity"
+                    end
                 end
+            elsif controller_type.downcase == 'daemonset'
+                capacity_from_kubelet = KubeletUtils.get_node_capacity
+                @cpu_capacity = capacity_from_kubelet[0]
+                @memory_capacity = capacity_from_kubelet[1]
             end
         end
 
diff --git a/source/code/plugin/filter_cadvisor_health_node.rb b/source/code/plugin/filter_cadvisor_health_node.rb
index c6280db60..4106b4d82 100644
--- a/source/code/plugin/filter_cadvisor_health_node.rb
+++ b/source/code/plugin/filter_cadvisor_health_node.rb
@@ -131,13 +131,13 @@ def process_node_cpu_record(record, metric_value)
             else
                 instance_name = record['DataItems'][0]['InstanceName']
                 #@log.info "CPU capacity #{@cpu_capacity}"
-
+                metric_value /= 1000000
                 percent = (metric_value.to_f/@cpu_capacity*100).round(2)
                 #@log.debug "Percentage of CPU limit: #{percent}"
                 state = HealthMonitorUtils.compute_percentage_state(percent, @provider.get_config(MonitorId::NODE_CPU_MONITOR_ID))
                 #@log.debug "Computed State : #{state}"
                 timestamp = record['DataItems'][0]['Timestamp']
-                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value/1000000.to_f, "cpuUtilizationPercentage" => percent}}
+                health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => {"cpuUsageMillicores" => metric_value, "cpuUtilizationPercentage" => percent}}
 
                 monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@clusterId, @@hostName])
                 # temp = record.nil? ? "Nil" : record["MonitorInstanceId"]
diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb
index 422b4b54a..16f2bb148 100644
--- a/source/code/plugin/filter_inventory2mdm.rb
+++ b/source/code/plugin/filter_inventory2mdm.rb
@@ -156,7 +156,7 @@ def process_pod_inventory_records(es)
             no_phase_dim_values_hash = Hash.new
             total_pod_count = 0
             pod_count_by_phase = {}
-	    podUids = {}
+	        podUids = {}
             record_count = 0
             begin
                 records = []
@@ -165,7 +165,7 @@ def process_pod_inventory_records(es)
                     timestamp = record['DataItems'][0]['CollectionTime']
                     podUid = record['DataItems'][0]['PodUid']
 
-		    if podUids.key?(podUid)
+		            if podUids.key?(podUid)
                         #@log.info "pod with #{podUid} already counted"
                         next
                     end
diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb
index 2b5bd85b5..c23d8824a 100644
--- a/source/code/plugin/health/health_monitor_utils.rb
+++ b/source/code/plugin/health/health_monitor_utils.rb
@@ -3,6 +3,7 @@
 require 'digest'
 require_relative 'health_model_constants'
 require 'yajl/json_gem'
+require_relative '../kubelet_utils'
 
 module HealthModel
     # static class that provides a bunch of utility methods
@@ -265,50 +266,13 @@ def get_monitor_instance_id(monitor_id, args = [])
             end
 
             def ensure_cpu_memory_capacity_set(log, cpu_capacity, memory_capacity, hostname)
-
                 log.info "ensure_cpu_memory_capacity_set cpu_capacity #{cpu_capacity} memory_capacity #{memory_capacity}"
                 if cpu_capacity != 1.0 && memory_capacity != 1.0
                     log.info "CPU And Memory Capacity are already set"
                     return [cpu_capacity, memory_capacity]
                 end
-
                 log.info "CPU and Memory Capacity Not set"
-                begin
-                    resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
-                    @@nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo(resourceUri).body)
-                rescue Exception => e
-                    log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} "
-                    ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
-                end
-                if !@@nodeInventory.nil?
-                    cpu_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores")
-                    if !cpu_capacity_json.nil?
-                        cpu_capacity_json.each do |cpu_info_node|
-                            if !cpu_info_node['DataItems'][0]['Host'].nil? && cpu_info_node['DataItems'][0]['Host'] == hostname
-                                if !cpu_info_node['DataItems'][0]['Collections'][0]['Value'].nil?
-                                    cpu_capacity = cpu_info_node['DataItems'][0]['Collections'][0]['Value']
-                                end
-                            end
-                        end
-                        log.info "CPU Limit #{cpu_capacity}"
-                    else
-                        log.info "Error getting cpu_capacity"
-                    end
-                    memory_capacity_json = KubernetesApiClient.parseNodeLimits(@@nodeInventory, "capacity", "memory", "memoryCapacityBytes")
-                    if !memory_capacity_json.nil?
-                        memory_capacity_json.each do |memory_info_node|
-                            if !memory_info_node['DataItems'][0]['Host'].nil? && memory_info_node['DataItems'][0]['Host'] == hostname
-                                if !memory_info_node['DataItems'][0]['Collections'][0]['Value'].nil?
-                                    memory_capacity = memory_info_node['DataItems'][0]['Collections'][0]['Value']
-                                end
-                            end
-                        end
-                        log.info "memory Limit #{memory_capacity}"
-                    else
-                        log.info "Error getting memory_capacity"
-                    end
-                    return [cpu_capacity, memory_capacity]
-                end
+                return KubeletUtils.get_node_capacity
             end
 
             def build_metrics_hash(metrics_to_collect)
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index c709edbc2..3a8ad2761 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -2,6 +2,9 @@
 # frozen_string_literal: true
 
 module Fluent
+
+  require_relative "podinventory_to_mdm"
+
   class Kube_PodInventory_Input < Input
     Plugin.register_input("kubepodinventory", self)
 
@@ -32,9 +35,12 @@ def initialize
 
     config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
+    config_param :custom_metrics_azure_regions, :string
+
 
     def configure(conf)
       super
+      @inventoryToMdmConvertor = Inventory2MdmConvertor.new(@custom_metrics_azure_regions)
     end
 
     def start
@@ -87,7 +93,7 @@ def enumerate(podList = nil)
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-          parse_and_emit_records(podInventory, serviceList, batchTime)
+          parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime)
         else
           $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
         end
@@ -96,7 +102,7 @@ def enumerate(podList = nil)
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-            parse_and_emit_records(podInventory, serviceList, batchTime)
+            parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime)
           else
             $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
           end
@@ -246,7 +252,7 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
       end
     end
 
-    def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso8601)
+    def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime = Time.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
       #batchTime = currentTime.utc.iso8601
@@ -466,6 +472,7 @@ def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso86
                           "DataItems" => [record.each { |k, v| record[k] = v }],
                         }
               eventStream.add(emitTime, wrapper) if wrapper
+              @inventoryToMdmConvertor.process_pod_inventory_record(wrapper)
             end
           end
           # Send container inventory records for containers on windows nodes
@@ -483,7 +490,18 @@ def parse_and_emit_records(podInventory, serviceList, batchTime = Time.utc.iso86
         end  #podInventory block end
 
         router.emit_stream(@tag, eventStream) if eventStream
-        router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream
+
+        if continuationToken.nil?  #no more chunks in this batch to be sent, get all pod inventory records to send
+            @log.info "Sending pod inventory mdm records to out_mdm"
+            pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime)
+            @log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}"
+            mdm_pod_inventory_es = MultiEventStream.new
+            pod_inventory_mdm_records.each {|pod_inventory_mdm_record|
+                mdm_pod_inventory_es.add(batchTime, pod_inventory_mdm_record) if pod_inventory_mdm_record
+            } if pod_inventory_mdm_records
+            router.emit_stream(@@MDMKubePodInventoryTag, mdm_pod_inventory_es) if mdm_pod_inventory_es
+        end
+
         #:optimize:kubeperf merge
         begin
           #if(!podInventory.empty?)
diff --git a/source/code/plugin/kubelet_utils.rb b/source/code/plugin/kubelet_utils.rb
new file mode 100644
index 000000000..6d97e30a9
--- /dev/null
+++ b/source/code/plugin/kubelet_utils.rb
@@ -0,0 +1,23 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+require_relative 'CAdvisorMetricsAPIClient'
+
+class KubeletUtils
+    class << self
+        def get_node_capacity
+            
+            cpu_capacity = 1.0
+            memory_capacity = 1.0
+
+            response = CAdvisorMetricsAPIClient.getNodeCapacityFromCAdvisor(winNode: nil)
+            if !response.nil? && !response.body.nil?
+                cpu_capacity = JSON.parse(response.body)["num_cores"].nil? ? 1.0 : (JSON.parse(response.body)["num_cores"] * 1000.0)
+                memory_capacity = JSON.parse(response.body)["memory_capacity"].nil? ? 1.0 : JSON.parse(response.body)["memory_capacity"].to_f
+                $log.info "CPU = #{cpu_capacity}mc Memory = #{memory_capacity/1024/1024}MB"
+                return [cpu_capacity, memory_capacity]
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 0a4e601b2..308eb6c68 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -22,6 +22,7 @@ def initialize
       @@post_request_url_template = "https://%{aks_region}.monitoring.azure.com%{aks_resource_id}/metrics"
       @@token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token"
       @@plugin_name = "AKSCustomMetricsMDM"
+      @@record_batch_size = 2600
 
       @data_hash = {}
       @token_url = nil
@@ -136,7 +137,14 @@ def write(chunk)
           chunk.msgpack_each { |(tag, record)|
             post_body.push(record.to_json)
           }
-          send_to_mdm post_body
+          # the limit of the payload is 1MB. Each record is ~300 bytes. using a batch size of 2600, so that
+          # the pay load size becomes approximately 800 Kb.
+          count = post_body.size
+          while count > 0
+            current_batch = post_body.first(@@record_batch_size)
+            count -= current_batch.size
+            send_to_mdm current_batch
+          end
         else
           if !@can_send_data_to_mdm
             @log.info "Cannot send data to MDM since all required conditions were not met"
@@ -157,7 +165,9 @@ def send_to_mdm(post_body)
         request = Net::HTTP::Post.new(@post_request_uri.request_uri)
         request["Content-Type"] = "application/x-ndjson"
         request["Authorization"] = "Bearer #{access_token}"
+
         request.body = post_body.join("\n")
+        @log.info "REQUEST BODY SIZE #{request.body.bytesize/1024}"
         response = @http_client.request(request)
         response.value # this throws for non 200 HTTP response code
         @log.info "HTTP Post Response Code : #{response.code}"
diff --git a/source/code/plugin/podinventory_to_mdm.rb b/source/code/plugin/podinventory_to_mdm.rb
new file mode 100644
index 000000000..21ef12c34
--- /dev/null
+++ b/source/code/plugin/podinventory_to_mdm.rb
@@ -0,0 +1,190 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+
+# frozen_string_literal: true
+
+require 'logger'
+require 'yajl/json_gem'
+require_relative 'oms_common'
+require_relative 'CustomMetricsUtils'
+
+
+class Inventory2MdmConvertor
+
+    @@node_count_metric_name = 'nodesCount'
+    @@pod_count_metric_name = 'podCount'
+    @@pod_inventory_tag = 'mdm.kubepodinventory'
+    @@node_inventory_tag = 'mdm.kubenodeinventory'
+    @@node_status_ready = 'Ready'
+    @@node_status_not_ready = 'NotReady'
+
+    @@node_inventory_custom_metrics_template = '
+        {
+            "time": "%{timestamp}",
+            "data": {
+                "baseData": {
+                    "metric": "%{metricName}",
+                    "namespace": "insights.container/nodes",
+                    "dimNames": [
+                    "status"
+                    ],
+                    "series": [
+                    {
+                        "dimValues": [
+                        "%{statusValue}"
+                        ],
+                        "min": %{node_status_count},
+                        "max": %{node_status_count},
+                        "sum": %{node_status_count},
+                        "count": 1
+                    }
+                    ]
+                }
+            }
+        }'
+
+    @@pod_inventory_custom_metrics_template = '
+        {
+            "time": "%{timestamp}",
+            "data": {
+                "baseData": {
+                    "metric": "%{metricName}",
+                    "namespace": "insights.container/pods",
+                    "dimNames": [
+                    "phase",
+                    "Kubernetes namespace",
+                    "node",
+                    "controllerName"
+                    ],
+                    "series": [
+                    {
+                        "dimValues": [
+                        "%{phaseDimValue}",
+                        "%{namespaceDimValue}",
+                        "%{nodeDimValue}",
+                        "%{controllerNameDimValue}"
+                        ],
+                        "min": %{podCountMetricValue},
+                        "max": %{podCountMetricValue},
+                        "sum": %{podCountMetricValue},
+                        "count": 1
+                    }
+                    ]
+                }
+            }
+        }'
+
+    @@pod_phase_values = ['Running', 'Pending', 'Succeeded', 'Failed', 'Unknown']
+    @process_incoming_stream = false
+
+    def initialize(custom_metrics_azure_regions)
+        @log_path = '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log'
+        @log = Logger.new(@log_path, 1, 5000000)
+        @pod_count_hash = {}
+        @no_phase_dim_values_hash = {}
+        @pod_count_by_phase = {}
+        @pod_uids = {}
+        @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(custom_metrics_azure_regions)
+        @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
+        @log.debug {'Starting filter_inventory2mdm plugin'}
+    end
+
+    def get_pod_inventory_mdm_records(batch_time)
+        begin
+            # generate all possible values of non_phase_dim_values X pod Phases and zero-fill the ones that are not already present
+            @no_phase_dim_values_hash.each {|key, value|
+                @@pod_phase_values.each{|phase|
+                    pod_key = [key, phase].join('~~')
+                    if !@pod_count_hash.key?(pod_key)
+                        @pod_count_hash[pod_key] = 0
+                        #@log.info "Zero filled #{pod_key}"
+                    else
+                        next
+                    end
+                }
+            }
+            records = []
+            @pod_count_hash.each {|key, value|
+                key_elements = key.split('~~')
+                if key_elements.length != 4
+                    next
+                end
+
+                # get dimension values by key
+                podNodeDimValue = key_elements[0]
+                podNamespaceDimValue = key_elements[1]
+                podControllerNameDimValue = key_elements[2]
+                podPhaseDimValue = key_elements[3]
+
+                record = @@pod_inventory_custom_metrics_template % {
+                    timestamp: batch_time,
+                    metricName: @@pod_count_metric_name,
+                    phaseDimValue: podPhaseDimValue,
+                    namespaceDimValue: podNamespaceDimValue,
+                    nodeDimValue: podNodeDimValue,
+                    controllerNameDimValue: podControllerNameDimValue,
+                    podCountMetricValue: value
+                }
+                records.push(JSON.parse(record))
+            }
+        rescue Exception => e
+            @log.info "Error processing pod inventory record Exception: #{e.class} Message: #{e.message}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+            return []
+        end
+        @log.info "Pod Count To Phase #{@pod_count_by_phase} "
+        @log.info "resetting convertor state "
+        @pod_count_hash = {}
+        @no_phase_dim_values_hash = {}
+        @pod_count_by_phase = {}
+        @pod_uids = {}
+        return records
+    end
+
+    def process_pod_inventory_record(record)
+        if @process_incoming_stream
+            begin
+                records = []
+
+                podUid = record['DataItems'][0]['PodUid']
+                if @pod_uids.key?(podUid)
+                    #@log.info "pod with #{podUid} already counted"
+                    return
+                end
+
+                @pod_uids[podUid] = true
+                podPhaseDimValue = record['DataItems'][0]['PodStatus']
+                podNamespaceDimValue = record['DataItems'][0]['Namespace']
+                podControllerNameDimValue = record['DataItems'][0]['ControllerName']
+                podNodeDimValue = record['DataItems'][0]['Computer']
+
+                if podControllerNameDimValue.nil? || podControllerNameDimValue.empty?
+                    podControllerNameDimValue = 'No Controller'
+                end
+
+                if podNodeDimValue.empty? && podPhaseDimValue.downcase == 'pending'
+                    podNodeDimValue = 'unscheduled'
+                elsif podNodeDimValue.empty?
+                    podNodeDimValue = 'unknown'
+                end
+
+                # group by distinct dimension values
+                pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~')
+
+                @pod_count_by_phase[podPhaseDimValue] = @pod_count_by_phase.key?(podPhaseDimValue) ? @pod_count_by_phase[podPhaseDimValue] + 1 : 1
+                @pod_count_hash[pod_key] = @pod_count_hash.key?(pod_key) ? @pod_count_hash[pod_key] + 1 : 1
+
+                # Collect all possible combinations of dimension values other than pod phase
+                key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~')
+                if @no_phase_dim_values_hash.key?(key_without_phase_dim_value)
+                    return
+                else
+                    @no_phase_dim_values_hash[key_without_phase_dim_value] = true
+                end
+            rescue Exception => e
+                @log.info "Error processing pod inventory record Exception: #{e.class} Message: #{e.message}"
+                ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+            end
+        end
+    end
+end
+

From 0ea6c6e05cd33efdd5c6c2b15fee5182a7827a25 Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 28 Jan 2020 16:00:55 -0800
Subject: [PATCH 155/160] Add Null check for kube api responses in
 in_kube_health (#325)

---
 source/code/plugin/in_kube_health.rb | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb
index c54545e04..f9b211f11 100644
--- a/source/code/plugin/in_kube_health.rb
+++ b/source/code/plugin/in_kube_health.rb
@@ -87,16 +87,24 @@ def enumerate
         # we do this so that if the call fails, we get a response code/header etc.
         resourceUri = KubernetesApiClient.getNodesResourceUri("nodes")
         node_inventory_response = KubernetesApiClient.getKubeResourceInfo(resourceUri)
-        node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
+        if !node_inventory_response.nil? && !node_inventory_response.body.nil?
+            node_inventory = Yajl::Parser.parse(StringIO.new(node_inventory_response.body))
+            @resources.node_inventory = node_inventory
+        end
+
         pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}")
-        pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
+        if !pod_inventory_response.nil? && !pod_inventory_response.body.nil?
+            pod_inventory = Yajl::Parser.parse(StringIO.new(pod_inventory_response.body))
+            @resources.pod_inventory = pod_inventory
+            @resources.build_pod_uid_lookup
+        end
+
         replicaset_inventory_response = KubernetesApiClient.getKubeResourceInfo("replicasets?fieldSelector=metadata.namespace%3D#{@@KubeInfraNamespace}", api_group: @@ApiGroupApps)
-        replicaset_inventory = Yajl::Parser.parse(StringIO.new(replicaset_inventory_response.body))
+        if !replicaset_inventory_response.nil? && !replicaset_inventory_response.body.nil?
+            replicaset_inventory = Yajl::Parser.parse(StringIO.new(replicaset_inventory_response.body))
+            @resources.set_replicaset_inventory(replicaset_inventory)
+        end
 
-        @resources.node_inventory = node_inventory
-        @resources.pod_inventory = pod_inventory
-        @resources.set_replicaset_inventory(replicaset_inventory)
-        @resources.build_pod_uid_lookup
 
         if node_inventory_response.code.to_i != 200
           record = process_kube_api_up_monitor("fail", node_inventory_response)

From 843100c23387d9ad15be5abad49205a67602a33c Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Tue, 4 Feb 2020 15:06:35 -0800
Subject: [PATCH 156/160] Fix casing bug (#326)

---
 source/code/plugin/KubernetesApiClient.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 6f108ec92..91b76bbf1 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -195,7 +195,7 @@ def isNodeMaster
     def getNodesResourceUri(nodesResourceUri)
       begin
         # For ARO v3 cluster, filter out all other node roles other than compute
-        if IsAROV3Cluster
+        if isAROV3Cluster()
           if !nodesResourceUri.nil? && !nodesResourceUri.index("?").nil?
             nodesResourceUri = nodesResourceUri + "&labelSelector=node-role.kubernetes.io%2Fcompute%3Dtrue"
           else
@@ -390,7 +390,7 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
           end
 
           # For ARO, skip the pods scheduled on to master or infra nodes to ingest
-          if IsAROV3Cluster && !pod["spec"].nil? && !pod["spec"]["nodeName"].nil? &&
+          if isAROV3Cluster() && !pod["spec"].nil? && !pod["spec"]["nodeName"].nil? &&
             ( pod["spec"]["nodeName"].downcase.start_with?("infra-") ||
             pod["spec"]["nodeName"].downcase.start_with?("master-") )
             next

From 2c32e5797a6deb38beb275b4fdc8e1490efa0fcc Mon Sep 17 00:00:00 2001
From: Dilip Raghunathan <dilip.rangarajan@gmail.com>
Date: Fri, 7 Feb 2020 09:51:48 -0800
Subject: [PATCH 157/160] Missed kube.conf update (#327)

---
 installer/conf/kube.conf | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 207780442..77c8454a6 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -13,6 +13,7 @@
      tag oms.containerinsights.KubePodInventory
      run_interval 60
      log_level debug
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
     </source>
 
     #Kubernetes events
@@ -47,7 +48,7 @@
      log_level debug
     </source>
 
-    <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
+    <filter mdm.kubenodeinventory**>
      type filter_inventory2mdm
      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
      log_level info
@@ -140,7 +141,7 @@
      max_retry_wait 5m
     </match>
 
-    <match oms.api.KubePerf**>	
+    <match oms.api.KubePerf**>
      type out_oms
      log_level debug
      num_threads 5

From b10fee9b3ea1a899ae4ed7ac7b02171dce8ae04c Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 20 Feb 2020 16:49:48 -0800
Subject: [PATCH 158/160] changes to use msi if service principal does not
 exist (#328)

changes to use msi if service principal does not exist (#328)
---
 installer/conf/container.conf              |   2 +-
 installer/scripts/tomlparser.rb            |  12 ++
 source/code/plugin/in_kube_events.rb       |  34 +++++-
 source/code/plugin/in_kube_podinventory.rb |  49 ++++----
 source/code/plugin/out_mdm.rb              | 125 +++++++++++++++------
 5 files changed, 164 insertions(+), 58 deletions(-)

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 93c250fbb..0e088e7f7 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -110,5 +110,5 @@
   retry_limit 10
   retry_wait 5s
   max_retry_wait 5m
-  retry_mdm_post_wait_minutes 60
+  retry_mdm_post_wait_minutes 30
 </match>
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index ba67d023a..5f2596bca 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -16,6 +16,7 @@
 @logExclusionRegexPattern = "(^((?!stdout|stderr).)*$)"
 @excludePath = "*.csv2" #some invalid path
 @enrichContainerLogs = false
+@collectAllKubeEvents = false
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -128,6 +129,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
     rescue => errorStr
       ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level container log enrichment - #{errorStr}, using defaults, please check config map for errors")
     end
+
+    #Get kube events enrichment setting
+    begin
+      if !parsedConfig[:log_collection_settings][:collect_all_kube_events].nil? && !parsedConfig[:log_collection_settings][:collect_all_kube_events][:enabled].nil?
+        @collectAllKubeEvents = parsedConfig[:log_collection_settings][:collect_all_kube_events][:enabled]
+        puts "config::Using config map setting for kube event collection"
+      end
+    rescue => errorStr
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for kube event collection - #{errorStr}, using defaults, please check config map for errors")
+    end
   end
 end
 
@@ -168,6 +179,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
   file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
   file.write("export AZMON_CLUSTER_CONTAINER_LOG_ENRICH=#{@enrichContainerLogs}\n")
+  file.write("export AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS=#{@collectAllKubeEvents}\n")
   # Close file after writing all environment variables
   file.close
   puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index b405afde9..bb0ab6f05 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -4,7 +4,6 @@
 module Fluent
   class Kube_Event_Input < Input
     Plugin.register_input("kubeevents", self)
-
     @@KubeEventsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml"
 
     def initialize
@@ -20,6 +19,12 @@ def initialize
 
       # 30000 events account to approximately 5MB
       @EVENTS_CHUNK_SIZE = 30000
+
+      # Initializing events count for telemetry
+      @eventsCount = 0
+
+      # Initilize enable/disable normal event collection
+      @collectAllKubeEvents = false
     end
 
     config_param :run_interval, :time, :default => 60
@@ -35,6 +40,16 @@ def start
         @condition = ConditionVariable.new
         @mutex = Mutex.new
         @thread = Thread.new(&method(:run_periodic))
+        collectAllKubeEventsSetting = ENV["AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS"]
+        if !collectAllKubeEventsSetting.nil? && !collectAllKubeEventsSetting.empty?
+          if collectAllKubeEventsSetting.casecmp("false") == 0
+            @collectAllKubeEvents = false
+            $log.warn("Normal kube events collection disabled for cluster")
+          else
+            @collectAllKubeEvents = true
+            $log.warn("Normal kube events collection enabled for cluster")
+          end
+        end
       end
     end
 
@@ -55,11 +70,16 @@ def enumerate
         batchTime = currentTime.utc.iso8601
         eventQueryState = getEventQueryState
         newEventQueryState = []
+        @eventsCount = 0
 
         # Initializing continuation token to nil
         continuationToken = nil
         $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-        continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}")
+        if @collectAllKubeEvents
+          continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?limit=#{@EVENTS_CHUNK_SIZE}")
+        else
+          continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}")
+        end
         $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
         if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
           newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
@@ -80,6 +100,13 @@ def enumerate
         # Setting this to nil so that we dont hold memory until GC kicks in
         eventList = nil
         writeEventQueryState(newEventQueryState)
+
+        # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
+        if (@eventsCount > 0)
+          telemetryProperties = {}
+          telemetryProperties["CollectAllKubeEvents"] = @collectAllKubeEvents
+          ApplicationInsightsUtility.sendMetricTelemetry("EventCount", @eventsCount, {})
+        end
       rescue => errorStr
         $log.warn "in_kube_events::enumerate:Failed in enumerate: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -105,7 +132,7 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
           nodeName = items["source"].key?("host") ? items["source"]["host"] : (OMS::Common.get_hostname)
           # For ARO v3 cluster, drop the master and infra node sourced events to ingest
           if KubernetesApiClient.isAROV3Cluster && !nodeName.nil? && !nodeName.empty? &&
-             ( nodeName.downcase.start_with?("infra-") || nodeName.downcase.start_with?("master-") )
+             (nodeName.downcase.start_with?("infra-") || nodeName.downcase.start_with?("master-"))
             next
           end
 
@@ -129,6 +156,7 @@ def parse_and_emit_records(events, eventQueryState, newEventQueryState, batchTim
             "DataItems" => [record.each { |k, v| record[k] = v }],
           }
           eventStream.add(emitTime, wrapper) if wrapper
+          @eventsCount += 1
         end
         router.emit_stream(@tag, eventStream) if eventStream
       rescue => errorStr
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 3a8ad2761..3a78d4c05 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -2,7 +2,6 @@
 # frozen_string_literal: true
 
 module Fluent
-
   require_relative "podinventory_to_mdm"
 
   class Kube_PodInventory_Input < Input
@@ -37,7 +36,6 @@ def initialize
     config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
     config_param :custom_metrics_azure_regions, :string
 
-
     def configure(conf)
       super
       @inventoryToMdmConvertor = Inventory2MdmConvertor.new(@custom_metrics_azure_regions)
@@ -149,18 +147,25 @@ def populateWindowsContainerInventoryRecord(container, record, containerEnvVaria
         containerInventoryRecord["Computer"] = record["Computer"]
         containerInventoryRecord["ContainerHostname"] = record["Computer"]
         containerInventoryRecord["ElementName"] = containerName
-        image = container["image"]
-        repoInfo = image.split("/")
-        if !repoInfo.nil?
-          containerInventoryRecord["Repository"] = repoInfo[0]
-          if !repoInfo[1].nil?
-            imageInfo = repoInfo[1].split(":")
-            if !imageInfo.nil?
-              containerInventoryRecord["Image"] = imageInfo[0]
-              containerInventoryRecord["ImageTag"] = imageInfo[1]
+
+        # Find delimiters in the string of format repository/image:imagetag
+        imageValue = container["image"]
+        if !imageValue.empty?
+          slashLocation = imageValue.index("/")
+          colonLocation = imageValue.index(":")
+          if !colonLocation.nil?
+            if slashLocation.nil?
+              # image:imagetag
+              containerInventoryRecord["Image"] = imageValue[0..(colonLocation - 1)]
+            else
+              # repository/image:imagetag
+              containerInventoryRecord["Repository"] = imageValue[0..(slashLocation - 1)]
+              containerInventoryRecord["Image"] = imageValue[(slashLocation + 1)..(colonLocation - 1)]
             end
+            containerInventoryRecord["ImageTag"] = imageValue[(colonLocation + 1)..-1]
           end
         end
+
         imageIdInfo = container["imageID"]
         imageIdSplitInfo = imageIdInfo.split("@")
         if !imageIdSplitInfo.nil?
@@ -273,8 +278,8 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
 
           # For ARO v3 cluster, skip the pods scheduled on to master or infra nodes
           if KubernetesApiClient.isAROV3Cluster && !items["spec"].nil? && !items["spec"]["nodeName"].nil? &&
-             ( items["spec"]["nodeName"].downcase.start_with?("infra-") ||
-              items["spec"]["nodeName"].downcase.start_with?("master-") )
+             (items["spec"]["nodeName"].downcase.start_with?("infra-") ||
+              items["spec"]["nodeName"].downcase.start_with?("master-"))
             next
           end
 
@@ -491,15 +496,15 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
 
         router.emit_stream(@tag, eventStream) if eventStream
 
-        if continuationToken.nil?  #no more chunks in this batch to be sent, get all pod inventory records to send
-            @log.info "Sending pod inventory mdm records to out_mdm"
-            pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime)
-            @log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}"
-            mdm_pod_inventory_es = MultiEventStream.new
-            pod_inventory_mdm_records.each {|pod_inventory_mdm_record|
-                mdm_pod_inventory_es.add(batchTime, pod_inventory_mdm_record) if pod_inventory_mdm_record
-            } if pod_inventory_mdm_records
-            router.emit_stream(@@MDMKubePodInventoryTag, mdm_pod_inventory_es) if mdm_pod_inventory_es
+        if continuationToken.nil? #no more chunks in this batch to be sent, get all pod inventory records to send
+          @log.info "Sending pod inventory mdm records to out_mdm"
+          pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime)
+          @log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}"
+          mdm_pod_inventory_es = MultiEventStream.new
+          pod_inventory_mdm_records.each { |pod_inventory_mdm_record|
+            mdm_pod_inventory_es.add(batchTime, pod_inventory_mdm_record) if pod_inventory_mdm_record
+          } if pod_inventory_mdm_records
+          router.emit_stream(@@MDMKubePodInventoryTag, mdm_pod_inventory_es) if mdm_pod_inventory_es
         end
 
         #:optimize:kubeperf merge
diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 308eb6c68..2f90b89ee 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -12,7 +12,7 @@ def initialize
       require "net/http"
       require "net/https"
       require "uri"
-      require 'yajl/json_gem'
+      require "yajl/json_gem"
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
 
@@ -20,12 +20,19 @@ def initialize
       @@grant_type = "client_credentials"
       @@azure_json_path = "/etc/kubernetes/host/azure.json"
       @@post_request_url_template = "https://%{aks_region}.monitoring.azure.com%{aks_resource_id}/metrics"
-      @@token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token"
+      @@aad_token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token"
+
+      # msiEndpoint is the well known endpoint for getting MSI authentications tokens
+      @@msi_endpoint_template = "http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&client_id=%{user_assigned_client_id}&resource=%{resource}"
+      @@userAssignedClientId = ENV["USER_ASSIGNED_IDENTITY_CLIENT_ID"]
+
       @@plugin_name = "AKSCustomMetricsMDM"
       @@record_batch_size = 2600
 
+      @@tokenRefreshBackoffInterval = 30
+
       @data_hash = {}
-      @token_url = nil
+      @parsed_token_uri = nil
       @http_client = nil
       @token_expiry_time = Time.now
       @cached_access_token = String.new
@@ -33,6 +40,10 @@ def initialize
       @first_post_attempt_made = false
       @can_send_data_to_mdm = true
       @last_telemetry_sent_time = nil
+      # Setting useMsi to false by default
+      @useMsi = false
+
+      @get_access_token_backoff_expiry = Time.now
     end
 
     def configure(conf)
@@ -57,51 +68,102 @@ def start
           @log.info "Environment Variable AKS_REGION is not set.. "
           @can_send_data_to_mdm = false
         else
-          aks_region = aks_region.gsub(" ","")
+          aks_region = aks_region.gsub(" ", "")
         end
 
         if @can_send_data_to_mdm
           @log.info "MDM Metrics supported in #{aks_region} region"
-          @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]}
-          @cached_access_token = get_access_token
+
           @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id}
           @post_request_uri = URI.parse(@@post_request_url)
           @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
           @http_client.use_ssl = true
           @log.info "POST Request url: #{@@post_request_url}"
           ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {})
+
+          # Check to see if SP exists, if it does use SP. Else, use msi
+          sp_client_id = @data_hash["aadClientId"]
+          sp_client_secret = @data_hash["aadClientSecret"]
+
+          if (!sp_client_id.nil? && !sp_client_id.empty? && sp_client_id != "msi")
+            @useMsi = false
+            aad_token_url = @@aad_token_url_template % {tenant_id: @data_hash["tenantId"]}
+            @parsed_token_uri = URI.parse(aad_token_url)
+          else
+            @useMsi = true
+            msi_endpoint = @@msi_endpoint_template % {user_assigned_client_id: @@userAssignedClientId, resource: @@token_resource_url}
+            @parsed_token_uri = URI.parse(msi_endpoint)
+          end
+
+          @cached_access_token = get_access_token
         end
       rescue => e
         @log.info "exception when initializing out_mdm #{e}"
         ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "MDM"})
-        @can_send_data_to_mdm = false
         return
       end
-
     end
 
-    # get the access token only if the time to expiry is less than 5 minutes
+    # get the access token only if the time to expiry is less than 5 minutes and get_access_token_backoff has expired
     def get_access_token
-      if @cached_access_token.to_s.empty? || (Time.now + 5 * 60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration
-        @log.info "Refreshing access token for out_mdm plugin.."
-        token_uri = URI.parse(@token_url)
-        http_access_token = Net::HTTP.new(token_uri.host, token_uri.port)
-        http_access_token.use_ssl = true
-        token_request = Net::HTTP::Post.new(token_uri.request_uri)
-        token_request.set_form_data(
-          {
-            "grant_type" => @@grant_type,
-            "client_id" => @data_hash["aadClientId"],
-            "client_secret" => @data_hash["aadClientSecret"],
-            "resource" => @@token_resource_url,
-          }
-        )
-
-        token_response = http_access_token.request(token_request)
-        # Handle the case where the response is not 200
-        parsed_json = JSON.parse(token_response.body)
-        @token_expiry_time = Time.now + 59 * 60 # set the expiry time to be ~one hour from current time
-        @cached_access_token = parsed_json["access_token"]
+      if (Time.now > @get_access_token_backoff_expiry)
+        http_access_token = nil
+        retries = 0
+        begin
+          if @cached_access_token.to_s.empty? || (Time.now + 5 * 60 > @token_expiry_time) # Refresh token 5 minutes from expiration
+            @log.info "Refreshing access token for out_mdm plugin.."
+
+            if (!!@useMsi)
+              @log.info "Using msi to get the token to post MDM data"
+              ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMToken-MSI", {})
+              @log.info "Opening TCP connection"
+              http_access_token = Net::HTTP.start(@parsed_token_uri.host, @parsed_token_uri.port, :use_ssl => false)
+              # http_access_token.use_ssl = false
+              token_request = Net::HTTP::Get.new(@parsed_token_uri.request_uri)
+              token_request["Metadata"] = true
+            else
+              @log.info "Using SP to get the token to post MDM data"
+              ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMToken-SP", {})
+              @log.info "Opening TCP connection"
+              http_access_token = Net::HTTP.start(@parsed_token_uri.host, @parsed_token_uri.port, :use_ssl => true)
+              # http_access_token.use_ssl = true
+              token_request = Net::HTTP::Post.new(@parsed_token_uri.request_uri)
+              token_request.set_form_data(
+                {
+                  "grant_type" => @@grant_type,
+                  "client_id" => @data_hash["aadClientId"],
+                  "client_secret" => @data_hash["aadClientSecret"],
+                  "resource" => @@token_resource_url,
+                }
+              )
+            end
+
+            @log.info "making request to get token.."
+            token_response = http_access_token.request(token_request)
+            # Handle the case where the response is not 200
+            parsed_json = JSON.parse(token_response.body)
+            @token_expiry_time = Time.now + @@tokenRefreshBackoffInterval * 60 # set the expiry time to be ~thirty minutes from current time
+            @cached_access_token = parsed_json["access_token"]
+          @log.info "Successfully got access token"
+          end
+        rescue => err
+          @log.info "Exception in get_access_token: #{err}"
+          if (retries < 2)
+            retries += 1
+            @log.info "Retrying request to get token - retry number: #{retries}"
+            sleep(retries)
+            retry
+          else
+          @get_access_token_backoff_expiry = Time.now + @@tokenRefreshBackoffInterval * 60
+          @log.info "@get_access_token_backoff_expiry set to #{@get_access_token_backoff_expiry}"
+          ApplicationInsightsUtility.sendExceptionTelemetry(err, {"FeatureArea" => "MDM"})
+          end
+        ensure
+          if http_access_token
+            @log.info "Closing http connection"
+            http_access_token.finish
+          end
+        end
       end
       @cached_access_token
     end
@@ -172,10 +234,9 @@ def send_to_mdm(post_body)
         response.value # this throws for non 200 HTTP response code
         @log.info "HTTP Post Response Code : #{response.code}"
         if @last_telemetry_sent_time.nil? || @last_telemetry_sent_time + 60 * 60 < Time.now
-            ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {})
-            @last_telemetry_sent_time = Time.now
+          ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {})
+          @last_telemetry_sent_time = Time.now
         end
-
       rescue Net::HTTPServerException => e
         @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)

From f820075ef14d71751ad7702e011cbc44accff7c4 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 24 Feb 2020 15:41:37 -0800
Subject: [PATCH 159/160] Adding caseinsensitive compare (#330)

Adding case insensitive compare
---
 source/code/plugin/out_mdm.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb
index 2f90b89ee..243251bca 100644
--- a/source/code/plugin/out_mdm.rb
+++ b/source/code/plugin/out_mdm.rb
@@ -85,7 +85,7 @@ def start
           sp_client_id = @data_hash["aadClientId"]
           sp_client_secret = @data_hash["aadClientSecret"]
 
-          if (!sp_client_id.nil? && !sp_client_id.empty? && sp_client_id != "msi")
+          if (!sp_client_id.nil? && !sp_client_id.empty? && sp_client_id.downcase != "msi")
             @useMsi = false
             aad_token_url = @@aad_token_url_template % {tenant_id: @data_hash["tenantId"]}
             @parsed_token_uri = URI.parse(aad_token_url)

From 03d90dec9a293831bfbf2361f3f2ea699ba47482 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 25 Feb 2020 10:46:21 -0800
Subject: [PATCH 160/160] gpu monitoring (#329)

* gpu monitoring

* Emit info log for tests for the new insightsmetrics data stream
---
 installer/conf/container.conf                 |  14 ++
 installer/conf/kube.conf                      |  15 ++
 installer/datafiles/base_container.data       |   2 +-
 .../code/plugin/CAdvisorMetricsAPIClient.rb   |  96 +++++++++++++
 source/code/plugin/KubernetesApiClient.rb     | 131 ++++++++++++++++++
 source/code/plugin/constants.rb               |  15 ++
 source/code/plugin/in_cadvisor_perf.rb        |  33 ++++-
 source/code/plugin/in_kube_nodes.rb           |  48 ++++++-
 source/code/plugin/in_kube_podinventory.rb    |  37 ++++-
 source/code/plugin/in_win_cadvisor_perf.rb    |  31 ++++-
 10 files changed, 417 insertions(+), 5 deletions(-)
 create mode 100644 source/code/plugin/constants.rb

diff --git a/installer/conf/container.conf b/installer/conf/container.conf
index 0e088e7f7..16acd6353 100644
--- a/installer/conf/container.conf
+++ b/installer/conf/container.conf
@@ -112,3 +112,17 @@
   max_retry_wait 5m
   retry_mdm_post_wait_minutes 30
 </match>
+
+<match oms.api.InsightsMetrics**>
+  type out_oms
+  log_level debug
+  num_threads 5
+  buffer_type file
+  buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer
+  buffer_queue_full_action drop_oldest_chunk
+  buffer_chunk_limit 4m
+  flush_interval 20s
+  retry_limit 10
+  retry_wait 5s
+  max_retry_wait 5m
+</match>
diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf
index 77c8454a6..98a2fbb63 100644
--- a/installer/conf/kube.conf
+++ b/installer/conf/kube.conf
@@ -216,4 +216,19 @@
      retry_limit 10
      retry_wait 5s
      max_retry_wait 5m
+    </match>
+
+    <match oms.api.InsightsMetrics**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/out_oms_insightsmetrics*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
     </match>
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index f976454f9..e011dddf9 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -38,7 +38,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/podinventory_to_mdm.rb;			     source/code/plugin/podinventory_to_mdm.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/kubelet_utils.rb;			         source/code/plugin/kubelet_utils.rb;			    	644; root; root
 /opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb;			     source/code/plugin/CustomMetricsUtils.rb;			    	644; root; root
-
+/opt/microsoft/omsagent/plugin/constants.rb;			     source/code/plugin/constants.rb;			    	644; root; root
 
 /opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb;									source/code/plugin/ApplicationInsightsUtility.rb;	644; root; root
 /opt/microsoft/omsagent/plugin/ContainerInventoryState.rb;										source/code/plugin/ContainerInventoryState.rb;		644; root; root
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index 54e7e5fd9..53139ea4e 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -13,6 +13,7 @@ class CAdvisorMetricsAPIClient
   require_relative "oms_common"
   require_relative "KubernetesApiClient"
   require_relative "ApplicationInsightsUtility"
+  require_relative "constants"
 
   @configMapMountPath = "/etc/config/settings/log-data-collection-settings"
   @promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
@@ -255,6 +256,101 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
       return metricItems
     end
 
+    def getInsightsMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
+      metricDataItems = []
+      begin
+        cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
+        if !cAdvisorStats.nil?
+          metricInfo = JSON.parse(cAdvisorStats.body)
+        end
+        if !winNode.nil?
+          hostName = winNode["Hostname"]
+          operatingSystem = "Windows"
+        else
+          if !metricInfo.nil? && !metricInfo["node"].nil? && !metricInfo["node"]["nodeName"].nil?
+            hostName = metricInfo["node"]["nodeName"]
+          else
+            hostName = (OMS::Common.get_hostname)
+          end
+          operatingSystem = "Linux"
+        end
+        if !metricInfo.nil?
+          metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryTotal", "containerGpumemoryTotalBytes", metricTime))
+          metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryUsed","containerGpumemoryUsedBytes", metricTime))
+          metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "dutyCycle","containerGpuDutyCycle", metricTime))
+        else
+          @Log.warn("Couldn't get Insights metrics information for host: #{hostName} os:#{operatingSystem}")
+        end
+      rescue => error
+        @Log.warn("CAdvisorMetricsAPIClient::getInsightsMetrics failed: #{error}")
+        return metricDataItems
+      end
+      return metricDataItems
+    end
+
+    def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCollect, metricNametoReturn, metricPollTime)
+      metricItems = []
+      clusterId = KubernetesApiClient.getClusterId
+      clusterName = KubernetesApiClient.getClusterName
+      begin
+        metricInfo = metricJSON
+        metricInfo["pods"].each do |pod|
+          podUid = pod["podRef"]["uid"]
+          podName = pod["podRef"]["name"]
+          podNamespace = pod["podRef"]["namespace"]
+
+          if (!pod["containers"].nil?)
+            pod["containers"].each do |container|
+              #gpu metrics
+              if (!container["accelerators"].nil?)
+                container["accelerators"].each do |accelerator|
+                  if (!accelerator[metricNameToCollect].nil?) #empty check is invalid for non-strings
+                    containerName = container["name"]
+                    metricValue = accelerator[metricNameToCollect]
+                    
+
+                    metricItem = {}
+                    metricItem["CollectionTime"] = metricPollTime
+                    metricItem["Computer"] = hostName
+                    metricItem["Name"] = metricNametoReturn
+                    metricItem["Value"] = metricValue
+                    metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN 
+                    metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+                    
+                    metricTags = {}
+                    metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId
+                    metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+                    metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
+                    #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
+
+                    if (!accelerator["make"].nil? && !accelerator["make"].empty?)
+                      metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_VENDOR] = accelerator["make"]
+                    end
+
+                    if (!accelerator["model"].nil? && !accelerator["model"].empty?)
+                      metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_MODEL] = accelerator["model"]
+                    end
+
+                    if (!accelerator["id"].nil? && !accelerator["id"].empty?)
+                      metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_ID] = accelerator["id"]
+                    end
+                  
+                    metricItem["Tags"] = metricTags
+                    
+                    metricItems.push(metricItem)
+                  end
+                end
+              end
+            end
+          end
+        end
+      rescue => errorStr
+        @Log.warn("getContainerGpuMetricsAsInsightsMetrics failed: #{errorStr} for metric #{metricNameToCollect}")
+        return metricItems
+      end
+      return metricItems
+    end
+
     def clearDeletedWinContainersFromCache()
       begin
         winCpuUsageNanoSecondsKeys = @@winContainerCpuUsageNanoSecondsLast.keys
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 91b76bbf1..b864ef718 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -10,6 +10,7 @@ class KubernetesApiClient
   require "time"
 
   require_relative "oms_common"
+  require_relative "constants"
 
   @@ApiVersion = "v1"
   @@ApiVersionApps = "v1"
@@ -470,6 +471,87 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
       return metricItems
     end #getContainerResourceRequestAndLimits
 
+    def getContainerResourceRequestsAndLimitsAsInsightsMetrics(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItems = []
+      begin
+        clusterId = getClusterId
+        clusterName = getClusterName
+        
+        metricInfo = metricJSON
+        metricInfo["items"].each do |pod|
+          podNameSpace = pod["metadata"]["namespace"]
+          if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences")
+            # The above case seems to be the only case where you have horizontal scaling of pods
+            # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
+            # instead of the actual poduid. Since this uid is not being surface into the UX
+            # its ok to use this.
+            # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
+            if pod["metadata"]["annotations"].nil?
+              next
+            else
+              podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            end
+          else
+            podUid = pod["metadata"]["uid"]
+          end
+
+          podContainers = []
+          if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
+            podContainers = podContainers + pod["spec"]["containers"]
+          end
+          # Adding init containers to the record list as well.
+          if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
+            podContainers = podContainers + pod["spec"]["initContainers"]
+          end
+
+          if (!podContainers.nil? && !podContainers.empty?)
+            if (!pod["spec"]["nodeName"].nil?)
+              nodeName = pod["spec"]["nodeName"]
+            else
+              nodeName = "" #unscheduled pod. We still want to collect limits & requests for GPU
+            end
+            podContainers.each do |container|
+              metricValue = nil
+              containerName = container["name"]
+              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+              if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
+                metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
+              else 
+                #No container level limit for the given metric, so default to node level limit for non-gpu metrics
+                if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
+                  nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
+                  metricValue = @@NodeMetrics[nodeMetricsHashKey]
+                end
+              end
+              if (!metricValue.nil?)
+                metricItem = {}
+                metricItem["CollectionTime"] = metricTime
+                metricItem["Computer"] = nodeName
+                metricItem["Name"] = metricNametoReturn
+                metricItem["Value"] = metricValue
+                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN 
+                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+                
+                metricTags = {}
+                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId
+                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+                metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
+                #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
+              
+                metricItem["Tags"] = metricTags
+                
+                metricItems.push(metricItem)
+              end
+            end
+          end
+        end
+      rescue => error
+        @Log.warn("getcontainerResourceRequestsAndLimitsAsInsightsMetrics failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+        return metricItems
+      end
+      return metricItems
+    end #getContainerResourceRequestAndLimitsAsInsightsMetrics
+
     def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
@@ -513,6 +595,51 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet
       return metricItems
     end #parseNodeLimits
 
+    def parseNodeLimitsAsInsightsMetrics(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItems = []
+      begin
+        metricInfo = metricJSON
+        clusterId = getClusterId
+        clusterName = getClusterName
+        #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
+        #if we are coming up with the time it should be same for all nodes
+        #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        metricInfo["items"].each do |node|
+          if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
+
+            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" or "amd.com/gpu" or "nvidia.com/gpu"
+            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+            metricItem = {}
+            metricItem["CollectionTime"] = metricTime
+            metricItem["Computer"] = node["metadata"]["name"]
+            metricItem["Name"] = metricNametoReturn
+            metricItem["Value"] = metricValue
+            metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN 
+            metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+            
+            metricTags = {}
+            metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId
+            metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+            metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_VENDOR] = metricNameToCollect
+           
+            metricItem["Tags"] = metricTags
+            
+            metricItems.push(metricItem)
+            #push node level metrics (except gpu ones) to a inmem hash so that we can use it looking up at container level.
+            #Currently if container level cpu & memory limits are not defined we default to node level limits
+            if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
+              @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+              #@Log.info ("Node metric hash: #{@@NodeMetrics}")
+            end 
+          end
+        end
+      rescue => error
+        @Log.warn("parseNodeLimitsAsInsightsMetrics failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+      end
+      return metricItems
+    end 
+
     def getMetricNumericValue(metricName, metricVal)
       metricValue = metricVal.downcase
       begin
@@ -578,6 +705,10 @@ def getMetricNumericValue(metricName, metricVal)
           else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units')
             metricValue = Float(metricValue) * 1000.0 ** 3
           end
+        when "nvidia.com/gpu"
+          metricValue = Float(metricValue) * 1.0
+        when "amd.com/gpu"
+          metricValue = Float(metricValue) * 1.0
         else
           @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value")
           metricValue = 0
diff --git a/source/code/plugin/constants.rb b/source/code/plugin/constants.rb
new file mode 100644
index 000000000..20114ea2b
--- /dev/null
+++ b/source/code/plugin/constants.rb
@@ -0,0 +1,15 @@
+class Constants
+    INSIGHTSMETRICS_TAGS_ORIGIN = "container.azm.ms"
+    INSIGHTSMETRICS_TAGS_CLUSTERID = "container.azm.ms/clusterId"
+    INSIGHTSMETRICS_TAGS_CLUSTERNAME = "container.azm.ms/clusterName"
+    INSIGHTSMETRICS_TAGS_GPU_VENDOR = "gpuVendor"
+    INSIGHTSMETRICS_TAGS_GPU_NAMESPACE = "container.azm.ms/gpu"
+    INSIGHTSMETRICS_TAGS_GPU_MODEL = "gpuModel"
+    INSIGHTSMETRICS_TAGS_GPU_ID = "gpuId"
+    INSIGHTSMETRICS_TAGS_CONTAINER_NAME = "containerName"
+    INSIGHTSMETRICS_TAGS_CONTAINER_ID = "containerName"
+    INSIGHTSMETRICS_TAGS_K8SNAMESPACE = "k8sNamespace"
+    INSIGHTSMETRICS_TAGS_CONTROLLER_NAME = "controllerName"
+    INSIGHTSMETRICS_TAGS_CONTROLLER_KIND = "controllerKind"
+    INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
+end
\ No newline at end of file
diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb
index 96aa66aa1..a44365e9d 100644
--- a/source/code/plugin/in_cadvisor_perf.rb
+++ b/source/code/plugin/in_cadvisor_perf.rb
@@ -15,6 +15,7 @@ def initialize
       require_relative "CAdvisorMetricsAPIClient"
       require_relative "oms_common"
       require_relative "omslog"
+      require_relative "constants"
     end
 
     config_param :run_interval, :time, :default => 60
@@ -50,8 +51,10 @@ def enumerate()
       currentTime = Time.now
       time = currentTime.to_f
       batchTime = currentTime.utc.iso8601
+      @@istestvar = ENV["ISTEST"]
       begin
         eventStream = MultiEventStream.new
+        insightsMetricsEventStream = MultiEventStream.new
         metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: nil, metricTime: batchTime )
         metricData.each do |record|
           record["DataType"] = "LINUX_PERF_BLOB"
@@ -64,10 +67,38 @@ def enumerate()
         router.emit_stream(@containerhealthtag, eventStream) if eventStream
         router.emit_stream(@nodehealthtag, eventStream) if eventStream
 
-        @@istestvar = ENV["ISTEST"]
+        
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
+
+        #start GPU InsightsMetrics items
+        begin
+          containerGPUusageInsightsMetricsDataItems = []
+          containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime))
+          
+
+          containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
+            wrapper = {
+              "DataType" => "INSIGHTS_METRICS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+            }
+            insightsMetricsEventStream.add(time, wrapper) if wrapper
+          end
+
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+            $log.info("cAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+        rescue => errorStr
+          $log.warn "Failed when processing GPU Usage metrics in_cadvisor_perf : #{errorStr}"
+          $log.debug_backtrace(errorStr.backtrace)
+          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        end 
+        #end GPU InsightsMetrics items
+
       rescue => errorStr
         $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index 706c3ad13..4242a8dba 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -31,6 +31,7 @@ def initialize
       require_relative "oms_common"
       require_relative "omslog"
       @NODES_CHUNK_SIZE = "400"
+      require_relative "constants"
     end
 
     config_param :run_interval, :time, :default => 60
@@ -105,6 +106,8 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
         telemetrySent = false
         eventStream = MultiEventStream.new
         containerNodeInventoryEventStream = MultiEventStream.new
+        insightsMetricsEventStream = MultiEventStream.new
+        @@istestvar = ENV["ISTEST"]
         #get node inventory
         nodeInventory["items"].each do |items|
           record = {}
@@ -193,6 +196,20 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
             capacityInfo = items["status"]["capacity"]
             ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
 
+            begin
+              if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
+                properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
+              end
+
+              if (!capacityInfo["amd.com/gpu"].nil?) && (!capacityInfo["amd.com/gpu"].empty?)
+                properties["amdgpus"] = capacityInfo["amd.com/gpu"]
+              end
+            rescue => errorStr
+              $log.warn "Failed in getting GPU telemetry in_kube_nodes : #{errorStr}"
+              $log.debug_backtrace(errorStr.backtrace)
+              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+            end
+
             #telemetry about prometheus metric collections settings for replicaset
             if (File.file?(@@promConfigMountPath))
               properties["rsPromInt"] = @@rsPromInterval
@@ -213,7 +230,7 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
         if telemetrySent == true
           @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
-        @@istestvar = ENV["ISTEST"]
+        
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
@@ -237,6 +254,35 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           end
           #end
           router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+
+          #start GPU InsightsMetrics items
+          begin
+            nodeGPUInsightsMetricsDataItems = []
+            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime))
+            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime))
+
+            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime))
+            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime))
+
+            nodeGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
+              wrapper = {
+                "DataType" => "INSIGHTS_METRICS_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+              }
+              insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+            end
+
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+              $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+            end
+          rescue => errorStr
+            $log.warn "Failed when processing GPU metrics in_kube_nodes : #{errorStr}"
+            $log.debug_backtrace(errorStr.backtrace)
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          end 
+          #end GPU InsightsMetrics items
         rescue => errorStr
           $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
           $log.debug_backtrace(errorStr.backtrace)
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 3a78d4c05..29438d076 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -24,6 +24,7 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
+      require_relative "constants"
 
       @PODS_CHUNK_SIZE = "1500"
       @podCount = 0
@@ -262,6 +263,7 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
       emitTime = currentTime.to_f
       #batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
+      @@istestvar = ENV["ISTEST"]
 
       begin #begin block start
         # Getting windows nodes from kubeapi
@@ -518,6 +520,7 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
           containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory", "memoryLimitBytes", batchTime))
 
           kubePerfEventStream = MultiEventStream.new
+          insightsMetricsEventStream = MultiEventStream.new
 
           containerMetricDataItems.each do |record|
             record["DataType"] = "LINUX_PERF_BLOB"
@@ -526,6 +529,38 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
           end
           #end
           router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+
+          begin 
+            #start GPU InsightsMetrics items
+            
+            containerGPUInsightsMetricsDataItems = []
+            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "requests", "nvidia.com/gpu", "containerGpuRequests", batchTime))
+            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime))
+
+            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "requests", "amd.com/gpu", "containerGpuRequests", batchTime))
+            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "limits", "amd.com/gpu", "containerGpuLimits", batchTime))
+
+            containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
+              wrapper = {
+                "DataType" => "INSIGHTS_METRICS_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+              }
+              insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+              
+              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+                $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+              end
+
+            end
+
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            #end GPU InsightsMetrics items
+          rescue => errorStr
+            $log.warn "Failed when processing GPU metrics in_kube_podinventory : #{errorStr}"
+            $log.debug_backtrace(errorStr.backtrace)
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          end
         rescue => errorStr
           $log.warn "Failed in parse_and_emit_record for KubePerf from in_kube_podinventory : #{errorStr}"
           $log.debug_backtrace(errorStr.backtrace)
@@ -567,7 +602,7 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
         #Updating value for AppInsights telemetry
         @podCount += podInventory["items"].length
 
-        @@istestvar = ENV["ISTEST"]
+        
         if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
           $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb
index 695a686cf..38868f2f5 100644
--- a/source/code/plugin/in_win_cadvisor_perf.rb
+++ b/source/code/plugin/in_win_cadvisor_perf.rb
@@ -17,6 +17,7 @@ def initialize
       require_relative "KubernetesApiClient"
       require_relative "oms_common"
       require_relative "omslog"
+      require_relative "constants"
     end
 
     config_param :run_interval, :time, :default => 60
@@ -52,8 +53,10 @@ def enumerate()
       time = Time.now.to_f
       begin
         eventStream = MultiEventStream.new
+        insightsMetricsEventStream = MultiEventStream.new
         timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs
         timeDifferenceInMinutes = timeDifference / 60
+        @@istestvar = ENV["ISTEST"]
 
         #Resetting this cache so that it is populated with the current set of containers with every call
         CAdvisorMetricsAPIClient.resetWinContainerIdCache()
@@ -78,10 +81,36 @@ def enumerate()
           router.emit_stream(@tag, eventStream) if eventStream
           router.emit_stream(@mdmtag, eventStream) if eventStream
 
-          @@istestvar = ENV["ISTEST"]
+          
           if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
             $log.info("winCAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
+
+          #start GPU InsightsMetrics items
+          begin
+            containerGPUusageInsightsMetricsDataItems = []
+            containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601))
+
+            containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
+              wrapper = {
+                "DataType" => "INSIGHTS_METRICS_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+              }
+              insightsMetricsEventStream.add(time, wrapper) if wrapper
+            end
+
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+              $log.info("winCAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+            end 
+          rescue => errorStr
+            $log.warn "Failed when processing GPU Usage metrics in_win_cadvisor_perf : #{errorStr}"
+            $log.debug_backtrace(errorStr.backtrace)
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          end 
+          #end GPU InsightsMetrics items
+
         end
 
         # Cleanup routine to clear deleted containers from cache